Skip to content
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions examples/llm_compression/onnx/tiny_llama/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,10 @@ def main():
ov_model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, from_onnx=True)

messages = [{"role": "user", "content": "What is PyTorch?"}]
input_ids = tokenizer.apply_chat_template(
batch_feature = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to(device=model.device)
)
input_ids = batch_feature["input_ids"]

start_t = time.time()
output = ov_model.generate(input_ids, max_new_tokens=100)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
transformers==4.53.0
transformers==5.0.0
openvino==2026.2.0
optimum-intel[openvino]==2.0.0
optimum==2.2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,10 @@ def main():
# Infer Model.
ov_model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR, from_onnx=True)
messages = [{"role": "user", "content": "What is PyTorch?"}]
input_ids = tokenizer.apply_chat_template(
batch_feature = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to(device=model.device)
)
input_ids = batch_feature["input_ids"]

start_t = time.time()
output = ov_model.generate(input_ids, max_new_tokens=100)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
torch==2.10.0
transformers==4.53.0
transformers==5.0.0
openvino==2026.2.0
optimum-intel[openvino]==2.0.0
optimum==2.2.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,10 @@ def generate_answers(

for question in questions:
messages.append({"role": "user", "content": question})
input_ids = tokenizer.apply_chat_template(
batch_feature = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to(device=model.device)
)
input_ids = batch_feature["input_ids"]
input_len = len(input_ids[0])

output = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=False)[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ datasets==5.0.0
openvino==2026.2.0
optimum-intel[openvino]==2.0.0
optimum==2.2.0
transformers==4.53.0
transformers==5.0.0
onnx==1.21.0
torch==2.10.0
torchvision==0.25.0
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,10 @@ def generate_answers(

for question in questions:
messages.append({"role": "user", "content": question})
input_ids = tokenizer.apply_chat_template(
batch_feature = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to(device=model.device)
)
input_ids = batch_feature["input_ids"]
input_len = len(input_ids[0])

output = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=False)[0]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
openvino==2026.2.0
optimum-intel[openvino]==2.0.0
optimum==2.2.0
transformers==4.53.0
transformers==5.0.0
onnx==1.21.0
torch==2.10.0
torchvision==0.25.0
5 changes: 3 additions & 2 deletions examples/llm_compression/openvino/smollm2_360m_fp8/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,10 @@ def generate_answers(questions, model, tokenizer, max_new_tokens=50):

for question in questions:
messages.append({"role": "user", "content": question})
input_ids = tokenizer.apply_chat_template(
batch_feature = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to(device=model.device)
)
input_ids = batch_feature["input_ids"]
input_len = len(input_ids[0])

output = model.generate(input_ids, max_new_tokens=max_new_tokens, do_sample=False)[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ datasets==5.0.0
openvino==2026.2.0
optimum-intel[openvino]==2.0.0
optimum==2.2.0
transformers==4.53.0
transformers==5.0.0
onnx==1.21.0
torch==2.10.0
5 changes: 3 additions & 2 deletions examples/llm_compression/openvino/tiny_llama/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,10 @@ def transform_fn(data, tokenizer):
model = OVModelForCausalLM.from_pretrained(OUTPUT_DIR)

messages = [{"role": "user", "content": "What is PyTorch?"}]
input_ids = tokenizer.apply_chat_template(
batch_feature = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
).to(device=model.device)
)
input_ids = batch_feature["input_ids"]

start_t = time.time()
output = model.generate(input_ids, max_new_tokens=100)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ openvino==2026.2.0
optimum-intel[openvino]==2.0.0
optimum==2.2.0
torch==2.10.0
transformers==4.53.0
transformers==5.0.0
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
whowhatbench @ git+https://github.com/AlexanderDokuchaev/openvino.genai@light_req#subdirectory=tools/who_what_benchmark
whowhatbench @ git+https://github.com/AlexanderDokuchaev/openvino.genai@a26c9b20d07209f035f6d74aeae94d6a72a132ab#subdirectory=tools/who_what_benchmark
numpy==1.26.4
openvino==2026.2.0
optimum-intel==2.0.0
optimum==2.2.0
transformers==4.53.0
transformers==5.0.0
onnx==1.21.0
torch==2.10.0
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@ numpy>=1.23.5,<2
openvino==2026.2.0
optimum-intel==2.0.0
optimum==2.2.0
transformers==4.53.0
transformers==5.0.0
onnx==1.21.0
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ numpy>=1.23.5,<2
openvino==2026.2.0
optimum-intel==2.0.0
optimum==2.2.0
transformers==4.53.0
lm_eval==0.4.8
transformers==5.0.0
lm_eval[hf]==0.4.12
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ numpy>=1.23.5,<2
openvino==2026.2.0
optimum-intel==2.0.0
optimum==2.2.0
transformers==4.53.0
lm_eval==0.4.8
transformers==5.0.0
lm_eval[hf]==0.4.12
torchao==0.17.0
3 changes: 2 additions & 1 deletion examples/llm_compression/torch_fx/tiny_llama/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,10 @@ def main() -> str:
},
{"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
]
input_ids = tokenizer.apply_chat_template(
batch_feature = tokenizer.apply_chat_template(
messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
)
input_ids = batch_feature["input_ids"]

print("Warmup...")
output = compressed_model_hf.generate(input_ids)
Expand Down
4 changes: 2 additions & 2 deletions examples/llm_compression/torch_fx/tiny_llama/modelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from transformers import GenerationMixin
from transformers import PretrainedConfig
from transformers import PreTrainedModel
from transformers.cache_utils import StaticCacheConfig
from transformers.integrations.executorch import TorchExportableModuleWithStaticCache
from transformers.modeling_outputs import CausalLMOutputWithPast
from transformers.models.llama.configuration_llama import LlamaConfig
Expand Down Expand Up @@ -97,8 +96,9 @@ def convert_and_export_with_cache(model: PreTrainedModel) -> tuple[ExportedProgr
example_cache_position = torch.arange(0, 8, dtype=torch.long)
model_config = None
gen_config = None
model.generation_config.use_cache = True
model.generation_config.cache_implementation = "static"
model.generation_config.cache_config = StaticCacheConfig(batch_size=1, max_cache_len=512)
model.generation_config.cache_config = {"batch_size": 1, "max_cache_len": 512}
model.generation_config.max_new_tokens = 100
gen_config = model.generation_config
model_config = model.config
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
transformers==4.53.0
transformers==5.0.0
datasets==5.0.0
openvino==2026.2.0
optimum==2.2.0
Expand Down
2 changes: 1 addition & 1 deletion tests/openvino/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ addict>=2.4.0
timm==0.9.2
efficientnet_pytorch==0.7.1
datasets
transformers==4.53.0
transformers==5.0.0
optimum-intel==2.0.0
optimum==2.2.0
4 changes: 2 additions & 2 deletions tests/post_training/pipelines/fx_modelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from transformers import GenerationMixin
from transformers import PretrainedConfig
from transformers import PreTrainedModel
from transformers.cache_utils import StaticCacheConfig
from transformers.integrations.executorch import TorchExportableModuleWithStaticCache
from transformers.modeling_outputs import CausalLMOutputWithPast

Expand Down Expand Up @@ -84,8 +83,9 @@ def convert_and_export_with_cache(model: PreTrainedModel):

example_input_ids = torch.ones(1, 8, dtype=torch.long)
example_cache_position = torch.arange(0, 8, dtype=torch.long)
model.generation_config.use_cache = True
model.generation_config.cache_implementation = "static"
model.generation_config.cache_config = StaticCacheConfig(batch_size=1, max_cache_len=512)
model.generation_config.cache_config = {"batch_size": 1, "max_cache_len": 512}
model.generation_config.max_new_tokens = 100
gen_config = model.generation_config
model_config = model.config
Expand Down
6 changes: 3 additions & 3 deletions tests/post_training/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ optimum-onnx @ git+https://github.com/AlexanderDokuchaev/optimum-onnx.git@b57739
scikit-learn>=1.2.2,<=1.5.0
soundfile==0.12.1
tensorboard==2.20.0
tensorflow-io==0.37.0
tensorflow-io==0.37.1
timm==0.9.2
accelerate==1.9.0
transformers==4.53.0
whowhatbench @ git+https://github.com/AlexanderDokuchaev/openvino.genai@light_req#subdirectory=tools/who_what_benchmark
transformers==5.0.0
whowhatbench @ git+https://github.com/AlexanderDokuchaev/openvino.genai@a26c9b20d07209f035f6d74aeae94d6a72a132ab#subdirectory=tools/who_what_benchmark
16 changes: 0 additions & 16 deletions tests/torch/function_hook/sparsify_activations/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import openvino as ov
import torch
import torch.nn as nn
import transformers.models

from nncf import IgnoredScope
from nncf.experimental.torch.sparsify_activations import TargetScope
Expand All @@ -36,21 +35,6 @@ def forward(self, input_ids: torch.Tensor):
return y0, y1


def dummy_llama_model():
config = transformers.models.llama.configuration_llama.LlamaConfig(
vocab_size=32,
hidden_size=8,
intermediate_size=14,
num_attention_heads=2,
num_key_value_heads=1,
num_hidden_layers=2,
use_cache=False,
return_dict=False,
)
model = transformers.AutoModelForCausalLM.from_config(config, attn_implementation="eager")
return model


def count_sparsifier_patterns_in_ov(model: ov.Model) -> int:
"""
Counts the number of activation sparsification pattern "Abs -> LessEqual -> Select"
Expand Down
19 changes: 0 additions & 19 deletions tests/torch/function_hook/sparsify_activations/test_algo.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from tests.cross_fw.shared.paths import TEST_ROOT
from tests.torch.function_hook.sparsify_activations.helpers import ThreeLinearModel
from tests.torch.function_hook.sparsify_activations.helpers import count_sparsifier_patterns_in_ov
from tests.torch.function_hook.sparsify_activations.helpers import dummy_llama_model
from tests.torch.helpers import set_torch_seed
from tests.torch.utils import compare_with_reference_file
from tests.torch.utils import to_comparable_nx_graph
Expand Down Expand Up @@ -100,24 +99,6 @@ def __str__(self) -> str:
ref_num_batches_tracked=3,
ref_num_patterns_in_ov=2,
),
SparsifyActivationsAlgorithmTestDesc(
name="dummy_llama",
model_getter=dummy_llama_model,
dataset_getter=lambda device: nncf.Dataset(torch.randint(0, 30, (3, 2, 8)).to(device)),
target_sparsity_by_scope={
TargetScope(patterns=[".*gate_proj.*"]): 0.2,
TargetScope(patterns=[".*up_proj.*"]): 0.3,
TargetScope(patterns=[".*down_proj.*"]): 0.4,
},
ignored_scope=None,
ref_sparsifier_target_sparsity={
(f"pre_hooks.model/mlp/{name}/linear/{layer_id}__0.0"): sparsity
for name, sparsity in [("gate_proj", 0.2), ("up_proj", 0.3), ("down_proj", 0.4)]
for layer_id in [0, 1]
},
ref_num_batches_tracked=3,
ref_num_patterns_in_ov=6,
),
]
Comment thread
AlexanderDokuchaev marked this conversation as resolved.
Comment thread
AlexanderDokuchaev marked this conversation as resolved.


Expand Down
4 changes: 2 additions & 2 deletions tests/torch/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ torchvision

addict>=2.4.0
efficientnet_pytorch==0.7.1
transformers==4.53.0
transformers==5.0.0

sentence-transformers==4.1.0
sentence-transformers==5.6.0
optimum-intel==2.0.0
optimum==2.2.0
accelerate==1.9.0
Expand Down
Loading