diff --git a/lmdeploy/api.py b/lmdeploy/api.py index 11f31c1de4..d674166ddf 100644 --- a/lmdeploy/api.py +++ b/lmdeploy/api.py @@ -17,6 +17,7 @@ def pipeline(model_path: str, chat_template_config: ChatTemplateConfig | None = None, log_level: str = 'WARNING', max_log_len: int | None = None, + trust_remote_code: bool = False, speculative_config: SpeculativeConfig | None = None, **kwargs): """Create a pipeline for inference. @@ -41,6 +42,7 @@ def pipeline(model_path: str, ``WARNING``, ``INFO``, ``DEBUG``] max_log_len: Max number of prompt characters or prompt tokens being printed in log. + trust_remote_code: whether to trust remote code from model repositories. speculative_config: speculative decoding configuration. **kwargs: additional keyword arguments passed to the pipeline. @@ -73,6 +75,7 @@ def pipeline(model_path: str, chat_template_config=chat_template_config, log_level=log_level, max_log_len=max_log_len, + trust_remote_code=trust_remote_code, speculative_config=speculative_config, **kwargs) diff --git a/lmdeploy/archs.py b/lmdeploy/archs.py index 68fa03a407..f1fddb8f5c 100644 --- a/lmdeploy/archs.py +++ b/lmdeploy/archs.py @@ -128,14 +128,14 @@ def check_vl_llm(backend: str, config: dict) -> bool: return False -def get_task(backend: str, model_path: str): +def get_task(backend: str, model_path: str, trust_remote_code: bool = False): """Get pipeline type and pipeline class from model config.""" from lmdeploy.serve.core import AsyncEngine if os.path.exists(os.path.join(model_path, 'triton_models', 'weights')): # workspace model return 'llm', AsyncEngine - _, config = get_model_arch(model_path) + _, config = get_model_arch(model_path, trust_remote_code=trust_remote_code) if check_vl_llm(backend, config.to_dict()): from lmdeploy.serve.core import VLAsyncEngine return 'vlm', VLAsyncEngine @@ -144,17 +144,17 @@ def get_task(backend: str, model_path: str): return 'llm', AsyncEngine -def get_model_arch(model_path: str): +def get_model_arch(model_path: str, trust_remote_code: bool = False): """Get a model's architecture and configuration. Args: model_path(str): the model path """ try: - cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True) + cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code) except Exception as e: # noqa from transformers import PretrainedConfig - cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=True) + cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code) _cfg = cfg.to_dict() if _cfg.get('architectures', None): diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py index 155392f4a7..81816729ba 100644 --- a/lmdeploy/cli/serve.py +++ b/lmdeploy/cli/serve.py @@ -63,6 +63,9 @@ def add_parser_api_server(): default=['*'], help='A list of allowed http headers for cors') parser.add_argument('--proxy-url', type=str, default=None, help='The proxy url for api server.') + parser.add_argument('--trust-remote-code', + action='store_true', + help='Whether to trust remote code from model repositories.') parser.add_argument('--max-concurrent-requests', type=int, default=None, @@ -303,6 +306,7 @@ def api_server(args): max_log_len=args.max_log_len, disable_fastapi_docs=args.disable_fastapi_docs, max_concurrent_requests=args.max_concurrent_requests, + trust_remote_code=args.trust_remote_code, reasoning_parser=args.reasoning_parser, tool_call_parser=args.tool_call_parser, speculative_config=speculative_config, @@ -334,6 +338,7 @@ def api_server(args): max_log_len=args.max_log_len, disable_fastapi_docs=args.disable_fastapi_docs, max_concurrent_requests=args.max_concurrent_requests, + trust_remote_code=args.trust_remote_code, reasoning_parser=args.reasoning_parser, tool_call_parser=args.tool_call_parser, speculative_config=speculative_config, diff --git a/lmdeploy/pipeline.py b/lmdeploy/pipeline.py index ca4c42bba0..7e7a3b0029 100644 --- a/lmdeploy/pipeline.py +++ b/lmdeploy/pipeline.py @@ -39,6 +39,7 @@ def __init__(self, chat_template_config: ChatTemplateConfig | None = None, log_level: str = 'WARNING', max_log_len: int | None = None, + trust_remote_code: bool = False, speculative_config: SpeculativeConfig | None = None, **kwargs): """Initialize Pipeline. @@ -49,6 +50,7 @@ def __init__(self, chat_template_config: Chat template configuration. log_level: Log level. max_log_len: Max number of prompt characters or prompt tokens being printed in log. + trust_remote_code: whether to trust remote code from model repositories. speculative_config: Speculative decoding configuration. **kwargs: Additional keyword arguments. """ @@ -69,12 +71,13 @@ def __init__(self, # Create inference engine backend, backend_config = autoget_backend_config(model_path, backend_config) - _, pipeline_class = get_task(backend, model_path) + _, pipeline_class = get_task(backend, model_path, trust_remote_code=trust_remote_code) self.async_engine = pipeline_class(model_path, backend=backend, backend_config=backend_config, chat_template_config=chat_template_config, max_log_len=max_log_len, + trust_remote_code=trust_remote_code, speculative_config=speculative_config, **kwargs) self.internal_thread = _EventLoopThread(daemon=True) diff --git a/lmdeploy/pytorch/config.py b/lmdeploy/pytorch/config.py index 39584ac4b7..d9b63617e2 100644 --- a/lmdeploy/pytorch/config.py +++ b/lmdeploy/pytorch/config.py @@ -365,7 +365,7 @@ def get_head_size(self): def from_pretrained( cls, pretrained_model_name_or_path: str, - trust_remote_code: bool = True, + trust_remote_code: bool = False, dtype: str = 'auto', dist_config: DistConfig = None, hf_overrides: dict[str, Any] = None, @@ -563,10 +563,11 @@ def from_config( target_cache_cfg: CacheConfig, target_model: str = None, dtype: str = 'auto', + trust_remote_code: bool = False, ): model = model or target_model model_config = ModelConfig.from_pretrained(model, - trust_remote_code=True, + trust_remote_code=trust_remote_code, dtype=dtype, is_draft_model=True, spec_method=method, diff --git a/lmdeploy/pytorch/engine/config_builder.py b/lmdeploy/pytorch/engine/config_builder.py index 7c7ab6c3d0..3128a3d986 100644 --- a/lmdeploy/pytorch/engine/config_builder.py +++ b/lmdeploy/pytorch/engine/config_builder.py @@ -98,7 +98,7 @@ def build_misc_config(engine_config: PytorchEngineConfig): @staticmethod def build_specdecode_config(target_model, speculative_config: SpeculativeConfig, engine_config: PytorchEngineConfig, - cache_config: CacheConfig): + cache_config: CacheConfig, trust_remote_code: bool = False): """Build spec decode config.""" specdecode_config = None if speculative_config is not None: @@ -113,5 +113,6 @@ def build_specdecode_config(target_model, speculative_config: SpeculativeConfig, target_model=target_model, target_cache_cfg=cache_config, dtype=engine_config.dtype, + trust_remote_code=trust_remote_code, ) return specdecode_config diff --git a/lmdeploy/pytorch/engine/engine.py b/lmdeploy/pytorch/engine/engine.py index 27848de026..deeb8e77d9 100644 --- a/lmdeploy/pytorch/engine/engine.py +++ b/lmdeploy/pytorch/engine/engine.py @@ -94,7 +94,7 @@ def __init__( self, model_path: str, engine_config: PytorchEngineConfig = None, - trust_remote_code: bool = True, + trust_remote_code: bool = False, speculative_config: SpeculativeConfig = None, ) -> None: # make sure engine config exist @@ -133,7 +133,7 @@ def __init__( misc_config = ConfigBuilder.build_misc_config(engine_config) # spec decode self.specdecode_config = ConfigBuilder.build_specdecode_config(model_path, speculative_config, engine_config, - cache_config) + cache_config, trust_remote_code) # build model agent self.executor = build_executor( @@ -147,6 +147,7 @@ def __init__( distributed_executor_backend=engine_config.distributed_executor_backend, dtype=engine_config.dtype, specdecode_config=self.specdecode_config, + trust_remote_code=trust_remote_code, ) self.executor.init() @@ -198,7 +199,7 @@ def __init__( def from_pretrained(cls, pretrained_model_name_or_path: str, engine_config: PytorchEngineConfig = None, - trust_remote_code: bool = True, + trust_remote_code: bool = False, speculative_config: SpeculativeConfig = None, **kwargs): """Lmdeploy python inference engine. diff --git a/lmdeploy/pytorch/engine/executor/__init__.py b/lmdeploy/pytorch/engine/executor/__init__.py index bd580462b6..497bf9492b 100644 --- a/lmdeploy/pytorch/engine/executor/__init__.py +++ b/lmdeploy/pytorch/engine/executor/__init__.py @@ -63,6 +63,7 @@ def build_executor( distributed_executor_backend: str = None, dtype: str = 'auto', specdecode_config: SpecDecodeConfig = None, + trust_remote_code: bool = False, ) -> ExecutorBase: """Build model agent executor.""" logger = get_logger('lmdeploy') @@ -71,7 +72,7 @@ def build_executor( model_config = ModelConfig.from_pretrained( model_path, - trust_remote_code=True, + trust_remote_code=trust_remote_code, dtype=dtype, hf_overrides=misc_config.hf_overrides, dist_config=dist_config, diff --git a/lmdeploy/serve/core/async_engine.py b/lmdeploy/serve/core/async_engine.py index c5dfcd0364..4d33c12646 100644 --- a/lmdeploy/serve/core/async_engine.py +++ b/lmdeploy/serve/core/async_engine.py @@ -110,6 +110,7 @@ def __init__(self, backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None, chat_template_config: ChatTemplateConfig | None = None, max_log_len: int | None = None, + trust_remote_code: bool = False, speculative_config: SpeculativeConfig | None = None, **kwargs) -> None: logger.info(f'input backend={backend}, backend_config={backend_config}') @@ -118,10 +119,10 @@ def __init__(self, if backend == 'turbomind' else PytorchEngineConfig()) self.model_name = model_name if model_name else model_path self.chat_template = get_chat_template(model_path, chat_template_config) - self.tokenizer = Tokenizer(model_path) + self.tokenizer = Tokenizer(model_path, trust_remote_code=trust_remote_code) self.prompt_processor = MultimodalProcessor(self.tokenizer, self.chat_template) - self.hf_gen_cfg = get_hf_gen_cfg(model_path) - self.arch, self.hf_cfg = get_model_arch(model_path) + self.hf_gen_cfg = get_hf_gen_cfg(model_path, trust_remote_code=trust_remote_code) + self.arch, self.hf_cfg = get_model_arch(model_path, trust_remote_code=trust_remote_code) self.session_len = (_get_and_verify_max_len(self.hf_cfg, None) if backend_config.session_len is None else backend_config.session_len) backend_config.session_len = self.session_len @@ -129,10 +130,14 @@ def __init__(self, logger.warning('speculative decoding is not supported by turbomind ') # build backend engine if backend == 'turbomind': - self.engine = self._build_turbomind(model_path=model_path, backend_config=backend_config, **kwargs) + self.engine = self._build_turbomind(model_path=model_path, + backend_config=backend_config, + trust_remote_code=trust_remote_code, + **kwargs) elif backend == 'pytorch': self.engine = self._build_pytorch(model_path=model_path, backend_config=backend_config, + trust_remote_code=trust_remote_code, speculative_config=speculative_config, **kwargs) else: @@ -169,19 +174,30 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.close() - def _build_turbomind(self, model_path: str, backend_config: TurbomindEngineConfig | None = None, **kwargs): + def _build_turbomind(self, + model_path: str, + backend_config: TurbomindEngineConfig | None = None, + trust_remote_code: bool = False, + **kwargs): """Inner build method for turbomind backend.""" from lmdeploy import turbomind as tm - return tm.TurboMind.from_pretrained(model_path, engine_config=backend_config, **kwargs) + return tm.TurboMind.from_pretrained(model_path, + engine_config=backend_config, + trust_remote_code=trust_remote_code, + **kwargs) def _build_pytorch(self, model_path: str, backend_config: PytorchEngineConfig | None = None, + trust_remote_code: bool = False, speculative_config: SpeculativeConfig | None = None, **kwargs): """Inner build method for pytorch backend.""" from lmdeploy.pytorch.engine import Engine - return Engine.from_pretrained(model_path, engine_config=backend_config, speculative_config=speculative_config) + return Engine.from_pretrained(model_path, + engine_config=backend_config, + trust_remote_code=trust_remote_code, + speculative_config=speculative_config) def _build_stat_loggers(self): self.stat_loggers = [] diff --git a/lmdeploy/serve/core/vl_async_engine.py b/lmdeploy/serve/core/vl_async_engine.py index 44fd97dac6..9e6c9ac25d 100644 --- a/lmdeploy/serve/core/vl_async_engine.py +++ b/lmdeploy/serve/core/vl_async_engine.py @@ -17,6 +17,7 @@ def __init__(self, backend: Literal['turbomind', 'pytorch'] = 'turbomind', backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None, vision_config: VisionConfig | None = None, + trust_remote_code: bool = False, **kwargs) -> None: from lmdeploy.serve.processors import MultimodalProcessor from lmdeploy.utils import try_import_deeplink @@ -27,8 +28,16 @@ def __init__(self, if backend_config and backend_config.enable_prefix_caching: backend_config.enable_prefix_caching = False logger.warning('Prefix caching is disabled since LMDeploy hasn\'t support in on VL models yet') - self.vl_encoder = ImageEncoder(model_path, backend, vision_config, backend_config=backend_config) - super().__init__(model_path, backend=backend, backend_config=backend_config, **kwargs) + self.vl_encoder = ImageEncoder(model_path, + backend, + vision_config, + backend_config=backend_config, + trust_remote_code=trust_remote_code) + super().__init__(model_path, + backend=backend, + backend_config=backend_config, + trust_remote_code=trust_remote_code, + **kwargs) # Update prompt_processor to support multimodal processing self.prompt_processor = MultimodalProcessor(self.tokenizer, self.chat_template, diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py index 2c552febd0..456bc40912 100644 --- a/lmdeploy/serve/openai/api_server.py +++ b/lmdeploy/serve/openai/api_server.py @@ -1415,6 +1415,7 @@ def serve(model_path: str, max_log_len: int | None = None, disable_fastapi_docs: bool = False, max_concurrent_requests: int | None = None, + trust_remote_code: bool = False, reasoning_parser: str | None = None, tool_call_parser: str | None = None, allow_terminate_by_client: bool = False, @@ -1487,7 +1488,7 @@ def serve(model_path: str, http_or_https = 'https' handle_torchrun() - _, pipeline_class = get_task(backend, model_path) + _, pipeline_class = get_task(backend, model_path, trust_remote_code=trust_remote_code) if isinstance(backend_config, PytorchEngineConfig): backend_config.enable_mp_engine = True # router replay @@ -1499,6 +1500,7 @@ def serve(model_path: str, backend_config=backend_config, chat_template_config=chat_template_config, max_log_len=max_log_len, + trust_remote_code=trust_remote_code, speculative_config=speculative_config, **kwargs) # set reasoning parser and tool parser diff --git a/lmdeploy/tokenizer.py b/lmdeploy/tokenizer.py index c184e53111..df5329f423 100644 --- a/lmdeploy/tokenizer.py +++ b/lmdeploy/tokenizer.py @@ -43,11 +43,11 @@ class HuggingFaceTokenizer: model_dir: the directory of the tokenizer model. """ - def __init__(self, model_dir: str): - self._check_transformers_version(model_dir) + def __init__(self, model_dir: str, trust_remote_code: bool = False): + self._check_transformers_version(model_dir, trust_remote_code=trust_remote_code) from transformers import AutoTokenizer self.logger = get_logger('lmdeploy') - self.model = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) + self.model = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=trust_remote_code) self._prefix_space_tokens = None if self.model.eos_token_id is None: @@ -67,7 +67,7 @@ def __init__(self, model_dir: str): self.max_indexes_num = 5 self.token2id = {} - def _check_transformers_version(self, model_dir: str): + def _check_transformers_version(self, model_dir: str, trust_remote_code: bool = False): import transformers from packaging import version @@ -76,7 +76,7 @@ def _check_transformers_version(self, model_dir: str): logger = get_logger('lmdeploy') current_transformers_version = version.parse(transformers.__version__) - cfg = get_model_arch(model_dir)[1] + cfg = get_model_arch(model_dir, trust_remote_code=trust_remote_code)[1] cfg_ver = getattr(cfg, 'transformers_version', None) if cfg_ver is None: llm_config = getattr(cfg, 'llm_config', None) @@ -352,8 +352,8 @@ def __call__(self, s: str | Sequence[str]): class ChatGLM4Tokenizer(HuggingFaceTokenizer): """Tokenizer of GLM4.""" - def __init__(self, model_path): - super().__init__(model_path) + def __init__(self, model_path, trust_remote_code: bool = False): + super().__init__(model_path, trust_remote_code=trust_remote_code) original_pad = self.model._pad def __pad(*args, **kwargs): @@ -374,8 +374,8 @@ def encode(self, s: str, add_bos: bool = True, add_special_tokens: bool = True, class ChatGLMTokenizer(HuggingFaceTokenizer): """Tokenizer of GLM2.""" - def __init__(self, model_path): - super().__init__(model_path) + def __init__(self, model_path, trust_remote_code: bool = False): + super().__init__(model_path, trust_remote_code=trust_remote_code) original_pad = self.model._pad def __pad(*args, **kwargs): @@ -390,8 +390,8 @@ def __pad(*args, **kwargs): class GptOssTokenizer(HuggingFaceTokenizer): """Tokenizer of GPT-OSS.""" - def __init__(self, model_dir: str): - super().__init__(model_dir) + def __init__(self, model_dir: str, trust_remote_code: bool = False): + super().__init__(model_dir, trust_remote_code=trust_remote_code) from openai_harmony import HarmonyEncodingName, Role, StreamableParser, load_harmony_encoding encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS) self.role = Role.ASSISTANT @@ -423,24 +423,24 @@ class Tokenizer: model_path: the path of the tokenizer model. """ - def __init__(self, model_path: str): + def __init__(self, model_path: str, trust_remote_code: bool = False): from transformers import AutoConfig, PretrainedConfig try: - model_cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True) + model_cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code) except Exception as e: # noqa - model_cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=True) + model_cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code) is_gpt_oss = getattr(model_cfg, 'model_type', '') == 'gpt_oss' from transformers.models.auto.tokenization_auto import get_tokenizer_config - tokenizer_config = get_tokenizer_config(model_path, trust_remote_code=True) + tokenizer_config = get_tokenizer_config(model_path, trust_remote_code=trust_remote_code) config_tokenizer_class = tokenizer_config.get('tokenizer_class') if config_tokenizer_class == 'ChatGLM4Tokenizer': - self.model = ChatGLM4Tokenizer(model_path) + self.model = ChatGLM4Tokenizer(model_path, trust_remote_code=trust_remote_code) elif config_tokenizer_class == 'ChatGLMTokenizer': - self.model = ChatGLMTokenizer(model_path) + self.model = ChatGLMTokenizer(model_path, trust_remote_code=trust_remote_code) elif is_gpt_oss: - self.model = GptOssTokenizer(model_path) + self.model = GptOssTokenizer(model_path, trust_remote_code=trust_remote_code) else: - self.model = HuggingFaceTokenizer(model_path) + self.model = HuggingFaceTokenizer(model_path, trust_remote_code=trust_remote_code) self.logger = get_logger('lmdeploy') @property diff --git a/lmdeploy/utils.py b/lmdeploy/utils.py index 9d83dc06b4..6fc451cfa9 100644 --- a/lmdeploy/utils.py +++ b/lmdeploy/utils.py @@ -219,10 +219,10 @@ def _stop_words(stop_words: list[int | str], tokenizer: object): return stop_words -def get_hf_gen_cfg(path: str): +def get_hf_gen_cfg(path: str, trust_remote_code: bool = False): from transformers import GenerationConfig try: - cfg = GenerationConfig.from_pretrained(path, trust_remote_code=True) + cfg = GenerationConfig.from_pretrained(path, trust_remote_code=trust_remote_code) return cfg.to_dict() except OSError: return {} diff --git a/lmdeploy/vl/engine.py b/lmdeploy/vl/engine.py index 8cd179df8a..6fc39630da 100644 --- a/lmdeploy/vl/engine.py +++ b/lmdeploy/vl/engine.py @@ -38,8 +38,12 @@ def __init__( backend: str, vision_config: VisionConfig = None, backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None, + trust_remote_code: bool = False, ): - self.model = load_vl_model(model_path, backend, backend_config=backend_config) + self.model = load_vl_model(model_path, + backend, + backend_config=backend_config, + trust_remote_code=trust_remote_code) if vision_config is None: vision_config = VisionConfig() self.vision_config = vision_config diff --git a/lmdeploy/vl/model/base.py b/lmdeploy/vl/model/base.py index 51ebb44419..bdad514bbc 100644 --- a/lmdeploy/vl/model/base.py +++ b/lmdeploy/vl/model/base.py @@ -20,14 +20,16 @@ def __init__(self, with_llm: bool = False, max_memory: dict[int, int] = None, hf_config: AutoConfig = None, - backend: str = ''): + backend: str = '', + trust_remote_code: bool = False): """init.""" self.model_path = model_path self.with_llm = with_llm self.max_memory = max_memory self.backend = backend + self.trust_remote_code = trust_remote_code if hf_config is None: - _, hf_config = get_model_arch(model_path) + _, hf_config = get_model_arch(model_path, trust_remote_code=trust_remote_code) self.hf_config = hf_config self.image_token_id = self.get_pad_token_id(model_path, hf_config) or 0 @@ -36,7 +38,7 @@ def get_pad_token_id(self, model_path, hf_config): pad_token_id = getattr(hf_config, 'pad_token_id', None) if pad_token_id is None: try: - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) + tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=self.trust_remote_code) pad_token_id = getattr(tokenizer, 'pad_token_id', None) except Exception as e: print(e) diff --git a/lmdeploy/vl/model/builder.py b/lmdeploy/vl/model/builder.py index 04ac5ab759..db441d6217 100644 --- a/lmdeploy/vl/model/builder.py +++ b/lmdeploy/vl/model/builder.py @@ -39,7 +39,8 @@ def load_vl_model(model_path: str, backend: str, with_llm: bool = False, - backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None): + backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None, + trust_remote_code: bool = False): """Load visual model. Args: @@ -59,8 +60,13 @@ def load_vl_model(model_path: str, tp = getattr(backend_config, 'tp', 1) max_memory = {i: torch.cuda.mem_get_info(i)[0] for i in range(tp)} if backend == 'turbomind' else None - _, hf_config = get_model_arch(model_path) - kwargs = dict(model_path=model_path, with_llm=with_llm, max_memory=max_memory, hf_config=hf_config, backend=backend) + _, hf_config = get_model_arch(model_path, trust_remote_code=trust_remote_code) + kwargs = dict(model_path=model_path, + with_llm=with_llm, + max_memory=max_memory, + hf_config=hf_config, + backend=backend, + trust_remote_code=trust_remote_code) for name, module in VISION_MODELS.module_dict.items(): try: diff --git a/lmdeploy/vl/model/cogvlm.py b/lmdeploy/vl/model/cogvlm.py index 0dbacc5450..dd5a4ef30d 100644 --- a/lmdeploy/vl/model/cogvlm.py +++ b/lmdeploy/vl/model/cogvlm.py @@ -34,7 +34,7 @@ def build_model(self): from transformers import AutoModelForCausalLM self.vl_model = AutoModelForCausalLM.from_pretrained(self.model_path, device_map='cpu', - trust_remote_code=True) + trust_remote_code=self.trust_remote_code) else: raise NotImplementedError('turbomind has not supported cogvlm yet') diff --git a/lmdeploy/vl/model/glm4_v.py b/lmdeploy/vl/model/glm4_v.py index ea837aa3b3..b4de297f0a 100644 --- a/lmdeploy/vl/model/glm4_v.py +++ b/lmdeploy/vl/model/glm4_v.py @@ -39,7 +39,7 @@ def build_model(self): from transformers import AutoModelForCausalLM self.vl_model = AutoModelForCausalLM.from_pretrained(self.model_path, device_map='cpu', - trust_remote_code=True) + trust_remote_code=self.trust_remote_code) else: raise NotImplementedError('turbomind has not supported glm4v yet') diff --git a/lmdeploy/vl/model/internvl.py b/lmdeploy/vl/model/internvl.py index bac11f2251..ce33b0a90a 100644 --- a/lmdeploy/vl/model/internvl.py +++ b/lmdeploy/vl/model/internvl.py @@ -73,10 +73,13 @@ def __init__(self, with_llm: bool = False, max_memory: dict[int, int] = None, hf_config: AutoConfig = None, - backend: str = ''): - super().__init__(model_path, with_llm, max_memory, hf_config, backend) + backend: str = '', + trust_remote_code: bool = False): + super().__init__(model_path, with_llm, max_memory, hf_config, backend, trust_remote_code=trust_remote_code) self.image_token = '' - tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False) + tokenizer = AutoTokenizer.from_pretrained(model_path, + trust_remote_code=self.trust_remote_code, + use_fast=False) self.image_token_id = tokenizer.convert_tokens_to_ids(self.image_token) def build_preprocessor(self): @@ -120,7 +123,7 @@ def build_model(self): with init_empty_weights(): # transformers below 4.37.0 may raise error about flash_attn self.config.llm_config.attn_implementation = 'eager' - model = AutoModel.from_config(self.config, trust_remote_code=True) + model = AutoModel.from_config(self.config, trust_remote_code=self.trust_remote_code) self.vl_model = model if not self.with_llm: del model.language_model diff --git a/lmdeploy/vl/model/internvl3_hf.py b/lmdeploy/vl/model/internvl3_hf.py index 9f9cce3d34..1448945892 100644 --- a/lmdeploy/vl/model/internvl3_hf.py +++ b/lmdeploy/vl/model/internvl3_hf.py @@ -41,12 +41,13 @@ def __init__(self, with_llm: bool = False, max_memory: dict[int, int] = None, hf_config: AutoConfig = None, - backend: str = ''): - super().__init__(model_path, with_llm, max_memory, hf_config, backend) + backend: str = '', + trust_remote_code: bool = False): + super().__init__(model_path, with_llm, max_memory, hf_config, backend, trust_remote_code=trust_remote_code) self.arch = self.hf_config.architectures[0] def build_preprocessor(self): - self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True) + self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code) tokenizer = self.processor.tokenizer self.image_token = self.processor.image_token self.image_token_id = tokenizer.context_image_token_id @@ -59,11 +60,11 @@ def build_model(self): from accelerate import init_empty_weights with init_empty_weights(): if self.arch == 'InternVLForConditionalGeneration': - model = AutoModel.from_config(self.hf_config, trust_remote_code=True) + model = AutoModel.from_config(self.hf_config, trust_remote_code=self.trust_remote_code) if not self.with_llm: del model.language_model elif self.arch == 'InternS1ForConditionalGeneration': - model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=True) + model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=self.trust_remote_code) if not self.with_llm: del model.model.language_model else: diff --git a/lmdeploy/vl/model/internvl_llava.py b/lmdeploy/vl/model/internvl_llava.py index d521bab9fb..963000d8f0 100644 --- a/lmdeploy/vl/model/internvl_llava.py +++ b/lmdeploy/vl/model/internvl_llava.py @@ -89,7 +89,7 @@ def build_model(self): disable_transformers_logging(): warnings.simplefilter('ignore') self.config.quantization_config = {} # disable vision part quantization - model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=True) + model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=self.trust_remote_code) self.vl_model = model if not self.with_llm: del model.lm_head diff --git a/lmdeploy/vl/model/llava.py b/lmdeploy/vl/model/llava.py index 6dc5eff4c4..1f0a8253ec 100644 --- a/lmdeploy/vl/model/llava.py +++ b/lmdeploy/vl/model/llava.py @@ -256,7 +256,7 @@ def build_model(self): init_llava_vision_tower(self.config): warnings.simplefilter('ignore') self.config.quantization_config = {} # disable vision part quantization - model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=True) + model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=self.trust_remote_code) self.vl_model = model if not self.with_llm: diff --git a/lmdeploy/vl/model/llava_hf.py b/lmdeploy/vl/model/llava_hf.py index 8b865f48b7..60dc991e2a 100644 --- a/lmdeploy/vl/model/llava_hf.py +++ b/lmdeploy/vl/model/llava_hf.py @@ -18,7 +18,7 @@ class LlavaHfVisionModel(VisionModel): _arch = 'LlavaForConditionalGeneration' def build_preprocessor(self): - processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True) + processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code) if hasattr(processor, 'tokenizer'): del processor.tokenizer processor.prtokenizer = None diff --git a/lmdeploy/vl/model/minicpmv.py b/lmdeploy/vl/model/minicpmv.py index 9e6c3e52a0..f0c5f1e4f2 100644 --- a/lmdeploy/vl/model/minicpmv.py +++ b/lmdeploy/vl/model/minicpmv.py @@ -24,8 +24,9 @@ def __init__(self, with_llm: bool = False, max_memory: dict[int, int] = None, hf_config: AutoConfig = None, - backend: str = ''): - super().__init__(model_path, with_llm, max_memory, hf_config, backend) + backend: str = '', + trust_remote_code: bool = False): + super().__init__(model_path, with_llm, max_memory, hf_config, backend, trust_remote_code=trust_remote_code) if not hasattr(self.hf_config, 'version'): raise ValueError('Can not find `version` in config.json. ' 'Please checkout the latest model') @@ -36,7 +37,7 @@ def __init__(self, def build_preprocessor(self): from transformers import AutoProcessor - self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True) + self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code) self.image_processor = self.processor.image_processor self._preprocess_func = (self._preprocess_v2_5 if self.version == '2.5' else self._preprocess_v2_6) @@ -49,7 +50,7 @@ def build_model(self): config = self.hf_config assert config.slice_mode is True, 'only support slice mode' config.quantization_config = {} # disable vision part quantization - model = AutoModelForCausalLM.from_config(config, trust_remote_code=True) + model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code) self.vl_model = model if not self.with_llm: del model.llm diff --git a/lmdeploy/vl/model/molmo.py b/lmdeploy/vl/model/molmo.py index 90b8cb932a..68696ccb68 100644 --- a/lmdeploy/vl/model/molmo.py +++ b/lmdeploy/vl/model/molmo.py @@ -19,7 +19,7 @@ class MolmoVisionModel(VisionModel): def build_preprocessor(self): self.processor = AutoProcessor.from_pretrained(self.model_path, - trust_remote_code=True, + trust_remote_code=self.trust_remote_code, torch_dtype=torch.half, device_map='auto') @@ -28,7 +28,7 @@ def build_model(self): load the whole VLM model when `self.with_llm==True`""" from accelerate import init_empty_weights, load_checkpoint_and_dispatch with init_empty_weights(): - model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=True) + model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=self.trust_remote_code) self.vl_model = model if not self.with_llm: diff --git a/lmdeploy/vl/model/phi3_vision.py b/lmdeploy/vl/model/phi3_vision.py index b48302371f..3cff6ca79c 100644 --- a/lmdeploy/vl/model/phi3_vision.py +++ b/lmdeploy/vl/model/phi3_vision.py @@ -13,7 +13,7 @@ class Phi3VisionModel(LlavaHfVisionModel): _arch = 'Phi3VForCausalLM' def build_preprocessor(self): - processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True) + processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code) if hasattr(processor, 'tokenizer'): del processor.tokenizer processor.tokenizer = None @@ -24,7 +24,7 @@ def build_model(self): from transformers import AutoModelForCausalLM self.vl_model = AutoModelForCausalLM.from_pretrained(self.model_path, device_map='cpu', - trust_remote_code=True) + trust_remote_code=self.trust_remote_code) else: raise NotImplementedError('turbomind has not supported phi3v yet') diff --git a/lmdeploy/vl/model/qwen.py b/lmdeploy/vl/model/qwen.py index e7bee48bfc..5bb28cbe3d 100644 --- a/lmdeploy/vl/model/qwen.py +++ b/lmdeploy/vl/model/qwen.py @@ -36,7 +36,7 @@ def build_model(self): with init_empty_weights(): config = self.hf_config config.quantization_config = {} # disable vision part quantization - model = AutoModelForCausalLM.from_config(config, trust_remote_code=True) + model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code) self.vl_model = model if not self.with_llm: del model.lm_head diff --git a/lmdeploy/vl/model/qwen3.py b/lmdeploy/vl/model/qwen3.py index e43dad838c..5986258eb7 100644 --- a/lmdeploy/vl/model/qwen3.py +++ b/lmdeploy/vl/model/qwen3.py @@ -27,7 +27,7 @@ class Qwen3VLModel(VisionModel): def build_preprocessor(self): check_transformers() - self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True) + self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code) # image tokens self.image_token = self.processor.image_token diff --git a/lmdeploy/vl/model/xcomposer2.py b/lmdeploy/vl/model/xcomposer2.py index 89eaa7659a..cc26e3e3b5 100644 --- a/lmdeploy/vl/model/xcomposer2.py +++ b/lmdeploy/vl/model/xcomposer2.py @@ -92,9 +92,10 @@ def __init__(self, with_llm: bool = False, max_memory: dict[int, int] = None, hf_config: AutoConfig = None, - backend: str = ''): + backend: str = '', + trust_remote_code: bool = False): model_path = model_path.rstrip(os.sep) - super().__init__(model_path, with_llm, max_memory, hf_config, backend) + super().__init__(model_path, with_llm, max_memory, hf_config, backend, trust_remote_code=trust_remote_code) check_xcomposer_install() self.model_type, self.module = get_xcomposer_type(self.model_path) logger.info(f'matching type of {self.model_type}') @@ -141,7 +142,7 @@ def build_model(self): init_empty_vit(self.model_path): warnings.simplefilter('ignore') config = self.hf_config - model = AutoModelForCausalLM.from_config(config, trust_remote_code=True) + model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code) model.vit.load_model() model.vit.resize_pos() if hasattr(self.hf_config, 'img_size'):