diff --git a/lmdeploy/api.py b/lmdeploy/api.py
index 11f31c1de4..d674166ddf 100644
--- a/lmdeploy/api.py
+++ b/lmdeploy/api.py
@@ -17,6 +17,7 @@ def pipeline(model_path: str,
              chat_template_config: ChatTemplateConfig | None = None,
              log_level: str = 'WARNING',
              max_log_len: int | None = None,
+             trust_remote_code: bool = False,
              speculative_config: SpeculativeConfig | None = None,
              **kwargs):
     """Create a pipeline for inference.
@@ -41,6 +42,7 @@ def pipeline(model_path: str,
             ``WARNING``, ``INFO``, ``DEBUG``]
         max_log_len: Max number of prompt characters or prompt tokens
             being printed in log.
+        trust_remote_code: whether to trust remote code from model repositories.
         speculative_config: speculative decoding configuration.
         **kwargs: additional keyword arguments passed to the pipeline.
 
@@ -73,6 +75,7 @@ def pipeline(model_path: str,
                     chat_template_config=chat_template_config,
                     log_level=log_level,
                     max_log_len=max_log_len,
+                    trust_remote_code=trust_remote_code,
                     speculative_config=speculative_config,
                     **kwargs)
 
diff --git a/lmdeploy/archs.py b/lmdeploy/archs.py
index 68fa03a407..f1fddb8f5c 100644
--- a/lmdeploy/archs.py
+++ b/lmdeploy/archs.py
@@ -128,14 +128,14 @@ def check_vl_llm(backend: str, config: dict) -> bool:
     return False
 
 
-def get_task(backend: str, model_path: str):
+def get_task(backend: str, model_path: str, trust_remote_code: bool = False):
     """Get pipeline type and pipeline class from model config."""
     from lmdeploy.serve.core import AsyncEngine
 
     if os.path.exists(os.path.join(model_path, 'triton_models', 'weights')):
         # workspace model
         return 'llm', AsyncEngine
-    _, config = get_model_arch(model_path)
+    _, config = get_model_arch(model_path, trust_remote_code=trust_remote_code)
     if check_vl_llm(backend, config.to_dict()):
         from lmdeploy.serve.core import VLAsyncEngine
         return 'vlm', VLAsyncEngine
@@ -144,17 +144,17 @@ def get_task(backend: str, model_path: str):
     return 'llm', AsyncEngine
 
 
-def get_model_arch(model_path: str):
+def get_model_arch(model_path: str, trust_remote_code: bool = False):
     """Get a model's architecture and configuration.
 
     Args:
         model_path(str): the model path
     """
     try:
-        cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+        cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code)
     except Exception as e:  # noqa
         from transformers import PretrainedConfig
-        cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=True)
+        cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code)
 
     _cfg = cfg.to_dict()
     if _cfg.get('architectures', None):
diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
index 155392f4a7..81816729ba 100644
--- a/lmdeploy/cli/serve.py
+++ b/lmdeploy/cli/serve.py
@@ -63,6 +63,9 @@ def add_parser_api_server():
                             default=['*'],
                             help='A list of allowed http headers for cors')
         parser.add_argument('--proxy-url', type=str, default=None, help='The proxy url for api server.')
+        parser.add_argument('--trust-remote-code',
+                            action='store_true',
+                            help='Whether to trust remote code from model repositories.')
         parser.add_argument('--max-concurrent-requests',
                             type=int,
                             default=None,
@@ -303,6 +306,7 @@ def api_server(args):
                 max_log_len=args.max_log_len,
                 disable_fastapi_docs=args.disable_fastapi_docs,
                 max_concurrent_requests=args.max_concurrent_requests,
+                trust_remote_code=args.trust_remote_code,
                 reasoning_parser=args.reasoning_parser,
                 tool_call_parser=args.tool_call_parser,
                 speculative_config=speculative_config,
@@ -334,6 +338,7 @@ def api_server(args):
                 max_log_len=args.max_log_len,
                 disable_fastapi_docs=args.disable_fastapi_docs,
                 max_concurrent_requests=args.max_concurrent_requests,
+                trust_remote_code=args.trust_remote_code,
                 reasoning_parser=args.reasoning_parser,
                 tool_call_parser=args.tool_call_parser,
                 speculative_config=speculative_config,
diff --git a/lmdeploy/pipeline.py b/lmdeploy/pipeline.py
index ca4c42bba0..7e7a3b0029 100644
--- a/lmdeploy/pipeline.py
+++ b/lmdeploy/pipeline.py
@@ -39,6 +39,7 @@ def __init__(self,
                  chat_template_config: ChatTemplateConfig | None = None,
                  log_level: str = 'WARNING',
                  max_log_len: int | None = None,
+                 trust_remote_code: bool = False,
                  speculative_config: SpeculativeConfig | None = None,
                  **kwargs):
         """Initialize Pipeline.
@@ -49,6 +50,7 @@ def __init__(self,
             chat_template_config: Chat template configuration.
             log_level: Log level.
             max_log_len: Max number of prompt characters or prompt tokens being printed in log.
+            trust_remote_code: whether to trust remote code from model repositories.
             speculative_config: Speculative decoding configuration.
             **kwargs: Additional keyword arguments.
         """
@@ -69,12 +71,13 @@ def __init__(self,
 
         # Create inference engine
         backend, backend_config = autoget_backend_config(model_path, backend_config)
-        _, pipeline_class = get_task(backend, model_path)
+        _, pipeline_class = get_task(backend, model_path, trust_remote_code=trust_remote_code)
         self.async_engine = pipeline_class(model_path,
                                            backend=backend,
                                            backend_config=backend_config,
                                            chat_template_config=chat_template_config,
                                            max_log_len=max_log_len,
+                                           trust_remote_code=trust_remote_code,
                                            speculative_config=speculative_config,
                                            **kwargs)
         self.internal_thread = _EventLoopThread(daemon=True)
diff --git a/lmdeploy/pytorch/config.py b/lmdeploy/pytorch/config.py
index 39584ac4b7..d9b63617e2 100644
--- a/lmdeploy/pytorch/config.py
+++ b/lmdeploy/pytorch/config.py
@@ -365,7 +365,7 @@ def get_head_size(self):
     def from_pretrained(
         cls,
         pretrained_model_name_or_path: str,
-        trust_remote_code: bool = True,
+        trust_remote_code: bool = False,
         dtype: str = 'auto',
         dist_config: DistConfig = None,
         hf_overrides: dict[str, Any] = None,
@@ -563,10 +563,11 @@ def from_config(
         target_cache_cfg: CacheConfig,
         target_model: str = None,
         dtype: str = 'auto',
+        trust_remote_code: bool = False,
     ):
         model = model or target_model
         model_config = ModelConfig.from_pretrained(model,
-                                                   trust_remote_code=True,
+                                                   trust_remote_code=trust_remote_code,
                                                    dtype=dtype,
                                                    is_draft_model=True,
                                                    spec_method=method,
diff --git a/lmdeploy/pytorch/engine/config_builder.py b/lmdeploy/pytorch/engine/config_builder.py
index 7c7ab6c3d0..3128a3d986 100644
--- a/lmdeploy/pytorch/engine/config_builder.py
+++ b/lmdeploy/pytorch/engine/config_builder.py
@@ -98,7 +98,7 @@ def build_misc_config(engine_config: PytorchEngineConfig):
 
     @staticmethod
     def build_specdecode_config(target_model, speculative_config: SpeculativeConfig, engine_config: PytorchEngineConfig,
-                                cache_config: CacheConfig):
+                                cache_config: CacheConfig, trust_remote_code: bool = False):
         """Build spec decode config."""
         specdecode_config = None
         if speculative_config is not None:
@@ -113,5 +113,6 @@ def build_specdecode_config(target_model, speculative_config: SpeculativeConfig,
                 target_model=target_model,
                 target_cache_cfg=cache_config,
                 dtype=engine_config.dtype,
+                trust_remote_code=trust_remote_code,
             )
         return specdecode_config
diff --git a/lmdeploy/pytorch/engine/engine.py b/lmdeploy/pytorch/engine/engine.py
index 27848de026..deeb8e77d9 100644
--- a/lmdeploy/pytorch/engine/engine.py
+++ b/lmdeploy/pytorch/engine/engine.py
@@ -94,7 +94,7 @@ def __init__(
         self,
         model_path: str,
         engine_config: PytorchEngineConfig = None,
-        trust_remote_code: bool = True,
+        trust_remote_code: bool = False,
         speculative_config: SpeculativeConfig = None,
     ) -> None:
         # make sure engine config exist
@@ -133,7 +133,7 @@ def __init__(
         misc_config = ConfigBuilder.build_misc_config(engine_config)
         # spec decode
         self.specdecode_config = ConfigBuilder.build_specdecode_config(model_path, speculative_config, engine_config,
-                                                                       cache_config)
+                                                                       cache_config, trust_remote_code)
 
         # build model agent
         self.executor = build_executor(
@@ -147,6 +147,7 @@ def __init__(
             distributed_executor_backend=engine_config.distributed_executor_backend,
             dtype=engine_config.dtype,
             specdecode_config=self.specdecode_config,
+            trust_remote_code=trust_remote_code,
         )
         self.executor.init()
 
@@ -198,7 +199,7 @@ def __init__(
     def from_pretrained(cls,
                         pretrained_model_name_or_path: str,
                         engine_config: PytorchEngineConfig = None,
-                        trust_remote_code: bool = True,
+                        trust_remote_code: bool = False,
                         speculative_config: SpeculativeConfig = None,
                         **kwargs):
         """Lmdeploy python inference engine.
diff --git a/lmdeploy/pytorch/engine/executor/__init__.py b/lmdeploy/pytorch/engine/executor/__init__.py
index bd580462b6..497bf9492b 100644
--- a/lmdeploy/pytorch/engine/executor/__init__.py
+++ b/lmdeploy/pytorch/engine/executor/__init__.py
@@ -63,6 +63,7 @@ def build_executor(
     distributed_executor_backend: str = None,
     dtype: str = 'auto',
     specdecode_config: SpecDecodeConfig = None,
+    trust_remote_code: bool = False,
 ) -> ExecutorBase:
     """Build model agent executor."""
     logger = get_logger('lmdeploy')
@@ -71,7 +72,7 @@ def build_executor(
 
     model_config = ModelConfig.from_pretrained(
         model_path,
-        trust_remote_code=True,
+        trust_remote_code=trust_remote_code,
         dtype=dtype,
         hf_overrides=misc_config.hf_overrides,
         dist_config=dist_config,
diff --git a/lmdeploy/serve/core/async_engine.py b/lmdeploy/serve/core/async_engine.py
index c5dfcd0364..4d33c12646 100644
--- a/lmdeploy/serve/core/async_engine.py
+++ b/lmdeploy/serve/core/async_engine.py
@@ -110,6 +110,7 @@ def __init__(self,
                  backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
                  chat_template_config: ChatTemplateConfig | None = None,
                  max_log_len: int | None = None,
+                 trust_remote_code: bool = False,
                  speculative_config: SpeculativeConfig | None = None,
                  **kwargs) -> None:
         logger.info(f'input backend={backend}, backend_config={backend_config}')
@@ -118,10 +119,10 @@ def __init__(self,
                                             if backend == 'turbomind' else PytorchEngineConfig())
         self.model_name = model_name if model_name else model_path
         self.chat_template = get_chat_template(model_path, chat_template_config)
-        self.tokenizer = Tokenizer(model_path)
+        self.tokenizer = Tokenizer(model_path, trust_remote_code=trust_remote_code)
         self.prompt_processor = MultimodalProcessor(self.tokenizer, self.chat_template)
-        self.hf_gen_cfg = get_hf_gen_cfg(model_path)
-        self.arch, self.hf_cfg = get_model_arch(model_path)
+        self.hf_gen_cfg = get_hf_gen_cfg(model_path, trust_remote_code=trust_remote_code)
+        self.arch, self.hf_cfg = get_model_arch(model_path, trust_remote_code=trust_remote_code)
         self.session_len = (_get_and_verify_max_len(self.hf_cfg, None)
                             if backend_config.session_len is None else backend_config.session_len)
         backend_config.session_len = self.session_len
@@ -129,10 +130,14 @@ def __init__(self,
             logger.warning('speculative decoding is not supported by turbomind ')
         # build backend engine
         if backend == 'turbomind':
-            self.engine = self._build_turbomind(model_path=model_path, backend_config=backend_config, **kwargs)
+            self.engine = self._build_turbomind(model_path=model_path,
+                                                backend_config=backend_config,
+                                                trust_remote_code=trust_remote_code,
+                                                **kwargs)
         elif backend == 'pytorch':
             self.engine = self._build_pytorch(model_path=model_path,
                                               backend_config=backend_config,
+                                              trust_remote_code=trust_remote_code,
                                               speculative_config=speculative_config,
                                               **kwargs)
         else:
@@ -169,19 +174,30 @@ def __enter__(self):
     def __exit__(self, exc_type, exc_value, traceback):
         self.close()
 
-    def _build_turbomind(self, model_path: str, backend_config: TurbomindEngineConfig | None = None, **kwargs):
+    def _build_turbomind(self,
+                         model_path: str,
+                         backend_config: TurbomindEngineConfig | None = None,
+                         trust_remote_code: bool = False,
+                         **kwargs):
         """Inner build method for turbomind backend."""
         from lmdeploy import turbomind as tm
-        return tm.TurboMind.from_pretrained(model_path, engine_config=backend_config, **kwargs)
+        return tm.TurboMind.from_pretrained(model_path,
+                                            engine_config=backend_config,
+                                            trust_remote_code=trust_remote_code,
+                                            **kwargs)
 
     def _build_pytorch(self,
                        model_path: str,
                        backend_config: PytorchEngineConfig | None = None,
+                       trust_remote_code: bool = False,
                        speculative_config: SpeculativeConfig | None = None,
                        **kwargs):
         """Inner build method for pytorch backend."""
         from lmdeploy.pytorch.engine import Engine
-        return Engine.from_pretrained(model_path, engine_config=backend_config, speculative_config=speculative_config)
+        return Engine.from_pretrained(model_path,
+                                      engine_config=backend_config,
+                                      trust_remote_code=trust_remote_code,
+                                      speculative_config=speculative_config)
 
     def _build_stat_loggers(self):
         self.stat_loggers = []
diff --git a/lmdeploy/serve/core/vl_async_engine.py b/lmdeploy/serve/core/vl_async_engine.py
index 44fd97dac6..9e6c9ac25d 100644
--- a/lmdeploy/serve/core/vl_async_engine.py
+++ b/lmdeploy/serve/core/vl_async_engine.py
@@ -17,6 +17,7 @@ def __init__(self,
                  backend: Literal['turbomind', 'pytorch'] = 'turbomind',
                  backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
                  vision_config: VisionConfig | None = None,
+                 trust_remote_code: bool = False,
                  **kwargs) -> None:
         from lmdeploy.serve.processors import MultimodalProcessor
         from lmdeploy.utils import try_import_deeplink
@@ -27,8 +28,16 @@ def __init__(self,
         if backend_config and backend_config.enable_prefix_caching:
             backend_config.enable_prefix_caching = False
             logger.warning('Prefix caching is disabled since LMDeploy hasn\'t support in on VL models yet')
-        self.vl_encoder = ImageEncoder(model_path, backend, vision_config, backend_config=backend_config)
-        super().__init__(model_path, backend=backend, backend_config=backend_config, **kwargs)
+        self.vl_encoder = ImageEncoder(model_path,
+                                       backend,
+                                       vision_config,
+                                       backend_config=backend_config,
+                                       trust_remote_code=trust_remote_code)
+        super().__init__(model_path,
+                         backend=backend,
+                         backend_config=backend_config,
+                         trust_remote_code=trust_remote_code,
+                         **kwargs)
         # Update prompt_processor to support multimodal processing
         self.prompt_processor = MultimodalProcessor(self.tokenizer,
                                                     self.chat_template,
diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
index 2c552febd0..456bc40912 100644
--- a/lmdeploy/serve/openai/api_server.py
+++ b/lmdeploy/serve/openai/api_server.py
@@ -1415,6 +1415,7 @@ def serve(model_path: str,
           max_log_len: int | None = None,
           disable_fastapi_docs: bool = False,
           max_concurrent_requests: int | None = None,
+          trust_remote_code: bool = False,
           reasoning_parser: str | None = None,
           tool_call_parser: str | None = None,
           allow_terminate_by_client: bool = False,
@@ -1487,7 +1488,7 @@ def serve(model_path: str,
         http_or_https = 'https'
 
     handle_torchrun()
-    _, pipeline_class = get_task(backend, model_path)
+    _, pipeline_class = get_task(backend, model_path, trust_remote_code=trust_remote_code)
     if isinstance(backend_config, PytorchEngineConfig):
         backend_config.enable_mp_engine = True
         # router replay
@@ -1499,6 +1500,7 @@ def serve(model_path: str,
                                                     backend_config=backend_config,
                                                     chat_template_config=chat_template_config,
                                                     max_log_len=max_log_len,
+                                                    trust_remote_code=trust_remote_code,
                                                     speculative_config=speculative_config,
                                                     **kwargs)
     # set reasoning parser and tool parser
diff --git a/lmdeploy/tokenizer.py b/lmdeploy/tokenizer.py
index c184e53111..df5329f423 100644
--- a/lmdeploy/tokenizer.py
+++ b/lmdeploy/tokenizer.py
@@ -43,11 +43,11 @@ class HuggingFaceTokenizer:
         model_dir: the directory of the tokenizer model.
     """
 
-    def __init__(self, model_dir: str):
-        self._check_transformers_version(model_dir)
+    def __init__(self, model_dir: str, trust_remote_code: bool = False):
+        self._check_transformers_version(model_dir, trust_remote_code=trust_remote_code)
         from transformers import AutoTokenizer
         self.logger = get_logger('lmdeploy')
-        self.model = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
+        self.model = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=trust_remote_code)
         self._prefix_space_tokens = None
 
         if self.model.eos_token_id is None:
@@ -67,7 +67,7 @@ def __init__(self, model_dir: str):
         self.max_indexes_num = 5
         self.token2id = {}
 
-    def _check_transformers_version(self, model_dir: str):
+    def _check_transformers_version(self, model_dir: str, trust_remote_code: bool = False):
         import transformers
         from packaging import version
 
@@ -76,7 +76,7 @@ def _check_transformers_version(self, model_dir: str):
         logger = get_logger('lmdeploy')
 
         current_transformers_version = version.parse(transformers.__version__)
-        cfg = get_model_arch(model_dir)[1]
+        cfg = get_model_arch(model_dir, trust_remote_code=trust_remote_code)[1]
         cfg_ver = getattr(cfg, 'transformers_version', None)
         if cfg_ver is None:
             llm_config = getattr(cfg, 'llm_config', None)
@@ -352,8 +352,8 @@ def __call__(self, s: str | Sequence[str]):
 class ChatGLM4Tokenizer(HuggingFaceTokenizer):
     """Tokenizer of GLM4."""
 
-    def __init__(self, model_path):
-        super().__init__(model_path)
+    def __init__(self, model_path, trust_remote_code: bool = False):
+        super().__init__(model_path, trust_remote_code=trust_remote_code)
         original_pad = self.model._pad
 
         def __pad(*args, **kwargs):
@@ -374,8 +374,8 @@ def encode(self, s: str, add_bos: bool = True, add_special_tokens: bool = True,
 class ChatGLMTokenizer(HuggingFaceTokenizer):
     """Tokenizer of GLM2."""
 
-    def __init__(self, model_path):
-        super().__init__(model_path)
+    def __init__(self, model_path, trust_remote_code: bool = False):
+        super().__init__(model_path, trust_remote_code=trust_remote_code)
         original_pad = self.model._pad
 
         def __pad(*args, **kwargs):
@@ -390,8 +390,8 @@ def __pad(*args, **kwargs):
 class GptOssTokenizer(HuggingFaceTokenizer):
     """Tokenizer of GPT-OSS."""
 
-    def __init__(self, model_dir: str):
-        super().__init__(model_dir)
+    def __init__(self, model_dir: str, trust_remote_code: bool = False):
+        super().__init__(model_dir, trust_remote_code=trust_remote_code)
         from openai_harmony import HarmonyEncodingName, Role, StreamableParser, load_harmony_encoding
         encoding = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
         self.role = Role.ASSISTANT
@@ -423,24 +423,24 @@ class Tokenizer:
         model_path: the path of the tokenizer model.
     """
 
-    def __init__(self, model_path: str):
+    def __init__(self, model_path: str, trust_remote_code: bool = False):
         from transformers import AutoConfig, PretrainedConfig
         try:
-            model_cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+            model_cfg = AutoConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code)
         except Exception as e:  # noqa
-            model_cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=True)
+            model_cfg = PretrainedConfig.from_pretrained(model_path, trust_remote_code=trust_remote_code)
         is_gpt_oss = getattr(model_cfg, 'model_type', '') == 'gpt_oss'
         from transformers.models.auto.tokenization_auto import get_tokenizer_config
-        tokenizer_config = get_tokenizer_config(model_path, trust_remote_code=True)
+        tokenizer_config = get_tokenizer_config(model_path, trust_remote_code=trust_remote_code)
         config_tokenizer_class = tokenizer_config.get('tokenizer_class')
         if config_tokenizer_class == 'ChatGLM4Tokenizer':
-            self.model = ChatGLM4Tokenizer(model_path)
+            self.model = ChatGLM4Tokenizer(model_path, trust_remote_code=trust_remote_code)
         elif config_tokenizer_class == 'ChatGLMTokenizer':
-            self.model = ChatGLMTokenizer(model_path)
+            self.model = ChatGLMTokenizer(model_path, trust_remote_code=trust_remote_code)
         elif is_gpt_oss:
-            self.model = GptOssTokenizer(model_path)
+            self.model = GptOssTokenizer(model_path, trust_remote_code=trust_remote_code)
         else:
-            self.model = HuggingFaceTokenizer(model_path)
+            self.model = HuggingFaceTokenizer(model_path, trust_remote_code=trust_remote_code)
         self.logger = get_logger('lmdeploy')
 
     @property
diff --git a/lmdeploy/utils.py b/lmdeploy/utils.py
index 9d83dc06b4..6fc451cfa9 100644
--- a/lmdeploy/utils.py
+++ b/lmdeploy/utils.py
@@ -219,10 +219,10 @@ def _stop_words(stop_words: list[int | str], tokenizer: object):
     return stop_words
 
 
-def get_hf_gen_cfg(path: str):
+def get_hf_gen_cfg(path: str, trust_remote_code: bool = False):
     from transformers import GenerationConfig
     try:
-        cfg = GenerationConfig.from_pretrained(path, trust_remote_code=True)
+        cfg = GenerationConfig.from_pretrained(path, trust_remote_code=trust_remote_code)
         return cfg.to_dict()
     except OSError:
         return {}
diff --git a/lmdeploy/vl/engine.py b/lmdeploy/vl/engine.py
index 8cd179df8a..6fc39630da 100644
--- a/lmdeploy/vl/engine.py
+++ b/lmdeploy/vl/engine.py
@@ -38,8 +38,12 @@ def __init__(
         backend: str,
         vision_config: VisionConfig = None,
         backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
+        trust_remote_code: bool = False,
     ):
-        self.model = load_vl_model(model_path, backend, backend_config=backend_config)
+        self.model = load_vl_model(model_path,
+                                   backend,
+                                   backend_config=backend_config,
+                                   trust_remote_code=trust_remote_code)
         if vision_config is None:
             vision_config = VisionConfig()
         self.vision_config = vision_config
diff --git a/lmdeploy/vl/model/base.py b/lmdeploy/vl/model/base.py
index 51ebb44419..bdad514bbc 100644
--- a/lmdeploy/vl/model/base.py
+++ b/lmdeploy/vl/model/base.py
@@ -20,14 +20,16 @@ def __init__(self,
                  with_llm: bool = False,
                  max_memory: dict[int, int] = None,
                  hf_config: AutoConfig = None,
-                 backend: str = ''):
+                 backend: str = '',
+                 trust_remote_code: bool = False):
         """init."""
         self.model_path = model_path
         self.with_llm = with_llm
         self.max_memory = max_memory
         self.backend = backend
+        self.trust_remote_code = trust_remote_code
         if hf_config is None:
-            _, hf_config = get_model_arch(model_path)
+            _, hf_config = get_model_arch(model_path, trust_remote_code=trust_remote_code)
         self.hf_config = hf_config
         self.image_token_id = self.get_pad_token_id(model_path, hf_config) or 0
 
@@ -36,7 +38,7 @@ def get_pad_token_id(self, model_path, hf_config):
         pad_token_id = getattr(hf_config, 'pad_token_id', None)
         if pad_token_id is None:
             try:
-                tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+                tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=self.trust_remote_code)
                 pad_token_id = getattr(tokenizer, 'pad_token_id', None)
             except Exception as e:
                 print(e)
diff --git a/lmdeploy/vl/model/builder.py b/lmdeploy/vl/model/builder.py
index 04ac5ab759..db441d6217 100644
--- a/lmdeploy/vl/model/builder.py
+++ b/lmdeploy/vl/model/builder.py
@@ -39,7 +39,8 @@
 def load_vl_model(model_path: str,
                   backend: str,
                   with_llm: bool = False,
-                  backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None):
+                  backend_config: TurbomindEngineConfig | PytorchEngineConfig | None = None,
+                  trust_remote_code: bool = False):
     """Load visual model.
 
     Args:
@@ -59,8 +60,13 @@ def load_vl_model(model_path: str,
         tp = getattr(backend_config, 'tp', 1)
         max_memory = {i: torch.cuda.mem_get_info(i)[0] for i in range(tp)} if backend == 'turbomind' else None
 
-    _, hf_config = get_model_arch(model_path)
-    kwargs = dict(model_path=model_path, with_llm=with_llm, max_memory=max_memory, hf_config=hf_config, backend=backend)
+    _, hf_config = get_model_arch(model_path, trust_remote_code=trust_remote_code)
+    kwargs = dict(model_path=model_path,
+                  with_llm=with_llm,
+                  max_memory=max_memory,
+                  hf_config=hf_config,
+                  backend=backend,
+                  trust_remote_code=trust_remote_code)
 
     for name, module in VISION_MODELS.module_dict.items():
         try:
diff --git a/lmdeploy/vl/model/cogvlm.py b/lmdeploy/vl/model/cogvlm.py
index 0dbacc5450..dd5a4ef30d 100644
--- a/lmdeploy/vl/model/cogvlm.py
+++ b/lmdeploy/vl/model/cogvlm.py
@@ -34,7 +34,7 @@ def build_model(self):
             from transformers import AutoModelForCausalLM
             self.vl_model = AutoModelForCausalLM.from_pretrained(self.model_path,
                                                                  device_map='cpu',
-                                                                 trust_remote_code=True)
+                                                                 trust_remote_code=self.trust_remote_code)
         else:
             raise NotImplementedError('turbomind has not supported cogvlm yet')
 
diff --git a/lmdeploy/vl/model/glm4_v.py b/lmdeploy/vl/model/glm4_v.py
index ea837aa3b3..b4de297f0a 100644
--- a/lmdeploy/vl/model/glm4_v.py
+++ b/lmdeploy/vl/model/glm4_v.py
@@ -39,7 +39,7 @@ def build_model(self):
             from transformers import AutoModelForCausalLM
             self.vl_model = AutoModelForCausalLM.from_pretrained(self.model_path,
                                                                  device_map='cpu',
-                                                                 trust_remote_code=True)
+                                                                 trust_remote_code=self.trust_remote_code)
         else:
             raise NotImplementedError('turbomind has not supported glm4v yet')
 
diff --git a/lmdeploy/vl/model/internvl.py b/lmdeploy/vl/model/internvl.py
index bac11f2251..ce33b0a90a 100644
--- a/lmdeploy/vl/model/internvl.py
+++ b/lmdeploy/vl/model/internvl.py
@@ -73,10 +73,13 @@ def __init__(self,
                  with_llm: bool = False,
                  max_memory: dict[int, int] = None,
                  hf_config: AutoConfig = None,
-                 backend: str = ''):
-        super().__init__(model_path, with_llm, max_memory, hf_config, backend)
+                 backend: str = '',
+                 trust_remote_code: bool = False):
+        super().__init__(model_path, with_llm, max_memory, hf_config, backend, trust_remote_code=trust_remote_code)
         self.image_token = '<IMG_CONTEXT>'
-        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True, use_fast=False)
+        tokenizer = AutoTokenizer.from_pretrained(model_path,
+                                                  trust_remote_code=self.trust_remote_code,
+                                                  use_fast=False)
         self.image_token_id = tokenizer.convert_tokens_to_ids(self.image_token)
 
     def build_preprocessor(self):
@@ -120,7 +123,7 @@ def build_model(self):
         with init_empty_weights():
             # transformers below 4.37.0 may raise error about flash_attn
             self.config.llm_config.attn_implementation = 'eager'
-            model = AutoModel.from_config(self.config, trust_remote_code=True)
+            model = AutoModel.from_config(self.config, trust_remote_code=self.trust_remote_code)
             self.vl_model = model
             if not self.with_llm:
                 del model.language_model
diff --git a/lmdeploy/vl/model/internvl3_hf.py b/lmdeploy/vl/model/internvl3_hf.py
index 9f9cce3d34..1448945892 100644
--- a/lmdeploy/vl/model/internvl3_hf.py
+++ b/lmdeploy/vl/model/internvl3_hf.py
@@ -41,12 +41,13 @@ def __init__(self,
                  with_llm: bool = False,
                  max_memory: dict[int, int] = None,
                  hf_config: AutoConfig = None,
-                 backend: str = ''):
-        super().__init__(model_path, with_llm, max_memory, hf_config, backend)
+                 backend: str = '',
+                 trust_remote_code: bool = False):
+        super().__init__(model_path, with_llm, max_memory, hf_config, backend, trust_remote_code=trust_remote_code)
         self.arch = self.hf_config.architectures[0]
 
     def build_preprocessor(self):
-        self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
+        self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code)
         tokenizer = self.processor.tokenizer
         self.image_token = self.processor.image_token
         self.image_token_id = tokenizer.context_image_token_id
@@ -59,11 +60,11 @@ def build_model(self):
         from accelerate import init_empty_weights
         with init_empty_weights():
             if self.arch == 'InternVLForConditionalGeneration':
-                model = AutoModel.from_config(self.hf_config, trust_remote_code=True)
+                model = AutoModel.from_config(self.hf_config, trust_remote_code=self.trust_remote_code)
                 if not self.with_llm:
                     del model.language_model
             elif self.arch == 'InternS1ForConditionalGeneration':
-                model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=True)
+                model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=self.trust_remote_code)
                 if not self.with_llm:
                     del model.model.language_model
             else:
diff --git a/lmdeploy/vl/model/internvl_llava.py b/lmdeploy/vl/model/internvl_llava.py
index d521bab9fb..963000d8f0 100644
--- a/lmdeploy/vl/model/internvl_llava.py
+++ b/lmdeploy/vl/model/internvl_llava.py
@@ -89,7 +89,7 @@ def build_model(self):
                 disable_transformers_logging():
             warnings.simplefilter('ignore')
             self.config.quantization_config = {}  # disable vision part quantization
-            model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=True)
+            model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=self.trust_remote_code)
             self.vl_model = model
             if not self.with_llm:
                 del model.lm_head
diff --git a/lmdeploy/vl/model/llava.py b/lmdeploy/vl/model/llava.py
index 6dc5eff4c4..1f0a8253ec 100644
--- a/lmdeploy/vl/model/llava.py
+++ b/lmdeploy/vl/model/llava.py
@@ -256,7 +256,7 @@ def build_model(self):
                 init_llava_vision_tower(self.config):
             warnings.simplefilter('ignore')
             self.config.quantization_config = {}  # disable vision part quantization
-            model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=True)
+            model = AutoModelForCausalLM.from_config(self.config, trust_remote_code=self.trust_remote_code)
 
         self.vl_model = model
         if not self.with_llm:
diff --git a/lmdeploy/vl/model/llava_hf.py b/lmdeploy/vl/model/llava_hf.py
index 8b865f48b7..60dc991e2a 100644
--- a/lmdeploy/vl/model/llava_hf.py
+++ b/lmdeploy/vl/model/llava_hf.py
@@ -18,7 +18,7 @@ class LlavaHfVisionModel(VisionModel):
     _arch = 'LlavaForConditionalGeneration'
 
     def build_preprocessor(self):
-        processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
+        processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code)
         if hasattr(processor, 'tokenizer'):
             del processor.tokenizer
             processor.prtokenizer = None
diff --git a/lmdeploy/vl/model/minicpmv.py b/lmdeploy/vl/model/minicpmv.py
index 9e6c3e52a0..f0c5f1e4f2 100644
--- a/lmdeploy/vl/model/minicpmv.py
+++ b/lmdeploy/vl/model/minicpmv.py
@@ -24,8 +24,9 @@ def __init__(self,
                  with_llm: bool = False,
                  max_memory: dict[int, int] = None,
                  hf_config: AutoConfig = None,
-                 backend: str = ''):
-        super().__init__(model_path, with_llm, max_memory, hf_config, backend)
+                 backend: str = '',
+                 trust_remote_code: bool = False):
+        super().__init__(model_path, with_llm, max_memory, hf_config, backend, trust_remote_code=trust_remote_code)
         if not hasattr(self.hf_config, 'version'):
             raise ValueError('Can not find `version` in config.json. '
                              'Please checkout the latest model')
@@ -36,7 +37,7 @@ def __init__(self,
 
     def build_preprocessor(self):
         from transformers import AutoProcessor
-        self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
+        self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code)
         self.image_processor = self.processor.image_processor
         self._preprocess_func = (self._preprocess_v2_5 if self.version == '2.5' else self._preprocess_v2_6)
 
@@ -49,7 +50,7 @@ def build_model(self):
             config = self.hf_config
             assert config.slice_mode is True, 'only support slice mode'
             config.quantization_config = {}  # disable vision part quantization
-            model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
+            model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code)
         self.vl_model = model
         if not self.with_llm:
             del model.llm
diff --git a/lmdeploy/vl/model/molmo.py b/lmdeploy/vl/model/molmo.py
index 90b8cb932a..68696ccb68 100644
--- a/lmdeploy/vl/model/molmo.py
+++ b/lmdeploy/vl/model/molmo.py
@@ -19,7 +19,7 @@ class MolmoVisionModel(VisionModel):
 
     def build_preprocessor(self):
         self.processor = AutoProcessor.from_pretrained(self.model_path,
-                                                       trust_remote_code=True,
+                                                       trust_remote_code=self.trust_remote_code,
                                                        torch_dtype=torch.half,
                                                        device_map='auto')
 
@@ -28,7 +28,7 @@ def build_model(self):
         load the whole VLM model when `self.with_llm==True`"""
         from accelerate import init_empty_weights, load_checkpoint_and_dispatch
         with init_empty_weights():
-            model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=True)
+            model = AutoModelForCausalLM.from_config(self.hf_config, trust_remote_code=self.trust_remote_code)
 
             self.vl_model = model
             if not self.with_llm:
diff --git a/lmdeploy/vl/model/phi3_vision.py b/lmdeploy/vl/model/phi3_vision.py
index b48302371f..3cff6ca79c 100644
--- a/lmdeploy/vl/model/phi3_vision.py
+++ b/lmdeploy/vl/model/phi3_vision.py
@@ -13,7 +13,7 @@ class Phi3VisionModel(LlavaHfVisionModel):
     _arch = 'Phi3VForCausalLM'
 
     def build_preprocessor(self):
-        processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
+        processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code)
         if hasattr(processor, 'tokenizer'):
             del processor.tokenizer
             processor.tokenizer = None
@@ -24,7 +24,7 @@ def build_model(self):
             from transformers import AutoModelForCausalLM
             self.vl_model = AutoModelForCausalLM.from_pretrained(self.model_path,
                                                                  device_map='cpu',
-                                                                 trust_remote_code=True)
+                                                                 trust_remote_code=self.trust_remote_code)
         else:
             raise NotImplementedError('turbomind has not supported phi3v yet')
 
diff --git a/lmdeploy/vl/model/qwen.py b/lmdeploy/vl/model/qwen.py
index e7bee48bfc..5bb28cbe3d 100644
--- a/lmdeploy/vl/model/qwen.py
+++ b/lmdeploy/vl/model/qwen.py
@@ -36,7 +36,7 @@ def build_model(self):
         with init_empty_weights():
             config = self.hf_config
             config.quantization_config = {}  # disable vision part quantization
-            model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
+            model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code)
             self.vl_model = model
             if not self.with_llm:
                 del model.lm_head
diff --git a/lmdeploy/vl/model/qwen3.py b/lmdeploy/vl/model/qwen3.py
index e43dad838c..5986258eb7 100644
--- a/lmdeploy/vl/model/qwen3.py
+++ b/lmdeploy/vl/model/qwen3.py
@@ -27,7 +27,7 @@ class Qwen3VLModel(VisionModel):
 
     def build_preprocessor(self):
         check_transformers()
-        self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=True)
+        self.processor = AutoProcessor.from_pretrained(self.model_path, trust_remote_code=self.trust_remote_code)
 
         # image tokens
         self.image_token = self.processor.image_token
diff --git a/lmdeploy/vl/model/xcomposer2.py b/lmdeploy/vl/model/xcomposer2.py
index 89eaa7659a..cc26e3e3b5 100644
--- a/lmdeploy/vl/model/xcomposer2.py
+++ b/lmdeploy/vl/model/xcomposer2.py
@@ -92,9 +92,10 @@ def __init__(self,
                  with_llm: bool = False,
                  max_memory: dict[int, int] = None,
                  hf_config: AutoConfig = None,
-                 backend: str = ''):
+                 backend: str = '',
+                 trust_remote_code: bool = False):
         model_path = model_path.rstrip(os.sep)
-        super().__init__(model_path, with_llm, max_memory, hf_config, backend)
+        super().__init__(model_path, with_llm, max_memory, hf_config, backend, trust_remote_code=trust_remote_code)
         check_xcomposer_install()
         self.model_type, self.module = get_xcomposer_type(self.model_path)
         logger.info(f'matching type of {self.model_type}')
@@ -141,7 +142,7 @@ def build_model(self):
                 init_empty_vit(self.model_path):
             warnings.simplefilter('ignore')
             config = self.hf_config
-            model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
+            model = AutoModelForCausalLM.from_config(config, trust_remote_code=self.trust_remote_code)
             model.vit.load_model()
             model.vit.resize_pos()
             if hasattr(self.hf_config, 'img_size'):