diff --git a/pyproject.toml b/pyproject.toml index 54ed36099..70e609e06 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ dependencies = [ "uvloop>=0.18", "torch", "more-itertools>=10.8.0", + "lazy-loader @ git+https://github.com/sjmonson/lazy-loader.git@feat/lazy_submodules", ] [project.optional-dependencies] diff --git a/src/guidellm/backends/__init__.py b/src/guidellm/backends/__init__.py index 52ba6ecb3..f01b2c959 100644 --- a/src/guidellm/backends/__init__.py +++ b/src/guidellm/backends/__init__.py @@ -9,8 +9,6 @@ handlers for processing streaming and non-streaming API responses. """ -from guidellm.extras.vllm import HAS_VLLM - from .backend import Backend, BackendArgs, BackendType from .openai import ( AudioRequestHandler, @@ -20,13 +18,7 @@ OpenAIRequestHandlerFactory, TextCompletionsRequestHandler, ) - -# Conditionally import VLLM backend if available -if HAS_VLLM: - from .vllm_python import VLLMPythonBackend, VLLMResponseHandler -else: - VLLMPythonBackend = None # type: ignore[assignment, misc] - VLLMResponseHandler = None # type: ignore[assignment, misc] +from .vllm_python import VLLMPythonBackend, VLLMResponseHandler __all__ = [ "AudioRequestHandler", @@ -38,8 +30,6 @@ "OpenAIRequestHandler", "OpenAIRequestHandlerFactory", "TextCompletionsRequestHandler", + "VLLMPythonBackend", + "VLLMResponseHandler", ] - -# Conditionally add VLLM backend and handler to exports -if HAS_VLLM: - __all__.extend(["VLLMPythonBackend", "VLLMResponseHandler"]) diff --git a/src/guidellm/backends/vllm_python/vllm.py b/src/guidellm/backends/vllm_python/vllm.py index 11f3bd983..8e3702f4d 100644 --- a/src/guidellm/backends/vllm_python/vllm.py +++ b/src/guidellm/backends/vllm_python/vllm.py @@ -22,13 +22,7 @@ from guidellm.backends.backend import Backend, BackendArgs from guidellm.backends.vllm_python.vllm_response import VLLMResponseHandler -from guidellm.extras.vllm import ( - HAS_VLLM, - AsyncEngineArgs, - AsyncLLMEngine, - RequestOutput, - SamplingParams, -) +from guidellm.extras import audio, vision, vllm from guidellm.logger import logger from guidellm.schemas import ( GenerationRequest, @@ -37,22 +31,6 @@ StandardBaseModel, ) -try: - from guidellm.extras.audio import _decode_audio - - HAS_AUDIO = True -except ImportError: - _decode_audio = None # type: ignore[assignment] - HAS_AUDIO = False - -try: - from guidellm.extras.vision import image_dict_to_pil - - HAS_VISION = True -except ImportError: - image_dict_to_pil = None # type: ignore[assignment] - HAS_VISION = False - # Sentinel for "chat template not yet resolved" cache. _CHAT_TEMPLATE_UNSET: object = object() @@ -124,14 +102,6 @@ class _ResolvedRequest(StandardBaseModel): ) -def _check_vllm_available() -> None: - """Check if vllm is available and raise helpful error if not.""" - if not HAS_VLLM: - raise ImportError( - "vllm is not installed. Install vllm to use the vllm python backend." - ) - - def _has_jinja2_markers(s: str) -> bool: """Return True if the string contains Jinja2 template syntax ({{, {%, or {#).""" return "{{" in s or "{%" in s or "{#" in s @@ -197,7 +167,6 @@ def __init__( :param audio_placeholder: Optional string to use as the audio placeholder when using audio_column; if unset, falls back to "<|audio|>". """ - _check_vllm_available() super().__init__(type_="vllm_python") self.model = model @@ -209,7 +178,7 @@ def __init__( # Runtime state self._in_process = False - self._engine: AsyncLLMEngine | None = None + self._engine: vllm.AsyncLLMEngine | None = None self._resolved_chat_template: str | None | object = _CHAT_TEMPLATE_UNSET @property @@ -270,8 +239,8 @@ async def process_startup(self): if self._in_process: raise RuntimeError("Backend already started up for process.") - engine_args = AsyncEngineArgs(**self.vllm_config) # type: ignore[misc] - self._engine = AsyncLLMEngine.from_engine_args(engine_args) # type: ignore[misc] + engine_args = vllm.AsyncEngineArgs(**self.vllm_config) + self._engine = vllm.AsyncLLMEngine.from_engine_args(engine_args) self._in_process = True async def process_shutdown(self): @@ -320,7 +289,7 @@ async def default_model(self) -> str: """ return self.model - def _validate_backend_initialized(self) -> AsyncLLMEngine: + def _validate_backend_initialized(self) -> vllm.AsyncLLMEngine: """ Validate that the backend is initialized and return the engine. @@ -360,14 +329,9 @@ def _build_multi_modal_data_from_columns( # noqa: C901, PLR0912 for item in image_items: if not item or not isinstance(item, dict): continue - if not HAS_VISION or image_dict_to_pil is None: - raise ImportError( - "Image column support requires guidellm[vision]. " - "Install with: pip install 'guidellm[vision]'" - ) # Convert raw image dicts into PIL Images as required by vLLM's vision # processor - pil_image = image_dict_to_pil(item) + pil_image = vision.image_dict_to_pil(item) if "image" not in multi_modal_data: multi_modal_data["image"] = pil_image else: @@ -390,15 +354,10 @@ def _build_multi_modal_data_from_columns( # noqa: C901, PLR0912 else: audio_bytes = first.get("audio") if isinstance(audio_bytes, bytes) and len(audio_bytes) > 0: - if not HAS_AUDIO or _decode_audio is None: - raise ImportError( - "Audio column support requires guidellm[audio]. " - "Install with: pip install 'guidellm[audio]'" - ) try: # Decode raw audio bytes into an array since vLLM audio models # expect either raw numpy arrays or specific tensor formats - audio_samples = _decode_audio(audio_bytes) + audio_samples = audio._decode_audio(audio_bytes) # noqa: SLF001 # torchcodec decodes audio on CPU, so .data is always # a CPU torch.Tensor. .cpu() is a no-op on CPU tensors. audio_array = audio_samples.data.cpu().numpy() @@ -731,7 +690,7 @@ def _update_token_timing( request_info.timings.last_token_iteration = iter_time request_info.timings.token_iterations += iterations - def _text_from_output(self, output: RequestOutput | None) -> str: + def _text_from_output(self, output: vllm.RequestOutput | None) -> str: """ Extract generated text from VLLM RequestOutput. @@ -744,7 +703,7 @@ def _text_from_output(self, output: RequestOutput | None) -> str: def _stream_usage_tokens( self, - output: RequestOutput, + output: vllm.RequestOutput, request_info: RequestInfo, ) -> tuple[int, int]: """ @@ -770,7 +729,7 @@ def _stream_usage_tokens( def _usage_from_output( self, - output: RequestOutput | None, + output: vllm.RequestOutput | None, *, request_info: RequestInfo | None = None, ) -> dict[str, int] | None: @@ -805,7 +764,7 @@ def _build_final_response( self, request: GenerationRequest, request_info: RequestInfo, - final_output: RequestOutput | None, + final_output: vllm.RequestOutput | None, stream: bool, text: str = "", ) -> tuple[GenerationResponse, RequestInfo] | None: @@ -832,7 +791,7 @@ def _build_final_response( def _create_sampling_params( self, max_tokens_override: int | None = None, - ) -> SamplingParams: + ) -> vllm.SamplingParams: """ Create VLLM SamplingParams. @@ -850,7 +809,7 @@ def _create_sampling_params( params["max_tokens"] = max_tokens_override params["ignore_eos"] = True - return SamplingParams(**params) # type: ignore[misc] + return vllm.SamplingParams(**params) def _raise_generation_error(self, exc: BaseException) -> None: """Re-raise generation failure with context. @@ -895,7 +854,7 @@ async def _run_generation( request_info: RequestInfo, stream: bool, generate_input: str | dict[str, Any], - sampling_params: SamplingParams, + sampling_params: vllm.SamplingParams, request_id: str, state: dict[str, Any], ) -> AsyncIterator[tuple[GenerationResponse, RequestInfo]]: diff --git a/src/guidellm/data/preprocessors/encoders.py b/src/guidellm/data/preprocessors/encoders.py index 2e6f6a9b7..248c13ced 100644 --- a/src/guidellm/data/preprocessors/encoders.py +++ b/src/guidellm/data/preprocessors/encoders.py @@ -6,6 +6,8 @@ DatasetPreprocessor, PreprocessorRegistry, ) +from guidellm.extras import audio as guidellm_audio +from guidellm.extras import vision as guidellm_vision __all__ = ["MediaEncoder"] @@ -27,24 +29,6 @@ def __init__( encode_kwargs.get("video", {}) if encode_kwargs else {} ) - @staticmethod - def encode_audio(*args, **kwargs): - from guidellm.extras.audio import encode_audio - - return encode_audio(*args, **kwargs) - - @staticmethod - def encode_image(*args, **kwargs): - from guidellm.extras.vision import encode_image - - return encode_image(*args, **kwargs) - - @staticmethod - def encode_video(*args, **kwargs): - from guidellm.extras.vision import encode_video - - return encode_video(*args, **kwargs) - def __call__(self, items: list[dict[str, list[Any]]]) -> list[dict[str, list[Any]]]: return [self.encode_turn(item) for item in items] @@ -56,7 +40,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]: continue encoded_audio.append( - self.encode_audio(audio, **self.encode_audio_kwargs) + guidellm_audio.encode_audio(audio, **self.encode_audio_kwargs) ) columns["audio_column"] = encoded_audio @@ -67,7 +51,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]: continue encoded_images.append( - self.encode_image(image, **self.encode_image_kwargs) + guidellm_vision.encode_image(image, **self.encode_image_kwargs) ) columns["image_column"] = encoded_images @@ -78,7 +62,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]: continue encoded_videos.append( - self.encode_video(video, **self.encode_video_kwargs) + guidellm_vision.encode_video(video, **self.encode_video_kwargs) ) columns["video_column"] = encoded_videos diff --git a/src/guidellm/extras/__init__.py b/src/guidellm/extras/__init__.py index 80a9a3ea2..03197ebbc 100644 --- a/src/guidellm/extras/__init__.py +++ b/src/guidellm/extras/__init__.py @@ -1,4 +1,22 @@ """ Code that depends on optional dependencies. -Each submodule should be deferred imported. + +All dependent code should import in one of two ways: + +1. import guidellm.extras +2. from guidellm.extras import submodule + +As most of the codebase eager imports, importing specific functions or classes may cause +ImportErrors if the optional dependencies are missing. Importing from the module or +submodule level ensures errors are deferred to calling point. """ + +import lazy_loader as lazy + +submodules = ["vllm", "vision", "audio"] + +__getattr__, __dir__, __all__ = lazy.attach( + __name__, + submodules=submodules, + lazy_submodules=True, # Only import submodules when accessed +) diff --git a/src/guidellm/extras/audio.py b/src/guidellm/extras/audio.py index fe05f2275..b5ce66450 100644 --- a/src/guidellm/extras/audio.py +++ b/src/guidellm/extras/audio.py @@ -12,7 +12,7 @@ from torchcodec.decoders import AudioDecoder from torchcodec.encoders import AudioEncoder except ImportError as e: - raise ImportError("Please install guidellm[audio] to use audio features") from e + raise AttributeError("Please install guidellm[audio] to use audio features") from e __all__ = [ "encode_audio", diff --git a/src/guidellm/extras/vision.py b/src/guidellm/extras/vision.py index d28cfa97e..b38c9541d 100644 --- a/src/guidellm/extras/vision.py +++ b/src/guidellm/extras/vision.py @@ -11,7 +11,7 @@ try: from PIL import Image as PILImage except ImportError as e: - raise ImportError( + raise AttributeError( "Please install guidellm[vision] to use image/video features" ) from e diff --git a/src/guidellm/extras/vllm.py b/src/guidellm/extras/vllm.py index a415e966f..773a8b7a8 100644 --- a/src/guidellm/extras/vllm.py +++ b/src/guidellm/extras/vllm.py @@ -1,13 +1,15 @@ +""" +vLLM wrapper with same interface as vLLM. +""" + try: - from vllm import SamplingParams - from vllm.engine.arg_utils import AsyncEngineArgs - from vllm.engine.async_llm_engine import AsyncLLMEngine - from vllm.outputs import RequestOutput - - HAS_VLLM = True -except ImportError: - AsyncLLMEngine = None # type: ignore[assignment, misc] - AsyncEngineArgs = None # type: ignore[assignment, misc] - SamplingParams = None # type: ignore[assignment, misc] - RequestOutput = None # type: ignore[assignment, misc] - HAS_VLLM = False + import vllm +except ImportError as e: + raise AttributeError("Please install vllm to use vLLM features") from e + + +def __getattr__(name: str): + return getattr(vllm, name) + + +__all__ = vllm.__all__ diff --git a/uv.lock b/uv.lock index 8ab00fef4..cfff060f6 100644 --- a/uv.lock +++ b/uv.lock @@ -809,6 +809,7 @@ dependencies = [ { name = "faker" }, { name = "ftfy" }, { name = "httpx", extra = ["http2"] }, + { name = "lazy-loader" }, { name = "loguru" }, { name = "more-itertools" }, { name = "msgpack" }, @@ -936,6 +937,7 @@ requires-dist = [ { name = "guidellm", extras = ["audio", "perf", "tokenizers", "vision"], marker = "extra == 'all'" }, { name = "guidellm", extras = ["perf", "tokenizers"], marker = "extra == 'recommended'" }, { name = "httpx", extras = ["http2"], specifier = "<1.0.0" }, + { name = "lazy-loader", git = "https://github.com/sjmonson/lazy-loader.git?rev=feat%2Flazy_submodules" }, { name = "loguru" }, { name = "lorem", marker = "extra == 'dev'", specifier = "~=0.1.1" }, { name = "mdformat", marker = "extra == 'dev'", specifier = "~=1.0.0" }, @@ -1255,6 +1257,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, ] +[[package]] +name = "lazy-loader" +version = "0.6rc0.dev0" +source = { git = "https://github.com/sjmonson/lazy-loader.git?rev=feat%2Flazy_submodules#a7f66864e86aee5bed7bf5ce1a5ba98b81e80598" } +dependencies = [ + { name = "packaging" }, +] + [[package]] name = "loguru" version = "0.7.3"