Skip to content

Commit 5e4ee86

Browse files
committed
Switch to lazy-loading for extras packages
Signed-off-by: Samuel Monson <smonson@redhat.com>
1 parent d622cf1 commit 5e4ee86

File tree

5 files changed

+55
-102
lines changed

5 files changed

+55
-102
lines changed

src/guidellm/backends/__init__.py

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
handlers for processing streaming and non-streaming API responses.
1010
"""
1111

12-
from guidellm.extras.vllm import HAS_VLLM
13-
1412
from .backend import Backend, BackendArgs, BackendType
1513
from .openai import (
1614
AudioRequestHandler,
@@ -20,13 +18,7 @@
2018
OpenAIRequestHandlerFactory,
2119
TextCompletionsRequestHandler,
2220
)
23-
24-
# Conditionally import VLLM backend if available
25-
if HAS_VLLM:
26-
from .vllm_python import VLLMPythonBackend, VLLMResponseHandler
27-
else:
28-
VLLMPythonBackend = None # type: ignore[assignment, misc]
29-
VLLMResponseHandler = None # type: ignore[assignment, misc]
21+
from .vllm_python import VLLMPythonBackend, VLLMResponseHandler
3022

3123
__all__ = [
3224
"AudioRequestHandler",
@@ -38,8 +30,6 @@
3830
"OpenAIRequestHandler",
3931
"OpenAIRequestHandlerFactory",
4032
"TextCompletionsRequestHandler",
33+
"VLLMPythonBackend",
34+
"VLLMResponseHandler",
4135
]
42-
43-
# Conditionally add VLLM backend and handler to exports
44-
if HAS_VLLM:
45-
__all__.extend(["VLLMPythonBackend", "VLLMResponseHandler"])

src/guidellm/backends/vllm_python/vllm.py

Lines changed: 14 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -22,13 +22,7 @@
2222

2323
from guidellm.backends.backend import Backend, BackendArgs
2424
from guidellm.backends.vllm_python.vllm_response import VLLMResponseHandler
25-
from guidellm.extras.vllm import (
26-
HAS_VLLM,
27-
AsyncEngineArgs,
28-
AsyncLLMEngine,
29-
RequestOutput,
30-
SamplingParams,
31-
)
25+
from guidellm.extras import audio, vision, vllm
3226
from guidellm.logger import logger
3327
from guidellm.schemas import (
3428
GenerationRequest,
@@ -37,22 +31,6 @@
3731
StandardBaseModel,
3832
)
3933

40-
try:
41-
from guidellm.extras.audio import _decode_audio
42-
43-
HAS_AUDIO = True
44-
except ImportError:
45-
_decode_audio = None # type: ignore[assignment]
46-
HAS_AUDIO = False
47-
48-
try:
49-
from guidellm.extras.vision import image_dict_to_pil
50-
51-
HAS_VISION = True
52-
except ImportError:
53-
image_dict_to_pil = None # type: ignore[assignment]
54-
HAS_VISION = False
55-
5634
# Sentinel for "chat template not yet resolved" cache.
5735
_CHAT_TEMPLATE_UNSET: object = object()
5836

@@ -124,14 +102,6 @@ class _ResolvedRequest(StandardBaseModel):
124102
)
125103

126104

127-
def _check_vllm_available() -> None:
128-
"""Check if vllm is available and raise helpful error if not."""
129-
if not HAS_VLLM:
130-
raise ImportError(
131-
"vllm is not installed. Install vllm to use the vllm python backend."
132-
)
133-
134-
135105
def _has_jinja2_markers(s: str) -> bool:
136106
"""Return True if the string contains Jinja2 template syntax ({{, {%, or {#)."""
137107
return "{{" in s or "{%" in s or "{#" in s
@@ -197,7 +167,6 @@ def __init__(
197167
:param audio_placeholder: Optional string to use as the audio placeholder when
198168
using audio_column; if unset, falls back to "<|audio|>".
199169
"""
200-
_check_vllm_available()
201170
super().__init__(type_="vllm_python")
202171

203172
self.model = model
@@ -209,7 +178,7 @@ def __init__(
209178

210179
# Runtime state
211180
self._in_process = False
212-
self._engine: AsyncLLMEngine | None = None
181+
self._engine: vllm.AsyncLLMEngine | None = None
213182
self._resolved_chat_template: str | None | object = _CHAT_TEMPLATE_UNSET
214183

215184
@property
@@ -270,8 +239,8 @@ async def process_startup(self):
270239
if self._in_process:
271240
raise RuntimeError("Backend already started up for process.")
272241

273-
engine_args = AsyncEngineArgs(**self.vllm_config) # type: ignore[misc]
274-
self._engine = AsyncLLMEngine.from_engine_args(engine_args) # type: ignore[misc]
242+
engine_args = vllm.AsyncEngineArgs(**self.vllm_config)
243+
self._engine = vllm.AsyncLLMEngine.from_engine_args(engine_args)
275244
self._in_process = True
276245

277246
async def process_shutdown(self):
@@ -320,7 +289,7 @@ async def default_model(self) -> str:
320289
"""
321290
return self.model
322291

323-
def _validate_backend_initialized(self) -> AsyncLLMEngine:
292+
def _validate_backend_initialized(self) -> vllm.AsyncLLMEngine:
324293
"""
325294
Validate that the backend is initialized and return the engine.
326295
@@ -360,14 +329,9 @@ def _build_multi_modal_data_from_columns( # noqa: C901, PLR0912
360329
for item in image_items:
361330
if not item or not isinstance(item, dict):
362331
continue
363-
if not HAS_VISION or image_dict_to_pil is None:
364-
raise ImportError(
365-
"Image column support requires guidellm[vision]. "
366-
"Install with: pip install 'guidellm[vision]'"
367-
)
368332
# Convert raw image dicts into PIL Images as required by vLLM's vision
369333
# processor
370-
pil_image = image_dict_to_pil(item)
334+
pil_image = vision.image_dict_to_pil(item)
371335
if "image" not in multi_modal_data:
372336
multi_modal_data["image"] = pil_image
373337
else:
@@ -390,15 +354,10 @@ def _build_multi_modal_data_from_columns( # noqa: C901, PLR0912
390354
else:
391355
audio_bytes = first.get("audio")
392356
if isinstance(audio_bytes, bytes) and len(audio_bytes) > 0:
393-
if not HAS_AUDIO or _decode_audio is None:
394-
raise ImportError(
395-
"Audio column support requires guidellm[audio]. "
396-
"Install with: pip install 'guidellm[audio]'"
397-
)
398357
try:
399358
# Decode raw audio bytes into an array since vLLM audio models
400359
# expect either raw numpy arrays or specific tensor formats
401-
audio_samples = _decode_audio(audio_bytes)
360+
audio_samples = audio._decode_audio(audio_bytes) # noqa: SLF001
402361
# torchcodec decodes audio on CPU, so .data is always
403362
# a CPU torch.Tensor. .cpu() is a no-op on CPU tensors.
404363
audio_array = audio_samples.data.cpu().numpy()
@@ -731,7 +690,7 @@ def _update_token_timing(
731690
request_info.timings.last_token_iteration = iter_time
732691
request_info.timings.token_iterations += iterations
733692

734-
def _text_from_output(self, output: RequestOutput | None) -> str:
693+
def _text_from_output(self, output: vllm.RequestOutput | None) -> str:
735694
"""
736695
Extract generated text from VLLM RequestOutput.
737696
@@ -744,7 +703,7 @@ def _text_from_output(self, output: RequestOutput | None) -> str:
744703

745704
def _stream_usage_tokens(
746705
self,
747-
output: RequestOutput,
706+
output: vllm.RequestOutput,
748707
request_info: RequestInfo,
749708
) -> tuple[int, int]:
750709
"""
@@ -770,7 +729,7 @@ def _stream_usage_tokens(
770729

771730
def _usage_from_output(
772731
self,
773-
output: RequestOutput | None,
732+
output: vllm.RequestOutput | None,
774733
*,
775734
request_info: RequestInfo | None = None,
776735
) -> dict[str, int] | None:
@@ -805,7 +764,7 @@ def _build_final_response(
805764
self,
806765
request: GenerationRequest,
807766
request_info: RequestInfo,
808-
final_output: RequestOutput | None,
767+
final_output: vllm.RequestOutput | None,
809768
stream: bool,
810769
text: str = "",
811770
) -> tuple[GenerationResponse, RequestInfo] | None:
@@ -832,7 +791,7 @@ def _build_final_response(
832791
def _create_sampling_params(
833792
self,
834793
max_tokens_override: int | None = None,
835-
) -> SamplingParams:
794+
) -> vllm.SamplingParams:
836795
"""
837796
Create VLLM SamplingParams.
838797
@@ -850,7 +809,7 @@ def _create_sampling_params(
850809
params["max_tokens"] = max_tokens_override
851810
params["ignore_eos"] = True
852811

853-
return SamplingParams(**params) # type: ignore[misc]
812+
return vllm.SamplingParams(**params)
854813

855814
def _raise_generation_error(self, exc: BaseException) -> None:
856815
"""Re-raise generation failure with context.
@@ -895,7 +854,7 @@ async def _run_generation(
895854
request_info: RequestInfo,
896855
stream: bool,
897856
generate_input: str | dict[str, Any],
898-
sampling_params: SamplingParams,
857+
sampling_params: vllm.SamplingParams,
899858
request_id: str,
900859
state: dict[str, Any],
901860
) -> AsyncIterator[tuple[GenerationResponse, RequestInfo]]:

src/guidellm/data/preprocessors/encoders.py

Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
DatasetPreprocessor,
77
PreprocessorRegistry,
88
)
9+
from guidellm.extras import audio as guidellm_audio
10+
from guidellm.extras import vision as guidellm_vision
911

1012
__all__ = ["MediaEncoder"]
1113

@@ -27,24 +29,6 @@ def __init__(
2729
encode_kwargs.get("video", {}) if encode_kwargs else {}
2830
)
2931

30-
@staticmethod
31-
def encode_audio(*args, **kwargs):
32-
from guidellm.extras.audio import encode_audio
33-
34-
return encode_audio(*args, **kwargs)
35-
36-
@staticmethod
37-
def encode_image(*args, **kwargs):
38-
from guidellm.extras.vision import encode_image
39-
40-
return encode_image(*args, **kwargs)
41-
42-
@staticmethod
43-
def encode_video(*args, **kwargs):
44-
from guidellm.extras.vision import encode_video
45-
46-
return encode_video(*args, **kwargs)
47-
4832
def __call__(self, items: list[dict[str, list[Any]]]) -> list[dict[str, list[Any]]]:
4933
return [self.encode_turn(item) for item in items]
5034

@@ -56,7 +40,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]:
5640
continue
5741

5842
encoded_audio.append(
59-
self.encode_audio(audio, **self.encode_audio_kwargs)
43+
guidellm_audio.encode_audio(audio, **self.encode_audio_kwargs)
6044
)
6145
columns["audio_column"] = encoded_audio
6246

@@ -67,7 +51,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]:
6751
continue
6852

6953
encoded_images.append(
70-
self.encode_image(image, **self.encode_image_kwargs)
54+
guidellm_vision.encode_image(image, **self.encode_image_kwargs)
7155
)
7256
columns["image_column"] = encoded_images
7357

@@ -78,7 +62,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]:
7862
continue
7963

8064
encoded_videos.append(
81-
self.encode_video(video, **self.encode_video_kwargs)
65+
guidellm_vision.encode_video(video, **self.encode_video_kwargs)
8266
)
8367
columns["video_column"] = encoded_videos
8468

src/guidellm/extras/__init__.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,22 @@
11
"""
22
Code that depends on optional dependencies.
3-
Each submodule should be deferred imported.
3+
4+
All dependent code should import in one of two ways:
5+
6+
1. import guidellm.extras
7+
2. from guidellm.extras import submodule
8+
9+
As most of the codebase eager imports, importing specific functions or classes may cause
10+
ImportErrors if the optional dependencies are missing. Importing from the module or
11+
submodule level ensures errors are deferred to calling point.
412
"""
13+
14+
import lazy_loader as lazy
15+
16+
submodules = ["vllm", "vision", "audio"]
17+
18+
__getattr__, __dir__, __all__ = lazy.attach(
19+
__name__,
20+
submodules=submodules,
21+
lazy_submodules=True, # Only import submodules when accessed
22+
)

src/guidellm/extras/vllm.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1+
"""
2+
vLLM wrapper with same interface as vLLM.
3+
"""
4+
15
try:
2-
from vllm import SamplingParams
3-
from vllm.engine.arg_utils import AsyncEngineArgs
4-
from vllm.engine.async_llm_engine import AsyncLLMEngine
5-
from vllm.outputs import RequestOutput
6-
7-
HAS_VLLM = True
8-
except ImportError:
9-
AsyncLLMEngine = None # type: ignore[assignment, misc]
10-
AsyncEngineArgs = None # type: ignore[assignment, misc]
11-
SamplingParams = None # type: ignore[assignment, misc]
12-
RequestOutput = None # type: ignore[assignment, misc]
13-
HAS_VLLM = False
6+
import vllm
7+
except ImportError as e:
8+
raise ImportError("Please install vllm to use vLLM features") from e
9+
10+
11+
def __getattr__(name: str):
12+
return getattr(vllm, name)
13+
14+
15+
__all__ = vllm.__all__

0 commit comments

Comments
 (0)