Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ dependencies = [
"uvloop>=0.18",
"torch",
"more-itertools>=10.8.0",
"lazy-loader @ git+https://github.com/sjmonson/lazy-loader.git@feat/lazy_submodules",
]

[project.optional-dependencies]
Expand Down
16 changes: 3 additions & 13 deletions src/guidellm/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
handlers for processing streaming and non-streaming API responses.
"""

from guidellm.extras.vllm import HAS_VLLM

from .backend import Backend, BackendArgs, BackendType
from .openai import (
AudioRequestHandler,
Expand All @@ -20,13 +18,7 @@
OpenAIRequestHandlerFactory,
TextCompletionsRequestHandler,
)

# Conditionally import VLLM backend if available
if HAS_VLLM:
from .vllm_python import VLLMPythonBackend, VLLMResponseHandler
else:
VLLMPythonBackend = None # type: ignore[assignment, misc]
VLLMResponseHandler = None # type: ignore[assignment, misc]
from .vllm_python import VLLMPythonBackend, VLLMResponseHandler

__all__ = [
"AudioRequestHandler",
Expand All @@ -38,8 +30,6 @@
"OpenAIRequestHandler",
"OpenAIRequestHandlerFactory",
"TextCompletionsRequestHandler",
"VLLMPythonBackend",
"VLLMResponseHandler",
]

# Conditionally add VLLM backend and handler to exports
if HAS_VLLM:
__all__.extend(["VLLMPythonBackend", "VLLMResponseHandler"])
69 changes: 14 additions & 55 deletions src/guidellm/backends/vllm_python/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,7 @@

from guidellm.backends.backend import Backend, BackendArgs
from guidellm.backends.vllm_python.vllm_response import VLLMResponseHandler
from guidellm.extras.vllm import (
HAS_VLLM,
AsyncEngineArgs,
AsyncLLMEngine,
RequestOutput,
SamplingParams,
)
from guidellm.extras import audio, vision, vllm
from guidellm.logger import logger
from guidellm.schemas import (
GenerationRequest,
Expand All @@ -37,22 +31,6 @@
StandardBaseModel,
)

try:
from guidellm.extras.audio import _decode_audio

HAS_AUDIO = True
except ImportError:
_decode_audio = None # type: ignore[assignment]
HAS_AUDIO = False

try:
from guidellm.extras.vision import image_dict_to_pil

HAS_VISION = True
except ImportError:
image_dict_to_pil = None # type: ignore[assignment]
HAS_VISION = False

# Sentinel for "chat template not yet resolved" cache.
_CHAT_TEMPLATE_UNSET: object = object()

Expand Down Expand Up @@ -124,14 +102,6 @@ class _ResolvedRequest(StandardBaseModel):
)


def _check_vllm_available() -> None:
"""Check if vllm is available and raise helpful error if not."""
if not HAS_VLLM:
raise ImportError(
"vllm is not installed. Install vllm to use the vllm python backend."
)


def _has_jinja2_markers(s: str) -> bool:
"""Return True if the string contains Jinja2 template syntax ({{, {%, or {#)."""
return "{{" in s or "{%" in s or "{#" in s
Expand Down Expand Up @@ -197,7 +167,6 @@ def __init__(
:param audio_placeholder: Optional string to use as the audio placeholder when
using audio_column; if unset, falls back to "<|audio|>".
"""
_check_vllm_available()
super().__init__(type_="vllm_python")

self.model = model
Expand All @@ -209,7 +178,7 @@ def __init__(

# Runtime state
self._in_process = False
self._engine: AsyncLLMEngine | None = None
self._engine: vllm.AsyncLLMEngine | None = None
self._resolved_chat_template: str | None | object = _CHAT_TEMPLATE_UNSET

@property
Expand Down Expand Up @@ -270,8 +239,8 @@ async def process_startup(self):
if self._in_process:
raise RuntimeError("Backend already started up for process.")

engine_args = AsyncEngineArgs(**self.vllm_config) # type: ignore[misc]
self._engine = AsyncLLMEngine.from_engine_args(engine_args) # type: ignore[misc]
engine_args = vllm.AsyncEngineArgs(**self.vllm_config)
self._engine = vllm.AsyncLLMEngine.from_engine_args(engine_args)
self._in_process = True

async def process_shutdown(self):
Expand Down Expand Up @@ -320,7 +289,7 @@ async def default_model(self) -> str:
"""
return self.model

def _validate_backend_initialized(self) -> AsyncLLMEngine:
def _validate_backend_initialized(self) -> vllm.AsyncLLMEngine:
"""
Validate that the backend is initialized and return the engine.

Expand Down Expand Up @@ -360,14 +329,9 @@ def _build_multi_modal_data_from_columns( # noqa: C901, PLR0912
for item in image_items:
if not item or not isinstance(item, dict):
continue
if not HAS_VISION or image_dict_to_pil is None:
raise ImportError(
"Image column support requires guidellm[vision]. "
"Install with: pip install 'guidellm[vision]'"
)
# Convert raw image dicts into PIL Images as required by vLLM's vision
# processor
pil_image = image_dict_to_pil(item)
pil_image = vision.image_dict_to_pil(item)
if "image" not in multi_modal_data:
multi_modal_data["image"] = pil_image
else:
Expand All @@ -390,15 +354,10 @@ def _build_multi_modal_data_from_columns( # noqa: C901, PLR0912
else:
audio_bytes = first.get("audio")
if isinstance(audio_bytes, bytes) and len(audio_bytes) > 0:
if not HAS_AUDIO or _decode_audio is None:
raise ImportError(
"Audio column support requires guidellm[audio]. "
"Install with: pip install 'guidellm[audio]'"
)
try:
# Decode raw audio bytes into an array since vLLM audio models
# expect either raw numpy arrays or specific tensor formats
audio_samples = _decode_audio(audio_bytes)
audio_samples = audio._decode_audio(audio_bytes) # noqa: SLF001
# torchcodec decodes audio on CPU, so .data is always
# a CPU torch.Tensor. .cpu() is a no-op on CPU tensors.
audio_array = audio_samples.data.cpu().numpy()
Expand Down Expand Up @@ -731,7 +690,7 @@ def _update_token_timing(
request_info.timings.last_token_iteration = iter_time
request_info.timings.token_iterations += iterations

def _text_from_output(self, output: RequestOutput | None) -> str:
def _text_from_output(self, output: vllm.RequestOutput | None) -> str:
"""
Extract generated text from VLLM RequestOutput.

Expand All @@ -744,7 +703,7 @@ def _text_from_output(self, output: RequestOutput | None) -> str:

def _stream_usage_tokens(
self,
output: RequestOutput,
output: vllm.RequestOutput,
request_info: RequestInfo,
) -> tuple[int, int]:
"""
Expand All @@ -770,7 +729,7 @@ def _stream_usage_tokens(

def _usage_from_output(
self,
output: RequestOutput | None,
output: vllm.RequestOutput | None,
*,
request_info: RequestInfo | None = None,
) -> dict[str, int] | None:
Expand Down Expand Up @@ -805,7 +764,7 @@ def _build_final_response(
self,
request: GenerationRequest,
request_info: RequestInfo,
final_output: RequestOutput | None,
final_output: vllm.RequestOutput | None,
stream: bool,
text: str = "",
) -> tuple[GenerationResponse, RequestInfo] | None:
Expand All @@ -832,7 +791,7 @@ def _build_final_response(
def _create_sampling_params(
self,
max_tokens_override: int | None = None,
) -> SamplingParams:
) -> vllm.SamplingParams:
"""
Create VLLM SamplingParams.

Expand All @@ -850,7 +809,7 @@ def _create_sampling_params(
params["max_tokens"] = max_tokens_override
params["ignore_eos"] = True

return SamplingParams(**params) # type: ignore[misc]
return vllm.SamplingParams(**params)

def _raise_generation_error(self, exc: BaseException) -> None:
"""Re-raise generation failure with context.
Expand Down Expand Up @@ -895,7 +854,7 @@ async def _run_generation(
request_info: RequestInfo,
stream: bool,
generate_input: str | dict[str, Any],
sampling_params: SamplingParams,
sampling_params: vllm.SamplingParams,
request_id: str,
state: dict[str, Any],
) -> AsyncIterator[tuple[GenerationResponse, RequestInfo]]:
Expand Down
26 changes: 5 additions & 21 deletions src/guidellm/data/preprocessors/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
DatasetPreprocessor,
PreprocessorRegistry,
)
from guidellm.extras import audio as guidellm_audio
from guidellm.extras import vision as guidellm_vision

__all__ = ["MediaEncoder"]

Expand All @@ -27,24 +29,6 @@ def __init__(
encode_kwargs.get("video", {}) if encode_kwargs else {}
)

@staticmethod
def encode_audio(*args, **kwargs):
from guidellm.extras.audio import encode_audio

return encode_audio(*args, **kwargs)

@staticmethod
def encode_image(*args, **kwargs):
from guidellm.extras.vision import encode_image

return encode_image(*args, **kwargs)

@staticmethod
def encode_video(*args, **kwargs):
from guidellm.extras.vision import encode_video

return encode_video(*args, **kwargs)

def __call__(self, items: list[dict[str, list[Any]]]) -> list[dict[str, list[Any]]]:
return [self.encode_turn(item) for item in items]

Expand All @@ -56,7 +40,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]:
continue

encoded_audio.append(
self.encode_audio(audio, **self.encode_audio_kwargs)
guidellm_audio.encode_audio(audio, **self.encode_audio_kwargs)
)
columns["audio_column"] = encoded_audio

Expand All @@ -67,7 +51,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]:
continue

encoded_images.append(
self.encode_image(image, **self.encode_image_kwargs)
guidellm_vision.encode_image(image, **self.encode_image_kwargs)
)
columns["image_column"] = encoded_images

Expand All @@ -78,7 +62,7 @@ def encode_turn(self, columns: dict[str, list[Any]]) -> dict[str, list[Any]]:
continue

encoded_videos.append(
self.encode_video(video, **self.encode_video_kwargs)
guidellm_vision.encode_video(video, **self.encode_video_kwargs)
)
columns["video_column"] = encoded_videos

Expand Down
20 changes: 19 additions & 1 deletion src/guidellm/extras/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,22 @@
"""
Code that depends on optional dependencies.
Each submodule should be deferred imported.

All dependent code should import in one of two ways:

1. import guidellm.extras
2. from guidellm.extras import submodule

As most of the codebase eager imports, importing specific functions or classes may cause
ImportErrors if the optional dependencies are missing. Importing from the module or
submodule level ensures errors are deferred to calling point.
"""

import lazy_loader as lazy

submodules = ["vllm", "vision", "audio"]

__getattr__, __dir__, __all__ = lazy.attach(
__name__,
submodules=submodules,
lazy_submodules=True, # Only import submodules when accessed
)
2 changes: 1 addition & 1 deletion src/guidellm/extras/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from torchcodec.decoders import AudioDecoder
from torchcodec.encoders import AudioEncoder
except ImportError as e:
raise ImportError("Please install guidellm[audio] to use audio features") from e
raise AttributeError("Please install guidellm[audio] to use audio features") from e

__all__ = [
"encode_audio",
Expand Down
2 changes: 1 addition & 1 deletion src/guidellm/extras/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
try:
from PIL import Image as PILImage
except ImportError as e:
raise ImportError(
raise AttributeError(
"Please install guidellm[vision] to use image/video features"
) from e

Expand Down
26 changes: 14 additions & 12 deletions src/guidellm/extras/vllm.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""
vLLM wrapper with same interface as vLLM.
"""

try:
from vllm import SamplingParams
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.engine.async_llm_engine import AsyncLLMEngine
from vllm.outputs import RequestOutput

HAS_VLLM = True
except ImportError:
AsyncLLMEngine = None # type: ignore[assignment, misc]
AsyncEngineArgs = None # type: ignore[assignment, misc]
SamplingParams = None # type: ignore[assignment, misc]
RequestOutput = None # type: ignore[assignment, misc]
HAS_VLLM = False
import vllm
except ImportError as e:
raise AttributeError("Please install vllm to use vLLM features") from e


def __getattr__(name: str):
return getattr(vllm, name)


__all__ = vllm.__all__
10 changes: 10 additions & 0 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading