2222
2323from guidellm .backends .backend import Backend , BackendArgs
2424from guidellm .backends .vllm_python .vllm_response import VLLMResponseHandler
25- from guidellm .extras .vllm import (
26- HAS_VLLM ,
27- AsyncEngineArgs ,
28- AsyncLLMEngine ,
29- RequestOutput ,
30- SamplingParams ,
31- )
25+ from guidellm .extras import audio , vision , vllm
3226from guidellm .logger import logger
3327from guidellm .schemas import (
3428 GenerationRequest ,
3731 StandardBaseModel ,
3832)
3933
40- try :
41- from guidellm .extras .audio import _decode_audio
42-
43- HAS_AUDIO = True
44- except ImportError :
45- _decode_audio = None # type: ignore[assignment]
46- HAS_AUDIO = False
47-
48- try :
49- from guidellm .extras .vision import image_dict_to_pil
50-
51- HAS_VISION = True
52- except ImportError :
53- image_dict_to_pil = None # type: ignore[assignment]
54- HAS_VISION = False
55-
5634# Sentinel for "chat template not yet resolved" cache.
5735_CHAT_TEMPLATE_UNSET : object = object ()
5836
@@ -124,14 +102,6 @@ class _ResolvedRequest(StandardBaseModel):
124102 )
125103
126104
127- def _check_vllm_available () -> None :
128- """Check if vllm is available and raise helpful error if not."""
129- if not HAS_VLLM :
130- raise ImportError (
131- "vllm is not installed. Install vllm to use the vllm python backend."
132- )
133-
134-
135105def _has_jinja2_markers (s : str ) -> bool :
136106 """Return True if the string contains Jinja2 template syntax ({{, {%, or {#)."""
137107 return "{{" in s or "{%" in s or "{#" in s
@@ -197,7 +167,6 @@ def __init__(
197167 :param audio_placeholder: Optional string to use as the audio placeholder when
198168 using audio_column; if unset, falls back to "<|audio|>".
199169 """
200- _check_vllm_available ()
201170 super ().__init__ (type_ = "vllm_python" )
202171
203172 self .model = model
@@ -209,7 +178,7 @@ def __init__(
209178
210179 # Runtime state
211180 self ._in_process = False
212- self ._engine : AsyncLLMEngine | None = None
181+ self ._engine : vllm . AsyncLLMEngine | None = None
213182 self ._resolved_chat_template : str | None | object = _CHAT_TEMPLATE_UNSET
214183
215184 @property
@@ -270,8 +239,8 @@ async def process_startup(self):
270239 if self ._in_process :
271240 raise RuntimeError ("Backend already started up for process." )
272241
273- engine_args = AsyncEngineArgs (** self .vllm_config ) # type: ignore[misc]
274- self ._engine = AsyncLLMEngine .from_engine_args (engine_args ) # type: ignore[misc]
242+ engine_args = vllm . AsyncEngineArgs (** self .vllm_config )
243+ self ._engine = vllm . AsyncLLMEngine .from_engine_args (engine_args )
275244 self ._in_process = True
276245
277246 async def process_shutdown (self ):
@@ -320,7 +289,7 @@ async def default_model(self) -> str:
320289 """
321290 return self .model
322291
323- def _validate_backend_initialized (self ) -> AsyncLLMEngine :
292+ def _validate_backend_initialized (self ) -> vllm . AsyncLLMEngine :
324293 """
325294 Validate that the backend is initialized and return the engine.
326295
@@ -360,14 +329,9 @@ def _build_multi_modal_data_from_columns( # noqa: C901, PLR0912
360329 for item in image_items :
361330 if not item or not isinstance (item , dict ):
362331 continue
363- if not HAS_VISION or image_dict_to_pil is None :
364- raise ImportError (
365- "Image column support requires guidellm[vision]. "
366- "Install with: pip install 'guidellm[vision]'"
367- )
368332 # Convert raw image dicts into PIL Images as required by vLLM's vision
369333 # processor
370- pil_image = image_dict_to_pil (item )
334+ pil_image = vision . image_dict_to_pil (item )
371335 if "image" not in multi_modal_data :
372336 multi_modal_data ["image" ] = pil_image
373337 else :
@@ -390,15 +354,10 @@ def _build_multi_modal_data_from_columns( # noqa: C901, PLR0912
390354 else :
391355 audio_bytes = first .get ("audio" )
392356 if isinstance (audio_bytes , bytes ) and len (audio_bytes ) > 0 :
393- if not HAS_AUDIO or _decode_audio is None :
394- raise ImportError (
395- "Audio column support requires guidellm[audio]. "
396- "Install with: pip install 'guidellm[audio]'"
397- )
398357 try :
399358 # Decode raw audio bytes into an array since vLLM audio models
400359 # expect either raw numpy arrays or specific tensor formats
401- audio_samples = _decode_audio (audio_bytes )
360+ audio_samples = audio . _decode_audio (audio_bytes ) # noqa: SLF001
402361 # torchcodec decodes audio on CPU, so .data is always
403362 # a CPU torch.Tensor. .cpu() is a no-op on CPU tensors.
404363 audio_array = audio_samples .data .cpu ().numpy ()
@@ -731,7 +690,7 @@ def _update_token_timing(
731690 request_info .timings .last_token_iteration = iter_time
732691 request_info .timings .token_iterations += iterations
733692
734- def _text_from_output (self , output : RequestOutput | None ) -> str :
693+ def _text_from_output (self , output : vllm . RequestOutput | None ) -> str :
735694 """
736695 Extract generated text from VLLM RequestOutput.
737696
@@ -744,7 +703,7 @@ def _text_from_output(self, output: RequestOutput | None) -> str:
744703
745704 def _stream_usage_tokens (
746705 self ,
747- output : RequestOutput ,
706+ output : vllm . RequestOutput ,
748707 request_info : RequestInfo ,
749708 ) -> tuple [int , int ]:
750709 """
@@ -770,7 +729,7 @@ def _stream_usage_tokens(
770729
771730 def _usage_from_output (
772731 self ,
773- output : RequestOutput | None ,
732+ output : vllm . RequestOutput | None ,
774733 * ,
775734 request_info : RequestInfo | None = None ,
776735 ) -> dict [str , int ] | None :
@@ -805,7 +764,7 @@ def _build_final_response(
805764 self ,
806765 request : GenerationRequest ,
807766 request_info : RequestInfo ,
808- final_output : RequestOutput | None ,
767+ final_output : vllm . RequestOutput | None ,
809768 stream : bool ,
810769 text : str = "" ,
811770 ) -> tuple [GenerationResponse , RequestInfo ] | None :
@@ -832,7 +791,7 @@ def _build_final_response(
832791 def _create_sampling_params (
833792 self ,
834793 max_tokens_override : int | None = None ,
835- ) -> SamplingParams :
794+ ) -> vllm . SamplingParams :
836795 """
837796 Create VLLM SamplingParams.
838797
@@ -850,7 +809,7 @@ def _create_sampling_params(
850809 params ["max_tokens" ] = max_tokens_override
851810 params ["ignore_eos" ] = True
852811
853- return SamplingParams (** params ) # type: ignore[misc]
812+ return vllm . SamplingParams (** params )
854813
855814 def _raise_generation_error (self , exc : BaseException ) -> None :
856815 """Re-raise generation failure with context.
@@ -895,7 +854,7 @@ async def _run_generation(
895854 request_info : RequestInfo ,
896855 stream : bool ,
897856 generate_input : str | dict [str , Any ],
898- sampling_params : SamplingParams ,
857+ sampling_params : vllm . SamplingParams ,
899858 request_id : str ,
900859 state : dict [str , Any ],
901860 ) -> AsyncIterator [tuple [GenerationResponse , RequestInfo ]]:
0 commit comments