From d4366e338bfb8e54e7e8ab3d83d3cc97abfebdf8 Mon Sep 17 00:00:00 2001 From: Zhongbo Tian Date: Mon, 2 Mar 2026 14:11:58 +0800 Subject: [PATCH] fix: convert guided decoding schema into Harmony-native mode to avoid Harmony/JSON mode conflict for GPT-OSS Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- lmdeploy/serve/openai/api_server.py | 38 ++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py index f64586fff7..11a4e36ebb 100644 --- a/lmdeploy/serve/openai/api_server.py +++ b/lmdeploy/serve/openai/api_server.py @@ -424,10 +424,6 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque adapter_name = model_name # got a adapter name request_id = str(session.session_id) created_time = int(time.time()) - gpt_oss_parser = None - if VariableInterface.async_engine.arch == 'GptOssForCausalLM': - gpt_oss_parser = GptOssChatParser() - if isinstance(request.stop, str): request.stop = [request.stop] @@ -438,6 +434,40 @@ async def chat_completions_v1(request: ChatCompletionRequest, raw_request: Reque if request.response_format and request.response_format.type != 'text': response_format = request.response_format.model_dump() + gpt_oss_parser = None + if VariableInterface.async_engine.arch == 'GptOssForCausalLM': + gpt_oss_parser = GptOssChatParser() + if response_format: + logger.info(f'[GPT-OSS:{request_id}] Structured output requested, converting to Harmony-native mode') + schema_json = json.dumps(response_format, ensure_ascii=False) + format_section = f'\n\n# Response Formats\n\n{schema_json}' + try: + if isinstance(request.messages, str): + # For string prompts, append the format section directly to request.messages + request.messages += format_section + else: + messages = request.messages + appended_to_system = False + for msg in messages: + if msg.get('role') == 'system': + content = msg.get('content') + if content is None: + content = '' + if isinstance(content, str): + msg['content'] = content + format_section + appended_to_system = True + break + if not appended_to_system: + system_msg = { + 'role': 'system', + 'content': f'You must follow the specified response format.{format_section}' + } + messages.insert(0, system_msg) + + response_format = None + except Exception as e: + logger.error(f'[GPT-OSS:{request_id}] Failed to convert response_format to Harmony mode: {str(e)}') + if request.logit_bias is not None: try: logits_processors = [