mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-24 11:38:29 +00:00
feat(providers): GLM-5.2 native reasoning_effort controls
Port from Kilo-Org/kilocode#11555: GLM-5.2 exposes a native reasoning_effort knob with two enabled levels (high / max) on its OpenAI-compatible endpoints. Previously the zai profile (direct Z.AI /api/paas/v4) used the base ProviderProfile and emitted nothing, and the OpenCode Go profile only handled Kimi K2 / DeepSeek — so a user's effort preference for GLM-5.2 was silently dropped on both routes. - zai: ZaiProfile maps effort onto high/max (xhigh/max -> max, lower -> high) - opencode-go: same mapping for GLM-5.2, alongside existing Kimi/DeepSeek - alias spellings recognized (glm-5.2 / glm-5-2 / glm-5p2, vendor-prefixed) - disabled / no effort leaves the server default untouched
This commit is contained in:
@@ -31,6 +31,12 @@ def _is_deepseek_thinking_model(model: str | None) -> bool:
|
||||
return m == "deepseek-reasoner"
|
||||
|
||||
|
||||
def _is_glm_5_2_model(model: str | None) -> bool:
|
||||
"""Detect GLM-5.2 across alias spellings (glm-5.2 / glm-5-2 / glm-5p2)."""
|
||||
m = _flat_model_name(model)
|
||||
return any(token in m for token in ("glm-5.2", "glm-5-2", "glm-5p2"))
|
||||
|
||||
|
||||
class OpenCodeGoProfile(ProviderProfile):
|
||||
"""OpenCode Go - model-specific reasoning controls."""
|
||||
|
||||
@@ -55,6 +61,21 @@ class OpenCodeGoProfile(ProviderProfile):
|
||||
extra_body: dict[str, Any] = {}
|
||||
top_level: dict[str, Any] = {}
|
||||
|
||||
if _is_glm_5_2_model(model):
|
||||
# GLM-5.2 on OpenCode Go uses its native OpenAI-compatible
|
||||
# reasoning_effort knob, which has exactly two enabled levels:
|
||||
# high and max. Map Hermes' richer scale onto those; leave the
|
||||
# server default alone when reasoning is disabled or unset.
|
||||
if not isinstance(reasoning_config, dict):
|
||||
return extra_body, top_level
|
||||
if reasoning_config.get("enabled") is False:
|
||||
return extra_body, top_level
|
||||
effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if not effort or effort == "none":
|
||||
return extra_body, top_level
|
||||
top_level["reasoning_effort"] = "max" if effort in {"xhigh", "max"} else "high"
|
||||
return extra_body, top_level
|
||||
|
||||
if _is_kimi_k2_model(model):
|
||||
# Kimi K2 on OpenCode Go uses Moonshot's native wire shape:
|
||||
# extra_body.thinking (binary toggle) + top-level reasoning_effort
|
||||
|
||||
@@ -1,9 +1,70 @@
|
||||
"""ZAI / GLM provider profile."""
|
||||
"""ZAI / GLM provider profile.
|
||||
|
||||
Z.AI's ``/api/paas/v4`` endpoint is OpenAI-compatible. GLM-5.2 exposes a
|
||||
native ``reasoning_effort`` knob with exactly two levels — ``high`` and
|
||||
``max`` — when thinking is enabled (per Z.AI / BigModel docs). Hermes' richer
|
||||
effort scale is collapsed onto those two so the user's effort preference
|
||||
actually reaches the model instead of being silently dropped.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any
|
||||
|
||||
from providers import register_provider
|
||||
from providers.base import ProviderProfile
|
||||
|
||||
zai = ProviderProfile(
|
||||
|
||||
def _is_glm_5_2(model: str | None) -> bool:
|
||||
"""Detect GLM-5.2 across the alias spellings providers use.
|
||||
|
||||
Covers the canonical ``glm-5.2`` plus the ``glm-5-2`` / ``glm-5p2``
|
||||
variants seen on relays (Fireworks ``glm-5p2``, etc.) and any
|
||||
vendor-prefixed form (``z-ai/glm-5.2``, ``zai-org-glm-5-2``).
|
||||
"""
|
||||
m = (model or "").strip().lower()
|
||||
if not m:
|
||||
return False
|
||||
return any(token in m for token in ("glm-5.2", "glm-5-2", "glm-5p2"))
|
||||
|
||||
|
||||
def _glm_5_2_reasoning_extras(
|
||||
reasoning_config: dict | None,
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
"""Map Hermes reasoning effort onto GLM-5.2's native ``high``/``max``.
|
||||
|
||||
GLM-5.2 only supports two effort levels. ``xhigh``/``max`` request the
|
||||
top tier; everything else that is enabled requests ``high`` (its
|
||||
minimum thinking level). When reasoning is explicitly disabled, or no
|
||||
config is supplied, the server default is left untouched.
|
||||
"""
|
||||
if not isinstance(reasoning_config, dict):
|
||||
return {}, {}
|
||||
if reasoning_config.get("enabled") is False:
|
||||
return {}, {}
|
||||
|
||||
effort = (reasoning_config.get("effort") or "").strip().lower()
|
||||
if not effort or effort == "none":
|
||||
return {}, {}
|
||||
|
||||
if effort in {"xhigh", "max"}:
|
||||
return {}, {"reasoning_effort": "max"}
|
||||
# low / medium / minimal / high all clamp to GLM-5.2's minimum: high.
|
||||
return {}, {"reasoning_effort": "high"}
|
||||
|
||||
|
||||
class ZaiProfile(ProviderProfile):
|
||||
"""Z.AI / GLM — GLM-5.2 native reasoning_effort controls."""
|
||||
|
||||
def build_api_kwargs_extras(
|
||||
self, *, reasoning_config: dict | None = None, model: str | None = None, **context
|
||||
) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
if not _is_glm_5_2(model):
|
||||
return {}, {}
|
||||
return _glm_5_2_reasoning_extras(reasoning_config)
|
||||
|
||||
|
||||
zai = ZaiProfile(
|
||||
name="zai",
|
||||
aliases=("glm", "z-ai", "z.ai", "zhipu"),
|
||||
env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
|
||||
|
||||
@@ -122,13 +122,68 @@ class TestOpenCodeGoDeepSeekThinking:
|
||||
assert top_level == {"reasoning_effort": "max"}
|
||||
|
||||
|
||||
class TestOpenCodeGoGLM52Reasoning:
|
||||
"""GLM-5.2 uses its native high/max reasoning_effort knob on OpenCode Go."""
|
||||
|
||||
def test_high_maps_to_high(self, opencode_go_profile):
|
||||
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": "high"},
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {"reasoning_effort": "high"}
|
||||
|
||||
def test_low_and_medium_clamp_up_to_high(self, opencode_go_profile):
|
||||
for effort in ("low", "medium", "minimal"):
|
||||
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": effort},
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {"reasoning_effort": "high"}
|
||||
|
||||
def test_xhigh_and_max_map_to_max(self, opencode_go_profile):
|
||||
for effort in ("xhigh", "max"):
|
||||
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": effort},
|
||||
model="z-ai/glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {"reasoning_effort": "max"}
|
||||
|
||||
def test_disabled_leaves_server_default(self, opencode_go_profile):
|
||||
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": False, "effort": "high"},
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {}
|
||||
|
||||
def test_no_config_leaves_server_default(self, opencode_go_profile):
|
||||
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
|
||||
reasoning_config=None,
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {}
|
||||
|
||||
@pytest.mark.parametrize("model", ["glm-5-2", "glm-5p2"])
|
||||
def test_alias_spellings_recognized(self, opencode_go_profile, model):
|
||||
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": "max"},
|
||||
model=model,
|
||||
)
|
||||
assert top_level == {"reasoning_effort": "max"}
|
||||
|
||||
|
||||
class TestOpenCodeGoModelGating:
|
||||
"""Other OpenCode Go models must not receive Kimi/DeepSeek controls."""
|
||||
"""Other OpenCode Go models must not receive Kimi/DeepSeek/GLM controls."""
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
"qwen3.6-plus",
|
||||
"minimax-m2.7",
|
||||
"deepseek-v3.1",
|
||||
|
||||
113
tests/plugins/model_providers/test_zai_profile.py
Normal file
113
tests/plugins/model_providers/test_zai_profile.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Unit tests for Z.AI / GLM reasoning-control wiring.
|
||||
|
||||
GLM-5.2 on Z.AI's OpenAI-compatible ``/api/paas/v4`` endpoint exposes a
|
||||
native ``reasoning_effort`` knob with two enabled levels (high / max).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def zai_profile():
|
||||
"""Resolve the registered Z.AI provider profile."""
|
||||
import model_tools # noqa: F401
|
||||
import providers
|
||||
|
||||
profile = providers.get_provider_profile("zai")
|
||||
assert profile is not None, "zai provider profile must be registered"
|
||||
return profile
|
||||
|
||||
|
||||
class TestZaiGLM52Reasoning:
|
||||
def test_high_maps_to_high(self, zai_profile):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": "high"},
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {"reasoning_effort": "high"}
|
||||
|
||||
def test_low_and_medium_clamp_up_to_high(self, zai_profile):
|
||||
for effort in ("low", "medium", "minimal"):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": effort},
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {"reasoning_effort": "high"}
|
||||
|
||||
@pytest.mark.parametrize("effort", ["xhigh", "max"])
|
||||
def test_strong_efforts_map_to_max(self, zai_profile, effort):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": effort},
|
||||
model="z-ai/glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {"reasoning_effort": "max"}
|
||||
|
||||
def test_disabled_leaves_server_default(self, zai_profile):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": False, "effort": "high"},
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {}
|
||||
|
||||
def test_no_config_leaves_server_default(self, zai_profile):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config=None,
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {}
|
||||
|
||||
def test_no_effort_leaves_server_default(self, zai_profile):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True},
|
||||
model="glm-5.2",
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {}
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
["glm-5-2", "glm-5p2", "accounts/fireworks/models/glm-5p2", "zai-org-glm-5-2"],
|
||||
)
|
||||
def test_alias_spellings_recognized(self, zai_profile, model):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": "max"},
|
||||
model=model,
|
||||
)
|
||||
assert top_level == {"reasoning_effort": "max"}
|
||||
|
||||
|
||||
class TestZaiModelGating:
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
["glm-5.1", "glm-5", "glm-4.7", "glm-4-9b", "", None],
|
||||
)
|
||||
def test_non_glm_5_2_models_emit_nothing(self, zai_profile, model):
|
||||
extra_body, top_level = zai_profile.build_api_kwargs_extras(
|
||||
reasoning_config={"enabled": True, "effort": "high"},
|
||||
model=model,
|
||||
)
|
||||
assert extra_body == {}
|
||||
assert top_level == {}
|
||||
|
||||
|
||||
class TestZaiFullKwargsIntegration:
|
||||
def test_glm_5_2_reaches_top_level(self, zai_profile):
|
||||
from agent.transports.chat_completions import ChatCompletionsTransport
|
||||
|
||||
kwargs = ChatCompletionsTransport().build_kwargs(
|
||||
model="glm-5.2",
|
||||
messages=[{"role": "user", "content": "ping"}],
|
||||
tools=None,
|
||||
provider_profile=zai_profile,
|
||||
reasoning_config={"enabled": True, "effort": "max"},
|
||||
base_url="https://api.z.ai/api/paas/v4",
|
||||
)
|
||||
assert kwargs["reasoning_effort"] == "max"
|
||||
assert "extra_body" not in kwargs
|
||||
Reference in New Issue
Block a user