feat(providers): GLM-5.2 native reasoning_effort controls

Port from Kilo-Org/kilocode#11555: GLM-5.2 exposes a native
reasoning_effort knob with two enabled levels (high / max) on its
OpenAI-compatible endpoints. Previously the zai profile (direct Z.AI
/api/paas/v4) used the base ProviderProfile and emitted nothing, and the
OpenCode Go profile only handled Kimi K2 / DeepSeek — so a user's effort
preference for GLM-5.2 was silently dropped on both routes.

- zai: ZaiProfile maps effort onto high/max (xhigh/max -> max, lower -> high)
- opencode-go: same mapping for GLM-5.2, alongside existing Kimi/DeepSeek
- alias spellings recognized (glm-5.2 / glm-5-2 / glm-5p2, vendor-prefixed)
- disabled / no effort leaves the server default untouched
This commit is contained in:
Teknium
2026-06-22 17:04:17 -07:00
parent 672ea1f894
commit c3ae275571
4 changed files with 253 additions and 3 deletions

View File

@@ -31,6 +31,12 @@ def _is_deepseek_thinking_model(model: str | None) -> bool:
return m == "deepseek-reasoner"
def _is_glm_5_2_model(model: str | None) -> bool:
"""Detect GLM-5.2 across alias spellings (glm-5.2 / glm-5-2 / glm-5p2)."""
m = _flat_model_name(model)
return any(token in m for token in ("glm-5.2", "glm-5-2", "glm-5p2"))
class OpenCodeGoProfile(ProviderProfile):
"""OpenCode Go - model-specific reasoning controls."""
@@ -55,6 +61,21 @@ class OpenCodeGoProfile(ProviderProfile):
extra_body: dict[str, Any] = {}
top_level: dict[str, Any] = {}
if _is_glm_5_2_model(model):
# GLM-5.2 on OpenCode Go uses its native OpenAI-compatible
# reasoning_effort knob, which has exactly two enabled levels:
# high and max. Map Hermes' richer scale onto those; leave the
# server default alone when reasoning is disabled or unset.
if not isinstance(reasoning_config, dict):
return extra_body, top_level
if reasoning_config.get("enabled") is False:
return extra_body, top_level
effort = (reasoning_config.get("effort") or "").strip().lower()
if not effort or effort == "none":
return extra_body, top_level
top_level["reasoning_effort"] = "max" if effort in {"xhigh", "max"} else "high"
return extra_body, top_level
if _is_kimi_k2_model(model):
# Kimi K2 on OpenCode Go uses Moonshot's native wire shape:
# extra_body.thinking (binary toggle) + top-level reasoning_effort

View File

@@ -1,9 +1,70 @@
"""ZAI / GLM provider profile."""
"""ZAI / GLM provider profile.
Z.AI's ``/api/paas/v4`` endpoint is OpenAI-compatible. GLM-5.2 exposes a
native ``reasoning_effort`` knob with exactly two levels — ``high`` and
``max`` — when thinking is enabled (per Z.AI / BigModel docs). Hermes' richer
effort scale is collapsed onto those two so the user's effort preference
actually reaches the model instead of being silently dropped.
"""
from __future__ import annotations
from typing import Any
from providers import register_provider
from providers.base import ProviderProfile
zai = ProviderProfile(
def _is_glm_5_2(model: str | None) -> bool:
"""Detect GLM-5.2 across the alias spellings providers use.
Covers the canonical ``glm-5.2`` plus the ``glm-5-2`` / ``glm-5p2``
variants seen on relays (Fireworks ``glm-5p2``, etc.) and any
vendor-prefixed form (``z-ai/glm-5.2``, ``zai-org-glm-5-2``).
"""
m = (model or "").strip().lower()
if not m:
return False
return any(token in m for token in ("glm-5.2", "glm-5-2", "glm-5p2"))
def _glm_5_2_reasoning_extras(
reasoning_config: dict | None,
) -> tuple[dict[str, Any], dict[str, Any]]:
"""Map Hermes reasoning effort onto GLM-5.2's native ``high``/``max``.
GLM-5.2 only supports two effort levels. ``xhigh``/``max`` request the
top tier; everything else that is enabled requests ``high`` (its
minimum thinking level). When reasoning is explicitly disabled, or no
config is supplied, the server default is left untouched.
"""
if not isinstance(reasoning_config, dict):
return {}, {}
if reasoning_config.get("enabled") is False:
return {}, {}
effort = (reasoning_config.get("effort") or "").strip().lower()
if not effort or effort == "none":
return {}, {}
if effort in {"xhigh", "max"}:
return {}, {"reasoning_effort": "max"}
# low / medium / minimal / high all clamp to GLM-5.2's minimum: high.
return {}, {"reasoning_effort": "high"}
class ZaiProfile(ProviderProfile):
"""Z.AI / GLM — GLM-5.2 native reasoning_effort controls."""
def build_api_kwargs_extras(
self, *, reasoning_config: dict | None = None, model: str | None = None, **context
) -> tuple[dict[str, Any], dict[str, Any]]:
if not _is_glm_5_2(model):
return {}, {}
return _glm_5_2_reasoning_extras(reasoning_config)
zai = ZaiProfile(
name="zai",
aliases=("glm", "z-ai", "z.ai", "zhipu"),
env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),

View File

@@ -122,13 +122,68 @@ class TestOpenCodeGoDeepSeekThinking:
assert top_level == {"reasoning_effort": "max"}
class TestOpenCodeGoGLM52Reasoning:
"""GLM-5.2 uses its native high/max reasoning_effort knob on OpenCode Go."""
def test_high_maps_to_high(self, opencode_go_profile):
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": "high"},
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {"reasoning_effort": "high"}
def test_low_and_medium_clamp_up_to_high(self, opencode_go_profile):
for effort in ("low", "medium", "minimal"):
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": effort},
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {"reasoning_effort": "high"}
def test_xhigh_and_max_map_to_max(self, opencode_go_profile):
for effort in ("xhigh", "max"):
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": effort},
model="z-ai/glm-5.2",
)
assert extra_body == {}
assert top_level == {"reasoning_effort": "max"}
def test_disabled_leaves_server_default(self, opencode_go_profile):
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
reasoning_config={"enabled": False, "effort": "high"},
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {}
def test_no_config_leaves_server_default(self, opencode_go_profile):
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
reasoning_config=None,
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {}
@pytest.mark.parametrize("model", ["glm-5-2", "glm-5p2"])
def test_alias_spellings_recognized(self, opencode_go_profile, model):
extra_body, top_level = opencode_go_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": "max"},
model=model,
)
assert top_level == {"reasoning_effort": "max"}
class TestOpenCodeGoModelGating:
"""Other OpenCode Go models must not receive Kimi/DeepSeek controls."""
"""Other OpenCode Go models must not receive Kimi/DeepSeek/GLM controls."""
@pytest.mark.parametrize(
"model",
[
"glm-5.1",
"glm-5",
"qwen3.6-plus",
"minimax-m2.7",
"deepseek-v3.1",

View File

@@ -0,0 +1,113 @@
"""Unit tests for Z.AI / GLM reasoning-control wiring.
GLM-5.2 on Z.AI's OpenAI-compatible ``/api/paas/v4`` endpoint exposes a
native ``reasoning_effort`` knob with two enabled levels (high / max).
"""
from __future__ import annotations
import pytest
@pytest.fixture
def zai_profile():
"""Resolve the registered Z.AI provider profile."""
import model_tools # noqa: F401
import providers
profile = providers.get_provider_profile("zai")
assert profile is not None, "zai provider profile must be registered"
return profile
class TestZaiGLM52Reasoning:
def test_high_maps_to_high(self, zai_profile):
extra_body, top_level = zai_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": "high"},
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {"reasoning_effort": "high"}
def test_low_and_medium_clamp_up_to_high(self, zai_profile):
for effort in ("low", "medium", "minimal"):
extra_body, top_level = zai_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": effort},
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {"reasoning_effort": "high"}
@pytest.mark.parametrize("effort", ["xhigh", "max"])
def test_strong_efforts_map_to_max(self, zai_profile, effort):
extra_body, top_level = zai_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": effort},
model="z-ai/glm-5.2",
)
assert extra_body == {}
assert top_level == {"reasoning_effort": "max"}
def test_disabled_leaves_server_default(self, zai_profile):
extra_body, top_level = zai_profile.build_api_kwargs_extras(
reasoning_config={"enabled": False, "effort": "high"},
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {}
def test_no_config_leaves_server_default(self, zai_profile):
extra_body, top_level = zai_profile.build_api_kwargs_extras(
reasoning_config=None,
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {}
def test_no_effort_leaves_server_default(self, zai_profile):
extra_body, top_level = zai_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True},
model="glm-5.2",
)
assert extra_body == {}
assert top_level == {}
@pytest.mark.parametrize(
"model",
["glm-5-2", "glm-5p2", "accounts/fireworks/models/glm-5p2", "zai-org-glm-5-2"],
)
def test_alias_spellings_recognized(self, zai_profile, model):
extra_body, top_level = zai_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": "max"},
model=model,
)
assert top_level == {"reasoning_effort": "max"}
class TestZaiModelGating:
@pytest.mark.parametrize(
"model",
["glm-5.1", "glm-5", "glm-4.7", "glm-4-9b", "", None],
)
def test_non_glm_5_2_models_emit_nothing(self, zai_profile, model):
extra_body, top_level = zai_profile.build_api_kwargs_extras(
reasoning_config={"enabled": True, "effort": "high"},
model=model,
)
assert extra_body == {}
assert top_level == {}
class TestZaiFullKwargsIntegration:
def test_glm_5_2_reaches_top_level(self, zai_profile):
from agent.transports.chat_completions import ChatCompletionsTransport
kwargs = ChatCompletionsTransport().build_kwargs(
model="glm-5.2",
messages=[{"role": "user", "content": "ping"}],
tools=None,
provider_profile=zai_profile,
reasoning_config={"enabled": True, "effort": "max"},
base_url="https://api.z.ai/api/paas/v4",
)
assert kwargs["reasoning_effort"] == "max"
assert "extra_body" not in kwargs