diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256
index 3adb7c1a922d..394c7b19c411 100644
--- a/docs/.generated/plugin-sdk-api-baseline.sha256
+++ b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-4768607253fdc720cb2bc280ac285ccfa7f7057a01659691f5be5b1f58422789 plugin-sdk-api-baseline.json
-7901bc511cf6f9628df4cd619035265f48c40939e4e8e51c5c10dc73a263f183 plugin-sdk-api-baseline.jsonl
+8a2769df428906990ee0d1bf8b0423f2a099b053c64c816d092ff84d61e11633 plugin-sdk-api-baseline.json
+28b798973f3fb2a5b33ccbb6e3c1ac0453fa234a3a1c6cdc27935c27639bd104 plugin-sdk-api-baseline.jsonl
diff --git a/docs/plugins/reference/anthropic-vertex.md b/docs/plugins/reference/anthropic-vertex.md
index 425219d3e401..71988de7dc4d 100644
--- a/docs/plugins/reference/anthropic-vertex.md
+++ b/docs/plugins/reference/anthropic-vertex.md
@@ -17,3 +17,9 @@ OpenClaw Anthropic Vertex provider plugin for Claude models on Google Vertex AI.
## Surface
providers: anthropic-vertex
+
+## Claude Fable 5
+
+Use `anthropic-vertex/claude-fable-5` where the model is available in your Google Cloud region.
+Fable 5 always uses adaptive thinking and defaults to `high` effort. `/think off` and
+`/think minimal` use `low` effort because the model does not support disabling thinking.
diff --git a/docs/plugins/reference/microsoft-foundry.md b/docs/plugins/reference/microsoft-foundry.md
index 7b3e4d2a710a..f27e005124d5 100644
--- a/docs/plugins/reference/microsoft-foundry.md
+++ b/docs/plugins/reference/microsoft-foundry.md
@@ -50,7 +50,10 @@ chat APIs:
Anthropic Claude deployments in Microsoft Foundry use the Anthropic Messages
API shape, not the OpenAI-compatible `/openai/v1` shape. Configure those as a
custom `anthropic-messages` provider until the Microsoft Foundry plugin grows a
-native Anthropic runtime.
+native Anthropic runtime. When the Foundry deployment name differs from the
+Claude model ID, set `params.canonicalModelId` on the model entry so OpenClaw
+can apply model-specific wire contracts, map `/think off` correctly, and
+preserve signed thinking safely.
## MAI image generation
diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md
index ed095df2f12b..a799c8de1282 100644
--- a/docs/providers/anthropic.md
+++ b/docs/providers/anthropic.md
@@ -160,7 +160,12 @@ Anthropic's current public docs:
-## Thinking defaults (Claude 4.8 and 4.6)
+## Thinking defaults (Claude Fable 5, 4.8, and 4.6)
+
+`anthropic/claude-fable-5` always uses adaptive thinking and defaults to `high`
+effort. Because Anthropic does not allow thinking to be disabled for this model,
+`/think off` and `/think minimal` use `low` effort. OpenClaw also omits custom
+temperature values for Fable 5 requests.
Claude Opus 4.8 keeps thinking off by default in OpenClaw. When you explicitly enable adaptive thinking with `/think high|xhigh|max`, OpenClaw sends Anthropic's Opus 4.8 effort values; Claude 4.6 models default to `adaptive`.
diff --git a/docs/providers/bedrock.md b/docs/providers/bedrock.md
index d966d0985c77..23850a49ab52 100644
--- a/docs/providers/bedrock.md
+++ b/docs/providers/bedrock.md
@@ -310,6 +310,25 @@ openclaw models list
the request options object and the `inferenceConfig` payload field.
+
+ Use `amazon-bedrock/anthropic.claude-fable-5` in `us-east-1`, or the
+ regional inference ids such as `us.anthropic.claude-fable-5`.
+ OpenClaw applies Fable's 1M context window, 128K output limit, always-on
+ adaptive thinking, and supported effort mapping. `/think off` and
+ `/think minimal` map to `low`; unsupported temperature and forced tool
+ choice controls are omitted. Streaming output is held until Bedrock
+ returns a terminal status so mid-stream refusals do not expose partial text.
+ Fable supports only the standard service tier; OpenClaw ignores configured
+ `flex`, `priority`, and `reserved` tiers for this model.
+
+ AWS requires an explicit `provider_data_share` data-retention opt-in before
+ Fable is available. Prompts and completions are shared with Anthropic and
+ retained for up to 30 days for trust and safety. Review and configure
+ [Bedrock data retention](https://docs.aws.amazon.com/bedrock/latest/userguide/data-retention.html)
+ before enabling the model.
+
+
+
You can apply [Amazon Bedrock Guardrails](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html)
to all Bedrock model invocations by adding a `guardrail` object to the
diff --git a/extensions/amazon-bedrock/bedrock-options.ts b/extensions/amazon-bedrock/bedrock-options.ts
index 2732d9d34475..fb0e19e5993e 100644
--- a/extensions/amazon-bedrock/bedrock-options.ts
+++ b/extensions/amazon-bedrock/bedrock-options.ts
@@ -41,6 +41,9 @@ export function supportsBedrockPromptCaching(modelId: string, modelName?: string
if (candidates.some((s) => s.includes("-4-"))) {
return true;
}
+ if (candidates.some((s) => s.includes("claude-fable-5"))) {
+ return true;
+ }
if (candidates.some((s) => s.includes("claude-3-7-sonnet"))) {
return true;
}
diff --git a/extensions/amazon-bedrock/discovery.test.ts b/extensions/amazon-bedrock/discovery.test.ts
index 4e46fe7acd94..54a0561736b1 100644
--- a/extensions/amazon-bedrock/discovery.test.ts
+++ b/extensions/amazon-bedrock/discovery.test.ts
@@ -248,6 +248,58 @@ describe("bedrock discovery", () => {
);
});
+ it("applies Fable limits and reasoning metadata to foundation and profile models", async () => {
+ sendMock
+ .mockResolvedValueOnce({
+ modelSummaries: [
+ {
+ modelId: "anthropic.claude-fable-5",
+ modelName: "Claude Fable 5",
+ providerName: "anthropic",
+ inputModalities: ["TEXT", "IMAGE"],
+ outputModalities: ["TEXT"],
+ responseStreamingSupported: true,
+ modelLifecycle: { status: "ACTIVE" },
+ },
+ ],
+ })
+ .mockResolvedValueOnce({
+ inferenceProfileSummaries: [
+ {
+ inferenceProfileId: "company-fable",
+ inferenceProfileName: "Company Fable",
+ status: "ACTIVE",
+ type: "APPLICATION",
+ models: [
+ {
+ modelArn: "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-fable-5",
+ },
+ ],
+ },
+ ],
+ });
+
+ const models = await discoverBedrockModels({ region: "us-east-1", clientFactory });
+ const expected = {
+ reasoning: true,
+ contextWindow: 1_000_000,
+ maxTokens: 128_000,
+ thinkingLevelMap: { off: "low", minimal: "low", xhigh: "xhigh", max: "max" },
+ };
+
+ expectModelFields(
+ models.find((model) => model.id === "anthropic.claude-fable-5"),
+ expected,
+ );
+ expectModelFields(
+ models.find((model) => model.id === "company-fable"),
+ {
+ ...expected,
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ );
+ });
+
it("caches results when refreshInterval is enabled", async () => {
mockSingleActiveSummary();
@@ -410,7 +462,9 @@ describe("bedrock discovery", () => {
input: ["text", "image"],
contextWindow: 1000000,
maxTokens: 4096,
+ params: { canonicalModelId: "claude-sonnet-4-6" },
});
+ expect(usProfile?.thinkingLevelMap).toBeUndefined();
expectModelFields(euProfile, { input: ["text", "image"] });
expectModelFields(globalProfile, { input: ["text", "image"] });
@@ -546,6 +600,8 @@ describe("bedrock discovery", () => {
contextWindow: 1_000_000,
maxTokens: 4096,
input: ["text"],
+ params: { canonicalModelId: "claude-opus-4-6-v1:0" },
+ thinkingLevelMap: { xhigh: null, max: "max" },
});
});
diff --git a/extensions/amazon-bedrock/discovery.ts b/extensions/amazon-bedrock/discovery.ts
index de07b3a5f280..a6fe04959ed4 100644
--- a/extensions/amazon-bedrock/discovery.ts
+++ b/extensions/amazon-bedrock/discovery.ts
@@ -18,12 +18,18 @@ import type {
ModelDefinitionConfig,
ModelProviderConfig,
} from "openclaw/plugin-sdk/provider-model-shared";
+import {
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
+ supportsClaudeAdaptiveThinking,
+} from "openclaw/plugin-sdk/provider-model-shared";
import {
normalizeLowercaseStringOrEmpty,
normalizeOptionalLowercaseString,
} from "openclaw/plugin-sdk/string-coerce-runtime";
import { refreshAwsSharedConfigCacheForBedrock } from "./aws-credential-refresh.js";
import { resolveBedrockConfigApiKey } from "./discovery-shared.js";
+import { resolveBedrockNativeThinkingLevelMap } from "./thinking-policy.js";
const log = createSubsystemLogger("bedrock-discovery");
@@ -53,6 +59,7 @@ const DEFAULT_MAX_TOKENS = 4096;
*/
const KNOWN_CONTEXT_WINDOWS: Record = {
// Anthropic Claude
+ "anthropic.claude-fable-5": 1_000_000,
"anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000,
"anthropic.claude-opus-4-8": 1_000_000,
"anthropic.claude-opus-4-7": 1_000_000,
@@ -130,6 +137,9 @@ function resolveKnownContextWindow(modelId: string): number | undefined {
const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
const candidates = [modelId, stripped];
for (const candidate of candidates) {
+ if (resolveClaudeFable5ModelIdentity({ id: candidate })) {
+ return 1_000_000;
+ }
if (/(?:^|[/.:])anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test(candidate)) {
return 1_000_000;
}
@@ -147,20 +157,14 @@ function resolveKnownContextWindow(modelId: string): number | undefined {
return undefined;
}
-function isKnownClaudeOpus47OrNewerModelId(modelId: string): boolean {
- const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, "");
- return [modelId, stripped].some((candidate) =>
- /(?:^|[/.:])anthropic\.claude-opus-4[.-][78](?:$|[-.:/])/i.test(candidate),
- );
-}
-
function resolveKnownThinkingLevelMap(
modelId: string,
): ModelDefinitionConfig["thinkingLevelMap"] | undefined {
- if (!isKnownClaudeOpus47OrNewerModelId(modelId)) {
- return undefined;
- }
- return { xhigh: "xhigh", max: "max" };
+ return resolveBedrockNativeThinkingLevelMap(modelId);
+}
+
+function resolveKnownMaxTokens(modelId: string): number | undefined {
+ return resolveClaudeFable5ModelIdentity({ id: modelId }) ? 128_000 : undefined;
}
const DEFAULT_COST = {
@@ -271,7 +275,7 @@ function mapInputModalities(summary: BedrockModelSummary): Array<"text" | "image
}
function inferReasoningSupport(summary: BedrockModelSummary): boolean {
- if (isKnownClaudeOpus47OrNewerModelId(summary.modelId ?? "")) {
+ if (supportsClaudeAdaptiveThinking({ id: summary.modelId })) {
return true;
}
const haystack = normalizeLowercaseStringOrEmpty(
@@ -340,7 +344,7 @@ function toModelDefinition(
input: mapInputModalities(summary),
cost: DEFAULT_COST,
contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow,
- maxTokens: defaults.maxTokens,
+ maxTokens: resolveKnownMaxTokens(id) ?? defaults.maxTokens,
...(thinkingLevelMap ? { thinkingLevelMap } : {}),
};
}
@@ -456,23 +460,30 @@ function resolveInferenceProfiles(
const knownThinkingLevelMap = resolveKnownThinkingLevelMap(
baseModelId ?? profile.inferenceProfileId,
);
+ const canonicalClaudeId = resolveClaudeModelIdentity({ id: baseModelId });
discovered.push({
id: profile.inferenceProfileId,
name: profile.inferenceProfileName?.trim() || profile.inferenceProfileId,
reasoning:
baseModel?.reasoning ??
- isKnownClaudeOpus47OrNewerModelId(baseModelId ?? profile.inferenceProfileId),
+ supportsClaudeAdaptiveThinking({ id: baseModelId ?? profile.inferenceProfileId }),
input: baseModel?.input ?? ["text"],
cost: baseModel?.cost ?? DEFAULT_COST,
contextWindow:
baseModel?.contextWindow ??
resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "") ??
defaults.contextWindow,
- maxTokens: baseModel?.maxTokens ?? defaults.maxTokens,
+ maxTokens:
+ baseModel?.maxTokens ??
+ resolveKnownMaxTokens(baseModelId ?? profile.inferenceProfileId) ??
+ defaults.maxTokens,
...(baseModel?.thinkingLevelMap || knownThinkingLevelMap
? { thinkingLevelMap: baseModel?.thinkingLevelMap ?? knownThinkingLevelMap }
: {}),
+ ...(canonicalClaudeId.startsWith("claude-")
+ ? { params: { canonicalModelId: canonicalClaudeId } }
+ : {}),
});
}
return discovered;
diff --git a/extensions/amazon-bedrock/index.test.ts b/extensions/amazon-bedrock/index.test.ts
index 7d02f014a775..acf089c01efe 100644
--- a/extensions/amazon-bedrock/index.test.ts
+++ b/extensions/amazon-bedrock/index.test.ts
@@ -10,6 +10,7 @@ import {
import { withEnvAsync } from "openclaw/plugin-sdk/test-env";
import { afterAll, afterEach, beforeEach, describe, expect, it, vi } from "vitest";
import { setAwsSharedIniFileLoaderForTest } from "./aws-credential-refresh.js";
+import { supportsBedrockPromptCaching } from "./bedrock-options.js";
import { resetBedrockDiscoveryCacheForTest } from "./discovery.js";
import amazonBedrockPlugin from "./index.js";
import {
@@ -312,6 +313,40 @@ describe("amazon-bedrock provider plugin", () => {
);
});
+ it("normalizes explicit Claude 4.6 rows with native max metadata", async () => {
+ const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
+
+ const normalized = provider.normalizeResolvedModel?.({
+ provider: "amazon-bedrock",
+ modelId: "us.anthropic.claude-opus-4-6-v1",
+ model: {
+ id: "us.anthropic.claude-opus-4-6-v1",
+ name: "Claude Opus 4.6",
+ provider: "amazon-bedrock",
+ api: "bedrock-converse-stream",
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+ reasoning: true,
+ input: ["text"],
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: 1_000_000,
+ maxTokens: 4096,
+ },
+ } as never);
+
+ expect(normalized?.thinkingLevelMap).toEqual({ xhigh: null, max: "max" });
+
+ const restricted = provider.normalizeResolvedModel?.({
+ provider: "amazon-bedrock",
+ modelId: "us.anthropic.claude-opus-4-6-v1",
+ model: {
+ ...(normalized as NonNullable),
+ thinkingLevelMap: { max: null },
+ },
+ } as never);
+
+ expect(restricted?.thinkingLevelMap).toEqual({ xhigh: null, max: null });
+ });
+
it("mirrors Claude Opus 4.7 thinking levels for Bedrock model refs", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
@@ -355,6 +390,47 @@ describe("amazon-bedrock provider plugin", () => {
}
});
+ it("keeps Claude Fable 5 always adaptive with high default effort", async () => {
+ const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
+
+ for (const modelId of [
+ "anthropic.claude-fable-5",
+ "us.anthropic.claude-fable-5",
+ "global.anthropic.claude-fable-5",
+ ]) {
+ expectThinkingProfile(
+ provider.resolveThinkingProfile?.({
+ provider: "amazon-bedrock",
+ modelId,
+ } as never),
+ {
+ levelIds: ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"],
+ defaultLevel: "high",
+ },
+ );
+ }
+ });
+
+ it("keeps Fable thinking policy for opaque deployment aliases", async () => {
+ const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
+
+ expectThinkingProfile(
+ provider.resolveThinkingProfile?.({
+ provider: "amazon-bedrock",
+ modelId: "company-fable",
+ params: { canonicalModelId: "claude-fable-5" },
+ } as never),
+ {
+ levelIds: ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"],
+ defaultLevel: "high",
+ },
+ );
+ });
+
+ it("recognizes direct Fable model refs as prompt-cache eligible", () => {
+ expect(supportsBedrockPromptCaching("us.anthropic.claude-fable-5")).toBe(true);
+ });
+
it("owns Anthropic-style replay policy for Claude Bedrock models", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
@@ -452,6 +528,7 @@ describe("amazon-bedrock provider plugin", () => {
expectWrappedResultFields(result, { maxTokens: 10 });
expect(result).not.toHaveProperty("temperature");
+ expect(result).not.toHaveProperty("cacheRetention", "none");
});
it("omits temperature for Bedrock Opus 4.8 model ids", async () => {
@@ -474,6 +551,62 @@ describe("amazon-bedrock provider plugin", () => {
expectWrappedResultFields(result, { maxTokens: 10 });
expect(result).not.toHaveProperty("temperature");
+ expect(result).not.toHaveProperty("cacheRetention", "none");
+ });
+
+ it("omits temperature for Bedrock Fable deployment aliases", async () => {
+ const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
+ const wrapped = provider.wrapStreamFn?.({
+ provider: "amazon-bedrock",
+ modelId: "production-fable",
+ model: {
+ api: "bedrock-converse-stream",
+ provider: "amazon-bedrock",
+ id: "production-fable",
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ streamFn: spyStreamFn,
+ } as never);
+
+ const result = wrapped?.(
+ {
+ api: "bedrock-converse-stream",
+ provider: "amazon-bedrock",
+ id: "production-fable",
+ params: { canonicalModelId: "claude-fable-5" },
+ } as never,
+ { messages: [] } as never,
+ { temperature: 0.2, maxTokens: 10 },
+ ) as Record | undefined;
+
+ expectWrappedResultFields(result, { maxTokens: 10 });
+ expect(result).not.toHaveProperty("temperature");
+ expect(result).not.toHaveProperty("cacheRetention", "none");
+ });
+
+ it("omits temperature for canonical Bedrock Opus aliases", async () => {
+ const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
+ const model = {
+ api: "bedrock-converse-stream",
+ provider: "amazon-bedrock",
+ id: "production-claude",
+ params: { canonicalModelId: "claude-opus-4-8" },
+ };
+ const wrapped = provider.wrapStreamFn?.({
+ provider: "amazon-bedrock",
+ modelId: model.id,
+ model,
+ streamFn: spyStreamFn,
+ } as never);
+
+ const result = wrapped?.(model as never, { messages: [] } as never, {
+ temperature: 0.2,
+ maxTokens: 10,
+ }) as Record | undefined;
+
+ expectWrappedResultFields(result, { maxTokens: 10 });
+ expect(result).not.toHaveProperty("temperature");
+ expect(result).not.toHaveProperty("cacheRetention", "none");
});
it("omits temperature for dotted Bedrock Opus 4.7 model ids", async () => {
@@ -604,6 +737,36 @@ describe("amazon-bedrock provider plugin", () => {
expect(payload.additionalModelRequestFields.output_config).toEqual({ effort: "max" });
});
+ it("preserves Bedrock Opus 4.6 max thinking in the final payload", async () => {
+ const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
+ const wrapped = provider.wrapStreamFn?.({
+ provider: "amazon-bedrock",
+ modelId: "us.anthropic.claude-opus-4-6-v1",
+ streamFn: spyStreamFn,
+ thinkingLevel: "max",
+ } as never);
+
+ const result = wrapped?.(
+ {
+ api: "bedrock-converse-stream",
+ provider: "amazon-bedrock",
+ id: "us.anthropic.claude-opus-4-6-v1",
+ } as never,
+ { messages: [] } as never,
+ { reasoning: "high" } as never,
+ ) as Record | undefined;
+ const payload = {
+ additionalModelRequestFields: {
+ thinking: { type: "adaptive" },
+ output_config: { effort: "high" },
+ },
+ };
+
+ await (result?.onPayload as ((p: Record) => unknown) | undefined)?.(payload);
+
+ expect(payload.additionalModelRequestFields.output_config).toEqual({ effort: "max" });
+ });
+
it("keeps Bedrock Opus 4.7 xhigh thinking distinct from max", async () => {
const provider = await registerSingleProviderPlugin(amazonBedrockPlugin);
const wrapped = provider.wrapStreamFn?.({
@@ -930,6 +1093,38 @@ describe("amazon-bedrock provider plugin", () => {
expect(result).not.toHaveProperty("capturedPayload");
});
+ it("omits unsupported service tiers for Fable", async () => {
+ const provider = await registerWithConfig(undefined);
+ const result = await callWrappedStream(
+ provider,
+ "us.anthropic.claude-fable-5",
+ {
+ api: "bedrock-converse-stream",
+ provider: "amazon-bedrock",
+ id: "us.anthropic.claude-fable-5",
+ } as never,
+ runtimePluginConfig(undefined),
+ { serviceTier: "flex" },
+ );
+ expect(result).not.toHaveProperty("capturedPayload");
+ });
+
+ it("keeps the standard service tier for Fable", async () => {
+ const provider = await registerWithConfig(undefined);
+ const result = await callWrappedStream(
+ provider,
+ "us.anthropic.claude-fable-5",
+ {
+ api: "bedrock-converse-stream",
+ provider: "amazon-bedrock",
+ id: "us.anthropic.claude-fable-5",
+ } as never,
+ runtimePluginConfig(undefined),
+ { serviceTier: "default" },
+ );
+ expectPayloadServiceTier(result, "default");
+ });
+
it("does not overwrite caller-provided serviceTier in payload", async () => {
const provider = await registerWithConfig(undefined);
const result = await callWrappedStream(
diff --git a/extensions/amazon-bedrock/provider-policy-api.test.ts b/extensions/amazon-bedrock/provider-policy-api.test.ts
index 2961b226cdee..11a984f63990 100644
--- a/extensions/amazon-bedrock/provider-policy-api.test.ts
+++ b/extensions/amazon-bedrock/provider-policy-api.test.ts
@@ -16,10 +16,27 @@ describe("amazon-bedrock provider-policy-api", () => {
"medium",
"high",
"adaptive",
+ "max",
]);
expect(profile?.defaultLevel).toBe("adaptive");
});
+ it("caps Bedrock Claude Sonnet 4.6 at high effort", () => {
+ const profile = resolveThinkingProfile({
+ provider: "amazon-bedrock",
+ modelId: "amazon-bedrock/global.anthropic.claude-sonnet-4-6",
+ });
+
+ expect(profile?.levels.map((level) => level.id)).toEqual([
+ "off",
+ "minimal",
+ "low",
+ "medium",
+ "high",
+ "adaptive",
+ ]);
+ });
+
it("leaves Bedrock Claude Opus 4.8 thinking off by default with max effort available", () => {
const profile = resolveThinkingProfile({
provider: "amazon-bedrock",
@@ -50,6 +67,32 @@ describe("amazon-bedrock provider-policy-api", () => {
).toEqual(["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"]);
});
+ it.each([
+ {
+ canonicalModelId: "claude-fable-5",
+ defaultLevel: "high",
+ preservesCatalogOptOut: true,
+ },
+ {
+ canonicalModelId: "claude-opus-4-8",
+ defaultLevel: "off",
+ preservesCatalogOptOut: false,
+ },
+ ])(
+ "resolves $canonicalModelId deployment aliases from canonical metadata",
+ ({ canonicalModelId, defaultLevel, preservesCatalogOptOut }) => {
+ const profile = resolveThinkingProfile({
+ provider: "amazon-bedrock",
+ modelId: "production-claude",
+ params: { canonicalModelId },
+ });
+
+ expect(profile?.defaultLevel).toBe(defaultLevel);
+ expect(profile?.levels.map((level) => level.id)).toContain("max");
+ expect(profile?.preserveWhenCatalogReasoningFalse === true).toBe(preservesCatalogOptOut);
+ },
+ );
+
it("ignores unrelated providers", () => {
expect(
resolveThinkingProfile({ provider: "anthropic", modelId: "claude-opus-4-6" }),
diff --git a/extensions/amazon-bedrock/provider-policy-api.ts b/extensions/amazon-bedrock/provider-policy-api.ts
index df09b3b5e7d1..d9f56ad1bd49 100644
--- a/extensions/amazon-bedrock/provider-policy-api.ts
+++ b/extensions/amazon-bedrock/provider-policy-api.ts
@@ -6,9 +6,13 @@ import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared";
import { resolveBedrockClaudeThinkingProfile } from "./thinking-policy.js";
/** Resolve the Bedrock thinking profile for a provider/model pair. */
-export function resolveThinkingProfile(params: { provider: string; modelId: string }) {
+export function resolveThinkingProfile(params: {
+ provider: string;
+ modelId: string;
+ params?: Record;
+}) {
if (normalizeProviderId(params.provider) !== "amazon-bedrock") {
return null;
}
- return resolveBedrockClaudeThinkingProfile(params.modelId);
+ return resolveBedrockClaudeThinkingProfile(params.modelId, params.params);
}
diff --git a/extensions/amazon-bedrock/register.sync.runtime.ts b/extensions/amazon-bedrock/register.sync.runtime.ts
index cfad4f9f7fdc..f6d6f5af8dd9 100644
--- a/extensions/amazon-bedrock/register.sync.runtime.ts
+++ b/extensions/amazon-bedrock/register.sync.runtime.ts
@@ -6,10 +6,15 @@ import type { StreamFn } from "openclaw/plugin-sdk/agent-core";
import type { OpenClawConfig } from "openclaw/plugin-sdk/config-contracts";
import { registerApiProvider, streamSimple } from "openclaw/plugin-sdk/llm";
import { resolvePluginConfigObject } from "openclaw/plugin-sdk/plugin-config-runtime";
-import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-entry";
+import type {
+ OpenClawPluginApi,
+ ProviderNormalizeResolvedModelContext,
+} from "openclaw/plugin-sdk/plugin-entry";
import {
ANTHROPIC_BY_MODEL_REPLAY_HOOKS,
normalizeProviderId,
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
} from "openclaw/plugin-sdk/provider-model-shared";
import { streamWithPayloadPatch } from "openclaw/plugin-sdk/provider-stream-shared";
import { refreshAwsSharedConfigCacheForBedrock } from "./aws-credential-refresh.js";
@@ -19,7 +24,9 @@ import { bedrockMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapte
import { streamBedrock, streamSimpleBedrock } from "./stream.runtime.js";
import {
isOpus47OrNewerBedrockModelRef,
+ resolveBedrockNativeThinkingLevelMap,
resolveBedrockClaudeThinkingProfile,
+ supportsBedrockNativeMaxEffort,
} from "./thinking-policy.js";
type GuardrailConfig = {
@@ -41,6 +48,29 @@ type AmazonBedrockPluginConfig = {
guardrail?: GuardrailConfig;
};
+function normalizeBedrockResolvedModel({ modelId, model }: ProviderNormalizeResolvedModelContext) {
+ const thinkingLevelMap = resolveBedrockNativeThinkingLevelMap(modelId, model.params);
+ if (!thinkingLevelMap) {
+ return undefined;
+ }
+ const reasoning =
+ model.reasoning ||
+ resolveClaudeFable5ModelIdentity({ id: modelId, params: model.params }) !== undefined;
+ const current = model.thinkingLevelMap;
+ const currentEfforts = current as Record | undefined;
+ if (
+ reasoning === model.reasoning &&
+ Object.entries(thinkingLevelMap).every(([level, effort]) => currentEfforts?.[level] === effort)
+ ) {
+ return undefined;
+ }
+ return {
+ ...model,
+ reasoning,
+ thinkingLevelMap: { ...thinkingLevelMap, ...current },
+ };
+}
+
const BEDROCK_SERVICE_TIER_VALUES = ["flex", "priority", "default", "reserved"] as const;
type BedrockServiceTier = (typeof BEDROCK_SERVICE_TIER_VALUES)[number];
@@ -103,9 +133,17 @@ function createBedrockServiceTierWrapper(
}
function createGuardrailWrapStreamFn(
- innerWrapStreamFn: (ctx: { modelId: string; streamFn?: StreamFn }) => StreamFn | null | undefined,
+ innerWrapStreamFn: (ctx: {
+ modelId: string;
+ model?: { params?: Record };
+ streamFn?: StreamFn;
+ }) => StreamFn | null | undefined,
guardrailConfig: GuardrailConfig,
-): (ctx: { modelId: string; streamFn?: StreamFn }) => StreamFn | null | undefined {
+): (ctx: {
+ modelId: string;
+ model?: { params?: Record };
+ streamFn?: StreamFn;
+}) => StreamFn | null | undefined {
return (ctx) => {
const inner = innerWrapStreamFn(ctx);
if (!inner) {
@@ -327,7 +365,7 @@ function injectBedrockCachePoints(
}
}
-function patchOpus47MaxThinkingEffort(payload: Record): void {
+function patchMaxThinkingEffort(payload: Record): void {
const fieldsValue = payload.additionalModelRequestFields;
const fields =
fieldsValue && typeof fieldsValue === "object" && !Array.isArray(fieldsValue)
@@ -382,8 +420,20 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
api.registerMemoryEmbeddingProvider(bedrockMemoryEmbeddingProviderAdapter);
- const baseWrapStreamFn = ({ modelId, streamFn }: { modelId: string; streamFn?: StreamFn }) => {
- if (isAnthropicBedrockModel(modelId)) {
+ const baseWrapStreamFn = ({
+ modelId,
+ model,
+ streamFn,
+ }: {
+ modelId: string;
+ model?: { params?: Record };
+ streamFn?: StreamFn;
+ }) => {
+ const modelRef = { id: modelId, params: model?.params };
+ if (
+ isAnthropicBedrockModel(modelId) ||
+ resolveClaudeModelIdentity(modelRef).startsWith("claude-")
+ ) {
return streamFn;
}
// For app inference profiles with opaque IDs, don't force cacheRetention: "none"
@@ -394,11 +444,16 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
return createBedrockNoCacheWrapper(streamFn);
};
- function omitDeprecatedOpus47Temperature(
- modelId: string,
+ function omitUnsupportedClaudeTemperature(
+ modelRef: { id: string; params?: Record },
options: TOptions,
): TOptions {
- if (!isOpus47OrNewerBedrockModelRef(modelId) || !("temperature" in options)) {
+ const canonicalModelId = resolveClaudeModelIdentity(modelRef);
+ const omitsTemperature =
+ isOpus47OrNewerBedrockModelRef(modelRef.id) ||
+ isOpus47OrNewerBedrockModelRef(canonicalModelId) ||
+ resolveClaudeFable5ModelIdentity(modelRef) !== undefined;
+ if (!omitsTemperature || !("temperature" in options)) {
return options;
}
const next = { ...options } as typeof options & { temperature?: unknown };
@@ -406,7 +461,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
return next;
}
- function omitDeprecatedOpus47PayloadTemperature(payload: Record): void {
+ function omitUnsupportedClaudePayloadTemperature(payload: Record): void {
const inferenceConfig = payload.inferenceConfig;
if (!inferenceConfig || typeof inferenceConfig !== "object") {
return;
@@ -501,20 +556,38 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
},
},
resolveConfigApiKey: ({ env }) => resolveBedrockConfigApiKey(env),
+ normalizeResolvedModel: normalizeBedrockResolvedModel,
...anthropicByModelReplayHooks,
wrapStreamFn: ({ modelId, config, model, streamFn, thinkingLevel, extraParams }) => {
const currentPluginConfig = resolveCurrentPluginConfig(config);
const currentGuardrail = currentPluginConfig?.guardrail;
+ const modelRef = { id: modelId, params: model?.params };
+ const fable5 = resolveClaudeFable5ModelIdentity(modelRef) !== undefined;
+ const canonicalModelId = resolveClaudeModelIdentity(modelRef);
+ const opus47OrNewer =
+ isOpus47OrNewerBedrockModelRef(modelId) || isOpus47OrNewerBedrockModelRef(canonicalModelId);
+ const supportsNativeMax = supportsBedrockNativeMaxEffort(modelId, model?.params);
let wrapped =
(currentGuardrail?.guardrailIdentifier && currentGuardrail?.guardrailVersion
- ? createGuardrailWrapStreamFn(baseWrapStreamFn, currentGuardrail)({ modelId, streamFn })
- : baseWrapStreamFn({ modelId, streamFn })) ?? undefined;
+ ? createGuardrailWrapStreamFn(
+ baseWrapStreamFn,
+ currentGuardrail,
+ )({
+ modelId,
+ model,
+ streamFn,
+ })
+ : baseWrapStreamFn({ modelId, model, streamFn })) ?? undefined;
const serviceTier = resolveBedrockServiceTier(extraParams, (message) =>
api.logger.warn(message),
);
if (serviceTier && wrapped) {
- wrapped = createBedrockServiceTierWrapper(wrapped, serviceTier);
+ if (fable5 && serviceTier !== "default") {
+ api.logger.warn(`ignoring unsupported Fable 5 Bedrock service tier: ${serviceTier}`);
+ } else {
+ wrapped = createBedrockServiceTierWrapper(wrapped, serviceTier);
+ }
}
const region =
@@ -523,8 +596,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
currentPluginConfig?.discovery?.region;
const mayNeedCacheInjection =
isBedrockAppInferenceProfile(modelId) && !sharedRuntimeWouldInjectCachePoints(modelId);
- const shouldOmitTemperature = isOpus47OrNewerBedrockModelRef(modelId);
- const shouldPatchMaxThinking = shouldOmitTemperature && thinkingLevel === "max";
+ const shouldOmitTemperature = opus47OrNewer || fable5;
+ const shouldPatchMaxThinking = supportsNativeMax && thinkingLevel === "max";
// For known Anthropic models (heuristic match), enable injection immediately.
// For opaque profile IDs, we'll resolve via GetInferenceProfile on first call.
@@ -539,8 +612,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
return wrapped;
}
return (streamModel, context, options) => {
- const merged = omitDeprecatedOpus47Temperature(
- modelId,
+ const merged = omitUnsupportedClaudeTemperature(
+ modelRef,
Object.assign({}, options, region ? { region } : {}),
);
@@ -559,8 +632,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
onPayload: (payload: unknown, payloadModel: unknown) => {
if (payload && typeof payload === "object") {
const payloadRecord = payload as Record;
- patchOpus47MaxThinkingEffort(payloadRecord);
- omitDeprecatedOpus47PayloadTemperature(payloadRecord);
+ patchMaxThinkingEffort(payloadRecord);
+ omitUnsupportedClaudePayloadTemperature(payloadRecord);
}
return originalOnPayload?.(payload, payloadModel);
},
@@ -594,14 +667,14 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
const payloadRecord = payload as Record;
injectBedrockCachePoints(payloadRecord, cacheRetention);
if (shouldPatchMaxThinking) {
- patchOpus47MaxThinkingEffort(payloadRecord);
+ patchMaxThinkingEffort(payloadRecord);
}
if (shouldOmitTemperature) {
- omitDeprecatedOpus47PayloadTemperature(payloadRecord);
+ omitUnsupportedClaudePayloadTemperature(payloadRecord);
} else if (mayNeedTemperatureTrait) {
const traits = await resolveAppProfileTraits(modelId, region);
if (traits.omitTemperature) {
- omitDeprecatedOpus47PayloadTemperature(payloadRecord);
+ omitUnsupportedClaudePayloadTemperature(payloadRecord);
}
}
}
@@ -626,10 +699,10 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
injectBedrockCachePoints(payloadRecord, cacheRetention);
}
if (shouldPatchMaxThinking) {
- patchOpus47MaxThinkingEffort(payloadRecord);
+ patchMaxThinkingEffort(payloadRecord);
}
if (traits.omitTemperature) {
- omitDeprecatedOpus47PayloadTemperature(payloadRecord);
+ omitUnsupportedClaudePayloadTemperature(payloadRecord);
}
}
return originalOnPayload?.(payload, payloadModel);
@@ -652,6 +725,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void {
}
return undefined;
},
- resolveThinkingProfile: ({ modelId }) => resolveBedrockClaudeThinkingProfile(modelId),
+ resolveThinkingProfile: ({ modelId, params }) =>
+ resolveBedrockClaudeThinkingProfile(modelId, params),
});
}
diff --git a/extensions/amazon-bedrock/stream.runtime.test.ts b/extensions/amazon-bedrock/stream.runtime.test.ts
index 2e2229add84c..cf6d61a4d4d3 100644
--- a/extensions/amazon-bedrock/stream.runtime.test.ts
+++ b/extensions/amazon-bedrock/stream.runtime.test.ts
@@ -1,6 +1,8 @@
// Amazon Bedrock tests cover stream plugin behavior.
-import { describe, expect, it } from "vitest";
-import { testing } from "./stream.runtime.js";
+import { BedrockRuntimeClient, ConversationRole } from "@aws-sdk/client-bedrock-runtime";
+import { onLlmRequestActivity } from "openclaw/plugin-sdk/provider-stream-shared";
+import { afterEach, describe, expect, it, vi } from "vitest";
+import { streamBedrock, streamSimpleBedrock, testing } from "./stream.runtime.js";
function bedrockModel(overrides: Record) {
return {
@@ -39,6 +41,16 @@ function signedThinkingContext(modelId: string) {
} as never;
}
+async function* streamEvents(events: unknown[]) {
+ for (const event of events) {
+ yield event;
+ }
+}
+
+afterEach(() => {
+ vi.restoreAllMocks();
+});
+
describe("Bedrock reasoning replay", () => {
it("preserves signed reasoning for Claude profile descriptors", () => {
const modelId =
@@ -74,6 +86,69 @@ describe("Bedrock reasoning replay", () => {
expect(messages[0]?.content).toEqual([{ text: "privatereasoning" }]);
});
+
+ it("preserves signature-only Fable reasoning blocks", () => {
+ const modelId = "anthropic.claude-fable-5";
+ const messages = testing.convertMessages(
+ {
+ messages: [
+ {
+ role: "assistant",
+ api: "bedrock-converse-stream",
+ provider: "amazon-bedrock",
+ model: modelId,
+ content: [
+ {
+ type: "thinking",
+ thinking: "",
+ thinkingSignature: " sig-fable ",
+ },
+ ],
+ },
+ ],
+ } as never,
+ bedrockModel({ id: modelId, name: "Claude Fable 5" }),
+ "none",
+ );
+
+ expect(messages[0]?.content).toEqual([
+ {
+ reasoningContent: {
+ reasoningText: {
+ text: "",
+ signature: " sig-fable ",
+ },
+ },
+ },
+ ]);
+ });
+
+ it("drops synthetic reasoning placeholders from Claude replay", () => {
+ const modelId = "anthropic.claude-fable-5";
+ const messages = testing.convertMessages(
+ {
+ messages: [
+ {
+ role: "assistant",
+ api: "bedrock-converse-stream",
+ provider: "amazon-bedrock",
+ model: modelId,
+ content: [
+ {
+ type: "thinking",
+ thinking: "hidden compatibility reasoning",
+ thinkingSignature: "reasoning_content",
+ },
+ ],
+ },
+ ],
+ } as never,
+ bedrockModel({ id: modelId, name: "Claude Fable 5" }),
+ "none",
+ );
+
+ expect(messages).toEqual([]);
+ });
});
describe("Bedrock profile endpoint resolution", () => {
@@ -92,7 +167,7 @@ describe("Bedrock profile endpoint resolution", () => {
});
describe("Bedrock thinking effort mapping", () => {
- it("clamps max effort for Claude models without native max support", () => {
+ it("caps max effort at high for Claude Sonnet 4.6", () => {
expect(
testing.mapThinkingLevelToEffort(
bedrockModel({
@@ -104,6 +179,18 @@ describe("Bedrock thinking effort mapping", () => {
).toBe("high");
});
+ it("caps unsupported xhigh effort at high for Claude Opus 4.6", () => {
+ expect(
+ testing.mapThinkingLevelToEffort(
+ bedrockModel({
+ id: "anthropic.claude-opus-4-6-v1:0",
+ name: "Claude Opus 4.6",
+ }),
+ "xhigh",
+ ),
+ ).toBe("high");
+ });
+
it("preserves max effort for Claude Opus 4.8", () => {
expect(
testing.mapThinkingLevelToEffort(
@@ -115,4 +202,275 @@ describe("Bedrock thinking effort mapping", () => {
),
).toBe("max");
});
+
+ it("uses canonical Claude policy for deployment aliases", () => {
+ expect(
+ testing.mapThinkingLevelToEffort(
+ bedrockModel({
+ id: "production-claude",
+ name: "Production Claude",
+ params: { canonicalModelId: "claude-opus-4-8" },
+ }),
+ "max",
+ ),
+ ).toBe("max");
+ });
+
+ it("preserves adaptive effort for opaque profiles with descriptive Claude names", () => {
+ expect(
+ testing.mapThinkingLevelToEffort(
+ bedrockModel({
+ id: "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/profile-abc",
+ name: "Claude Production Opus 4.8",
+ }),
+ "xhigh",
+ ),
+ ).toBe("xhigh");
+ });
+});
+
+describe("Bedrock Fable contract", () => {
+ function fableModel() {
+ return bedrockModel({
+ id: "production-fable",
+ name: "Production deployment",
+ reasoning: false,
+ params: { canonicalModelId: "claude-fable-5" },
+ contextWindow: 1_000_000,
+ maxTokens: 128_000,
+ });
+ }
+
+ function context() {
+ return {
+ messages: [{ role: "user", content: "Reply briefly.", timestamp: 0 }],
+ tools: [
+ {
+ name: "lookup",
+ description: "Lookup",
+ parameters: { type: "object", properties: {} },
+ },
+ ],
+ } as never;
+ }
+
+ it("sends always-adaptive high effort without unsupported request controls", async () => {
+ const send = vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({
+ $metadata: { httpStatusCode: 200 },
+ stream: streamEvents([
+ { messageStart: { role: ConversationRole.ASSISTANT } },
+ { messageStop: { stopReason: "end_turn" } },
+ ]),
+ } as never);
+
+ const stream = streamBedrock(fableModel(), context(), {
+ reasoning: "high",
+ temperature: 0.2,
+ toolChoice: "any",
+ });
+ await stream.result();
+
+ const command = send.mock.calls[0]?.[0] as { input?: Record };
+ expect(command.input).toMatchObject({
+ modelId: "production-fable",
+ inferenceConfig: {},
+ messages: [
+ {
+ role: "user",
+ content: [{ text: "Reply briefly." }, { cachePoint: { type: "default" } }],
+ },
+ ],
+ toolConfig: { toolChoice: { auto: {} } },
+ additionalModelRequestFields: {
+ thinking: { type: "adaptive", display: "summarized" },
+ output_config: { effort: "high" },
+ },
+ additionalModelResponseFieldPaths: ["/stop_details"],
+ });
+ });
+
+ it("preserves explicit tool disabling", async () => {
+ const send = vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({
+ $metadata: { httpStatusCode: 200 },
+ stream: streamEvents([
+ { messageStart: { role: ConversationRole.ASSISTANT } },
+ { messageStop: { stopReason: "end_turn" } },
+ ]),
+ } as never);
+
+ const stream = streamBedrock(fableModel(), context(), {
+ reasoning: "high",
+ toolChoice: "none",
+ });
+ await stream.result();
+
+ const command = send.mock.calls[0]?.[0] as { input?: Record };
+ expect(command.input?.toolConfig).toBeUndefined();
+ });
+
+ it("quarantines partial output when Fable returns a terminal refusal", async () => {
+ vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({
+ $metadata: { httpStatusCode: 200 },
+ stream: streamEvents([
+ {
+ contentBlockDelta: {
+ contentBlockIndex: 0,
+ delta: { text: "discard this partial output" },
+ },
+ },
+ {
+ messageStop: {
+ stopReason: "refusal",
+ additionalModelResponseFields: {
+ stop_details: {
+ category: "cyber",
+ explanation: "This request is not allowed.",
+ },
+ },
+ },
+ },
+ ]),
+ } as never);
+
+ const stream = streamSimpleBedrock(fableModel(), context());
+ const eventTypes: string[] = [];
+ for await (const event of stream) {
+ eventTypes.push(event.type);
+ }
+ const result = await stream.result();
+
+ expect(eventTypes).toEqual(["error"]);
+ expect(result.content).toEqual([]);
+ expect(result.errorMessage).toBe(
+ "Anthropic refusal (category: cyber): This request is not allowed.",
+ );
+ expect(result.diagnostics).toEqual([
+ expect.objectContaining({
+ type: "provider_refusal",
+ details: {
+ provider: "amazon-bedrock",
+ category: "cyber",
+ explanation: "This request is not allowed.",
+ },
+ }),
+ ]);
+ });
+
+ it("discards partial output when the Fable stream ends without messageStop", async () => {
+ vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({
+ $metadata: { httpStatusCode: 200 },
+ stream: streamEvents([
+ { messageStart: { role: ConversationRole.ASSISTANT } },
+ {
+ contentBlockDelta: {
+ contentBlockIndex: 0,
+ delta: { text: "unsafe partial output" },
+ },
+ },
+ ]),
+ } as never);
+
+ const stream = streamSimpleBedrock(fableModel(), context());
+ const eventTypes: string[] = [];
+ for await (const event of stream) {
+ eventTypes.push(event.type);
+ }
+ const result = await stream.result();
+
+ expect(eventTypes).toEqual(["error"]);
+ expect(result.content).toEqual([]);
+ expect(result.errorMessage).toContain("ended before messageStop");
+ });
+
+ it("reports activity while Fable events are buffered", async () => {
+ vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({
+ $metadata: { httpStatusCode: 200 },
+ stream: streamEvents([
+ { messageStart: { role: ConversationRole.ASSISTANT } },
+ {
+ contentBlockDelta: {
+ contentBlockIndex: 0,
+ delta: { text: "buffered output" },
+ },
+ },
+ { messageStop: { stopReason: "end_turn" } },
+ ]),
+ } as never);
+ const controller = new AbortController();
+ let activityCount = 0;
+ const unsubscribe = onLlmRequestActivity(controller.signal, () => {
+ activityCount += 1;
+ });
+
+ try {
+ const stream = streamSimpleBedrock(fableModel(), context(), {
+ signal: controller.signal,
+ });
+ await stream.result();
+ } finally {
+ unsubscribe();
+ }
+
+ expect(activityCount).toBeGreaterThan(0);
+ });
+});
+
+describe("Bedrock canonical Claude aliases", () => {
+ it.each([
+ {
+ canonicalModelId: "claude-opus-4-8",
+ reasoning: "xhigh" as const,
+ thinkingLevelMap: { xhigh: "xhigh" as const, max: "max" as const },
+ expectedEffort: "xhigh",
+ },
+ {
+ canonicalModelId: "claude-opus-4-6",
+ reasoning: "max" as const,
+ thinkingLevelMap: { xhigh: null, max: "max" as const },
+ expectedEffort: "max",
+ },
+ {
+ canonicalModelId: "claude-opus-4-6",
+ reasoning: "max" as const,
+ thinkingLevelMap: { xhigh: null, max: null },
+ expectedEffort: "high",
+ },
+ ])(
+ "uses adaptive thinking and omits temperature for $canonicalModelId aliases",
+ async ({ canonicalModelId, reasoning, thinkingLevelMap, expectedEffort }) => {
+ const send = vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({
+ $metadata: { httpStatusCode: 200 },
+ stream: streamEvents([
+ { messageStart: { role: ConversationRole.ASSISTANT } },
+ { messageStop: { stopReason: "end_turn" } },
+ ]),
+ } as never);
+ const model = bedrockModel({
+ id: "production-claude",
+ name: "Production Claude",
+ reasoning: false,
+ params: { canonicalModelId },
+ thinkingLevelMap,
+ });
+
+ await streamSimpleBedrock(
+ model,
+ { messages: [{ role: "user", content: "Reply briefly.", timestamp: 0 }] } as never,
+ {
+ reasoning,
+ temperature: 0.2,
+ },
+ ).result();
+
+ const command = send.mock.calls[0]?.[0] as { input?: Record };
+ expect(command.input).toMatchObject({
+ modelId: "production-claude",
+ inferenceConfig: {},
+ additionalModelRequestFields: {
+ thinking: { type: "adaptive", display: "summarized" },
+ output_config: { effort: expectedEffort },
+ },
+ });
+ },
+ );
});
diff --git a/extensions/amazon-bedrock/stream.runtime.ts b/extensions/amazon-bedrock/stream.runtime.ts
index 5ff6230123a8..883eabc89353 100644
--- a/extensions/amazon-bedrock/stream.runtime.ts
+++ b/extensions/amazon-bedrock/stream.runtime.ts
@@ -38,6 +38,7 @@ import {
transformMessages,
type Api,
type AssistantMessage,
+ type AssistantMessageEvent,
type CacheRetention,
type Context,
type Model,
@@ -51,9 +52,43 @@ import {
type ToolCall,
type ToolResultMessage,
} from "openclaw/plugin-sdk/llm";
+import {
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeXhighEffort,
+} from "openclaw/plugin-sdk/provider-model-shared";
+import {
+ applyAnthropicRefusal,
+ createDeferredEventBuffer,
+ notifyLlmRequestActivity,
+} from "openclaw/plugin-sdk/provider-stream-shared";
import { supportsBedrockPromptCaching, type BedrockOptions } from "./bedrock-options.js";
+import { supportsBedrockNativeMaxEffort } from "./thinking-policy.js";
type Block = (TextContent | ThinkingContent | ToolCall) & { index?: number; partialJson?: string };
+type BedrockEventSink = { push(event: AssistantMessageEvent): void };
+
+function usesClaudeFable5BedrockContract(model: Model<"bedrock-converse-stream">): boolean {
+ return resolveClaudeFable5ModelIdentity(model) !== undefined;
+}
+
+function readBedrockStopDetails(fields: DocumentType | undefined): unknown {
+ if (!fields || typeof fields !== "object" || Array.isArray(fields)) {
+ return undefined;
+ }
+ const record = fields as Record;
+ return record.stop_details ?? record.stopDetails;
+}
+
+function normalizeFableToolChoice(
+ toolChoice: BedrockOptions["toolChoice"],
+): BedrockOptions["toolChoice"] {
+ if (toolChoice === "any" || (typeof toolChoice === "object" && toolChoice?.type === "tool")) {
+ return "auto";
+ }
+ return toolChoice;
+}
/** Stream a Bedrock Converse request using Bedrock-specific options. */
export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOptions> = (
@@ -83,6 +118,15 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
};
const blocks = output.content as Block[];
+ const fable5 = usesClaudeFable5BedrockContract(model);
+ // Fable classifiers may refuse after partial output. Hold every event until
+ // messageStop proves the response is safe to expose.
+ const refusalBuffer = fable5
+ ? createDeferredEventBuffer(stream, () =>
+ notifyLlmRequestActivity(options.signal),
+ )
+ : undefined;
+ const eventSink = refusalBuffer ?? stream;
const config: BedrockRuntimeClientConfig = {
profile: options.profile,
@@ -155,16 +199,28 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
try {
const client = new BedrockRuntimeClient(config);
const cacheRetention = resolveCacheRetention(options.cacheRetention);
+ const additionalModelRequestFields = buildAdditionalModelRequestFields(model, options);
+ const thinking = (additionalModelRequestFields as Record | undefined)
+ ?.thinking;
+ const sendsAdaptiveThinking =
+ thinking !== null &&
+ typeof thinking === "object" &&
+ (thinking as { type?: unknown }).type === "adaptive";
let commandInput = {
modelId: model.id,
messages: convertMessages(context, model, cacheRetention),
system: buildSystemPrompt(context.systemPrompt, model, cacheRetention),
inferenceConfig: {
...(options.maxTokens !== undefined && { maxTokens: options.maxTokens }),
- ...(options.temperature !== undefined && { temperature: options.temperature }),
+ ...(options.temperature !== undefined &&
+ !sendsAdaptiveThinking && { temperature: options.temperature }),
},
- toolConfig: convertToolConfig(context.tools, options.toolChoice),
- additionalModelRequestFields: buildAdditionalModelRequestFields(model, options),
+ toolConfig: convertToolConfig(
+ context.tools,
+ fable5 ? normalizeFableToolChoice(options.toolChoice) : options.toolChoice,
+ ),
+ additionalModelRequestFields,
+ ...(fable5 ? { additionalModelResponseFieldPaths: ["/stop_details"] } : {}),
...(options.requestMetadata !== undefined && { requestMetadata: options.requestMetadata }),
};
const nextCommandInput = await options?.onPayload?.(commandInput, model);
@@ -185,6 +241,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
);
}
+ let sawMessageStop = false;
for await (const item of response.stream!) {
if (item.messageStart) {
if (item.messageStart.role !== ConversationRole.ASSISTANT) {
@@ -192,15 +249,24 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
"Unexpected assistant message start but got user message start instead",
);
}
- stream.push({ type: "start", partial: output });
+ eventSink.push({ type: "start", partial: output });
} else if (item.contentBlockStart) {
- handleContentBlockStart(item.contentBlockStart, blocks, output, stream);
+ handleContentBlockStart(item.contentBlockStart, blocks, output, eventSink);
} else if (item.contentBlockDelta) {
- handleContentBlockDelta(item.contentBlockDelta, blocks, output, stream);
+ handleContentBlockDelta(item.contentBlockDelta, blocks, output, eventSink);
} else if (item.contentBlockStop) {
- handleContentBlockStop(item.contentBlockStop, blocks, output, stream);
+ handleContentBlockStop(item.contentBlockStop, blocks, output, eventSink);
} else if (item.messageStop) {
- output.stopReason = mapStopReason(item.messageStop.stopReason);
+ sawMessageStop = true;
+ if ((item.messageStop.stopReason as string | undefined) === "refusal") {
+ applyAnthropicRefusal(
+ output,
+ readBedrockStopDetails(item.messageStop.additionalModelResponseFields),
+ model.provider,
+ );
+ } else {
+ output.stopReason = mapStopReason(item.messageStop.stopReason);
+ }
} else if (item.metadata) {
handleMetadata(item.metadata, model, output);
} else if (item.internalServerException) {
@@ -216,14 +282,18 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
}
}
+ if (refusalBuffer && !sawMessageStop) {
+ throw new Error("Bedrock stream ended before messageStop");
+ }
if (options.signal?.aborted) {
throw new Error("Request was aborted");
}
if (output.stopReason === "error" || output.stopReason === "aborted") {
- throw new Error("An unknown error occurred");
+ throw new Error(output.errorMessage ?? "An unknown error occurred");
}
+ refusalBuffer?.flush();
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
@@ -232,6 +302,10 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt
// partialJson is only a streaming scratch buffer; never persist it.
delete (block as Block).partialJson;
}
+ if (refusalBuffer) {
+ refusalBuffer.discard();
+ output.content = [];
+ }
output.stopReason = options.signal?.aborted ? "aborted" : "error";
output.errorMessage = formatBedrockError(error);
stream.push({ type: "error", reason: output.stopReason, error: output });
@@ -279,6 +353,13 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
options?: SimpleStreamOptions,
) => {
const base = buildBaseOptions(model, options, undefined);
+ if (usesClaudeFable5BedrockContract(model)) {
+ return streamBedrock(model, context, {
+ ...base,
+ reasoning: options?.reasoning ?? "high",
+ thinkingBudgets: options?.thinkingBudgets,
+ } satisfies BedrockOptions);
+ }
if (!options?.reasoning) {
return streamBedrock(model, context, {
...base,
@@ -287,7 +368,7 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp
}
if (isAnthropicClaudeModel(model)) {
- if (supportsAdaptiveThinking(model.id, model.name)) {
+ if (supportsAdaptiveThinking(model)) {
return streamBedrock(model, context, {
...base,
reasoning: options.reasoning,
@@ -326,7 +407,7 @@ function handleContentBlockStart(
event: ContentBlockStartEvent,
blocks: Block[],
output: AssistantMessage,
- stream: AssistantMessageEventStream,
+ stream: BedrockEventSink,
): void {
const index = event.contentBlockIndex!;
const start = event.start;
@@ -349,7 +430,7 @@ function handleContentBlockDelta(
event: ContentBlockDeltaEvent,
blocks: Block[],
output: AssistantMessage,
- stream: AssistantMessageEventStream,
+ stream: BedrockEventSink,
): void {
const contentBlockIndex = event.contentBlockIndex!;
const delta = event.delta;
@@ -432,7 +513,7 @@ function handleContentBlockStop(
event: ContentBlockStopEvent,
blocks: Block[],
output: AssistantMessage,
- stream: AssistantMessageEventStream,
+ stream: BedrockEventSink,
): void {
const index = blocks.findIndex((b) => b.index === event.contentBlockIndex);
const block = blocks[index];
@@ -463,47 +544,54 @@ function handleContentBlockStop(
}
}
-/**
- * Check if the model supports adaptive thinking (Opus 4.6+, Sonnet 4.6).
- * Checks both model ID and model name to support application inference profiles
- * whose ARNs don't contain the model name.
- */
-function getModelMatchCandidates(modelId: string, modelName?: string): string[] {
- const values = modelName ? [modelId, modelName] : [modelId];
- return values.flatMap((value) => {
- const lower = value.toLowerCase();
- return [lower, lower.replace(/[\s_.:]+/g, "-")];
- });
+function resolveClaudeProfileNameModelId(modelName?: string): string | undefined {
+ const normalized =
+ modelName
+ ?.trim()
+ .toLowerCase()
+ .replace(/[\s_.:]+/g, "-") ?? "";
+ if (!normalized.includes("claude")) {
+ return undefined;
+ }
+ const family = /(?:fable-5|opus-4-(?:6|7|8)|sonnet-4-6)(?:$|-)/.exec(normalized)?.[0];
+ return family ? `claude-${family.replace(/-$/, "")}` : undefined;
}
-function supportsAdaptiveThinking(modelId: string, modelName?: string): boolean {
- const candidates = getModelMatchCandidates(modelId, modelName);
- return candidates.some(
- (s) =>
- s.includes("opus-4-6") ||
- s.includes("opus-4-7") ||
- s.includes("opus-4-8") ||
- s.includes("sonnet-4-6"),
+/** Check canonical metadata and profile names for adaptive Claude support. */
+function supportsAdaptiveThinking(model: Model<"bedrock-converse-stream">): boolean {
+ const profileModelId = resolveClaudeProfileNameModelId(model.name);
+ return (
+ supportsClaudeAdaptiveThinking(model) || supportsClaudeAdaptiveThinking({ id: profileModelId })
);
}
function supportsNativeXhighEffort(model: Model<"bedrock-converse-stream">): boolean {
- const candidates = getModelMatchCandidates(model.id, model.name);
- return candidates.some((s) => s.includes("opus-4-7") || s.includes("opus-4-8"));
+ const profileModelId = resolveClaudeProfileNameModelId(model.name);
+ return (
+ supportsClaudeNativeXhighEffort(model) ||
+ supportsClaudeNativeXhighEffort({ id: profileModelId })
+ );
+}
+
+function supportsNativeMaxEffort(model: Model<"bedrock-converse-stream">): boolean {
+ const profileModelId = resolveClaudeProfileNameModelId(model.name);
+ return (
+ supportsBedrockNativeMaxEffort(model.id, model.params) ||
+ supportsBedrockNativeMaxEffort(profileModelId ?? "")
+ );
}
function mapThinkingLevelToEffort(
model: Model<"bedrock-converse-stream">,
level: SimpleStreamOptions["reasoning"],
): "low" | "medium" | "high" | "xhigh" | "max" {
- if (level === "xhigh" && supportsNativeXhighEffort(model)) {
- return "xhigh";
- }
-
const mapped = level ? model.thinkingLevelMap?.[level] : undefined;
if (typeof mapped === "string") {
return mapped as "low" | "medium" | "high" | "xhigh" | "max";
}
+ if ((level === "xhigh" || level === "max") && mapped === null) {
+ return "high";
+ }
switch (level) {
case "minimal":
@@ -513,8 +601,10 @@ function mapThinkingLevelToEffort(
return "medium";
case "high":
return "high";
+ case "xhigh":
+ return supportsNativeXhighEffort(model) ? "xhigh" : "high";
case "max":
- return supportsNativeXhighEffort(model) ? "max" : "high";
+ return supportsNativeMaxEffort(model) ? "max" : "high";
default:
return "high";
}
@@ -540,6 +630,12 @@ function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention
* whose ARNs don't contain the model name.
*/
function isAnthropicClaudeModel(model: Model<"bedrock-converse-stream">): boolean {
+ if (usesClaudeFable5BedrockContract(model)) {
+ return true;
+ }
+ if (resolveClaudeModelIdentity(model).startsWith("claude-")) {
+ return true;
+ }
const id = model.id.toLowerCase();
const name = model.name?.toLowerCase() ?? "";
return (
@@ -552,7 +648,11 @@ function isAnthropicClaudeModel(model: Model<"bedrock-converse-stream">): boolea
}
function supportsPromptCaching(model: Model<"bedrock-converse-stream">): boolean {
- return supportsBedrockPromptCaching(model.id, model.name);
+ return (
+ usesClaudeFable5BedrockContract(model) ||
+ supportsBedrockPromptCaching(model.id, model.name) ||
+ supportsBedrockPromptCaching(resolveClaudeModelIdentity(model), model.name)
+ );
}
/**
@@ -656,26 +756,35 @@ function convertMessages(
toolUse: { toolUseId: c.id, name: c.name, input: c.arguments as DocumentType },
});
break;
- case "thinking":
- // Skip empty thinking blocks
- if (c.thinking.trim().length === 0) {
+ case "thinking": {
+ const thinkingSignature = c.thinkingSignature;
+ const normalizedThinkingSignature = thinkingSignature?.trim();
+ const supportsSignature = supportsThinkingSignature(model);
+ const hasNativeThinkingSignature =
+ supportsSignature &&
+ Boolean(normalizedThinkingSignature) &&
+ normalizedThinkingSignature !== "reasoning_content";
+ if (c.thinking.trim().length === 0 && !hasNativeThinkingSignature) {
continue;
}
// Only Anthropic models support the signature field in reasoningText.
// For other models, we omit the signature to avoid errors like:
// "This model doesn't support the reasoningContent.reasoningText.signature field"
- if (supportsThinkingSignature(model)) {
+ if (supportsSignature) {
+ if (normalizedThinkingSignature === "reasoning_content") {
+ continue;
+ }
// Signatures arrive after thinking deltas. If a partial or externally
// persisted message lacks a signature, Bedrock rejects the replayed
// reasoning block. Fall back to plain text, matching Anthropic.
- if (!c.thinkingSignature || c.thinkingSignature.trim().length === 0) {
+ if (!thinkingSignature || !normalizedThinkingSignature) {
contentBlocks.push({ text: sanitizeSurrogates(c.thinking) });
} else {
contentBlocks.push({
reasoningContent: {
reasoningText: {
text: c.thinking,
- signature: c.thinkingSignature,
+ signature: thinkingSignature,
},
},
});
@@ -684,6 +793,7 @@ function convertMessages(
contentBlocks.push({ text: sanitizeSurrogates(c.thinking) });
}
break;
+ }
default:
continue;
}
@@ -877,7 +987,12 @@ function buildAdditionalModelRequestFields(
model: Model<"bedrock-converse-stream">,
options: BedrockOptions,
): DocumentType | undefined {
- if (!options.reasoning || !model.reasoning) {
+ if (
+ !options.reasoning ||
+ (!model.reasoning &&
+ !usesClaudeFable5BedrockContract(model) &&
+ !supportsAdaptiveThinking(model))
+ ) {
return undefined;
}
@@ -887,7 +1002,7 @@ function buildAdditionalModelRequestFields(
const display = isGovCloudBedrockTarget(model, options)
? undefined
: (options.thinkingDisplay ?? "summarized");
- const result: Record = supportsAdaptiveThinking(model.id, model.name)
+ const result: Record = supportsAdaptiveThinking(model)
? {
thinking: { type: "adaptive", ...(display !== undefined ? { display } : {}) },
output_config: { effort: mapThinkingLevelToEffort(model, options.reasoning) },
@@ -915,7 +1030,7 @@ function buildAdditionalModelRequestFields(
};
})();
- if (!supportsAdaptiveThinking(model.id, model.name) && (options.interleavedThinking ?? true)) {
+ if (!supportsAdaptiveThinking(model) && (options.interleavedThinking ?? true)) {
result.anthropic_beta = ["interleaved-thinking-2025-05-14"];
}
diff --git a/extensions/amazon-bedrock/thinking-policy.ts b/extensions/amazon-bedrock/thinking-policy.ts
index a81ee6c486b8..1c67daccc6e1 100644
--- a/extensions/amazon-bedrock/thinking-policy.ts
+++ b/extensions/amazon-bedrock/thinking-policy.ts
@@ -2,7 +2,14 @@
* Thinking-level policy for Claude models on Amazon Bedrock. It maps Bedrock
* model ids to the provider SDK thinking levels that are actually supported.
*/
-import type { ProviderThinkingProfile } from "openclaw/plugin-sdk/plugin-entry";
+import type {
+ ProviderRuntimeModel,
+ ProviderThinkingProfile,
+} from "openclaw/plugin-sdk/plugin-entry";
+import {
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
+} from "openclaw/plugin-sdk/provider-model-shared";
const BASE_CLAUDE_THINKING_LEVELS = [
{ id: "off" },
@@ -13,14 +20,20 @@ const BASE_CLAUDE_THINKING_LEVELS = [
] as const satisfies ProviderThinkingProfile["levels"];
function isOpus48BedrockModelRef(modelRef: string): boolean {
- return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test(
+ return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?(?:anthropic\.)?claude-opus-4[.-]8(?:$|[-.:/])/i.test(
+ modelRef,
+ );
+}
+
+function isOpus46BedrockModelRef(modelRef: string): boolean {
+ return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?(?:anthropic\.)?claude-opus-4[.-]6(?:$|[-.:/])/i.test(
modelRef,
);
}
/** Return whether a Bedrock model ref names Claude Opus 4.7. */
export function isOpus47BedrockModelRef(modelRef: string): boolean {
- return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test(
+ return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?(?:anthropic\.)?claude-opus-4[.-]7(?:$|[-.:/])/i.test(
modelRef,
);
}
@@ -30,22 +43,73 @@ export function isOpus47OrNewerBedrockModelRef(modelRef: string): boolean {
return isOpus47BedrockModelRef(modelRef) || isOpus48BedrockModelRef(modelRef);
}
+/** Return whether a Bedrock Claude ref supports max effort. */
+export function supportsBedrockNativeMaxEffort(
+ modelId: string,
+ params?: Record,
+): boolean {
+ if (resolveClaudeFable5ModelIdentity({ id: modelId, params })) {
+ return true;
+ }
+ const canonicalModelId = resolveClaudeModelIdentity({ id: modelId, params });
+ return [modelId, canonicalModelId].some(
+ (modelRef) => isOpus46BedrockModelRef(modelRef) || isOpus47OrNewerBedrockModelRef(modelRef),
+ );
+}
+
+/** Resolve route-specific native effort mappings for Bedrock Claude models. */
+export function resolveBedrockNativeThinkingLevelMap(
+ modelId: string,
+ params?: Record,
+): ProviderRuntimeModel["thinkingLevelMap"] | undefined {
+ const modelRef = { id: modelId, params };
+ if (resolveClaudeFable5ModelIdentity(modelRef)) {
+ return { off: "low", minimal: "low", xhigh: "xhigh", max: "max" };
+ }
+ if (!supportsBedrockNativeMaxEffort(modelId, params)) {
+ return undefined;
+ }
+ const canonicalModelId = resolveClaudeModelIdentity(modelRef);
+ return {
+ xhigh: [modelId, canonicalModelId].some(isOpus47OrNewerBedrockModelRef) ? "xhigh" : null,
+ max: "max",
+ };
+}
+
/** Resolve supported Claude thinking levels for a Bedrock model id. */
-export function resolveBedrockClaudeThinkingProfile(modelId: string): ProviderThinkingProfile {
+export function resolveBedrockClaudeThinkingProfile(
+ modelId: string,
+ params?: Record,
+): ProviderThinkingProfile {
const trimmed = modelId.trim();
- if (isOpus48BedrockModelRef(trimmed)) {
+ const canonicalModelId = resolveClaudeModelIdentity({ id: trimmed, params });
+ const modelRefs = [trimmed, canonicalModelId];
+ if (resolveClaudeFable5ModelIdentity({ id: trimmed, params })) {
+ return {
+ levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
+ defaultLevel: "high",
+ preserveWhenCatalogReasoningFalse: true,
+ };
+ }
+ if (modelRefs.some(isOpus48BedrockModelRef)) {
return {
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
defaultLevel: "off",
};
}
- if (isOpus47BedrockModelRef(trimmed)) {
+ if (modelRefs.some(isOpus47BedrockModelRef)) {
return {
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
defaultLevel: "off",
};
}
- if (/claude-(?:opus|sonnet)-4(?:\.|-)6(?:$|[-.])/i.test(trimmed)) {
+ if (modelRefs.some(isOpus46BedrockModelRef)) {
+ return {
+ levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "adaptive" }, { id: "max" }],
+ defaultLevel: "adaptive",
+ };
+ }
+ if (modelRefs.some((modelRef) => /claude-sonnet-4(?:\.|-)6(?:$|[-.])/i.test(modelRef))) {
return {
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "adaptive" }],
defaultLevel: "adaptive",
diff --git a/extensions/anthropic-vertex/index.test.ts b/extensions/anthropic-vertex/index.test.ts
index 6fa583799ba2..f2b4bb9727a2 100644
--- a/extensions/anthropic-vertex/index.test.ts
+++ b/extensions/anthropic-vertex/index.test.ts
@@ -78,14 +78,19 @@ describe("anthropic-vertex provider plugin", () => {
expect(result.provider.baseUrl).toBe("https://europe-west4-aiplatform.googleapis.com");
expect(result.provider.headers).toEqual({ "x-test-header": "1" });
expect(result.provider.models.map((model) => model.id)).toEqual([
+ "claude-fable-5",
"claude-opus-4-8",
"claude-opus-4-6",
"claude-sonnet-4-6",
]);
expect(result.provider.models[0]?.thinkingLevelMap).toEqual({
+ off: "low",
+ minimal: "low",
xhigh: "xhigh",
max: "max",
});
+ expect(result.provider.models[2]?.thinkingLevelMap).toEqual({ xhigh: null, max: "max" });
+ expect(result.provider.models[3]?.thinkingLevelMap).toEqual({ xhigh: null, max: "max" });
});
it("owns Anthropic-style replay policy", async () => {
@@ -107,6 +112,13 @@ describe("anthropic-vertex provider plugin", () => {
validateAnthropicTurns: true,
allowSyntheticToolResults: true,
});
+ expect(
+ provider.buildReplayPolicy?.({
+ provider: "anthropic-vertex",
+ modelApi: "anthropic-messages",
+ modelId: "claude-fable-5",
+ } as never),
+ ).not.toHaveProperty("dropThinkingBlocks");
});
it("owns Anthropic-style thinking policy", async () => {
@@ -119,6 +131,81 @@ describe("anthropic-vertex provider plugin", () => {
expect(opus48Profile?.defaultLevel).toBe("off");
expect(opus48Profile?.levels.map((level) => level.id)).toContain("max");
+
+ const fableProfile = provider.resolveThinkingProfile?.({
+ provider: "anthropic-vertex",
+ modelId: "claude-fable-5",
+ } as never);
+ expect(fableProfile?.defaultLevel).toBe("high");
+ expect(fableProfile?.preserveWhenCatalogReasoningFalse).toBe(true);
+
+ const aliasProfile = provider.resolveThinkingProfile?.({
+ provider: "anthropic-vertex",
+ modelId: "production-claude",
+ params: { canonicalModelId: "claude-fable-5" },
+ } as never);
+ expect(aliasProfile?.defaultLevel).toBe("high");
+ });
+
+ it("restores Fable metadata for explicit Vertex catalog rows", async () => {
+ const provider = await registerSingleProviderPlugin(anthropicVertexPlugin);
+
+ const normalized = provider.normalizeResolvedModel?.({
+ provider: "anthropic-vertex",
+ modelId: "claude-fable-5",
+ model: {
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ api: "anthropic-messages",
+ provider: "anthropic-vertex",
+ baseUrl: "https://aiplatform.googleapis.com",
+ reasoning: false,
+ input: ["text"],
+ cost: { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 },
+ contextWindow: 200_000,
+ maxTokens: 8192,
+ },
+ } as never);
+
+ expect(normalized).toMatchObject({
+ reasoning: true,
+ input: ["text", "image"],
+ contextWindow: 1_000_000,
+ contextTokens: 1_000_000,
+ maxTokens: 128_000,
+ thinkingLevelMap: {
+ off: "low",
+ minimal: "low",
+ xhigh: "xhigh",
+ max: "max",
+ },
+ });
+
+ const aliasNormalized = provider.normalizeResolvedModel?.({
+ provider: "anthropic-vertex",
+ modelId: "production-claude",
+ model: {
+ id: "production-claude",
+ name: "Production Claude",
+ api: "anthropic-messages",
+ provider: "anthropic-vertex",
+ baseUrl: "https://aiplatform.googleapis.com",
+ reasoning: false,
+ input: ["text"],
+ cost: { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 },
+ contextWindow: 200_000,
+ maxTokens: 8192,
+ params: { canonicalModelId: "claude-fable-5" },
+ thinkingLevelMap: { max: null },
+ },
+ } as never);
+ expect(aliasNormalized).toMatchObject({
+ reasoning: true,
+ input: ["text", "image"],
+ contextWindow: 1_000_000,
+ maxTokens: 128_000,
+ thinkingLevelMap: { off: "low", minimal: "low", xhigh: "xhigh", max: null },
+ });
});
it("resolves synthetic auth when ADC is available", async () => {
diff --git a/extensions/anthropic-vertex/index.ts b/extensions/anthropic-vertex/index.ts
index e8983615c954..a7b3a6a4762e 100644
--- a/extensions/anthropic-vertex/index.ts
+++ b/extensions/anthropic-vertex/index.ts
@@ -14,6 +14,7 @@ import {
resolveAnthropicVertexConfigApiKey,
resolveImplicitAnthropicVertexProvider,
} from "./api.js";
+import { normalizeAnthropicVertexResolvedModel } from "./provider-catalog.js";
const PROVIDER_ID = "anthropic-vertex";
const GCP_VERTEX_CREDENTIALS_MARKER = "gcp-vertex-credentials";
@@ -48,7 +49,10 @@ export default definePluginEntry({
},
resolveConfigApiKey: ({ env }) => resolveAnthropicVertexConfigApiKey(env),
...NATIVE_ANTHROPIC_REPLAY_HOOKS,
- resolveThinkingProfile: ({ modelId }) => resolveClaudeThinkingProfile(modelId),
+ normalizeResolvedModel: ({ modelId, model }) =>
+ normalizeAnthropicVertexResolvedModel(modelId, model),
+ resolveThinkingProfile: ({ modelId, params }) =>
+ resolveClaudeThinkingProfile(modelId, params, { includeNativeMax: true }),
resolveSyntheticAuth: () => {
if (!hasAnthropicVertexAvailableAuth()) {
return undefined;
diff --git a/extensions/anthropic-vertex/provider-catalog.ts b/extensions/anthropic-vertex/provider-catalog.ts
index 200a807568a9..9a67fc877b06 100644
--- a/extensions/anthropic-vertex/provider-catalog.ts
+++ b/extensions/anthropic-vertex/provider-catalog.ts
@@ -1,3 +1,4 @@
+import type { ProviderRuntimeModel } from "openclaw/plugin-sdk/plugin-entry";
/**
* Static Anthropic Vertex model catalog builder. It derives provider base URLs
* from region configuration and publishes Claude model metadata.
@@ -6,11 +7,13 @@ import type {
ModelDefinitionConfig,
ModelProviderConfig,
} from "openclaw/plugin-sdk/provider-model-shared";
+import { resolveClaudeFable5ModelIdentity } from "openclaw/plugin-sdk/provider-model-shared";
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime";
import { resolveAnthropicVertexRegion } from "./region.js";
/** Default Anthropic Vertex model used for implicit provider catalogs. */
export const ANTHROPIC_VERTEX_DEFAULT_MODEL_ID = "claude-sonnet-4-6";
const ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW = 1_000_000;
+const ANTHROPIC_VERTEX_FABLE_MAX_TOKENS = 128_000;
const GCP_VERTEX_CREDENTIALS_MARKER = "gcp-vertex-credentials";
function buildAnthropicVertexModel(params: {
@@ -36,6 +39,15 @@ function buildAnthropicVertexModel(params: {
function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] {
return [
+ buildAnthropicVertexModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ reasoning: true,
+ input: ["text", "image"],
+ cost: { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 },
+ maxTokens: ANTHROPIC_VERTEX_FABLE_MAX_TOKENS,
+ thinkingLevelMap: { off: "low", minimal: "low", xhigh: "xhigh", max: "max" },
+ }),
buildAnthropicVertexModel({
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
@@ -52,6 +64,7 @@ function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] {
input: ["text", "image"],
cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
maxTokens: 128000,
+ thinkingLevelMap: { xhigh: null, max: "max" },
}),
buildAnthropicVertexModel({
id: ANTHROPIC_VERTEX_DEFAULT_MODEL_ID,
@@ -60,10 +73,53 @@ function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] {
input: ["text", "image"],
cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
maxTokens: 128000,
+ thinkingLevelMap: { xhigh: null, max: "max" },
}),
];
}
+/** Restore required Fable metadata after explicit catalog models replace the implicit row. */
+export function normalizeAnthropicVertexResolvedModel(
+ modelId: string,
+ model: ProviderRuntimeModel,
+): ProviderRuntimeModel | undefined {
+ if (!resolveClaudeFable5ModelIdentity({ id: modelId, params: model.params })) {
+ return undefined;
+ }
+ const input: ProviderRuntimeModel["input"] = model.input.includes("image")
+ ? model.input
+ : [...model.input, "image"];
+ const thinkingLevelMap = {
+ off: "low",
+ minimal: "low",
+ xhigh: "xhigh",
+ max: "max",
+ ...model.thinkingLevelMap,
+ };
+ if (
+ model.reasoning &&
+ input === model.input &&
+ model.contextWindow === ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW &&
+ model.contextTokens === ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW &&
+ (model.maxTokens ?? 0) >= ANTHROPIC_VERTEX_FABLE_MAX_TOKENS &&
+ model.thinkingLevelMap?.off === "low" &&
+ model.thinkingLevelMap.minimal === "low" &&
+ model.thinkingLevelMap.xhigh === "xhigh" &&
+ model.thinkingLevelMap.max === "max"
+ ) {
+ return undefined;
+ }
+ return {
+ ...model,
+ reasoning: true,
+ input,
+ contextWindow: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW,
+ contextTokens: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW,
+ maxTokens: Math.max(model.maxTokens ?? 0, ANTHROPIC_VERTEX_FABLE_MAX_TOKENS),
+ thinkingLevelMap,
+ };
+}
+
/** Build the implicit Anthropic Vertex provider config for the current env. */
export function buildAnthropicVertexProvider(params?: {
env?: NodeJS.ProcessEnv;
diff --git a/extensions/anthropic-vertex/provider-policy-api.test.ts b/extensions/anthropic-vertex/provider-policy-api.test.ts
index 9f6a78b4ec6c..7d2cdcb75f6e 100644
--- a/extensions/anthropic-vertex/provider-policy-api.test.ts
+++ b/extensions/anthropic-vertex/provider-policy-api.test.ts
@@ -22,6 +22,38 @@ describe("anthropic-vertex provider-policy-api", () => {
expect(profile?.defaultLevel).toBe("off");
});
+ it("exposes native max without xhigh for Claude Sonnet 4.6", () => {
+ const profile = resolveThinkingProfile({
+ provider: "anthropic-vertex",
+ modelId: "claude-sonnet-4-6",
+ });
+
+ expect(profile?.levels.map((level) => level.id)).toContain("max");
+ expect(profile?.levels.map((level) => level.id)).not.toContain("xhigh");
+ });
+
+ it("inherits Claude Fable 5's provider-agnostic thinking contract", () => {
+ const profile = resolveThinkingProfile({
+ provider: "anthropic-vertex",
+ modelId: "claude-fable-5",
+ });
+
+ expect(profile?.defaultLevel).toBe("high");
+ expect(profile?.preserveWhenCatalogReasoningFalse).toBe(true);
+ expect(profile?.levels.map((level) => level.id)).toContain("max");
+ });
+
+ it("resolves deployment aliases from canonical model metadata", () => {
+ const profile = resolveThinkingProfile({
+ provider: "anthropic-vertex",
+ modelId: "production-claude",
+ params: { canonicalModelId: "claude-fable-5" },
+ });
+
+ expect(profile?.defaultLevel).toBe("high");
+ expect(profile?.preserveWhenCatalogReasoningFalse).toBe(true);
+ });
+
it("ignores other providers", () => {
expect(resolveThinkingProfile({ provider: "anthropic", modelId: "claude-opus-4-8" })).toBe(
null,
diff --git a/extensions/anthropic-vertex/provider-policy-api.ts b/extensions/anthropic-vertex/provider-policy-api.ts
index 2dc2ddee554f..278297448dba 100644
--- a/extensions/anthropic-vertex/provider-policy-api.ts
+++ b/extensions/anthropic-vertex/provider-policy-api.ts
@@ -5,9 +5,15 @@
import { resolveClaudeThinkingProfile } from "openclaw/plugin-sdk/provider-model-shared";
/** Resolve Anthropic Vertex thinking profile for a provider/model pair. */
-export function resolveThinkingProfile(params: { provider: string; modelId: string }) {
+export function resolveThinkingProfile(params: {
+ provider: string;
+ modelId: string;
+ params?: Record;
+}) {
if (params.provider.trim().toLowerCase() !== "anthropic-vertex") {
return null;
}
- return resolveClaudeThinkingProfile(params.modelId);
+ return resolveClaudeThinkingProfile(params.modelId, params.params, {
+ includeNativeMax: true,
+ });
}
diff --git a/extensions/anthropic-vertex/stream-runtime.test.ts b/extensions/anthropic-vertex/stream-runtime.test.ts
index ac36f68c8846..05d1abba5829 100644
--- a/extensions/anthropic-vertex/stream-runtime.test.ts
+++ b/extensions/anthropic-vertex/stream-runtime.test.ts
@@ -32,12 +32,21 @@ function createStreamDeps(): {
let createAnthropicVertexStreamFn: typeof import("./stream-runtime.js").createAnthropicVertexStreamFn;
let createAnthropicVertexStreamFnForModel: typeof import("./stream-runtime.js").createAnthropicVertexStreamFnForModel;
-function makeModel(params: { id: string; maxTokens?: number }): Model<"anthropic-messages"> {
+function makeModel(params: {
+ id: string;
+ maxTokens?: number;
+ params?: Record;
+ reasoning?: boolean;
+ thinkingLevelMap?: Model<"anthropic-messages">["thinkingLevelMap"];
+}): Model<"anthropic-messages"> {
return {
id: params.id,
api: "anthropic-messages",
provider: "anthropic-vertex",
+ reasoning: params.reasoning ?? true,
...(params.maxTokens !== undefined ? { maxTokens: params.maxTokens } : {}),
+ ...(params.params ? { params: params.params } : {}),
+ ...(params.thinkingLevelMap ? { thinkingLevelMap: params.thinkingLevelMap } : {}),
} as Model<"anthropic-messages">;
}
@@ -195,7 +204,66 @@ describe("createAnthropicVertexStreamFn", () => {
expect(streamTransportOptions(streamAnthropicMock).temperature).toBe(0.7);
});
- it("maps xhigh reasoning to max effort for adaptive Opus models", () => {
+ it("uses Fable 5's always-adaptive Vertex contract", () => {
+ const { deps, streamAnthropicMock } = createStreamDeps();
+ const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
+ const model = makeModel({ id: "claude-fable-5", maxTokens: 128000 });
+
+ void streamFn(model, { messages: [] }, { temperature: 0.7 });
+
+ expect(streamTransportOptions(streamAnthropicMock)).toMatchObject({
+ thinkingEnabled: true,
+ effort: "high",
+ maxTokens: 128000,
+ });
+ expect(streamTransportOptions(streamAnthropicMock)).not.toHaveProperty("temperature");
+ });
+
+ it("uses canonical Claude policy for Vertex deployment aliases", () => {
+ const { deps, streamAnthropicMock } = createStreamDeps();
+ const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
+ const model = makeModel({
+ id: "production-claude",
+ maxTokens: 128000,
+ params: { canonicalModelId: "claude-opus-4-8" },
+ });
+
+ void streamFn(model, { messages: [] }, { reasoning: "xhigh", temperature: 0.7 });
+
+ expect(streamTransportOptions(streamAnthropicMock)).toMatchObject({
+ thinkingEnabled: true,
+ effort: "xhigh",
+ });
+ expect(streamTransportOptions(streamAnthropicMock)).not.toHaveProperty("temperature");
+ });
+
+ it("preserves Fable 5 low effort on Vertex", () => {
+ const { deps, streamAnthropicMock } = createStreamDeps();
+ const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
+ const model = makeModel({ id: "claude-fable-5", maxTokens: 128000 });
+
+ void streamFn(model, { messages: [] }, { reasoning: "low" });
+
+ expect(streamTransportOptions(streamAnthropicMock)).toMatchObject({
+ thinkingEnabled: true,
+ effort: "low",
+ });
+ });
+
+ it("preserves Fable 5 xhigh effort on Vertex", () => {
+ const { deps, streamAnthropicMock } = createStreamDeps();
+ const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
+ const model = makeModel({ id: "claude-fable-5", maxTokens: 128000 });
+
+ void streamFn(model, { messages: [] }, { reasoning: "xhigh" });
+
+ expect(streamTransportOptions(streamAnthropicMock)).toMatchObject({
+ thinkingEnabled: true,
+ effort: "xhigh",
+ });
+ });
+
+ it("maps unsupported xhigh reasoning to high effort for Opus 4.6", () => {
const { deps, streamAnthropicMock } = createStreamDeps();
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
const model = makeModel({ id: "claude-opus-4-6", maxTokens: 64000 });
@@ -204,7 +272,7 @@ describe("createAnthropicVertexStreamFn", () => {
const transportOptions = streamTransportOptions(streamAnthropicMock);
expect(transportOptions.thinkingEnabled).toBe(true);
- expect(transportOptions.effort).toBe("max");
+ expect(transportOptions.effort).toBe("high");
});
it("maps xhigh reasoning to xhigh effort for Opus 4.8", () => {
@@ -231,7 +299,7 @@ describe("createAnthropicVertexStreamFn", () => {
expect(transportOptions.effort).toBe("max");
});
- it("clamps max reasoning for adaptive models without native max support", () => {
+ it("preserves native max reasoning for Sonnet 4.6", () => {
const { deps, streamAnthropicMock } = createStreamDeps();
const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
const model = makeModel({ id: "claude-sonnet-4-6", maxTokens: 128000 });
@@ -240,7 +308,24 @@ describe("createAnthropicVertexStreamFn", () => {
const transportOptions = streamTransportOptions(streamAnthropicMock);
expect(transportOptions.thinkingEnabled).toBe(true);
+ expect(transportOptions.effort).toBe("max");
+ });
+
+ it("honors explicit max opt-outs for Vertex aliases", () => {
+ const { deps, streamAnthropicMock } = createStreamDeps();
+ const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps);
+ const model = makeModel({
+ id: "production-claude",
+ params: { canonicalModelId: "claude-sonnet-4-6" },
+ reasoning: false,
+ thinkingLevelMap: { xhigh: null, max: null },
+ });
+
+ void streamFn(model, { messages: [] }, { reasoning: "max", temperature: 0.2 });
+
+ const transportOptions = streamTransportOptions(streamAnthropicMock);
expect(transportOptions.effort).toBe("high");
+ expect(transportOptions).not.toHaveProperty("temperature");
});
it("applies Anthropic cache-boundary shaping before forwarding payload hooks", async () => {
diff --git a/extensions/anthropic-vertex/stream-runtime.ts b/extensions/anthropic-vertex/stream-runtime.ts
index 1fdc78fcfef2..94edb6241c9f 100644
--- a/extensions/anthropic-vertex/stream-runtime.ts
+++ b/extensions/anthropic-vertex/stream-runtime.ts
@@ -5,10 +5,19 @@
import { AnthropicVertex as AnthropicVertexSdk } from "@anthropic-ai/vertex-sdk";
import type { StreamFn } from "openclaw/plugin-sdk/agent-core";
import {
+ clampThinkingLevel,
stream as streamDefault,
type Model,
+ type ModelThinkingLevel,
type ProviderStreamOptions,
} from "openclaw/plugin-sdk/llm";
+import {
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeMaxEffort,
+ supportsClaudeNativeXhighEffort,
+} from "openclaw/plugin-sdk/provider-model-shared";
import {
applyAnthropicPayloadPolicyToParams,
resolveAnthropicPayloadPolicy,
@@ -42,44 +51,43 @@ const defaultAnthropicVertexStreamDeps: AnthropicVertexStreamDeps = {
};
function isClaudeOpus47OrNewerModel(modelId: string): boolean {
- return (
- modelId.includes("opus-4-8") ||
- modelId.includes("opus-4.8") ||
- modelId.includes("opus-4-7") ||
- modelId.includes("opus-4.7")
- );
+ return supportsClaudeNativeXhighEffort({ id: modelId });
}
-function isClaudeOpus46Model(modelId: string): boolean {
- return modelId.includes("opus-4-6") || modelId.includes("opus-4.6");
+function isClaudeFable5Model(modelId: string): boolean {
+ return resolveClaudeFable5ModelIdentity({ id: modelId }) !== undefined;
}
function supportsAdaptiveThinking(modelId: string): boolean {
- return (
- isClaudeOpus47OrNewerModel(modelId) ||
- isClaudeOpus46Model(modelId) ||
- modelId.includes("sonnet-4-6") ||
- modelId.includes("sonnet-4.6")
- );
+ return supportsClaudeAdaptiveThinking({ id: modelId });
}
function mapAnthropicAdaptiveEffort(
- reasoning: string,
+ reasoning: ModelThinkingLevel,
+ model: Model<"anthropic-messages">,
modelId: string,
): AnthropicVertexAdaptiveEffort {
+ const clampModel =
+ typeof model.params?.canonicalModelId === "string" ? { ...model, reasoning: true } : model;
+ const resolvedReasoning = clampThinkingLevel(clampModel, reasoning);
+ const mapped = model.thinkingLevelMap?.[resolvedReasoning];
+ if (typeof mapped === "string") {
+ return mapped as AnthropicVertexAdaptiveEffort;
+ }
const effortMap: Record = {
+ off: "low",
minimal: "low",
low: "low",
medium: "medium",
high: "high",
- xhigh: isClaudeOpus47OrNewerModel(modelId)
+ xhigh: isClaudeFable5Model(modelId)
? "xhigh"
- : isClaudeOpus46Model(modelId)
- ? "max"
+ : isClaudeOpus47OrNewerModel(modelId)
+ ? "xhigh"
: "high",
- max: isClaudeOpus47OrNewerModel(modelId) ? "max" : "high",
+ max: supportsClaudeNativeMaxEffort({ id: modelId }) ? "max" : "high",
};
- return effortMap[reasoning] ?? "high";
+ return effortMap[resolvedReasoning] ?? "high";
}
function resolveAnthropicVertexMaxTokens(params: {
@@ -163,7 +171,15 @@ export function createAnthropicVertexStreamFn(
modelMaxTokens: transportModel.maxTokens,
requestedMaxTokens: options?.maxTokens,
});
- const temperature = isClaudeOpus47OrNewerModel(model.id) ? undefined : options?.temperature;
+ const contractModelId = resolveClaudeModelIdentity(model);
+ const fable5 = isClaudeFable5Model(contractModelId);
+ const reasoning = options?.reasoning as ModelThinkingLevel | undefined;
+ const adaptiveThinking =
+ fable5 || Boolean(reasoning && supportsAdaptiveThinking(contractModelId));
+ const temperature =
+ adaptiveThinking || isClaudeOpus47OrNewerModel(contractModelId)
+ ? undefined
+ : options?.temperature;
const opts: AnthropicVertexTransportOptions = {
client,
...(temperature !== undefined ? { temperature } : {}),
@@ -181,21 +197,25 @@ export function createAnthropicVertexStreamFn(
metadata: options?.metadata,
};
- if (options?.reasoning) {
- if (supportsAdaptiveThinking(model.id)) {
+ if (reasoning) {
+ if (supportsAdaptiveThinking(contractModelId)) {
opts.thinkingEnabled = true;
opts.effort = mapAnthropicAdaptiveEffort(
- options.reasoning,
- model.id,
+ reasoning,
+ transportModel,
+ contractModelId,
) as AnthropicVertexEffort;
} else {
opts.thinkingEnabled = true;
- const budgets = options.thinkingBudgets;
+ const budgets = options?.thinkingBudgets;
opts.thinkingBudgetTokens =
- (budgets && options.reasoning in budgets
- ? budgets[options.reasoning as keyof typeof budgets]
+ (budgets && reasoning in budgets
+ ? budgets[reasoning as keyof typeof budgets]
: undefined) ?? 10000;
}
+ } else if (fable5) {
+ opts.thinkingEnabled = true;
+ opts.effort = "high";
} else {
opts.thinkingEnabled = false;
}
diff --git a/extensions/anthropic/cli-shared.ts b/extensions/anthropic/cli-shared.ts
index cf33a35532f4..f3fa566f3a8a 100644
--- a/extensions/anthropic/cli-shared.ts
+++ b/extensions/anthropic/cli-shared.ts
@@ -72,6 +72,12 @@ const CLAUDE_BYPASS_PERMISSION_MODE = "bypassPermissions";
type ClaudeCliEffort = "low" | "medium" | "high" | "xhigh" | "max";
+/** Explicit thinking opt-out for Claude CLI routes unsupported by Claude Code. */
+export const CLAUDE_CLI_OFF_THINKING_PROFILE = {
+ levels: [{ id: "off" }],
+ defaultLevel: "off",
+} as const;
+
/** Return whether a provider id refers to the Claude CLI backend. */
export function isClaudeCliProvider(providerId: string): boolean {
return normalizeOptionalLowercaseString(providerId) === CLAUDE_CLI_BACKEND_ID;
diff --git a/extensions/anthropic/index.test.ts b/extensions/anthropic/index.test.ts
index 5727a738d3de..840e11d65267 100644
--- a/extensions/anthropic/index.test.ts
+++ b/extensions/anthropic/index.test.ts
@@ -124,6 +124,26 @@ describe("anthropic provider replay hooks", () => {
});
});
+ it("preserves Fable thinking in its same-model replay policy", async () => {
+ const provider = await registerSingleProviderPlugin(anthropicPlugin);
+ const fableContext = {
+ provider: "anthropic",
+ modelApi: "anthropic-messages",
+ modelId: "claude-fable-5",
+ };
+
+ expect(provider.buildReplayPolicy?.(fableContext)).toEqual({
+ sanitizeMode: "full",
+ sanitizeToolCallIds: true,
+ toolCallIdMode: "strict",
+ preserveNativeAnthropicToolUseIds: true,
+ preserveSignatures: true,
+ repairToolUseResultPairing: true,
+ validateAnthropicTurns: true,
+ allowSyntheticToolResults: true,
+ });
+ });
+
it("defaults provider api through plugin config normalization", async () => {
const provider = await registerSingleProviderPlugin(anthropicPlugin);
@@ -507,7 +527,102 @@ describe("anthropic provider replay hooks", () => {
provider: "anthropic",
modelId: "claude-opus-4-6",
} as never)
- ?.levels.some((level) => level.id === "xhigh" || level.id === "max"),
+ ?.levels.some((level) => level.id === "max"),
+ ).toBe(true);
+ expect(
+ provider
+ .resolveThinkingProfile?.({
+ provider: "anthropic",
+ modelId: "claude-opus-4-6",
+ } as never)
+ ?.levels.some((level) => level.id === "xhigh"),
+ ).toBe(false);
+ });
+
+ it("resolves Claude Fable 5 with its always-adaptive model contract", async () => {
+ const provider = await registerSingleProviderPlugin(anthropicPlugin);
+ const resolved = provider.resolveDynamicModel?.({
+ provider: "anthropic",
+ modelId: "claude-fable-5",
+ modelRegistry: createModelRegistry([]),
+ } as ProviderResolveDynamicModelContext);
+
+ expectFields(resolved, {
+ provider: "anthropic",
+ id: "claude-fable-5",
+ api: "anthropic-messages",
+ reasoning: true,
+ input: ["text", "image"],
+ cost: { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 },
+ contextWindow: 1_000_000,
+ contextTokens: 1_000_000,
+ maxTokens: 128_000,
+ thinkingLevelMap: {
+ off: "low",
+ minimal: "low",
+ xhigh: "xhigh",
+ max: "max",
+ },
+ });
+ expect(requireRecord(resolved, "Fable model").mediaInput).toEqual({
+ image: { maxSidePx: 2576, preferredSidePx: 2576, tokenMode: "provider" },
+ });
+
+ const profile = provider.resolveThinkingProfile?.({
+ provider: "anthropic",
+ modelId: "claude-fable-5",
+ } as never);
+ expect(levelIds(profile)).toStrictEqual([
+ "off",
+ "minimal",
+ "low",
+ "medium",
+ "high",
+ "xhigh",
+ "adaptive",
+ "max",
+ ]);
+ expect(requireRecord(profile, "Fable thinking profile").defaultLevel).toBe("high");
+
+ const normalized = provider.normalizeResolvedModel?.({
+ provider: "anthropic",
+ modelId: "claude-fable-5",
+ model: {
+ ...(resolved as ProviderRuntimeModel),
+ reasoning: false,
+ },
+ } as never);
+ expect(normalized?.reasoning).toBe(true);
+
+ expect(
+ provider.resolveDynamicModel?.({
+ provider: "claude-cli",
+ modelId: "claude-fable-5",
+ modelRegistry: createModelRegistry([]),
+ } as ProviderResolveDynamicModelContext),
+ ).toBeUndefined();
+ expect(
+ provider.resolveThinkingProfile?.({
+ provider: "claude-cli",
+ modelId: "claude-fable-5",
+ } as never),
+ ).toEqual({
+ levels: [{ id: "off" }],
+ defaultLevel: "off",
+ });
+ expect(
+ provider
+ .resolveThinkingProfile?.({
+ provider: "claude-cli",
+ modelId: "claude-opus-4-6",
+ } as never)
+ ?.levels.map((level) => level.id),
+ ).toContain("max");
+ expect(
+ provider.isModernModelRef?.({
+ provider: "claude-cli",
+ modelId: "claude-fable-5",
+ }),
).toBe(false);
});
@@ -529,6 +644,48 @@ describe("anthropic provider replay hooks", () => {
});
});
+ it("uses canonical model identity instead of a Fable-looking deployment alias", async () => {
+ const provider = await registerSingleProviderPlugin(anthropicPlugin);
+ const model = {
+ id: "claude-fable-5-prod",
+ name: "Production Claude",
+ provider: "anthropic",
+ api: "anthropic-messages",
+ baseUrl: "https://api.anthropic.com",
+ reasoning: false,
+ input: ["text"],
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: 200_000,
+ maxTokens: 64_000,
+ params: { canonicalModelId: "claude-opus-4-8" },
+ } as ProviderRuntimeModel;
+
+ expectFields(
+ provider.normalizeResolvedModel?.({
+ provider: "anthropic",
+ modelId: model.id,
+ model,
+ } as never),
+ {
+ reasoning: false,
+ contextWindow: 1_048_576,
+ contextTokens: 1_048_576,
+ maxTokens: 128_000,
+ thinkingLevelMap: {
+ xhigh: "xhigh",
+ max: "max",
+ },
+ },
+ );
+ expect(
+ provider.resolveThinkingProfile?.({
+ provider: "anthropic",
+ modelId: model.id,
+ params: model.params,
+ } as never)?.defaultLevel,
+ ).toBe("off");
+ });
+
it("does not forward-compat case-mismatched Anthropic model ids", async () => {
const provider = await registerSingleProviderPlugin(anthropicPlugin);
@@ -569,6 +726,7 @@ describe("anthropic provider replay hooks", () => {
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200_000,
maxTokens: 64_000,
+ thinkingLevelMap: { max: null },
},
} as never);
@@ -576,6 +734,29 @@ describe("anthropic provider replay hooks", () => {
expect(normalized?.mediaInput).toEqual({
image: { maxSidePx: 1568, preferredSidePx: 1568, tokenMode: "provider" },
});
+ expect(normalized?.thinkingLevelMap).toEqual({ xhigh: null, max: null });
+ });
+
+ it("does not normalize numeric successors as known Claude contracts", async () => {
+ const provider = await registerSingleProviderPlugin(anthropicPlugin);
+
+ const normalized = provider.normalizeResolvedModel?.({
+ provider: "anthropic",
+ modelId: "claude-opus-4-60",
+ model: {
+ id: "claude-opus-4-60",
+ name: "Claude Opus 4.60",
+ provider: "anthropic",
+ api: "anthropic-messages",
+ reasoning: true,
+ input: ["text"],
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: 200_000,
+ maxTokens: 64_000,
+ },
+ } as never);
+
+ expect(normalized).toBeUndefined();
});
it("merges partial Claude image media metadata with provider limits", async () => {
diff --git a/extensions/anthropic/openclaw.plugin.json b/extensions/anthropic/openclaw.plugin.json
index ea7f182e1af4..e9d602a197d2 100644
--- a/extensions/anthropic/openclaw.plugin.json
+++ b/extensions/anthropic/openclaw.plugin.json
@@ -61,6 +61,24 @@
"baseUrl": "https://api.anthropic.com",
"api": "anthropic-messages",
"models": [
+ {
+ "id": "claude-fable-5",
+ "name": "Claude Fable 5",
+ "reasoning": true,
+ "input": ["text", "image"],
+ "mediaInput": {
+ "image": { "maxSidePx": 2576, "preferredSidePx": 2576, "tokenMode": "provider" }
+ },
+ "cost": { "input": 10, "output": 50, "cacheRead": 1, "cacheWrite": 12.5 },
+ "contextWindow": 1000000,
+ "maxTokens": 128000,
+ "thinkingLevelMap": {
+ "off": "low",
+ "minimal": "low",
+ "xhigh": "xhigh",
+ "max": "max"
+ }
+ },
{
"id": "claude-opus-4-8",
"name": "Claude Opus 4.8",
diff --git a/extensions/anthropic/provider-policy-api.test.ts b/extensions/anthropic/provider-policy-api.test.ts
index 6c19bf627364..9055bb770091 100644
--- a/extensions/anthropic/provider-policy-api.test.ts
+++ b/extensions/anthropic/provider-policy-api.test.ts
@@ -136,18 +136,52 @@ describe("anthropic provider policy public artifact", () => {
expect(profile?.defaultLevel).toBe("off");
});
- it("keeps adaptive-only Claude profiles aligned with the runtime provider", () => {
+ it("exposes the always-adaptive Claude Fable 5 thinking profile", () => {
const profile = resolveThinkingProfile({
provider: "anthropic",
- modelId: "claude-opus-4-6",
+ modelId: "claude-fable-5",
});
- if (!profile) {
- throw new Error("Expected Anthropic policy profile");
+ expect(profile).toEqual({
+ levels: [
+ { id: "off" },
+ { id: "minimal" },
+ { id: "low" },
+ { id: "medium" },
+ { id: "high" },
+ { id: "xhigh" },
+ { id: "adaptive" },
+ { id: "max" },
+ ],
+ defaultLevel: "high",
+ preserveWhenCatalogReasoningFalse: true,
+ });
+ expect(
+ resolveThinkingProfile({
+ provider: "claude-cli",
+ modelId: "claude-fable-5",
+ }),
+ ).toEqual({
+ levels: [{ id: "off" }],
+ defaultLevel: "off",
+ });
+ });
+
+ it("exposes native max without xhigh for direct Claude 4.6 routes", () => {
+ for (const provider of ["anthropic", "claude-cli"]) {
+ const profile = resolveThinkingProfile({
+ provider,
+ modelId: "claude-opus-4-6",
+ });
+
+ if (!profile) {
+ throw new Error(`Expected ${provider} policy profile`);
+ }
+ expect(levelIds(profile.levels)).toContain("adaptive");
+ expect(levelIds(profile.levels)).toContain("max");
+ expect(profile.defaultLevel).toBe("adaptive");
+ expect(collectLegacyExtendedLevelIds(profile.levels)).toStrictEqual(["max"]);
}
- expect(levelIds(profile.levels)).toContain("adaptive");
- expect(profile.defaultLevel).toBe("adaptive");
- expect(collectLegacyExtendedLevelIds(profile.levels)).toStrictEqual([]);
});
it("does not expose Anthropic thinking profiles for unrelated providers", () => {
diff --git a/extensions/anthropic/provider-policy-api.ts b/extensions/anthropic/provider-policy-api.ts
index 891dc7a9dc65..78954ca948de 100644
--- a/extensions/anthropic/provider-policy-api.ts
+++ b/extensions/anthropic/provider-policy-api.ts
@@ -2,8 +2,12 @@
* Provider-policy API for Anthropic and Claude CLI. Core calls this lightweight
* path for config defaults and thinking profiles.
*/
-import { resolveClaudeThinkingProfile } from "openclaw/plugin-sdk/provider-model-shared";
+import {
+ resolveClaudeModelIdentity,
+ resolveClaudeThinkingProfile,
+} from "openclaw/plugin-sdk/provider-model-shared";
import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-types";
+import { CLAUDE_CLI_OFF_THINKING_PROFILE } from "./cli-shared.js";
import {
applyAnthropicConfigDefaults,
normalizeAnthropicProviderConfigForProvider,
@@ -20,11 +24,27 @@ export function applyConfigDefaults(params: Parameters;
+}) {
+ const contractModelId = resolveClaudeModelIdentity({
+ id: params.modelId,
+ params: params.params,
+ });
switch (params.provider.trim().toLowerCase()) {
case "anthropic":
+ return resolveClaudeThinkingProfile(contractModelId, undefined, {
+ includeNativeMax: true,
+ });
case "claude-cli":
- return resolveClaudeThinkingProfile(params.modelId);
+ if (contractModelId.startsWith("claude-fable-5")) {
+ return CLAUDE_CLI_OFF_THINKING_PROFILE;
+ }
+ return resolveClaudeThinkingProfile(contractModelId, undefined, {
+ includeNativeMax: true,
+ });
default:
return null;
}
diff --git a/extensions/anthropic/register.runtime.ts b/extensions/anthropic/register.runtime.ts
index 0e1047f95a1e..121222a83ded 100644
--- a/extensions/anthropic/register.runtime.ts
+++ b/extensions/anthropic/register.runtime.ts
@@ -28,8 +28,13 @@ import {
} from "openclaw/plugin-sdk/provider-auth";
import {
cloneFirstTemplateModel,
+ NATIVE_ANTHROPIC_REPLAY_HOOKS,
type ProviderPlugin,
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
resolveClaudeThinkingProfile,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeXhighEffort,
} from "openclaw/plugin-sdk/provider-model-shared";
import { fetchClaudeUsage } from "openclaw/plugin-sdk/provider-usage";
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime";
@@ -41,13 +46,13 @@ import {
CLAUDE_CLI_BACKEND_ID,
CLAUDE_CLI_DEFAULT_ALLOWLIST_REFS,
CLAUDE_CLI_DEFAULT_MODEL_REF,
+ CLAUDE_CLI_OFF_THINKING_PROFILE,
} from "./cli-shared.js";
import {
applyAnthropicConfigDefaults,
normalizeAnthropicProviderConfigForProvider,
} from "./config-defaults.js";
import { anthropicMediaUnderstandingProvider } from "./media-understanding-provider.js";
-import { buildAnthropicReplayPolicy } from "./replay-policy.js";
import { wrapAnthropicProviderStream } from "./stream-wrappers.js";
const PROVIDER_ID = "anthropic";
@@ -58,7 +63,8 @@ const ANTHROPIC_OPUS_48_DOT_MODEL_ID = "claude-opus-4.8";
const ANTHROPIC_OPUS_47_MODEL_ID = "claude-opus-4-7";
const ANTHROPIC_OPUS_47_DOT_MODEL_ID = "claude-opus-4.7";
const ANTHROPIC_GA_1M_CONTEXT_TOKENS = 1_048_576;
-const ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS = 128_000;
+const ANTHROPIC_FABLE_CONTEXT_TOKENS = 1_000_000;
+const ANTHROPIC_MODERN_MAX_OUTPUT_TOKENS = 128_000;
const ANTHROPIC_OPUS_46_MODEL_ID = "claude-opus-4-6";
const ANTHROPIC_OPUS_46_DOT_MODEL_ID = "claude-opus-4.6";
const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [
@@ -67,26 +73,6 @@ const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [
] as const;
const ANTHROPIC_SONNET_46_MODEL_ID = "claude-sonnet-4-6";
const ANTHROPIC_SONNET_46_DOT_MODEL_ID = "claude-sonnet-4.6";
-const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
- ANTHROPIC_OPUS_48_MODEL_ID,
- ANTHROPIC_OPUS_48_DOT_MODEL_ID,
- ANTHROPIC_OPUS_46_MODEL_ID,
- ANTHROPIC_OPUS_46_DOT_MODEL_ID,
- ANTHROPIC_OPUS_47_MODEL_ID,
- ANTHROPIC_OPUS_47_DOT_MODEL_ID,
- ANTHROPIC_SONNET_46_MODEL_ID,
- ANTHROPIC_SONNET_46_DOT_MODEL_ID,
-] as const;
-const ANTHROPIC_MODERN_MODEL_PREFIXES = [
- "claude-opus-4-8",
- "claude-opus-4.8",
- "claude-opus-4-7",
- "claude-opus-4.7",
- "claude-opus-4-6",
- "claude-opus-4.6",
- "claude-sonnet-4-6",
- "claude-sonnet-4.6",
-] as const;
const ANTHROPIC_SETUP_TOKEN_NOTE_LINES = [
"Anthropic setup-token auth is supported in OpenClaw.",
"OpenClaw prefers Claude CLI reuse when it is available on the host.",
@@ -282,13 +268,15 @@ function buildAnthropicForwardCompatModel(
): ProviderRuntimeModel | undefined {
const trimmedModelId = ctx.modelId.trim();
const lower = normalizeLowercaseStringOrEmpty(trimmedModelId);
+ const normalizedProvider = normalizeLowercaseStringOrEmpty(ctx.provider);
if (trimmedModelId !== lower || !matchesAnthropicModernModel(lower)) {
return undefined;
}
+ if (isAnthropicFable5Model(lower) && normalizedProvider !== PROVIDER_ID) {
+ return undefined;
+ }
const provider =
- normalizeLowercaseStringOrEmpty(ctx.provider) === CLAUDE_CLI_BACKEND_ID
- ? CLAUDE_CLI_BACKEND_ID
- : PROVIDER_ID;
+ normalizedProvider === CLAUDE_CLI_BACKEND_ID ? CLAUDE_CLI_BACKEND_ID : PROVIDER_ID;
return {
id: trimmedModelId,
name: trimmedModelId,
@@ -297,10 +285,12 @@ function buildAnthropicForwardCompatModel(
baseUrl: "https://api.anthropic.com",
reasoning: true,
input: ["text", "image"],
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
- contextWindow: isAnthropicGa1MModel(trimmedModelId) ? ANTHROPIC_GA_1M_CONTEXT_TOKENS : 200_000,
- maxTokens: isAnthropicOpus48Model(trimmedModelId)
- ? ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS
+ cost: isAnthropicFable5Model(trimmedModelId)
+ ? { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 }
+ : { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: resolveAnthropicFixedContextWindow(trimmedModelId) ?? 200_000,
+ maxTokens: isAnthropic128kOutputModel(trimmedModelId)
+ ? ANTHROPIC_MODERN_MAX_OUTPUT_TOKENS
: 64_000,
};
}
@@ -346,25 +336,29 @@ function resolveAnthropicForwardCompatModel(
}
function isAnthropicGa1MModel(modelId: string): boolean {
- const normalized = normalizeLowercaseStringOrEmpty(modelId);
- return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix));
+ return supportsClaudeAdaptiveThinking({ id: modelId });
}
-function isAnthropicOpus48Model(modelId: string): boolean {
- const normalized = normalizeLowercaseStringOrEmpty(modelId);
- return [ANTHROPIC_OPUS_48_MODEL_ID, ANTHROPIC_OPUS_48_DOT_MODEL_ID].some((prefix) =>
- normalized.startsWith(prefix),
- );
+function isAnthropicFable5Model(modelId: string): boolean {
+ return resolveClaudeFable5ModelIdentity({ id: modelId }) !== undefined;
+}
+
+function resolveAnthropicFixedContextWindow(modelId: string): number | undefined {
+ if (isAnthropicFable5Model(modelId)) {
+ return ANTHROPIC_FABLE_CONTEXT_TOKENS;
+ }
+ return isAnthropicGa1MModel(modelId) ? ANTHROPIC_GA_1M_CONTEXT_TOKENS : undefined;
+}
+
+function isAnthropic128kOutputModel(modelId: string): boolean {
+ if (isAnthropicFable5Model(modelId)) {
+ return true;
+ }
+ return /^claude-opus-4-8(?=$|[^a-z0-9])/.test(resolveClaudeModelIdentity({ id: modelId }));
}
function isAnthropicOpus47OrNewerModel(modelId: string): boolean {
- const normalized = normalizeLowercaseStringOrEmpty(modelId);
- return [
- ANTHROPIC_OPUS_48_MODEL_ID,
- ANTHROPIC_OPUS_48_DOT_MODEL_ID,
- ANTHROPIC_OPUS_47_MODEL_ID,
- ANTHROPIC_OPUS_47_DOT_MODEL_ID,
- ].some((prefix) => normalized.startsWith(prefix));
+ return supportsClaudeNativeXhighEffort({ id: modelId }) && !isAnthropicFable5Model(modelId);
}
function hasConfiguredModelContextOverride(
@@ -403,26 +397,29 @@ function hasConfiguredModelContextOverride(
return false;
}
-function applyAnthropicGa1MContextWindow(params: {
+function applyAnthropicFixedContextWindow(params: {
config?: ProviderNormalizeResolvedModelContext["config"];
provider: string;
modelId: string;
+ contractModelId: string;
model: ProviderRuntimeModel;
}): ProviderRuntimeModel | undefined {
- if (!isAnthropicGa1MModel(params.modelId)) {
+ const fixedContextWindow = resolveAnthropicFixedContextWindow(params.contractModelId);
+ if (fixedContextWindow === undefined) {
return undefined;
}
if (hasConfiguredModelContextOverride(params.config, params.provider, params.modelId)) {
return undefined;
}
- const nextContextWindow = Math.max(
- params.model.contextWindow ?? 0,
- ANTHROPIC_GA_1M_CONTEXT_TOKENS,
- );
- const nextContextTokens =
- typeof params.model.contextTokens === "number"
- ? Math.max(params.model.contextTokens, ANTHROPIC_GA_1M_CONTEXT_TOKENS)
- : ANTHROPIC_GA_1M_CONTEXT_TOKENS;
+ const exactContextWindow = isAnthropicFable5Model(params.contractModelId);
+ const nextContextWindow = exactContextWindow
+ ? fixedContextWindow
+ : Math.max(params.model.contextWindow ?? 0, fixedContextWindow);
+ const nextContextTokens = exactContextWindow
+ ? fixedContextWindow
+ : typeof params.model.contextTokens === "number"
+ ? Math.max(params.model.contextTokens, fixedContextWindow)
+ : fixedContextWindow;
if (
nextContextWindow === params.model.contextWindow &&
nextContextTokens === params.model.contextTokens
@@ -436,48 +433,52 @@ function applyAnthropicGa1MContextWindow(params: {
};
}
-function applyAnthropicOpus48MaxTokens(params: {
+function applyAnthropicModernMaxTokens(params: {
modelId: string;
model: ProviderRuntimeModel;
}): ProviderRuntimeModel | undefined {
- if (!isAnthropicOpus48Model(params.modelId)) {
+ if (!isAnthropic128kOutputModel(params.modelId)) {
return undefined;
}
- if ((params.model.maxTokens ?? 0) >= ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS) {
+ if ((params.model.maxTokens ?? 0) >= ANTHROPIC_MODERN_MAX_OUTPUT_TOKENS) {
return undefined;
}
return {
...params.model,
- maxTokens: ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS,
+ maxTokens: ANTHROPIC_MODERN_MAX_OUTPUT_TOKENS,
};
}
-function applyAnthropicOpusThinkingLevelMap(params: {
+function applyAnthropicThinkingLevelMap(params: {
modelId: string;
model: ProviderRuntimeModel;
}): ProviderRuntimeModel | undefined {
- if (!isAnthropicOpus47OrNewerModel(params.modelId)) {
+ const fable5 = isAnthropicFable5Model(params.modelId);
+ const nativeXhigh = fable5 || isAnthropicOpus47OrNewerModel(params.modelId);
+ if (!matchesAnthropicModernModel(params.modelId)) {
return undefined;
}
- if (
- params.model.thinkingLevelMap?.xhigh === "xhigh" &&
- params.model.thinkingLevelMap?.max === "max"
- ) {
+ const current = params.model.thinkingLevelMap;
+ const nativeDefaults = {
+ ...(fable5 ? { off: "low" as const, minimal: "low" as const } : {}),
+ xhigh: nativeXhigh ? ("xhigh" as const) : null,
+ max: "max" as const,
+ };
+ const currentEfforts = current as Record | undefined;
+ if (Object.keys(nativeDefaults).every((level) => currentEfforts?.[level] !== undefined)) {
return undefined;
}
return {
...params.model,
thinkingLevelMap: {
- ...params.model.thinkingLevelMap,
- xhigh: "xhigh",
- max: "max",
+ ...nativeDefaults,
+ ...current,
},
};
}
function matchesAnthropicModernModel(modelId: string): boolean {
- const lower = normalizeLowercaseStringOrEmpty(modelId);
- return ANTHROPIC_MODERN_MODEL_PREFIXES.some((prefix) => lower.startsWith(prefix));
+ return supportsClaudeAdaptiveThinking({ id: modelId });
}
function hasImageInput(input: unknown): boolean {
@@ -495,18 +496,13 @@ function resolveAnthropicImageMediaInput(modelId: string, modelName?: string) {
return undefined;
}
const refs = [modelId, modelName].filter((value): value is string => typeof value === "string");
- const largeImageOpus = refs.some((ref) =>
- [
- ANTHROPIC_OPUS_48_MODEL_ID,
- ANTHROPIC_OPUS_48_DOT_MODEL_ID,
- ANTHROPIC_OPUS_47_MODEL_ID,
- ANTHROPIC_OPUS_47_DOT_MODEL_ID,
- ].some((prefix) => normalizeLowercaseStringOrEmpty(ref).startsWith(prefix)),
+ const largeImageModel = refs.some(
+ (ref) => isAnthropicFable5Model(ref) || isAnthropicOpus47OrNewerModel(ref),
);
return {
image: {
- maxSidePx: largeImageOpus ? 2576 : 1568,
- preferredSidePx: largeImageOpus ? 2576 : 1568,
+ maxSidePx: largeImageModel ? 2576 : 1568,
+ preferredSidePx: largeImageModel ? 2576 : 1568,
tokenMode: "provider" as const,
},
};
@@ -531,8 +527,26 @@ function applyAnthropicImageInputCapability(params: {
function normalizeAnthropicResolvedModel(
ctx: ProviderNormalizeResolvedModelContext,
): ProviderRuntimeModel | undefined {
- const imageCapableModel = applyAnthropicImageInputCapability(ctx) ?? ctx.model;
- const mediaInput = resolveAnthropicImageMediaInput(ctx.modelId, imageCapableModel.name);
+ const contractModelId = resolveClaudeModelIdentity({
+ id: ctx.modelId,
+ params: ctx.model.params,
+ });
+ if (
+ isAnthropicFable5Model(contractModelId) &&
+ normalizeLowercaseStringOrEmpty(ctx.provider) !== PROVIDER_ID
+ ) {
+ return undefined;
+ }
+ const contractModel =
+ isAnthropicFable5Model(contractModelId) && !ctx.model.reasoning
+ ? { ...ctx.model, reasoning: true }
+ : ctx.model;
+ const imageCapableModel =
+ applyAnthropicImageInputCapability({
+ modelId: contractModelId,
+ model: contractModel,
+ }) ?? contractModel;
+ const mediaInput = resolveAnthropicImageMediaInput(contractModelId, imageCapableModel.name);
const mediaInputModel = mediaInput
? {
...imageCapableModel,
@@ -547,20 +561,21 @@ function normalizeAnthropicResolvedModel(
}
: imageCapableModel;
const outputModel =
- applyAnthropicOpus48MaxTokens({
- modelId: ctx.modelId,
+ applyAnthropicModernMaxTokens({
+ modelId: contractModelId,
model: mediaInputModel,
}) ?? mediaInputModel;
const thinkingLevelModel =
- applyAnthropicOpusThinkingLevelMap({
- modelId: ctx.modelId,
+ applyAnthropicThinkingLevelMap({
+ modelId: contractModelId,
model: outputModel,
}) ?? outputModel;
const contextWindowModel =
- applyAnthropicGa1MContextWindow({
+ applyAnthropicFixedContextWindow({
config: ctx.config,
provider: ctx.provider,
modelId: ctx.modelId,
+ contractModelId,
model: thinkingLevelModel,
}) ?? thinkingLevelModel;
return contextWindowModel === ctx.model ? undefined : contextWindowModel;
@@ -789,28 +804,13 @@ export function buildAnthropicProvider(): ProviderPlugin {
if (!model) {
return undefined;
}
- const imageCapableModel =
- applyAnthropicImageInputCapability({
- modelId: ctx.modelId,
- model,
- }) ?? model;
- const outputModel =
- applyAnthropicOpus48MaxTokens({
- modelId: ctx.modelId,
- model: imageCapableModel,
- }) ?? imageCapableModel;
- const thinkingLevelModel =
- applyAnthropicOpusThinkingLevelMap({
- modelId: ctx.modelId,
- model: outputModel,
- }) ?? outputModel;
return (
- applyAnthropicGa1MContextWindow({
+ normalizeAnthropicResolvedModel({
config: ctx.config,
provider: ctx.provider,
modelId: ctx.modelId,
- model: thinkingLevelModel,
- }) ?? thinkingLevelModel
+ model,
+ }) ?? model
);
},
normalizeResolvedModel: (ctx) => normalizeAnthropicResolvedModel(ctx),
@@ -820,10 +820,23 @@ export function buildAnthropicProvider(): ProviderPlugin {
: undefined,
// Publish Claude CLI rows through the provider catalog hook.
augmentModelCatalog: () => buildClaudeCliCatalogEntries(),
- buildReplayPolicy: buildAnthropicReplayPolicy,
- isModernModelRef: ({ modelId }) => matchesAnthropicModernModel(modelId),
+ ...NATIVE_ANTHROPIC_REPLAY_HOOKS,
+ isModernModelRef: ({ provider, modelId }) =>
+ matchesAnthropicModernModel(modelId) &&
+ (!isAnthropicFable5Model(modelId) ||
+ normalizeLowercaseStringOrEmpty(provider) === PROVIDER_ID),
resolveReasoningOutputMode: () => "native",
- resolveThinkingProfile: ({ modelId }) => resolveClaudeThinkingProfile(modelId),
+ resolveThinkingProfile: ({ provider, modelId, params }) => {
+ const contractModelId = resolveClaudeModelIdentity({ id: modelId, params });
+ return isAnthropicFable5Model(contractModelId) &&
+ normalizeLowercaseStringOrEmpty(provider) !== PROVIDER_ID
+ ? CLAUDE_CLI_OFF_THINKING_PROFILE
+ : resolveClaudeThinkingProfile(contractModelId, undefined, {
+ includeNativeMax: [PROVIDER_ID, CLAUDE_CLI_BACKEND_ID].includes(
+ normalizeLowercaseStringOrEmpty(provider),
+ ),
+ });
+ },
wrapStreamFn: wrapAnthropicProviderStream,
resolveUsageAuth: resolveAnthropicUsageAuth,
fetchUsageSnapshot: async (ctx) =>
diff --git a/extensions/anthropic/replay-policy.ts b/extensions/anthropic/replay-policy.ts
deleted file mode 100644
index 63382a3419c6..000000000000
--- a/extensions/anthropic/replay-policy.ts
+++ /dev/null
@@ -1,13 +0,0 @@
-/**
- * Anthropic replay-policy bridge. It re-exports the native Anthropic replay
- * policy from the shared provider-model hooks and fails fast if it disappears.
- */
-import { NATIVE_ANTHROPIC_REPLAY_HOOKS } from "openclaw/plugin-sdk/provider-model-shared";
-
-const { buildReplayPolicy } = NATIVE_ANTHROPIC_REPLAY_HOOKS;
-
-if (!buildReplayPolicy) {
- throw new Error("Expected native Anthropic replay hooks to expose buildReplayPolicy.");
-}
-
-export { buildReplayPolicy as buildAnthropicReplayPolicy };
diff --git a/extensions/github-copilot/index.test.ts b/extensions/github-copilot/index.test.ts
index abce8f4d3132..2a9a024902cf 100644
--- a/extensions/github-copilot/index.test.ts
+++ b/extensions/github-copilot/index.test.ts
@@ -214,6 +214,30 @@ describe("github-copilot plugin", () => {
expect(profile?.levels.map((level) => level.id)).toContain("xhigh");
});
+ it("exposes max thinking for catalog-supported Copilot reasoning efforts", () => {
+ const provider = registerProviderWithPluginConfig({});
+
+ const profile = provider.resolveThinkingProfile({
+ provider: "github-copilot",
+ modelId: "claude-fable-5",
+ compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] },
+ });
+
+ expect(profile?.levels.map((level) => level.id)).toContain("max");
+ });
+
+ it("does not expose max for non-adaptive Claude Copilot models", () => {
+ const provider = registerProviderWithPluginConfig({});
+
+ const profile = provider.resolveThinkingProfile({
+ provider: "github-copilot",
+ modelId: "claude-opus-4-5",
+ compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] },
+ });
+
+ expect(profile?.levels.map((level) => level.id)).not.toContain("max");
+ });
+
it("uses live plugin config to re-enable discovery after startup disable", async () => {
mocks.resolveCopilotApiToken.mockResolvedValueOnce({
token: "copilot_api_token",
diff --git a/extensions/github-copilot/index.ts b/extensions/github-copilot/index.ts
index a9a8a820160a..f9f0ac97d4da 100644
--- a/extensions/github-copilot/index.ts
+++ b/extensions/github-copilot/index.ts
@@ -21,9 +21,9 @@ import {
upsertAuthProfileWithLock,
} from "openclaw/plugin-sdk/provider-auth";
import { getCachedLiveCatalogValue } from "openclaw/plugin-sdk/provider-catalog-shared";
-import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime";
import { resolveFirstGithubToken } from "./auth.js";
import { githubCopilotMemoryEmbeddingProviderAdapter } from "./embeddings.js";
+import { resolveCopilotExtendedThinkingLevels } from "./model-metadata.js";
import {
PROVIDER_ID,
fetchCopilotModelCatalog,
@@ -35,7 +35,6 @@ import { wrapCopilotProviderStream } from "./stream.js";
const COPILOT_ENV_VARS = ["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"];
const DEFAULT_COPILOT_MODEL = "github-copilot/claude-opus-4.7";
const DEFAULT_COPILOT_PROFILE_ID = "github-copilot:github";
-const COPILOT_XHIGH_MODEL_IDS = ["gpt-5.4", "gpt-5.3-codex"] as const;
type GithubCopilotPluginConfig = {
discovery?: {
@@ -43,17 +42,6 @@ type GithubCopilotPluginConfig = {
};
};
-function compatSupportsXHigh(
- compat: { supportedReasoningEfforts?: readonly string[] | null } | null | undefined,
-) {
- return (
- Array.isArray(compat?.supportedReasoningEfforts) &&
- compat.supportedReasoningEfforts.some(
- (effort) => normalizeOptionalLowercaseString(effort) === "xhigh",
- )
- );
-}
-
async function loadGithubCopilotRuntime() {
return await import("./register.runtime.js");
}
@@ -463,10 +451,7 @@ export default definePluginEntry({
wrapStreamFn: wrapCopilotProviderStream,
buildReplayPolicy: ({ modelId }) => buildGithubCopilotReplayPolicy(modelId),
resolveThinkingProfile: ({ modelId, compat }) => {
- const modelSupportsXHigh =
- COPILOT_XHIGH_MODEL_IDS.includes(
- (normalizeOptionalLowercaseString(modelId) ?? "") as never,
- ) || compatSupportsXHigh(compat);
+ const extendedLevels = resolveCopilotExtendedThinkingLevels(modelId, compat);
return {
levels: [
{ id: "off" },
@@ -474,7 +459,7 @@ export default definePluginEntry({
{ id: "low" },
{ id: "medium" },
{ id: "high" },
- ...(modelSupportsXHigh ? [{ id: "xhigh" as const }] : []),
+ ...extendedLevels.map((id) => ({ id })),
],
};
},
diff --git a/extensions/github-copilot/model-metadata.ts b/extensions/github-copilot/model-metadata.ts
index fbc4b2761fea..0816c892eefc 100644
--- a/extensions/github-copilot/model-metadata.ts
+++ b/extensions/github-copilot/model-metadata.ts
@@ -1,8 +1,12 @@
// Github Copilot plugin module implements model metadata behavior.
import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared";
+import { supportsClaudeAdaptiveThinking } from "openclaw/plugin-sdk/provider-model-shared";
import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime";
type CopilotRuntimeApi = "anthropic-messages" | "openai-completions" | "openai-responses";
+type CopilotReasoningCompat = {
+ supportedReasoningEfforts?: readonly string[] | null;
+};
const COPILOT_CHAT_COMPLETIONS_COMPAT: ModelDefinitionConfig["compat"] = {
supportsStore: false,
@@ -10,6 +14,7 @@ const COPILOT_CHAT_COMPLETIONS_COMPAT: ModelDefinitionConfig["compat"] = {
supportsUsageInStreaming: false,
maxTokensField: "max_tokens",
};
+const COPILOT_XHIGH_MODEL_IDS = new Set(["gpt-5.4", "gpt-5.3-codex"]);
const STATIC_MODEL_OVERRIDES = new Map>([
[
@@ -20,6 +25,7 @@ const STATIC_MODEL_OVERRIDES = new Map>([
reasoning: true,
contextWindow: 1_000_000,
maxTokens: 64_000,
+ thinkingLevelMap: { xhigh: null, max: null },
compat: { supportedReasoningEfforts: ["low", "medium", "high"] },
},
],
@@ -31,7 +37,7 @@ const STATIC_MODEL_OVERRIDES = new Map>([
reasoning: true,
contextWindow: 1_000_000,
maxTokens: 64_000,
- thinkingLevelMap: { xhigh: "xhigh" },
+ thinkingLevelMap: { xhigh: "xhigh", max: null },
compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] },
},
],
@@ -68,6 +74,46 @@ export function resolveCopilotModelCompat(
return isCopilotGeminiModelId(normalized) ? { ...COPILOT_CHAT_COMPLETIONS_COMPAT } : undefined;
}
+function compatSupportsEffort(
+ compat: CopilotReasoningCompat | null | undefined,
+ effort: "xhigh" | "max",
+): boolean {
+ return (
+ Array.isArray(compat?.supportedReasoningEfforts) &&
+ compat.supportedReasoningEfforts.some(
+ (candidate) => normalizeOptionalLowercaseString(candidate) === effort,
+ )
+ );
+}
+
+export function resolveCopilotExtendedThinkingLevels(
+ modelId: string,
+ compat?: CopilotReasoningCompat | null,
+): Array<"xhigh" | "max"> {
+ const normalizedModelId = normalizeOptionalLowercaseString(modelId) ?? "";
+ const staticCompat = resolveStaticCopilotModelOverride(normalizedModelId)?.compat;
+ const isClaudeModel = normalizedModelId.includes("claude");
+ const supportsAdaptiveClaudeEffort =
+ !isClaudeModel || supportsClaudeAdaptiveThinking({ id: normalizedModelId });
+ const levels: Array<"xhigh" | "max"> = [];
+ if (
+ supportsAdaptiveClaudeEffort &&
+ (COPILOT_XHIGH_MODEL_IDS.has(normalizedModelId) ||
+ compatSupportsEffort(compat, "xhigh") ||
+ compatSupportsEffort(staticCompat, "xhigh"))
+ ) {
+ levels.push("xhigh");
+ }
+ if (
+ isClaudeModel &&
+ supportsAdaptiveClaudeEffort &&
+ (compatSupportsEffort(compat, "max") || compatSupportsEffort(staticCompat, "max"))
+ ) {
+ levels.push("max");
+ }
+ return levels;
+}
+
export function resolveStaticCopilotModelOverride(
modelId: string,
): Partial | undefined {
diff --git a/extensions/github-copilot/models.test.ts b/extensions/github-copilot/models.test.ts
index 9f8ad65c1996..5189967de935 100644
--- a/extensions/github-copilot/models.test.ts
+++ b/extensions/github-copilot/models.test.ts
@@ -5,7 +5,8 @@ import { buildCopilotModelDefinition, getDefaultCopilotModelIds } from "./models
import { deriveCopilotApiBaseUrlFromToken, resolveCopilotApiToken } from "./token.js";
import { fetchCopilotUsage } from "./usage.js";
-vi.mock("openclaw/plugin-sdk/provider-model-shared", () => ({
+vi.mock("openclaw/plugin-sdk/provider-model-shared", async (importOriginal) => ({
+ ...(await importOriginal()),
normalizeModelCompat: (model: Record) => model,
resolveProviderEndpoint: (baseUrl: string) => ({
baseUrl,
@@ -116,7 +117,7 @@ describe("github-copilot model defaults", () => {
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 1_000_000,
maxTokens: 64_000,
- thinkingLevelMap: { xhigh: "xhigh" },
+ thinkingLevelMap: { xhigh: "xhigh", max: null },
compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] },
});
});
@@ -223,8 +224,11 @@ describe("resolveCopilotForwardCompatModel", () => {
});
it("preserves static Anthropic thinking maps for Claude Opus 1M fallback rows", () => {
+ const opus46 = requireResolvedModel(createMockCtx("claude-opus-4.6-1m"));
+ expect(opus46.thinkingLevelMap).toEqual({ xhigh: null, max: null });
+
const result = requireResolvedModel(createMockCtx("claude-opus-4.7-1m-internal"));
- expect(result.thinkingLevelMap).toEqual({ xhigh: "xhigh" });
+ expect(result.thinkingLevelMap).toEqual({ xhigh: "xhigh", max: null });
expect(result.compat).toEqual({
supportedReasoningEfforts: ["low", "medium", "high", "xhigh"],
});
@@ -508,6 +512,24 @@ describe("fetchCopilotModelCatalog", () => {
},
},
},
+ {
+ id: "claude-opus-4-5",
+ name: "Claude Opus 4.5",
+ object: "model",
+ vendor: "Anthropic",
+ capabilities: {
+ type: "chat",
+ limits: {
+ max_context_window_tokens: 200000,
+ max_output_tokens: 64000,
+ },
+ supports: {
+ vision: true,
+ tool_calls: true,
+ reasoning_effort: ["low", "medium", "high", "max"],
+ },
+ },
+ },
{
// Internal router — must be filtered out (id starts with "accounts/").
id: "accounts/msft/routers/abc123",
@@ -557,6 +579,7 @@ describe("fetchCopilotModelCatalog", () => {
"gpt-5.3-codex",
"gemini-3.1-pro-preview",
"claude-opus-4.7-1m-internal",
+ "claude-opus-4-5",
]);
const gpt55 = out.find((m) => m.id === "gpt-5.5");
@@ -589,10 +612,16 @@ describe("fetchCopilotModelCatalog", () => {
const opus1m = out.find((m) => m.id === "claude-opus-4.7-1m-internal");
expect(opus1m?.api).toBe("anthropic-messages");
expect(opus1m?.contextWindow).toBe(1_000_000);
- expect(opus1m?.thinkingLevelMap).toEqual({ xhigh: "xhigh" });
+ expect(opus1m?.thinkingLevelMap).toEqual({ xhigh: "xhigh", max: null });
expect(opus1m?.compat).toEqual({
supportedReasoningEfforts: ["low", "medium", "high", "xhigh"],
});
+
+ const opus45 = out.find((m) => m.id === "claude-opus-4-5");
+ expect(opus45?.thinkingLevelMap).toEqual({ xhigh: null, max: null });
+ expect(opus45?.compat).toEqual({
+ supportedReasoningEfforts: ["low", "medium", "high", "max"],
+ });
});
it("strips trailing slash from baseUrl when building the /models URL", async () => {
diff --git a/extensions/github-copilot/models.ts b/extensions/github-copilot/models.ts
index 98ba9a4a352c..28c708daa5ee 100644
--- a/extensions/github-copilot/models.ts
+++ b/extensions/github-copilot/models.ts
@@ -6,7 +6,10 @@ import type {
import { buildCopilotIdeHeaders, COPILOT_INTEGRATION_ID } from "openclaw/plugin-sdk/provider-auth";
import { readProviderJsonArrayFieldResponse } from "openclaw/plugin-sdk/provider-http";
import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared";
-import { normalizeModelCompat } from "openclaw/plugin-sdk/provider-model-shared";
+import {
+ normalizeModelCompat,
+ supportsClaudeAdaptiveThinking,
+} from "openclaw/plugin-sdk/provider-model-shared";
import {
asPositiveSafeInteger,
normalizeOptionalLowercaseString,
@@ -173,15 +176,18 @@ function mergeCopilotCompat(
function resolveCopilotThinkingLevelMap(
api: ModelDefinitionConfig["api"],
+ modelId: string,
compat: ModelDefinitionConfig["compat"] | undefined,
): ModelDefinitionConfig["thinkingLevelMap"] | undefined {
- if (
- api === "anthropic-messages" &&
- compat?.supportedReasoningEfforts?.some((effort) => effort === "xhigh")
- ) {
- return { xhigh: "xhigh" };
+ const efforts = compat?.supportedReasoningEfforts;
+ if (api !== "anthropic-messages" || !Array.isArray(efforts)) {
+ return undefined;
}
- return undefined;
+ const supportsAdaptiveEffort = supportsClaudeAdaptiveThinking({ id: modelId });
+ return {
+ xhigh: supportsAdaptiveEffort && efforts.includes("xhigh") ? "xhigh" : null,
+ max: supportsAdaptiveEffort && efforts.includes("max") ? "max" : null,
+ };
}
function mapCopilotApiModelToDefinition(
@@ -215,7 +221,7 @@ function mapCopilotApiModelToDefinition(
const maxTokens = asPositiveSafeInteger(limits?.max_output_tokens) ?? DEFAULT_MAX_TOKENS;
const compat = mergeCopilotCompat(resolveCopilotModelCompat(id), supports?.reasoning_effort);
const api = resolveCopilotApiForVendor(entry.vendor, id);
- const thinkingLevelMap = resolveCopilotThinkingLevelMap(api, compat);
+ const thinkingLevelMap = resolveCopilotThinkingLevelMap(api, id, compat);
const definition: ModelDefinitionConfig = {
id,
diff --git a/extensions/github-copilot/provider-policy-api.test.ts b/extensions/github-copilot/provider-policy-api.test.ts
index 931b26fb8d63..7471ca2bc6f6 100644
--- a/extensions/github-copilot/provider-policy-api.test.ts
+++ b/extensions/github-copilot/provider-policy-api.test.ts
@@ -34,6 +34,36 @@ describe("github-copilot provider-policy-api", () => {
).toContain("xhigh");
});
+ it("appends max when catalog compat advertises it", () => {
+ expect(
+ resolveThinkingProfile({
+ provider: "github-copilot",
+ modelId: "claude-fable-5",
+ compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] },
+ })?.levels.map((level) => level.id),
+ ).toContain("max");
+ });
+
+ it("does not expose max for non-Anthropic Copilot transports", () => {
+ expect(
+ resolveThinkingProfile({
+ provider: "github-copilot",
+ modelId: "future-copilot-model",
+ compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] },
+ })?.levels.map((level) => level.id),
+ ).not.toContain("max");
+ });
+
+ it("does not expose adaptive effort for older Claude models", () => {
+ expect(
+ resolveThinkingProfile({
+ provider: "github-copilot",
+ modelId: "claude-opus-4-5",
+ compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] },
+ })?.levels.map((level) => level.id),
+ ).not.toContain("max");
+ });
+
it("appends xhigh for static Copilot metadata overrides", () => {
expect(
resolveThinkingProfile({
diff --git a/extensions/github-copilot/provider-policy-api.ts b/extensions/github-copilot/provider-policy-api.ts
index e55483d9dabe..01f14352e0ad 100644
--- a/extensions/github-copilot/provider-policy-api.ts
+++ b/extensions/github-copilot/provider-policy-api.ts
@@ -1,31 +1,12 @@
// Github Copilot API module exposes the plugin public contract.
import type { ProviderDefaultThinkingPolicyContext } from "openclaw/plugin-sdk/core";
-import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime";
-import { resolveStaticCopilotModelOverride } from "./model-metadata.js";
-
-const COPILOT_XHIGH_MODEL_IDS = ["gpt-5.4", "gpt-5.3-codex"] as const;
-
-function compatSupportsXHigh(
- compat: { supportedReasoningEfforts?: readonly string[] | null } | null | undefined,
-) {
- return (
- Array.isArray(compat?.supportedReasoningEfforts) &&
- compat.supportedReasoningEfforts.some(
- (effort) => normalizeOptionalLowercaseString(effort) === "xhigh",
- )
- );
-}
+import { resolveCopilotExtendedThinkingLevels } from "./model-metadata.js";
export function resolveThinkingProfile(context: ProviderDefaultThinkingPolicyContext) {
if (context.provider.trim().toLowerCase() !== "github-copilot") {
return null;
}
- const normalizedModelId = normalizeOptionalLowercaseString(context.modelId) ?? "";
- const staticCompat = resolveStaticCopilotModelOverride(normalizedModelId)?.compat;
- const modelSupportsXHigh =
- COPILOT_XHIGH_MODEL_IDS.includes(normalizedModelId as never) ||
- compatSupportsXHigh(context.compat) ||
- compatSupportsXHigh(staticCompat);
+ const extendedLevels = resolveCopilotExtendedThinkingLevels(context.modelId, context.compat);
return {
levels: [
@@ -34,7 +15,7 @@ export function resolveThinkingProfile(context: ProviderDefaultThinkingPolicyCon
{ id: "low" as const },
{ id: "medium" as const },
{ id: "high" as const },
- ...(modelSupportsXHigh ? [{ id: "xhigh" as const }] : []),
+ ...extendedLevels.map((id) => ({ id })),
],
};
}
diff --git a/packages/agent-core/src/agent-loop.test.ts b/packages/agent-core/src/agent-loop.test.ts
index 6f4d50ffe1f4..b179e008813e 100644
--- a/packages/agent-core/src/agent-loop.test.ts
+++ b/packages/agent-core/src/agent-loop.test.ts
@@ -142,3 +142,88 @@ describe("agentLoop streaming updates", () => {
}
});
});
+
+describe("agentLoop thinking state", () => {
+ function makeAssistantMessage(
+ activeModel: Model,
+ content: AssistantMessage["content"],
+ ): AssistantMessage {
+ return {
+ role: "assistant",
+ content,
+ api: activeModel.api,
+ provider: activeModel.provider,
+ model: activeModel.id,
+ usage: {
+ input: 0,
+ output: 0,
+ cacheRead: 0,
+ cacheWrite: 0,
+ totalTokens: 0,
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+ },
+ stopReason: "stop",
+ timestamp: 1,
+ };
+ }
+
+ it.each([
+ {
+ name: "disables reasoning after leaving Fable",
+ initialModel: { ...model, id: "claude-fable-5", thinkingLevelMap: { off: "low" } },
+ nextModel: model,
+ expected: ["low", undefined],
+ },
+ {
+ name: "uses Fable's low fallback after entering Fable",
+ initialModel: model,
+ nextModel: { ...model, id: "claude-fable-5", thinkingLevelMap: { off: "low" } },
+ expected: [undefined, "low"],
+ },
+ ])("$name", async ({ initialModel, nextModel, expected }) => {
+ const observedReasoning: Array = [];
+ let callCount = 0;
+ const streamFn: StreamFn = (activeModel, _context, options) => {
+ observedReasoning.push(options?.reasoning);
+ callCount += 1;
+ const stream = createAssistantMessageEventStream();
+ queueMicrotask(() => {
+ const content: AssistantMessage["content"] =
+ callCount === 1
+ ? [{ type: "toolCall", id: "tool-1", name: "missing_tool", arguments: {} }]
+ : [{ type: "text", text: "done" }];
+ stream.push({
+ type: "done",
+ reason: "stop",
+ message: makeAssistantMessage(activeModel, content),
+ });
+ stream.end();
+ });
+ return stream;
+ };
+ let prepared = false;
+ const stream = agentLoop(
+ [{ role: "user", content: "hello", timestamp: 1 }],
+ { systemPrompt: "", messages: [] },
+ {
+ ...config,
+ model: initialModel,
+ thinkingLevel: "off",
+ reasoning: initialModel.thinkingLevelMap?.off === "low" ? "low" : undefined,
+ prepareNextTurn: () => {
+ if (prepared) {
+ return undefined;
+ }
+ prepared = true;
+ return { model: nextModel };
+ },
+ },
+ undefined,
+ streamFn,
+ );
+
+ await collectEvents(stream);
+
+ expect(observedReasoning).toEqual(expected);
+ });
+});
diff --git a/packages/agent-core/src/agent-loop.ts b/packages/agent-core/src/agent-loop.ts
index 1bd912210c0a..e1e584b9ab66 100644
--- a/packages/agent-core/src/agent-loop.ts
+++ b/packages/agent-core/src/agent-loop.ts
@@ -9,6 +9,7 @@ import type {
ToolResultMessage,
} from "../../llm-core/src/index.js";
import type { EventStream as SourceEventStream } from "../../llm-core/src/index.js";
+import { resolveAgentReasoningOption } from "./reasoning.js";
import { type AgentCoreStreamRuntimeDeps, resolveAgentCoreStreamFn } from "./runtime-deps.js";
import type {
AgentContext,
@@ -341,14 +342,19 @@ async function runLoop(
const nextTurnSnapshot = await config.prepareNextTurn?.(nextTurnContext);
if (nextTurnSnapshot) {
currentContext = nextTurnSnapshot.context ?? currentContext;
+ const nextModel = nextTurnSnapshot.model ?? config.model;
+ const nextThinkingLevel = nextTurnSnapshot.thinkingLevel ?? config.thinkingLevel;
+ const shouldResolveReasoning =
+ nextTurnSnapshot.thinkingLevel !== undefined ||
+ (nextTurnSnapshot.model !== undefined && nextThinkingLevel !== undefined);
+ const nextReasoning =
+ shouldResolveReasoning && nextThinkingLevel !== undefined
+ ? resolveAgentReasoningOption(nextModel, nextThinkingLevel)
+ : config.reasoning;
config = Object.assign({}, config, {
- model: nextTurnSnapshot.model ?? config.model,
- reasoning:
- nextTurnSnapshot.thinkingLevel === undefined
- ? config.reasoning
- : nextTurnSnapshot.thinkingLevel === "off"
- ? undefined
- : nextTurnSnapshot.thinkingLevel,
+ model: nextModel,
+ thinkingLevel: nextThinkingLevel,
+ reasoning: nextReasoning,
});
}
diff --git a/packages/agent-core/src/agent.ts b/packages/agent-core/src/agent.ts
index 57fa069ee73e..b052a7256fee 100644
--- a/packages/agent-core/src/agent.ts
+++ b/packages/agent-core/src/agent.ts
@@ -9,6 +9,7 @@ import type {
Transport,
} from "../../llm-core/src/index.js";
import { runAgentLoop, runAgentLoopContinue } from "./agent-loop.js";
+import { resolveAgentReasoningOption } from "./reasoning.js";
import { type AgentCoreStreamRuntimeDeps, resolveAgentCoreStreamFn } from "./runtime-deps.js";
import type {
AfterToolCallContext,
@@ -470,8 +471,11 @@ export class Agent {
let skipInitialSteeringPoll = options.skipInitialSteeringPoll === true;
return {
model: this.mutableState.model,
- reasoning:
- this.mutableState.thinkingLevel === "off" ? undefined : this.mutableState.thinkingLevel,
+ thinkingLevel: this.mutableState.thinkingLevel,
+ reasoning: resolveAgentReasoningOption(
+ this.mutableState.model,
+ this.mutableState.thinkingLevel,
+ ),
sessionId: this.sessionId,
onPayload: this.onPayload,
onResponse: this.onResponse,
diff --git a/packages/agent-core/src/harness/agent-harness.ts b/packages/agent-core/src/harness/agent-harness.ts
index a42f035000eb..e9787d2eddf0 100644
--- a/packages/agent-core/src/harness/agent-harness.ts
+++ b/packages/agent-core/src/harness/agent-harness.ts
@@ -6,6 +6,7 @@ import type {
UserMessage,
} from "../../../llm-core/src/index.js";
import { runAgentLoop } from "../agent-loop.js";
+import { resolveAgentReasoningOption } from "../reasoning.js";
import { type AgentCoreRuntimeDeps, resolveAgentCoreStreamFn } from "../runtime-deps.js";
import type {
AgentContext,
@@ -489,7 +490,8 @@ export class CoreAgentHarness<
const turnState = getTurnState();
return {
model: turnState.model,
- reasoning: turnState.thinkingLevel === "off" ? undefined : turnState.thinkingLevel,
+ thinkingLevel: turnState.thinkingLevel,
+ reasoning: resolveAgentReasoningOption(turnState.model, turnState.thinkingLevel),
convertToLlm,
transformContext: async (messages) => {
const result = await this.emitHook({ type: "context", messages: [...messages] });
diff --git a/packages/agent-core/src/harness/compaction/compaction.test.ts b/packages/agent-core/src/harness/compaction/compaction.test.ts
new file mode 100644
index 000000000000..7245d3db05ba
--- /dev/null
+++ b/packages/agent-core/src/harness/compaction/compaction.test.ts
@@ -0,0 +1,62 @@
+import { describe, expect, it, vi } from "vitest";
+import { createAssistantMessageEventStream } from "../../llm.js";
+import type { AssistantMessage, Model, StreamFn } from "../../llm.js";
+import { generateSummary } from "./compaction.js";
+
+describe("generateSummary thinking options", () => {
+ it("maps explicit Fable off to low effort for compaction", async () => {
+ const model: Model = {
+ id: "production-fable",
+ name: "Production Fable",
+ api: "anthropic-messages",
+ provider: "anthropic",
+ baseUrl: "https://api.anthropic.com",
+ reasoning: false,
+ input: ["text"],
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: 1_000_000,
+ maxTokens: 128_000,
+ params: { canonicalModelId: "claude-fable-5" },
+ };
+ const summaryMessage: AssistantMessage = {
+ role: "assistant",
+ content: [{ type: "text", text: "summary" }],
+ api: model.api,
+ provider: model.provider,
+ model: model.id,
+ usage: {
+ input: 0,
+ output: 0,
+ cacheRead: 0,
+ cacheWrite: 0,
+ totalTokens: 0,
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+ },
+ stopReason: "stop",
+ timestamp: 1,
+ };
+ const streamFn = vi.fn((_model, _context, options) => {
+ expect(options?.reasoning).toBe("low");
+ const stream = createAssistantMessageEventStream();
+ stream.push({ type: "done", reason: "stop", message: summaryMessage });
+ stream.end();
+ return stream;
+ });
+
+ const result = await generateSummary(
+ [{ role: "user", content: "hello", timestamp: 1 }],
+ model,
+ 1000,
+ undefined,
+ undefined,
+ undefined,
+ undefined,
+ undefined,
+ "off",
+ streamFn,
+ );
+
+ expect(result).toEqual({ ok: true, value: "summary" });
+ expect(streamFn).toHaveBeenCalledOnce();
+ });
+});
diff --git a/packages/agent-core/src/harness/compaction/compaction.ts b/packages/agent-core/src/harness/compaction/compaction.ts
index c497fe0eeb7f..bbde2b19edea 100644
--- a/packages/agent-core/src/harness/compaction/compaction.ts
+++ b/packages/agent-core/src/harness/compaction/compaction.ts
@@ -1,12 +1,14 @@
// Agent Core module implements compaction behavior.
-import type {
- AssistantMessage,
- Context,
- Model,
- SimpleStreamOptions,
- StreamFn,
- Usage,
+import {
+ resolveClaudeFable5ModelIdentity,
+ type AssistantMessage,
+ type Context,
+ type Model,
+ type SimpleStreamOptions,
+ type StreamFn,
+ type Usage,
} from "../../../../llm-core/src/index.js";
+import { resolveAgentReasoningOption } from "../../reasoning.js";
import {
type AgentCoreCompletionRuntimeDeps,
resolveAgentCoreCompleteFn,
@@ -517,8 +519,11 @@ function createSummarizationOptions(
thinkingLevel: ThinkingLevel | undefined,
): SimpleStreamOptions {
const options: SimpleStreamOptions = { maxTokens, signal, apiKey, headers };
- if (model.reasoning && thinkingLevel && thinkingLevel !== "off") {
- options.reasoning = thinkingLevel;
+ const fableReasoning =
+ (model.api === "anthropic-messages" || model.api === "bedrock-converse-stream") &&
+ resolveClaudeFable5ModelIdentity(model) !== undefined;
+ if ((model.reasoning || fableReasoning) && thinkingLevel) {
+ options.reasoning = resolveAgentReasoningOption(model, thinkingLevel);
}
return options;
}
diff --git a/packages/agent-core/src/reasoning.test.ts b/packages/agent-core/src/reasoning.test.ts
new file mode 100644
index 000000000000..ba3b0aac049a
--- /dev/null
+++ b/packages/agent-core/src/reasoning.test.ts
@@ -0,0 +1,53 @@
+import { describe, expect, it } from "vitest";
+import type { Model } from "../../llm-core/src/index.js";
+import { resolveAgentReasoningOption } from "./reasoning.js";
+
+function makeModel(
+ thinkingLevelMap?: Model["thinkingLevelMap"],
+ overrides: Partial = {},
+): Model {
+ return {
+ id: "test-model",
+ name: "Test Model",
+ api: "anthropic-messages",
+ provider: "anthropic",
+ baseUrl: "https://example.test",
+ reasoning: true,
+ input: ["text"],
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: 1000,
+ maxTokens: 100,
+ thinkingLevelMap,
+ ...overrides,
+ };
+}
+
+describe("resolveAgentReasoningOption", () => {
+ it("uses a model's enabled fallback for explicit off", () => {
+ expect(resolveAgentReasoningOption(makeModel({ off: "low" }), "off")).toBe("low");
+ });
+
+ it.each([undefined, null, "none"])("disables reasoning when off maps to %s", (offFallback) => {
+ expect(resolveAgentReasoningOption(makeModel({ off: offFallback }), "off")).toBeUndefined();
+ });
+
+ it("preserves enabled thinking levels", () => {
+ expect(resolveAgentReasoningOption(makeModel({ off: "low" }), "high")).toBe("high");
+ });
+
+ it.each(["anthropic-messages", "bedrock-converse-stream"] as const)(
+ "maps explicit off to low for canonical Fable aliases on %s",
+ (api) => {
+ expect(
+ resolveAgentReasoningOption(
+ makeModel(undefined, {
+ id: "production-deployment",
+ api,
+ params: { canonicalModelId: "claude-fable-5" },
+ }),
+ "off",
+ ),
+ ).toBe("low");
+ },
+ );
+});
diff --git a/packages/agent-core/src/reasoning.ts b/packages/agent-core/src/reasoning.ts
new file mode 100644
index 000000000000..0c7896b14893
--- /dev/null
+++ b/packages/agent-core/src/reasoning.ts
@@ -0,0 +1,37 @@
+import {
+ resolveClaudeFable5ModelIdentity,
+ type Model,
+ type SimpleStreamOptions,
+} from "../../llm-core/src/index.js";
+import type { ThinkingLevel } from "./types.js";
+
+type EnabledThinkingLevel = NonNullable;
+
+const ENABLED_THINKING_LEVELS = new Set([
+ "minimal",
+ "low",
+ "medium",
+ "high",
+ "xhigh",
+ "max",
+]);
+
+function isEnabledThinkingLevel(value: unknown): value is EnabledThinkingLevel {
+ return ENABLED_THINKING_LEVELS.has(value as EnabledThinkingLevel);
+}
+
+export function resolveAgentReasoningOption(
+ model: Model,
+ thinkingLevel: ThinkingLevel,
+): SimpleStreamOptions["reasoning"] {
+ if (thinkingLevel !== "off") {
+ return thinkingLevel;
+ }
+ const offFallback =
+ model.thinkingLevelMap?.off ??
+ ((model.api === "anthropic-messages" || model.api === "bedrock-converse-stream") &&
+ resolveClaudeFable5ModelIdentity(model)
+ ? "low"
+ : undefined);
+ return isEnabledThinkingLevel(offFallback) ? offFallback : undefined;
+}
diff --git a/packages/agent-core/src/types.ts b/packages/agent-core/src/types.ts
index ba6e8eace365..e1d8a0d0360e 100644
--- a/packages/agent-core/src/types.ts
+++ b/packages/agent-core/src/types.ts
@@ -133,6 +133,8 @@ export interface PrepareNextTurnContext extends ShouldStopAfterTurnContext {}
export interface AgentLoopConfig extends SimpleStreamOptions {
model: Model;
+ /** Logical thinking level retained across model changes before provider mapping. */
+ thinkingLevel?: ThinkingLevel;
/**
* Converts AgentMessage[] to LLM-compatible Message[] before each LLM call.
diff --git a/packages/llm-core/src/index.ts b/packages/llm-core/src/index.ts
index 191aa86d9ff0..9a1abdf79402 100644
--- a/packages/llm-core/src/index.ts
+++ b/packages/llm-core/src/index.ts
@@ -1,4 +1,5 @@
/** Public LLM core contracts shared by providers, plugin SDK wrappers, and tests. */
+export * from "./model-contracts/anthropic.js";
export * from "./types.js";
export * from "./utils/diagnostics.js";
export * from "./utils/event-stream.js";
diff --git a/packages/llm-core/src/model-contracts/anthropic.ts b/packages/llm-core/src/model-contracts/anthropic.ts
new file mode 100644
index 000000000000..ed2d4a677e90
--- /dev/null
+++ b/packages/llm-core/src/model-contracts/anthropic.ts
@@ -0,0 +1,88 @@
+type ClaudeModelRef = {
+ id?: string;
+ params?: Record;
+};
+
+type ClaudeEffortModelRef = ClaudeModelRef & {
+ thinkingLevelMap?: Record;
+};
+
+function normalizeClaudeModelId(modelId?: string): string {
+ const normalized = modelId?.trim().toLowerCase() ?? "";
+ const unprefixed = normalized.startsWith("anthropic/")
+ ? normalized.slice("anthropic/".length)
+ : normalized;
+ return unprefixed.replace(/[._\s]+/g, "-");
+}
+
+export const CLAUDE_FABLE_5_THINKING_PROFILE = {
+ levels: [
+ { id: "off" },
+ { id: "minimal" },
+ { id: "low" },
+ { id: "medium" },
+ { id: "high" },
+ { id: "xhigh" },
+ { id: "adaptive" },
+ { id: "max" },
+ ],
+ defaultLevel: "high",
+ preserveWhenCatalogReasoningFalse: true,
+} as const;
+
+/** Resolve the canonical normalized Claude model id for one runtime model ref. */
+export function resolveClaudeModelIdentity(ref: ClaudeModelRef): string {
+ const configuredCanonicalModelId =
+ typeof ref.params?.canonicalModelId === "string" ? ref.params.canonicalModelId : undefined;
+ const normalized = normalizeClaudeModelId(configuredCanonicalModelId ?? ref.id);
+ const match = /(?:^|[-/])claude-/.exec(normalized);
+ return match
+ ? normalized.slice((match.index ?? 0) + (match[0].startsWith("claude-") ? 0 : 1))
+ : normalized;
+}
+
+/** Resolve Claude Fable 5 through direct ids, cloud ids, or deployment metadata. */
+export function resolveClaudeFable5ModelIdentity(ref: ClaudeModelRef): string | undefined {
+ const normalized = resolveClaudeModelIdentity(ref);
+ const match = /(?:^|-)claude-fable-5(?=$|[^a-z0-9])/.exec(normalized);
+ if (!match) {
+ return undefined;
+ }
+ return normalized.slice((match.index ?? 0) + (match[0].startsWith("-") ? 1 : 0));
+}
+
+/** Return whether a Claude model supports adaptive thinking. */
+export function supportsClaudeAdaptiveThinking(ref: ClaudeModelRef): boolean {
+ const modelId = resolveClaudeModelIdentity(ref);
+ return /(?:^|-)claude-(?:fable-5|opus-4-(?:6|7|8)|sonnet-4-6)(?=$|[^a-z0-9])/.test(modelId);
+}
+
+/** Return whether a Claude model supports native max effort. */
+export function supportsClaudeNativeMaxEffort(ref: ClaudeModelRef): boolean {
+ return supportsClaudeAdaptiveThinking(ref);
+}
+
+/** Return whether a Claude model supports native xhigh effort. */
+export function supportsClaudeNativeXhighEffort(ref: ClaudeModelRef): boolean {
+ const modelId = resolveClaudeModelIdentity(ref);
+ return /(?:^|-)claude-(?:fable-5|opus-4-(?:7|8))(?=$|[^a-z0-9])/.test(modelId);
+}
+
+/**
+ * Fill native Claude effort mappings only when the provider did not publish a
+ * narrower route-specific contract.
+ */
+export function resolveClaudeNativeThinkingLevelMap(
+ ref: ClaudeEffortModelRef,
+): Record | undefined {
+ if (ref.thinkingLevelMap !== undefined) {
+ return ref.thinkingLevelMap;
+ }
+ if (!supportsClaudeNativeMaxEffort(ref)) {
+ return undefined;
+ }
+ return {
+ xhigh: supportsClaudeNativeXhighEffort(ref) ? "xhigh" : null,
+ max: "max",
+ };
+}
diff --git a/src/agents/anthropic-transport-stream.test.ts b/src/agents/anthropic-transport-stream.test.ts
index 3469b6a76600..e807befc151b 100644
--- a/src/agents/anthropic-transport-stream.test.ts
+++ b/src/agents/anthropic-transport-stream.test.ts
@@ -139,6 +139,7 @@ function makeAnthropicTransportModel(
provider?: string;
baseUrl?: string;
reasoning?: boolean;
+ params?: Record;
maxTokens?: number;
thinkingLevelMap?: AnthropicMessagesModel["thinkingLevelMap"];
headers?: Record;
@@ -153,6 +154,7 @@ function makeAnthropicTransportModel(
provider: params.provider ?? "anthropic",
baseUrl: params.baseUrl ?? "https://api.anthropic.com",
reasoning: params.reasoning ?? true,
+ ...(params.params ? { params: params.params } : {}),
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
@@ -562,6 +564,118 @@ describe("anthropic transport stream", () => {
expect(result.errorMessage).toBe("OpenClaw transport error: malformed_streaming_fragment");
});
+ it.each(["anthropic", "anthropic-vertex"])(
+ "surfaces structured Anthropic streaming refusals for %s",
+ async (provider) => {
+ guardedFetchMock.mockResolvedValueOnce(
+ createSseResponse([
+ {
+ type: "message_start",
+ message: { id: "msg_refusal", usage: { input_tokens: 3, output_tokens: 0 } },
+ },
+ {
+ type: "content_block_start",
+ index: 0,
+ content_block: { type: "text", text: "" },
+ },
+ {
+ type: "content_block_delta",
+ index: 0,
+ delta: { type: "text_delta", text: "discard this partial output" },
+ },
+ { type: "content_block_stop", index: 0 },
+ {
+ type: "message_delta",
+ delta: {
+ stop_reason: "refusal",
+ stop_details: {
+ type: "refusal",
+ category: "bio",
+ explanation: "This request is not allowed.",
+ },
+ },
+ usage: { input_tokens: 3, output_tokens: 2 },
+ },
+ { type: "message_stop" },
+ ]),
+ );
+
+ const streamFn = createAnthropicMessagesTransportStreamFn();
+ const stream = await Promise.resolve(
+ streamFn(
+ makeAnthropicTransportModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ provider,
+ }),
+ { messages: [{ role: "user", content: "hello" }] } as AnthropicStreamContext,
+ { apiKey: "sk-ant-api" } as AnthropicStreamOptions,
+ ),
+ );
+ const eventTypes: string[] = [];
+ for await (const event of stream as AsyncIterable<{ type: string }>) {
+ eventTypes.push(event.type);
+ }
+ const result = await stream.result();
+
+ expect(eventTypes).toEqual(["error"]);
+ expect(result.stopReason).toBe("error");
+ expect(result.content).toEqual([]);
+ expect(result.errorMessage).toBe(
+ "Anthropic refusal (category: bio): This request is not allowed.",
+ );
+ expect(result.usage).toMatchObject({ input: 3, output: 2 });
+ expect(result.diagnostics).toEqual([
+ expect.objectContaining({
+ type: "provider_refusal",
+ details: {
+ provider,
+ category: "bio",
+ explanation: "This request is not allowed.",
+ },
+ }),
+ ]);
+ },
+ );
+
+ it("discards buffered Fable output when the transport ends before terminal status", async () => {
+ guardedFetchMock.mockResolvedValueOnce(
+ createSseResponse([
+ {
+ type: "content_block_start",
+ index: 0,
+ content_block: { type: "text", text: "" },
+ },
+ {
+ type: "content_block_delta",
+ index: 0,
+ delta: { type: "text_delta", text: "unsafe partial output" },
+ },
+ ]),
+ );
+ const streamFn = createAnthropicMessagesTransportStreamFn();
+ const stream = await Promise.resolve(
+ streamFn(
+ makeAnthropicTransportModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ }),
+ { messages: [{ role: "user", content: "hello" }] } as AnthropicStreamContext,
+ { apiKey: "sk-ant-api" } as AnthropicStreamOptions,
+ ),
+ );
+ const eventTypes: string[] = [];
+ for await (const event of stream as AsyncIterable<{ type: string }>) {
+ eventTypes.push(event.type);
+ }
+ const result = await stream.result();
+
+ expect(eventTypes).toEqual(["error"]);
+ expect(result.stopReason).toBe("error");
+ expect(result.content).toEqual([]);
+ expect(result.errorMessage).toBe("Anthropic stream ended before message_stop");
+ });
+
it("preserves unsafe integer Anthropic tool-use input deltas", async () => {
guardedFetchMock.mockResolvedValueOnce(
createSseResponse([
@@ -1385,7 +1499,10 @@ describe("anthropic transport stream", () => {
const highSurrogate = String.fromCharCode(0xd83d);
const signedThinking = `keep${highSurrogate}signed`;
await runTransportStream(
- makeAnthropicTransportModel(),
+ makeAnthropicTransportModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ }),
{
messages: [
{ role: "user", content: "hello" },
@@ -1393,7 +1510,7 @@ describe("anthropic transport stream", () => {
role: "assistant",
provider: "anthropic",
api: "anthropic-messages",
- model: "claude-sonnet-4-6",
+ model: "claude-fable-5",
stopReason: "stop",
timestamp: 0,
content: [
@@ -1402,6 +1519,11 @@ describe("anthropic transport stream", () => {
thinking: signedThinking,
thinkingSignature: "sig_1",
},
+ {
+ type: "thinking",
+ thinking: "",
+ thinkingSignature: "sig_omitted",
+ },
],
},
{ role: "user", content: "again" },
@@ -1423,6 +1545,11 @@ describe("anthropic transport stream", () => {
thinking: signedThinking,
signature: "sig_1",
},
+ {
+ type: "thinking",
+ thinking: "",
+ signature: "sig_omitted",
+ },
]);
});
@@ -1942,7 +2069,7 @@ describe("anthropic transport stream", () => {
expect(cancelCalled).toBe(true);
});
- it("maps adaptive thinking effort for Claude 4.6 transport runs", async () => {
+ it("maps unsupported xhigh to high effort for Claude 4.6 transport runs", async () => {
const model = makeAnthropicTransportModel({
id: "claude-opus-4-6",
name: "Claude Opus 4.6",
@@ -1962,7 +2089,239 @@ describe("anthropic transport stream", () => {
const payload = latestAnthropicRequest().payload;
expect(payload.thinking).toEqual({ type: "adaptive" });
- expect(payload.output_config).toEqual({ effort: "max" });
+ expect(payload.output_config).toEqual({ effort: "high" });
+ });
+
+ it("does not infer adaptive thinking from forward-compatible effort maps", async () => {
+ const model = makeAnthropicTransportModel({
+ id: "claude-future",
+ name: "Future Claude",
+ provider: "github-copilot",
+ reasoning: true,
+ thinkingLevelMap: { xhigh: null, max: "max" },
+ });
+
+ await runTransportStream(
+ model,
+ {
+ messages: [{ role: "user", content: "Think as much as supported." }],
+ } as AnthropicStreamContext,
+ {
+ apiKey: "copilot-token",
+ reasoning: "max",
+ } as AnthropicStreamOptions,
+ );
+
+ const payload = latestAnthropicRequest().payload;
+ expect(payload.thinking).toEqual({ type: "enabled", budget_tokens: 7168 });
+ expect(payload.output_config).toBeUndefined();
+ });
+
+ it("honors provider effort restrictions for transport runs", async () => {
+ const model = makeAnthropicTransportModel({
+ id: "claude-opus-4.7-1m-internal",
+ name: "Claude Opus 4.7",
+ provider: "github-copilot",
+ maxTokens: 64_000,
+ thinkingLevelMap: { xhigh: "xhigh" },
+ });
+
+ await runTransportStream(
+ model,
+ {
+ messages: [{ role: "user", content: "Think as much as supported." }],
+ } as AnthropicStreamContext,
+ {
+ apiKey: "copilot-token",
+ reasoning: "max",
+ } as AnthropicStreamOptions,
+ );
+
+ expect(latestAnthropicRequest().payload.output_config).toEqual({ effort: "xhigh" });
+ });
+
+ it("uses canonical Claude policy for transport deployment aliases", async () => {
+ const model = makeAnthropicTransportModel({
+ id: "production-claude",
+ name: "Production Claude",
+ params: { canonicalModelId: "claude-opus-4-8" },
+ reasoning: false,
+ thinkingLevelMap: { xhigh: "xhigh", max: "max" },
+ maxTokens: 8192,
+ });
+
+ await runTransportStream(
+ model,
+ {
+ messages: [{ role: "user", content: "Think extra hard." }],
+ } as AnthropicStreamContext,
+ {
+ apiKey: "sk-ant-api",
+ reasoning: "xhigh",
+ temperature: 0.2,
+ } as AnthropicStreamOptions,
+ );
+
+ const payload = latestAnthropicRequest().payload;
+ expect(payload.model).toBe("production-claude");
+ expect(payload.thinking).toEqual({ type: "adaptive" });
+ expect(payload.output_config).toEqual({ effort: "xhigh" });
+ expect(payload).not.toHaveProperty("temperature");
+ });
+
+ it.each([
+ { canonicalModelId: "claude-opus-4-8", expectedTemperature: undefined },
+ { canonicalModelId: "claude-opus-4-6", expectedTemperature: 0.2 },
+ ] as const)(
+ "normalizes temperature for canonical $canonicalModelId transport aliases when thinking is off",
+ async ({ canonicalModelId, expectedTemperature }) => {
+ const model = makeAnthropicTransportModel({
+ id: "production-claude",
+ name: "Production Claude",
+ params: { canonicalModelId },
+ reasoning: false,
+ thinkingLevelMap: { xhigh: "xhigh", max: "max" },
+ maxTokens: 8192,
+ });
+
+ await runTransportStream(
+ model,
+ { messages: [{ role: "user", content: "Reply briefly." }] } as AnthropicStreamContext,
+ { apiKey: "sk-ant-api", temperature: 0.2 } as AnthropicStreamOptions,
+ );
+
+ expect(latestAnthropicRequest().payload.temperature).toBe(expectedTemperature);
+ },
+ );
+
+ it("uses always-on adaptive thinking for Claude Fable 5 transport runs", async () => {
+ const model = makeAnthropicTransportModel({
+ id: "prod-primary",
+ name: "Production Claude",
+ provider: "microsoft-foundry",
+ params: { canonicalModelId: "claude-fable-5" },
+ reasoning: false,
+ baseUrl: "https://example.services.ai.azure.com/anthropic",
+ maxTokens: 128_000,
+ });
+
+ guardedFetchMock.mockResolvedValueOnce(
+ createSseResponse([
+ {
+ type: "message_start",
+ message: {
+ id: "msg_1",
+ model: "claude-fable-5",
+ usage: { input_tokens: 1, output_tokens: 0 },
+ },
+ },
+ {
+ type: "message_delta",
+ delta: { stop_reason: "end_turn" },
+ usage: { input_tokens: 1, output_tokens: 1 },
+ },
+ { type: "message_stop" },
+ ]),
+ );
+ const result = await runTransportStream(
+ model,
+ {
+ messages: [{ role: "user", content: "Think." }],
+ } as AnthropicStreamContext,
+ {
+ apiKey: "sk-ant-api",
+ temperature: 0.2,
+ toolChoice: { type: "tool", name: "read_file" },
+ } as AnthropicStreamOptions,
+ );
+
+ const payload = latestAnthropicRequest().payload;
+ expect(payload.thinking).toEqual({ type: "adaptive", display: "summarized" });
+ expect(payload.output_config).toEqual({ effort: "high" });
+ expect(payload.tool_choice).toEqual({ type: "auto" });
+ expect(payload).not.toHaveProperty("temperature");
+ expect(result.responseModel).toBe("claude-fable-5");
+ });
+
+ it("maps Claude Fable 5 transport thinking levels to adaptive effort", async () => {
+ const model = makeAnthropicTransportModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ maxTokens: 128_000,
+ });
+
+ guardedFetchMock.mockImplementation(async () => createSseResponse());
+ for (const testCase of [
+ { reasoning: "off", effort: "low" },
+ { reasoning: "minimal", effort: "low" },
+ { reasoning: "high", effort: "high" },
+ ] as const) {
+ await runTransportStream(
+ model,
+ {
+ messages: [{ role: "user", content: "Think carefully." }],
+ } as AnthropicStreamContext,
+ {
+ apiKey: "sk-ant-api",
+ reasoning: testCase.reasoning,
+ } as unknown as AnthropicStreamOptions,
+ );
+
+ const payload = latestAnthropicRequest().payload;
+ expect(payload.thinking).toEqual({ type: "adaptive", display: "summarized" });
+ expect(payload.output_config).toEqual({ effort: testCase.effort });
+ }
+ });
+
+ it("honors provider effort restrictions for Claude Fable 5 transport runs", async () => {
+ const model = makeAnthropicTransportModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ provider: "github-copilot",
+ reasoning: false,
+ thinkingLevelMap: { xhigh: null, max: null },
+ maxTokens: 128_000,
+ });
+
+ guardedFetchMock.mockImplementation(async () => createSseResponse());
+ await runTransportStream(
+ model,
+ {
+ messages: [{ role: "user", content: "Think carefully." }],
+ } as AnthropicStreamContext,
+ {
+ apiKey: "copilot-token",
+ reasoning: "xhigh",
+ } as unknown as AnthropicStreamOptions,
+ );
+
+ const payload = latestAnthropicRequest().payload;
+ expect(payload.thinking).toEqual({ type: "adaptive", display: "summarized" });
+ expect(payload.output_config).toEqual({ effort: "high" });
+ });
+
+ it("uses the Claude Fable 5 contract on Anthropic Vertex transport runs", async () => {
+ const model = makeAnthropicTransportModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ provider: "anthropic-vertex",
+ maxTokens: 128_000,
+ });
+
+ await runTransportStream(
+ model,
+ {
+ messages: [{ role: "user", content: "Think carefully." }],
+ } as AnthropicStreamContext,
+ {
+ apiKey: "vertex-token",
+ reasoning: "high",
+ } as AnthropicStreamOptions,
+ );
+
+ const payload = latestAnthropicRequest().payload;
+ expect(payload.thinking).toEqual({ type: "adaptive", display: "summarized" });
+ expect(payload.output_config).toEqual({ effort: "high" });
});
it("maps xhigh thinking effort for Claude Opus 4.8 transport runs", async () => {
@@ -1970,6 +2329,7 @@ describe("anthropic transport stream", () => {
id: "claude-opus-4-8",
name: "Claude Opus 4.8",
maxTokens: 8192,
+ thinkingLevelMap: { xhigh: "xhigh", max: "max" },
});
await runTransportStream(
@@ -2012,11 +2372,13 @@ describe("anthropic transport stream", () => {
expect(payload.output_config).toEqual({ effort: "max" });
});
- it("clamps max thinking effort for Claude models without native max support", async () => {
+ it("honors provider routes that exclude native max effort", async () => {
const model = makeAnthropicTransportModel({
id: "claude-sonnet-4-6",
name: "Claude Sonnet 4.6",
+ provider: "github-copilot",
maxTokens: 8192,
+ thinkingLevelMap: { xhigh: null, max: null },
});
await runTransportStream(
diff --git a/src/agents/anthropic-transport-stream.ts b/src/agents/anthropic-transport-stream.ts
index f44274d8ba41..5ca35536ba62 100644
--- a/src/agents/anthropic-transport-stream.ts
+++ b/src/agents/anthropic-transport-stream.ts
@@ -5,11 +5,27 @@
*/
import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce";
import { getEnvApiKey } from "../llm/env-api-keys.js";
-import { calculateCost } from "../llm/model-utils.js";
+import { calculateCost, clampThinkingLevel } from "../llm/model-utils.js";
import type { AnthropicOptions } from "../llm/providers/anthropic.js";
-import type { Context, Model, SimpleStreamOptions, ThinkingLevel } from "../llm/types.js";
+import type {
+ AssistantMessageDiagnostic,
+ Context,
+ Model,
+ SimpleStreamOptions,
+ ThinkingLevel,
+} from "../llm/types.js";
import { parseStreamingJson } from "../llm/utils/json-parse.js";
+import {
+ resolveClaudeNativeThinkingLevelMap,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeMaxEffort,
+ supportsClaudeNativeXhighEffort,
+ usesClaudeFable5MessagesContract,
+} from "../shared/anthropic-model-contract.js";
+import { applyAnthropicRefusal } from "../shared/anthropic-refusal.js";
import { MALFORMED_STREAMING_FRAGMENT_ERROR_MESSAGE } from "../shared/assistant-error-format.js";
+import { createDeferredEventBuffer } from "../shared/deferred-event-buffer.js";
+import { notifyLlmRequestActivity } from "../shared/llm-request-activity.js";
import {
applyAnthropicPayloadPolicyToParams,
resolveAnthropicPayloadPolicy,
@@ -102,6 +118,7 @@ type MutableAssistantOutput = {
api: "anthropic-messages";
provider: string;
model: string;
+ responseModel?: string;
usage: {
input: number;
output: number;
@@ -114,46 +131,58 @@ type MutableAssistantOutput = {
timestamp: number;
responseId?: string;
errorMessage?: string;
+ diagnostics?: AssistantMessageDiagnostic[];
};
const EMPTY_ANTHROPIC_MESSAGES_FALLBACK_TEXT = ".";
-function isClaudeOpus47OrNewerModel(modelId: string): boolean {
- return (
- modelId.includes("opus-4-8") ||
- modelId.includes("opus-4.8") ||
- modelId.includes("opus-4-7") ||
- modelId.includes("opus-4.7")
- );
+function normalizeAnthropicToolChoice(
+ model: AnthropicTransportModel,
+ toolChoice: AnthropicTransportOptions["toolChoice"],
+) {
+ if (
+ usesClaudeFable5MessagesContract(model) &&
+ (toolChoice === "any" || (typeof toolChoice === "object" && toolChoice.type === "tool"))
+ ) {
+ return { type: "auto" as const };
+ }
+ return typeof toolChoice === "string" ? { type: toolChoice } : toolChoice;
}
-function isClaudeOpus46Model(modelId: string): boolean {
- return modelId.includes("opus-4-6") || modelId.includes("opus-4.6");
+function supportsNativeXhighEffort(model: AnthropicTransportModel): boolean {
+ return supportsClaudeNativeXhighEffort(model);
}
-function supportsAdaptiveThinking(modelId: string): boolean {
- return (
- isClaudeOpus47OrNewerModel(modelId) ||
- isClaudeOpus46Model(modelId) ||
- modelId.includes("sonnet-4-6") ||
- modelId.includes("sonnet-4.6")
- );
+function supportsAdaptiveThinking(model: AnthropicTransportModel): boolean {
+ return supportsClaudeAdaptiveThinking(model);
}
-function mapThinkingLevelToEffort(level: ThinkingLevel, modelId: string): AnthropicAdaptiveEffort {
- switch (level) {
+function mapThinkingLevelToEffort(
+ level: ThinkingLevel | "off",
+ model: AnthropicTransportModel,
+): AnthropicAdaptiveEffort {
+ const thinkingLevelMap = resolveClaudeNativeThinkingLevelMap(model);
+ const clampModel = {
+ ...model,
+ ...(typeof model.params?.canonicalModelId === "string" ? { reasoning: true } : {}),
+ ...(thinkingLevelMap ? { thinkingLevelMap } : {}),
+ };
+ const resolvedLevel = clampThinkingLevel(clampModel, level);
+ const mapped = thinkingLevelMap?.[resolvedLevel];
+ if (typeof mapped === "string") {
+ return mapped as AnthropicAdaptiveEffort;
+ }
+ switch (resolvedLevel) {
+ case "off":
case "minimal":
case "low":
return "low";
case "medium":
return "medium";
case "xhigh":
- if (isClaudeOpus47OrNewerModel(modelId)) {
- return "xhigh";
- }
- return isClaudeOpus46Model(modelId) ? "max" : "high";
+ return supportsNativeXhighEffort(model) ? "xhigh" : "high";
case "max":
- return isClaudeOpus47OrNewerModel(modelId) ? "max" : "high";
+ return supportsClaudeNativeMaxEffort(model) ? "max" : "high";
default:
return "high";
}
@@ -383,25 +412,28 @@ function convertAnthropicMessages(
});
continue;
}
- if (block.thinking.trim().length === 0) {
+ const thinkingSignature = block.thinkingSignature?.trim();
+ const hasNativeThinkingSignature =
+ Boolean(thinkingSignature) && thinkingSignature !== "reasoning_content";
+ if (block.thinking.trim().length === 0 && !hasNativeThinkingSignature) {
continue;
}
- if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
+ if (!thinkingSignature) {
blocks.push({
type: "text",
text: sanitizeTransportPayloadText(block.thinking),
});
} else {
const thinking =
- block.thinkingSignature === "reasoning_content"
+ thinkingSignature === "reasoning_content"
? sanitizeTransportPayloadText(block.thinking)
: block.thinking;
- if (block.thinkingSignature === "reasoning_content") {
+ if (thinkingSignature === "reasoning_content") {
if (allowReasoningContentReplay) {
blocks.push({
type: "thinking",
thinking,
- signature: block.thinkingSignature,
+ signature: thinkingSignature,
});
reasoningContent.push(thinking);
}
@@ -410,7 +442,7 @@ function convertAnthropicMessages(
blocks.push({
type: "thinking",
thinking,
- signature: block.thinkingSignature,
+ signature: thinkingSignature,
});
}
continue;
@@ -727,7 +759,7 @@ function createAnthropicTransportClient(params: {
}) {
const { model, context, apiKey, options } = params;
const needsInterleavedBeta =
- (options?.interleavedThinking ?? true) && !supportsAdaptiveThinking(model.id);
+ (options?.interleavedThinking ?? true) && !supportsAdaptiveThinking(model);
// Kimi's Anthropic thinking SSE is already well-formed for this parser, but
// the OpenAI SDK compatibility sanitizer can stall before the text block.
const fetch =
@@ -861,7 +893,11 @@ function buildAnthropicParams(
},
];
}
- if (options?.temperature !== undefined && !options.thinkingEnabled) {
+ if (
+ options?.temperature !== undefined &&
+ !options.thinkingEnabled &&
+ !supportsNativeXhighEffort(model)
+ ) {
params.temperature = options.temperature;
}
if (options?.stop !== undefined && options.stop.length > 0) {
@@ -870,17 +906,21 @@ function buildAnthropicParams(
if (context.tools) {
params.tools = convertAnthropicTools(context.tools, isOAuthToken);
}
- if (model.reasoning) {
- if (options?.thinkingEnabled) {
- if (supportsAdaptiveThinking(model.id)) {
- params.thinking = { type: "adaptive" };
- if (options.effort) {
- params.output_config = { effort: options.effort };
+ const fable5 = usesClaudeFable5MessagesContract(model);
+ if (fable5 || model.reasoning || supportsAdaptiveThinking(model)) {
+ if (fable5 || options?.thinkingEnabled) {
+ if (supportsAdaptiveThinking(model)) {
+ params.thinking = fable5
+ ? { type: "adaptive", display: "summarized" }
+ : { type: "adaptive" };
+ const effort = options?.effort ?? (fable5 ? "high" : undefined);
+ if (effort) {
+ params.output_config = { effort };
}
} else {
params.thinking = {
type: "enabled",
- budget_tokens: options.thinkingBudgetTokens || 1024,
+ budget_tokens: options?.thinkingBudgetTokens || 1024,
};
}
} else if (options?.thinkingEnabled === false) {
@@ -891,8 +931,7 @@ function buildAnthropicParams(
params.metadata = { user_id: options.metadata.user_id };
}
if (options?.toolChoice) {
- params.tool_choice =
- typeof options.toolChoice === "string" ? { type: options.toolChoice } : options.toolChoice;
+ params.tool_choice = normalizeAnthropicToolChoice(model, options.toolChoice);
}
applyAnthropicPayloadPolicyToParams(params, payloadPolicy);
return params;
@@ -932,12 +971,15 @@ function resolveAnthropicTransportOptions(
reasoning: options?.reasoning,
};
if (!options?.reasoning) {
- resolved.thinkingEnabled = false;
+ resolved.thinkingEnabled = usesClaudeFable5MessagesContract(model);
+ if (resolved.thinkingEnabled) {
+ resolved.effort = "high";
+ }
return resolved;
}
- if (supportsAdaptiveThinking(model.id)) {
+ if (supportsAdaptiveThinking(model)) {
resolved.thinkingEnabled = true;
- resolved.effort = mapThinkingLevelToEffort(options.reasoning, model.id) as NonNullable<
+ resolved.effort = mapThinkingLevelToEffort(options.reasoning, model) as NonNullable<
AnthropicOptions["effort"]
>;
return resolved;
@@ -971,6 +1013,14 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
stopReason: "stop",
timestamp: Date.now(),
};
+ // Fable classifiers can refuse after partial generation, so no event is
+ // safe to expose until the terminal stop reason is known.
+ const refusalBuffer = usesClaudeFable5MessagesContract(model)
+ ? createDeferredEventBuffer(stream, () =>
+ notifyLlmRequestActivity(options?.signal),
+ )
+ : undefined;
+ const eventSink = refusalBuffer ?? stream;
try {
const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? "";
if (!apiKey) {
@@ -997,6 +1047,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
const allowReasoningContentReplay = supportsReasoningContentReplay(model);
const reasoningContentThinkingBlocks = new Map();
const reasoningContentTextBlocks = new Map();
+ let sawMessageStop = false;
const eventIndexKey = (eventIndex: unknown) =>
typeof eventIndex === "number" ? eventIndex : -1;
const appendReasoningContentThinkingDelta = (
@@ -1021,7 +1072,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
output.content.push(block);
contentIndex = output.content.length - 1;
reasoningContentThinkingBlocks.set(key, contentIndex);
- stream.push({
+ eventSink.push({
type: "thinking_start",
contentIndex,
partial: output as never,
@@ -1029,7 +1080,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
}
block.thinking += text;
block.thinkingSignature = "reasoning_content";
- stream.push({
+ eventSink.push({
type: "thinking_delta",
contentIndex,
delta: text,
@@ -1059,14 +1110,14 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
output.content.push(block);
contentIndex = output.content.length - 1;
reasoningContentTextBlocks.set(key, contentIndex);
- stream.push({
+ eventSink.push({
type: "text_start",
contentIndex,
partial: output as never,
});
}
block.text += text;
- stream.push({
+ eventSink.push({
type: "text_delta",
contentIndex,
delta: text,
@@ -1081,7 +1132,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
reasoningContentThinkingBlocks.delete(key);
const block = output.content[thinkingContentIndex];
if (block?.type === "thinking") {
- stream.push({
+ eventSink.push({
type: "thinking_end",
contentIndex: thinkingContentIndex,
content: block.thinking,
@@ -1096,7 +1147,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
reasoningContentTextBlocks.delete(key);
const block = output.content[textContentIndex];
if (block?.type === "text") {
- stream.push({
+ eventSink.push({
type: "text_end",
contentIndex: textContentIndex,
content: block.text,
@@ -1111,10 +1162,11 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
}
if (event.type === "message_start") {
const message = event.message as
- | { id?: string; usage?: Record }
+ | { id?: string; model?: string; usage?: Record }
| undefined;
const usage = message?.usage ?? {};
output.responseId = typeof message?.id === "string" ? message.id : undefined;
+ output.responseModel = typeof message?.model === "string" ? message.model : undefined;
output.usage.input = typeof usage.input_tokens === "number" ? usage.input_tokens : 0;
output.usage.output = typeof usage.output_tokens === "number" ? usage.output_tokens : 0;
output.usage.cacheRead =
@@ -1133,7 +1185,11 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
// (e.g. invalid thinking signatures) arrive before any non-error event
// is yielded, keeping yieldedOutput=false in pumpStreamWithRecovery
// and allowing the thinking-block recovery retry to fire.
- stream.push({ type: "start", partial: output as never });
+ eventSink.push({ type: "start", partial: output as never });
+ continue;
+ }
+ if (event.type === "message_stop") {
+ sawMessageStop = true;
continue;
}
if (event.type === "content_block_start") {
@@ -1147,13 +1203,13 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
const block: TransportContentBlock = { type: "text", text, index };
output.content.push(block);
const contentIndex = output.content.length - 1;
- stream.push({
+ eventSink.push({
type: "text_start",
contentIndex,
partial: output as never,
});
if (text.length > 0) {
- stream.push({
+ eventSink.push({
type: "text_delta",
contentIndex,
delta: text,
@@ -1174,13 +1230,13 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
};
output.content.push(block);
const contentIndex = output.content.length - 1;
- stream.push({
+ eventSink.push({
type: "thinking_start",
contentIndex,
partial: output as never,
});
if (thinking.length > 0) {
- stream.push({
+ eventSink.push({
type: "thinking_delta",
contentIndex,
delta: thinking,
@@ -1198,7 +1254,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
index,
};
output.content.push(block);
- stream.push({
+ eventSink.push({
type: "thinking_start",
contentIndex: output.content.length - 1,
partial: output as never,
@@ -1223,7 +1279,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
index,
};
output.content.push(block);
- stream.push({
+ eventSink.push({
type: "toolcall_start",
contentIndex: output.content.length - 1,
partial: output as never,
@@ -1255,7 +1311,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
if (text.length > 0) {
if (block?.type === "text") {
block.text += text;
- stream.push({
+ eventSink.push({
type: "text_delta",
contentIndex: index,
delta: text,
@@ -1276,7 +1332,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
block = { type: "text", text: "", index: recoveredIndex };
output.content.push(block);
index = output.content.length - 1;
- stream.push({
+ eventSink.push({
type: "text_start",
contentIndex: index,
partial: output as never,
@@ -1288,7 +1344,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
typeof delta.text === "string"
) {
block.text += delta.text;
- stream.push({
+ eventSink.push({
type: "text_delta",
contentIndex: index,
delta: delta.text,
@@ -1302,7 +1358,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
typeof delta.thinking === "string"
) {
block.thinking += delta.thinking;
- stream.push({
+ eventSink.push({
type: "thinking_delta",
contentIndex: index,
delta: delta.thinking,
@@ -1318,7 +1374,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
const partialJson = `${block.partialJson ?? ""}${delta.partial_json}`;
block.partialJson = partialJson;
block.arguments = parseAnthropicToolCallArguments(partialJson);
- stream.push({
+ eventSink.push({
type: "toolcall_delta",
contentIndex: index,
delta: delta.partial_json,
@@ -1349,7 +1405,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
}
delete block.index;
if (block.type === "text") {
- stream.push({
+ eventSink.push({
type: "text_end",
contentIndex: index,
content: block.text,
@@ -1359,7 +1415,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
continue;
}
if (block.type === "thinking") {
- stream.push({
+ eventSink.push({
type: "thinking_end",
contentIndex: index,
content: block.thinking,
@@ -1373,7 +1429,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
block.arguments = parseAnthropicToolCallArguments(block.partialJson);
}
delete block.partialJson;
- stream.push({
+ eventSink.push({
type: "toolcall_end",
contentIndex: index,
toolCall: block as never,
@@ -1384,10 +1440,16 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
continue;
}
if (event.type === "message_delta") {
- const delta = event.delta as { stop_reason?: string } | undefined;
+ const delta = event.delta as
+ | { stop_reason?: string; stop_details?: unknown }
+ | undefined;
const usage = event.usage as Record | undefined;
if (delta?.stop_reason) {
- output.stopReason = mapStopReason(delta.stop_reason);
+ if (delta.stop_reason === "refusal") {
+ applyAnthropicRefusal(output, delta.stop_details, model.provider);
+ } else {
+ output.stopReason = mapStopReason(delta.stop_reason);
+ }
}
if (typeof usage?.input_tokens === "number") {
output.usage.input = usage.input_tokens;
@@ -1409,8 +1471,22 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn {
calculateCost(model, output.usage);
}
}
- finalizeTransportStream({ stream, output, signal: transportOptions.signal });
+ if (refusalBuffer && !sawMessageStop) {
+ throw new Error("Anthropic stream ended before message_stop");
+ }
+ if (transportOptions.signal?.aborted) {
+ throw new Error("Request was aborted");
+ }
+ if (output.stopReason === "aborted" || output.stopReason === "error") {
+ throw new Error(output.errorMessage ?? "An unknown error occurred");
+ }
+ refusalBuffer?.flush();
+ finalizeTransportStream({ stream, output });
} catch (error) {
+ if (refusalBuffer) {
+ refusalBuffer.discard();
+ output.content = [];
+ }
failTransportStream({
stream,
output,
diff --git a/src/agents/context.test.ts b/src/agents/context.test.ts
index 6b7543a5ced7..2a240ba9a014 100644
--- a/src/agents/context.test.ts
+++ b/src/agents/context.test.ts
@@ -3,6 +3,7 @@ import { describe, expect, it, vi } from "vitest";
import { createSessionManagerRuntimeRegistry } from "./agent-hooks/session-manager-runtime-registry.js";
import {
ANTHROPIC_CONTEXT_1M_TOKENS,
+ ANTHROPIC_FABLE_CONTEXT_TOKENS,
applyConfiguredContextWindows,
applyDiscoveredContextWindows,
resolveContextTokensForModel,
@@ -387,26 +388,33 @@ describe("resolveContextTokensForModel", () => {
expect(result).toBe(ANTHROPIC_CONTEXT_1M_TOKENS);
});
- it("returns 1M context for Anthropic sonnet 4 even when config reports 200k", () => {
- const result = resolveContextTokensForModel({
- cfg: {
- models: {
- providers: {
- anthropic: {
- baseUrl: "https://api.anthropic.com",
- models: [testModelContextWindow("claude-sonnet-4-6", 200_000)],
+ it.each([
+ ["anthropic", "claude-fable-5", ANTHROPIC_FABLE_CONTEXT_TOKENS],
+ ["anthropic-vertex", "claude-fable-5", ANTHROPIC_FABLE_CONTEXT_TOKENS],
+ ["anthropic", "claude-sonnet-4-6", ANTHROPIC_CONTEXT_1M_TOKENS],
+ ])(
+ "returns the fixed context for %s model %s even when config reports 200k",
+ (provider, modelId, expectedContextTokens) => {
+ const result = resolveContextTokensForModel({
+ cfg: {
+ models: {
+ providers: {
+ [provider]: {
+ baseUrl: "https://api.anthropic.com",
+ models: [testModelContextWindow(modelId, 200_000)],
+ },
},
},
},
- },
- provider: "anthropic",
- model: "claude-sonnet-4-6",
- fallbackContextTokens: 200_000,
- allowAsyncLoad: false,
- });
+ provider,
+ model: modelId,
+ fallbackContextTokens: 200_000,
+ allowAsyncLoad: false,
+ });
- expect(result).toBe(ANTHROPIC_CONTEXT_1M_TOKENS);
- });
+ expect(result).toBe(expectedContextTokens);
+ },
+ );
it("keeps older Anthropic Sonnet 4.x models at the configured window when context1m is set", () => {
const result = resolveContextTokensForModel({
diff --git a/src/agents/context.ts b/src/agents/context.ts
index e3ad2ec5d94c..e3abc5b59602 100644
--- a/src/agents/context.ts
+++ b/src/agents/context.ts
@@ -46,6 +46,7 @@ const ANTHROPIC_GA_1M_MODEL_PREFIXES = [
"claude-sonnet-4.6",
] as const;
export const ANTHROPIC_CONTEXT_1M_TOKENS = 1_048_576;
+export const ANTHROPIC_FABLE_CONTEXT_TOKENS = 1_000_000;
const CONFIG_LOAD_RETRY_POLICY: BackoffPolicy = {
initialMs: 1_000,
maxMs: 60_000,
@@ -67,9 +68,8 @@ export function applyDiscoveredContextWindows(params: {
: typeof model.contextWindow === "number"
? Math.trunc(model.contextWindow)
: undefined;
- const contextTokens = shouldUseDiscoveredAnthropicGa1MContextWindow(model)
- ? ANTHROPIC_CONTEXT_1M_TOKENS
- : discoveredContextTokens;
+ const contextTokens =
+ resolveDiscoveredAnthropicFixedContextWindow(model) ?? discoveredContextTokens;
if (!contextTokens || contextTokens <= 0) {
continue;
}
@@ -323,37 +323,39 @@ function resolveConfiguredProviderContextTokens(
return findContextTokens((id) => normalizeProviderId(id) === normalizedProvider);
}
-function isAnthropic1MModel(provider: string, model: string): boolean {
- if (provider !== "anthropic" && provider !== "claude-cli") {
- return false;
- }
+function resolveAnthropicFixedContextWindow(provider: string, model: string): number | undefined {
const modelId = resolveModelFamilyId(model);
- return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => modelId.startsWith(prefix));
+ if (
+ (provider === "anthropic" || provider === "anthropic-vertex") &&
+ modelId.startsWith("claude-fable-5")
+ ) {
+ return ANTHROPIC_FABLE_CONTEXT_TOKENS;
+ }
+ if (provider !== "anthropic" && provider !== "claude-cli") {
+ return undefined;
+ }
+ return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => modelId.startsWith(prefix))
+ ? ANTHROPIC_CONTEXT_1M_TOKENS
+ : undefined;
}
-function shouldUseAnthropicGa1MContextWindow(params: {
- provider?: string;
- model: string;
-}): boolean {
- const provider = params.provider ? normalizeProviderId(params.provider) : "";
- return isAnthropic1MModel(provider, params.model);
-}
-
-function shouldUseDiscoveredAnthropicGa1MContextWindow(model: ModelEntry): boolean {
+function resolveDiscoveredAnthropicFixedContextWindow(model: ModelEntry): number | undefined {
const provider =
typeof model.provider === "string" ? normalizeProviderId(model.provider) : undefined;
const modelId = model.id;
if (provider) {
- return isAnthropic1MModel(provider, modelId);
+ return resolveAnthropicFixedContextWindow(provider, modelId);
}
const normalized = normalizeLowercaseStringOrEmpty(modelId);
const slash = normalized.indexOf("/");
if (slash < 0) {
- return false;
+ return undefined;
}
const inferredProvider = normalizeProviderId(normalized.slice(0, slash));
const inferredModel = normalized.slice(slash + 1);
- return inferredProvider === "claude-cli" && isAnthropic1MModel(inferredProvider, inferredModel);
+ return inferredProvider === "claude-cli"
+ ? resolveAnthropicFixedContextWindow(inferredProvider, inferredModel)
+ : undefined;
}
function resolveModelFamilyId(modelId: string): string {
@@ -379,8 +381,11 @@ export function resolveContextTokensForModel(params: {
});
const explicitProvider = params.provider?.trim();
if (ref) {
- if (explicitProvider && isAnthropic1MModel(ref.provider, ref.model)) {
- return ANTHROPIC_CONTEXT_1M_TOKENS;
+ if (explicitProvider) {
+ const fixedContextWindow = resolveAnthropicFixedContextWindow(ref.provider, ref.model);
+ if (fixedContextWindow !== undefined) {
+ return fixedContextWindow;
+ }
}
// Only do the config direct scan when the caller explicitly passed a
// provider. When provider is inferred from a slash in the model string
@@ -401,10 +406,6 @@ export function resolveContextTokensForModel(params: {
}
}
- if (explicitProvider && ref && shouldUseAnthropicGa1MContextWindow(ref)) {
- return ANTHROPIC_CONTEXT_1M_TOKENS;
- }
-
// When provider is explicitly given and the model ID is bare (no slash),
// try the provider-qualified cache key BEFORE the bare key. Discovery
// entries are stored under qualified IDs (e.g. "google-gemini-cli/
diff --git a/src/agents/embedded-agent-runner/run/llm-idle-timeout.test.ts b/src/agents/embedded-agent-runner/run/llm-idle-timeout.test.ts
index 29902e3d44ca..7dd4f7e26527 100644
--- a/src/agents/embedded-agent-runner/run/llm-idle-timeout.test.ts
+++ b/src/agents/embedded-agent-runner/run/llm-idle-timeout.test.ts
@@ -1,9 +1,14 @@
// LLM idle-timeout tests cover timeout selection and stream wrapping for
// embedded provider calls, including local-provider and cron exceptions.
import { MAX_TIMER_TIMEOUT_MS } from "@openclaw/normalization-core/number-coercion";
-import type { AssistantMessageEventStream } from "openclaw/plugin-sdk/llm";
+import {
+ createAssistantMessageEventStream,
+ type AssistantMessageEventStream,
+} from "openclaw/plugin-sdk/llm";
import { afterEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../../config/config.js";
+import { notifyLlmRequestActivity } from "../../../shared/llm-request-activity.js";
+import type { StreamFn } from "../../runtime/index.js";
import {
DEFAULT_LLM_IDLE_TIMEOUT_MS,
resolveLlmIdleTimeoutMs,
@@ -339,12 +344,12 @@ describe("streamWithIdleTimeout", () => {
void wrapped(model, context, options);
- expect(baseFn).toHaveBeenCalledWith({ api: "openai", requestTimeoutMs: 1000 }, context, {
+ expect(baseFn).toHaveBeenCalledWith(model, context, {
signal: expect.any(AbortSignal),
});
});
- it("keeps model request timeouts that are shorter than the idle watchdog", () => {
+ it("preserves explicit model request timeouts", () => {
const mockStream = createMockAsyncIterable([]);
const baseFn = vi.fn().mockReturnValue(mockStream);
const wrapped = streamWithIdleTimeout(baseFn, 1000);
@@ -355,7 +360,7 @@ describe("streamWithIdleTimeout", () => {
void wrapped(model, context, options);
- expect(baseFn).toHaveBeenCalledWith({ requestTimeoutMs: 250 }, context, {
+ expect(baseFn).toHaveBeenCalledWith(model, context, {
signal: expect.any(AbortSignal),
});
});
@@ -508,6 +513,37 @@ describe("streamWithIdleTimeout", () => {
expect(results).toHaveLength(3);
});
+ it("treats quarantined provider events as stream activity", async () => {
+ vi.useFakeTimers();
+ let requestSignal: AbortSignal | undefined;
+ const baseFn: StreamFn = vi.fn((_model, _context, options) => {
+ requestSignal = options?.signal;
+ const stream = createAssistantMessageEventStream();
+ setTimeout(() => {
+ stream.push({ type: "text_delta", contentIndex: 0, delta: "done" });
+ }, 120);
+ return stream;
+ });
+ const wrapped = streamWithIdleTimeout(baseFn, 50);
+ const stream = wrapped(
+ {} as Parameters[0],
+ {} as Parameters[1],
+ {} as Parameters[2],
+ ) as AssistantMessageEventStream;
+ const iterator = stream[Symbol.asyncIterator]();
+ const next = iterator.next();
+
+ setTimeout(() => notifyLlmRequestActivity(requestSignal), 40);
+ setTimeout(() => notifyLlmRequestActivity(requestSignal), 80);
+ await vi.advanceTimersByTimeAsync(120);
+
+ await expect(next).resolves.toEqual({
+ done: false,
+ value: { type: "text_delta", contentIndex: 0, delta: "done" },
+ });
+ await iterator.return?.();
+ });
+
it("calls timeout hook on idle timeout", async () => {
vi.useFakeTimers();
const slowStream = createNeverYieldingStream();
diff --git a/src/agents/embedded-agent-runner/run/llm-idle-timeout.ts b/src/agents/embedded-agent-runner/run/llm-idle-timeout.ts
index e5320ee0cde8..832fb22afc19 100644
--- a/src/agents/embedded-agent-runner/run/llm-idle-timeout.ts
+++ b/src/agents/embedded-agent-runner/run/llm-idle-timeout.ts
@@ -8,6 +8,7 @@ import {
} from "@openclaw/normalization-core/number-coercion";
import { DEFAULT_LLM_IDLE_TIMEOUT_SECONDS } from "../../../config/agent-timeout-defaults.js";
import type { OpenClawConfig } from "../../../config/types.openclaw.js";
+import { onLlmRequestActivity } from "../../../shared/llm-request-activity.js";
import type { StreamFn } from "../../runtime/index.js";
import type { MutableAssistantMessageEventStream } from "../../stream-compat.js";
import { createStreamIteratorWrapper } from "../../stream-iterator-wrapper.js";
@@ -239,20 +240,6 @@ export function streamWithIdleTimeout(
...options,
signal: streamAbortController.signal,
} as typeof options;
- const existingRequestTimeoutMs =
- typeof (model as { requestTimeoutMs?: unknown })?.requestTimeoutMs === "number" &&
- Number.isFinite((model as { requestTimeoutMs?: number }).requestTimeoutMs) &&
- (model as { requestTimeoutMs?: number }).requestTimeoutMs! > 0
- ? Math.floor((model as { requestTimeoutMs?: number }).requestTimeoutMs!)
- : timeoutMs;
- const wrappedModel =
- typeof model === "object" && model !== null
- ? ({
- ...model,
- requestTimeoutMs: Math.min(existingRequestTimeoutMs, timeoutMs),
- } as typeof model)
- : model;
-
const createTimeoutPromise = (setTimer: (timer: NodeJS.Timeout) => void): Promise => {
return new Promise((_, reject) => {
const timer = setTimeout(() => {
@@ -268,7 +255,7 @@ export function streamWithIdleTimeout(
let maybeStream: ReturnType;
try {
- maybeStream = baseFn(wrappedModel, context, wrappedOptions);
+ maybeStream = baseFn(model, context, wrappedOptions);
} catch (error) {
cleanupSourceSignal();
throw error;
@@ -280,6 +267,8 @@ export function streamWithIdleTimeout(
function () {
const iterator = originalAsyncIterator();
let idleTimer: NodeJS.Timeout | null = null;
+ let waitingForProvider = false;
+ let rejectIdleTimeout: ((error: Error) => void) | undefined;
const clearTimer = () => {
if (idleTimer) {
@@ -287,42 +276,61 @@ export function streamWithIdleTimeout(
idleTimer = null;
}
};
+ const armTimer = () => {
+ clearTimer();
+ if (!waitingForProvider) {
+ return;
+ }
+ idleTimer = setTimeout(() => {
+ idleTimer = null;
+ const error = createIdleTimeoutError();
+ abortStream(error);
+ onIdleTimeout?.(error);
+ rejectIdleTimeout?.(error);
+ }, timeoutMs);
+ idleTimer.unref?.();
+ };
+ const stopWaiting = () => {
+ waitingForProvider = false;
+ rejectIdleTimeout = undefined;
+ clearTimer();
+ };
+ const unsubscribeActivity = onLlmRequestActivity(streamAbortController.signal, armTimer);
+ const cleanupIterator = () => {
+ stopWaiting();
+ unsubscribeActivity();
+ cleanupSourceSignal();
+ };
return createStreamIteratorWrapper({
iterator,
next: async (streamIterator) => {
- clearTimer();
-
+ waitingForProvider = true;
try {
- // Arm the watchdog only while waiting for provider progress.
- const result = await Promise.race([
- streamIterator.next(),
- createTimeoutPromise((timer) => {
- idleTimer = timer;
- }),
- ]);
+ const timeoutPromise = new Promise((_, reject) => {
+ rejectIdleTimeout = reject;
+ armTimer();
+ });
+ const result = await Promise.race([streamIterator.next(), timeoutPromise]);
if (result.done) {
- clearTimer();
- cleanupSourceSignal();
+ cleanupIterator();
return result;
}
- clearTimer();
+ stopWaiting();
return result;
} catch (error) {
- clearTimer();
+ cleanupIterator();
throw error;
}
},
onReturn(streamIterator) {
- clearTimer();
- cleanupSourceSignal();
+ cleanupIterator();
return streamIterator.return?.() ?? Promise.resolve({ done: true, value: undefined });
},
onThrow(streamIterator, error) {
- clearTimer();
- cleanupSourceSignal();
+ cleanupIterator();
return (
streamIterator.throw?.(error) ??
Promise.reject(toLintErrorObject(error, "Non-Error rejection"))
diff --git a/src/agents/model-catalog.test.ts b/src/agents/model-catalog.test.ts
index 002c28863f41..bbbd745ad982 100644
--- a/src/agents/model-catalog.test.ts
+++ b/src/agents/model-catalog.test.ts
@@ -496,6 +496,28 @@ describe("loadModelCatalog", () => {
});
});
+ it("preserves runtime model params in the internal catalog", async () => {
+ mockAgentDiscoveryModels([
+ {
+ id: "company-fable",
+ name: "Company Fable",
+ provider: "amazon-bedrock",
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ ]);
+
+ const result = await loadModelCatalog({ config: {} as OpenClawConfig });
+
+ expect(result).toEqual([
+ {
+ id: "company-fable",
+ name: "Company Fable",
+ provider: "amazon-bedrock",
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ ]);
+ });
+
it("writes runtime discovery results under the refreshed models.json fingerprint", async () => {
buildModelsJsonSourceFingerprintMock.mockResolvedValue({
agentDir: "/tmp/openclaw",
@@ -945,6 +967,31 @@ describe("loadModelCatalog", () => {
expect(augmentCatalogMock).not.toHaveBeenCalled();
});
+ it("inherits provider API and canonical Fable reasoning in persisted rows", async () => {
+ readFileMock.mockResolvedValueOnce(
+ JSON.stringify({
+ providers: {
+ "microsoft-foundry": {
+ api: "anthropic-messages",
+ models: [
+ {
+ id: "company-fable",
+ reasoning: false,
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ ],
+ },
+ },
+ }),
+ );
+
+ const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true });
+ const entry = requireCatalogEntry(result, "microsoft-foundry", "company-fable");
+
+ expect(entry.api).toBe("anthropic-messages");
+ expect(entry.reasoning).toBe(true);
+ });
+
it("refreshes stale persisted read-only rows with manifest catalog metadata", async () => {
readFileMock.mockResolvedValueOnce(
JSON.stringify({
@@ -1752,6 +1799,7 @@ describe("loadModelCatalog", () => {
provider: "xai",
id: "grok-4.3",
name: "Grok 4.3",
+ api: "openai-completions",
reasoning: false,
input: ["text"],
contextWindow: 200_000,
@@ -1767,6 +1815,7 @@ describe("loadModelCatalog", () => {
modelCatalog: {
providers: {
xai: {
+ api: "openai-responses",
models: [
{
id: "grok-4.3",
@@ -1787,6 +1836,7 @@ describe("loadModelCatalog", () => {
const entry = requireCatalogEntry(result, "xai", "grok-4.3");
expect(result.filter((entryValue) => entryValue.provider === "xai")).toHaveLength(1);
+ expect(entry.api).toBe("openai-responses");
expect(entry.contextWindow).toBe(1_000_000);
expect(entry.input).toEqual(["text", "image"]);
expect(entry.reasoning).toBe(true);
diff --git a/src/agents/model-catalog.ts b/src/agents/model-catalog.ts
index 364b83eb3dd0..b34791f3e852 100644
--- a/src/agents/model-catalog.ts
+++ b/src/agents/model-catalog.ts
@@ -3,6 +3,7 @@
*/
import { readFile } from "node:fs/promises";
import { join } from "node:path";
+import { resolveClaudeFable5ModelIdentity } from "@openclaw/llm-core";
import { normalizeProviderId } from "@openclaw/model-catalog-core/provider-id";
import {
normalizeLowercaseStringOrEmpty,
@@ -64,10 +65,12 @@ type DiscoveredModel = {
id: string;
name?: string;
provider: string;
+ api?: ModelCatalogEntry["api"];
contextWindow?: number;
contextTokens?: number;
reasoning?: boolean;
input?: ModelInputType[];
+ params?: ModelCatalogEntry["params"];
compat?: ModelCatalogEntry["compat"];
};
@@ -159,20 +162,46 @@ function mergeCatalogCompat(
return { ...base, ...override };
}
+function mergeCatalogParams(
+ base: ModelCatalogEntry["params"] | undefined,
+ override: ModelCatalogEntry["params"] | undefined,
+): ModelCatalogEntry["params"] | undefined {
+ if (!base) {
+ return override;
+ }
+ if (!override) {
+ return base;
+ }
+ return { ...base, ...override };
+}
+
function overlayCatalogMetadata(
base: ModelCatalogEntry,
overlay: ModelCatalogEntry,
): ModelCatalogEntry {
+ const params = mergeCatalogParams(base.params, overlay.params);
return {
...base,
+ ...(overlay.api !== undefined ? { api: overlay.api } : {}),
...(overlay.contextWindow !== undefined ? { contextWindow: overlay.contextWindow } : {}),
...(overlay.contextTokens !== undefined ? { contextTokens: overlay.contextTokens } : {}),
...(overlay.reasoning !== undefined ? { reasoning: overlay.reasoning } : {}),
...(overlay.input !== undefined ? { input: overlay.input } : {}),
+ ...(params ? { params } : {}),
compat: mergeCatalogCompat(base.compat, overlay.compat),
};
}
+function normalizeCatalogEntryContract(entry: ModelCatalogEntry): ModelCatalogEntry {
+ if (
+ entry.api === "anthropic-messages" &&
+ resolveClaudeFable5ModelIdentity({ id: entry.id, params: entry.params })
+ ) {
+ return { ...entry, reasoning: true };
+ }
+ return entry;
+}
+
function mergeCatalogEntries(models: ModelCatalogEntry[], entries: ModelCatalogEntry[]): void {
const indexByKey = new Map(
models.map((entry, index) => [catalogEntryDedupeKey(entry.provider, entry.id), index]),
@@ -260,7 +289,7 @@ export function loadManifestModelCatalog(params: {
}
function sortModelCatalogEntries(entries: ModelCatalogEntry[]): ModelCatalogEntry[] {
- return entries.toSorted((a, b) => {
+ return entries.map(normalizeCatalogEntryContract).toSorted((a, b) => {
const p = a.provider.localeCompare(b.provider);
if (p !== 0) {
return p;
@@ -273,6 +302,7 @@ function normalizePersistedModelCatalogEntry(
providerRaw: string,
entry: Record,
defaults?: {
+ api?: ModelCatalogEntry["api"];
contextWindow?: number;
contextTokens?: number;
},
@@ -303,6 +333,8 @@ function normalizePersistedModelCatalogEntry(
? defaults.contextTokens
: undefined;
const reasoning = typeof entry?.reasoning === "boolean" ? entry.reasoning : false;
+ const api =
+ typeof entry?.api === "string" ? (entry.api as ModelCatalogEntry["api"]) : defaults?.api;
const parsedInput = Array.isArray(entry?.input)
? entry.input.filter((value): value is ModelInputType =>
["text", "image", "audio", "video", "document"].includes(String(value)),
@@ -313,14 +345,20 @@ function normalizePersistedModelCatalogEntry(
entry?.compat && typeof entry.compat === "object"
? (entry.compat as ModelCatalogEntry["compat"])
: undefined;
+ const modelParams =
+ entry?.params && typeof entry.params === "object"
+ ? (entry.params as ModelCatalogEntry["params"])
+ : undefined;
return {
id,
name,
provider,
+ ...(api ? { api } : {}),
contextWindow,
...(contextTokens !== undefined ? { contextTokens } : {}),
reasoning,
input,
+ ...(modelParams ? { params: modelParams } : {}),
compat,
};
}
@@ -402,11 +440,16 @@ async function loadReadOnlyPersistedModelCatalog(params?: {
typeof providerConfig?.contextTokens === "number" && providerConfig.contextTokens > 0
? providerConfig.contextTokens
: undefined;
+ const providerApi =
+ typeof providerConfig?.api === "string"
+ ? (providerConfig.api as ModelCatalogEntry["api"])
+ : undefined;
for (const entry of providerConfig.models as Record[]) {
const normalized = normalizePersistedModelCatalogEntry(
providerRaw,
entry,
{
+ api: providerApi,
contextWindow: providerContextWindow,
contextTokens: providerContextTokens,
},
@@ -644,16 +687,21 @@ export async function loadModelCatalog(params?: {
? entry.contextTokens
: undefined;
const reasoning = typeof entry?.reasoning === "boolean" ? entry.reasoning : undefined;
+ const api = typeof entry?.api === "string" ? entry.api : undefined;
const input = Array.isArray(entry?.input) ? entry.input : undefined;
+ const modelParams =
+ entry?.params && typeof entry.params === "object" ? entry.params : undefined;
const compat = entry?.compat && typeof entry.compat === "object" ? entry.compat : undefined;
models.push({
id,
name,
provider,
+ ...(api ? { api } : {}),
contextWindow,
...(contextTokens !== undefined ? { contextTokens } : {}),
reasoning,
input,
+ ...(modelParams ? { params: modelParams } : {}),
compat,
});
}
diff --git a/src/agents/model-catalog.types.ts b/src/agents/model-catalog.types.ts
index 0dc4954321b4..f92b3950943d 100644
--- a/src/agents/model-catalog.types.ts
+++ b/src/agents/model-catalog.types.ts
@@ -19,6 +19,7 @@ export type ModelCatalogEntry = {
contextTokens?: number;
reasoning?: boolean;
input?: ModelInputType[];
+ params?: Record;
compat?: ModelCompatConfig;
mediaInput?: ModelMediaInputConfig;
};
diff --git a/src/agents/model-selection-shared.ts b/src/agents/model-selection-shared.ts
index aad94ffabc67..f574f5e8aed1 100644
--- a/src/agents/model-selection-shared.ts
+++ b/src/agents/model-selection-shared.ts
@@ -659,6 +659,10 @@ function applyModelCatalogMetadata(params: {
const nextContextTokens = configuredEntry?.contextTokens ?? params.entry.contextTokens;
const nextReasoning = configuredEntry?.reasoning ?? params.entry.reasoning;
const nextInput = configuredEntry?.input ?? params.entry.input;
+ const nextParams =
+ params.entry.params || configuredEntry?.params
+ ? { ...params.entry.params, ...configuredEntry?.params }
+ : undefined;
const nextCompat =
params.entry.compat || configuredEntry?.compat
? { ...params.entry.compat, ...configuredEntry?.compat }
@@ -672,6 +676,7 @@ function applyModelCatalogMetadata(params: {
...(nextContextTokens !== undefined ? { contextTokens: nextContextTokens } : {}),
...(nextReasoning !== undefined ? { reasoning: nextReasoning } : {}),
...(nextInput ? { input: nextInput } : {}),
+ ...(nextParams ? { params: nextParams } : {}),
...(nextCompat ? { compat: nextCompat } : {}),
};
}
@@ -687,6 +692,7 @@ function buildSyntheticAllowedCatalogEntry(params: {
const nextContextTokens = configuredEntry?.contextTokens;
const nextReasoning = configuredEntry?.reasoning;
const nextInput = configuredEntry?.input;
+ const nextParams = configuredEntry?.params;
const nextCompat = configuredEntry?.compat;
return {
@@ -698,6 +704,7 @@ function buildSyntheticAllowedCatalogEntry(params: {
...(nextContextTokens !== undefined ? { contextTokens: nextContextTokens } : {}),
...(nextReasoning !== undefined ? { reasoning: nextReasoning } : {}),
...(nextInput ? { input: nextInput } : {}),
+ ...(nextParams ? { params: nextParams } : {}),
...(nextCompat ? { compat: nextCompat } : {}),
};
}
@@ -1302,6 +1309,8 @@ export function buildConfiguredModelCatalog(params: {
? model.contextTokens
: undefined;
const input = Array.isArray(model?.input) ? model.input : undefined;
+ const modelParams =
+ model?.params && typeof model.params === "object" ? model.params : undefined;
const compat = model?.compat && typeof model.compat === "object" ? model.compat : undefined;
const reasoning =
typeof model?.reasoning === "boolean"
@@ -1318,6 +1327,7 @@ export function buildConfiguredModelCatalog(params: {
contextTokens,
reasoning,
input,
+ ...(modelParams ? { params: modelParams } : {}),
compat,
});
}
diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts
index a1a67d25a651..cd2ac63b2154 100644
--- a/src/agents/model-selection.test.ts
+++ b/src/agents/model-selection.test.ts
@@ -889,6 +889,31 @@ describe("model-selection", () => {
expect(model?.reasoning).toBe(true);
});
+ it("carries configured model params into catalog entries for provider policy", () => {
+ const cfg = {
+ models: {
+ providers: {
+ "amazon-bedrock": {
+ models: [
+ {
+ id: "company-fable",
+ name: "Company Fable",
+ params: {
+ canonicalModelId: "claude-fable-5",
+ },
+ },
+ ],
+ },
+ },
+ },
+ } as unknown as OpenClawConfig;
+
+ const model = buildConfiguredModelCatalog({ cfg }).find(
+ (entry) => entry.provider === "amazon-bedrock" && entry.id === "company-fable",
+ );
+ expect(model?.params).toEqual({ canonicalModelId: "claude-fable-5" });
+ });
+
it("does not infer reasoning from non-vLLM thinking compat", () => {
const cfg = {
models: {
diff --git a/src/agents/sessions/model-registry.test.ts b/src/agents/sessions/model-registry.test.ts
index 18a10e9a954f..e7b7c3e05421 100644
--- a/src/agents/sessions/model-registry.test.ts
+++ b/src/agents/sessions/model-registry.test.ts
@@ -145,6 +145,39 @@ describe("ModelRegistry models.json auth", () => {
expect(registry.find("zai", "glm-5.1")?.name).toBe("GLM 5.1");
});
+ it("preserves model params from generated plugin catalog shards", () => {
+ const modelsPath = writeModelsJsonWithPluginCatalog({
+ root: { providers: {} },
+ pluginRelativePath: join("plugins", "amazon-bedrock", PLUGIN_MODEL_CATALOG_FILE),
+ pluginCatalog: {
+ generatedBy: PLUGIN_MODEL_CATALOG_GENERATED_BY,
+ providers: {
+ "amazon-bedrock": {
+ baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com",
+ api: "bedrock-converse-stream",
+ auth: "aws-sdk",
+ models: [
+ {
+ id: "company-fable",
+ name: "Company Fable",
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ ],
+ },
+ },
+ },
+ });
+
+ const registry = ModelRegistry.create(AuthStorage.inMemory(), modelsPath, {
+ pluginMetadataSnapshot: pluginOwnerSnapshot("amazon-bedrock", "amazon-bedrock"),
+ });
+
+ expect(registry.getError()).toBeUndefined();
+ expect(registry.find("amazon-bedrock", "company-fable")?.params).toEqual({
+ canonicalModelId: "claude-fable-5",
+ });
+ });
+
it("ignores non-generated plugin catalog files", () => {
// Plugin catalog shards are codegen artifacts; hand-written lookalikes must
// not extend the provider registry.
diff --git a/src/agents/sessions/model-registry.ts b/src/agents/sessions/model-registry.ts
index ae657114a727..a6d8438c7229 100644
--- a/src/agents/sessions/model-registry.ts
+++ b/src/agents/sessions/model-registry.ts
@@ -170,6 +170,7 @@ const ModelDefinitionSchema = Type.Object({
),
contextWindow: Type.Optional(Type.Number()),
maxTokens: Type.Optional(Type.Number()),
+ params: Type.Optional(Type.Record(Type.String(), Type.Unknown())),
headers: Type.Optional(Type.Record(Type.String(), Type.String())),
compat: Type.Optional(ProviderCompatSchema),
});
@@ -553,6 +554,7 @@ export class ModelRegistry {
cost: modelDef.cost ?? defaultCost,
contextWindow: modelDef.contextWindow ?? 128000,
maxTokens: modelDef.maxTokens ?? 16384,
+ params: modelDef.params,
headers: undefined,
compat,
} as Model);
@@ -878,6 +880,7 @@ export class ModelRegistry {
cost: modelDef.cost,
contextWindow: modelDef.contextWindow,
maxTokens: modelDef.maxTokens,
+ params: modelDef.params,
headers: undefined,
compat: modelDef.compat,
} as Model);
@@ -923,6 +926,7 @@ export interface ProviderConfigInput {
cost: { input: number; output: number; cacheRead: number; cacheWrite: number };
contextWindow: number;
maxTokens: number;
+ params?: Record;
headers?: Record;
compat?: Model["compat"];
}>;
diff --git a/src/agents/transport-message-transform.test.ts b/src/agents/transport-message-transform.test.ts
index a3f8ce8b5315..4e276100f256 100644
--- a/src/agents/transport-message-transform.test.ts
+++ b/src/agents/transport-message-transform.test.ts
@@ -4,8 +4,16 @@ import type { Api, Context, Model } from "openclaw/plugin-sdk/llm";
import { describe, expect, it } from "vitest";
import { transformTransportMessages } from "./transport-message-transform.js";
-function makeModel(api: Api, provider: string, id: string): Model {
- return { api, provider, id, input: [], output: [] } as unknown as Model;
+function makeModel(api: Api, provider: string, id: string, canonicalModelId?: string): Model {
+ return {
+ api,
+ provider,
+ id,
+ name: id,
+ ...(canonicalModelId ? { params: { canonicalModelId } } : {}),
+ input: [],
+ output: [],
+ } as unknown as Model;
}
type ToolResultMessage = Extract;
@@ -47,6 +55,211 @@ function assistantToolCall(
}
describe("transformTransportMessages synthetic tool-result policy", () => {
+ it.each([
+ {
+ source: { provider: "anthropic", model: "claude-fable-5" },
+ target: { provider: "anthropic-vertex", model: "claude-opus-4-8" },
+ },
+ {
+ source: { provider: "anthropic", model: "claude-sonnet-4-6" },
+ target: { provider: "anthropic", model: "claude-fable-5" },
+ },
+ {
+ source: {
+ provider: "microsoft-foundry",
+ model: "prod-primary",
+ responseModel: "claude-fable-5",
+ },
+ target: { provider: "anthropic", model: "claude-opus-4-8" },
+ },
+ {
+ source: { provider: "legacy-provider", model: "prod-primary" },
+ target: {
+ provider: "microsoft-foundry",
+ model: "prod-primary",
+ canonicalModelId: "claude-fable-5",
+ },
+ },
+ {
+ source: {
+ provider: "anthropic",
+ model: "claude-fable-5",
+ responseModel: "claude-opus-4-8",
+ },
+ target: { provider: "anthropic", model: "claude-fable-5" },
+ },
+ {
+ source: {
+ provider: "microsoft-foundry",
+ model: "prod-primary",
+ responseModel: "claude-opus-4-8",
+ },
+ target: {
+ provider: "microsoft-foundry",
+ model: "prod-primary",
+ canonicalModelId: "claude-fable-5",
+ },
+ },
+ ])("drops model-bound thinking for Fable switches", ({ source, target }) => {
+ const result = transformTransportMessages(
+ [
+ {
+ role: "assistant",
+ provider: source.provider,
+ api: "anthropic-messages",
+ model: source.model,
+ responseModel: source.responseModel,
+ stopReason: "stop",
+ timestamp: Date.now(),
+ content: [
+ {
+ type: "thinking",
+ thinking: "model-bound thought",
+ thinkingSignature: "sig_model_bound",
+ },
+ { type: "text", text: "visible answer" },
+ ],
+ },
+ ] as Context["messages"],
+ makeModel("anthropic-messages", target.provider, target.model, target.canonicalModelId),
+ );
+
+ expect(result[0]).toMatchObject({
+ role: "assistant",
+ content: [{ type: "text", text: "visible answer" }],
+ });
+ });
+
+ it.each([
+ {
+ sourceProvider: "anthropic",
+ sourceModel: "claude-fable-5",
+ sourceResponseModel: undefined,
+ targetProvider: "anthropic",
+ targetApi: "openclaw-anthropic-messages-transport" as const,
+ targetModel: "claude-fable-5",
+ targetCanonicalModelId: undefined,
+ },
+ {
+ sourceProvider: "microsoft-foundry",
+ sourceModel: "prod-primary",
+ sourceResponseModel: undefined,
+ targetProvider: "microsoft-foundry",
+ targetApi: "anthropic-messages" as const,
+ targetModel: "prod-primary",
+ targetCanonicalModelId: "claude-fable-5",
+ },
+ {
+ sourceProvider: "microsoft-foundry",
+ sourceModel: "prod-primary",
+ sourceResponseModel: "prod-primary",
+ targetProvider: "microsoft-foundry",
+ targetApi: "anthropic-messages" as const,
+ targetModel: "prod-primary",
+ targetCanonicalModelId: "claude-fable-5",
+ },
+ {
+ sourceProvider: "anthropic",
+ sourceModel: "claude-fable-5",
+ sourceResponseModel: undefined,
+ targetProvider: "anthropic-vertex",
+ targetApi: "anthropic-messages" as const,
+ targetModel: "claude-fable-5",
+ targetCanonicalModelId: undefined,
+ },
+ {
+ sourceProvider: "microsoft-foundry",
+ sourceModel: "prod-primary",
+ sourceResponseModel: "claude-fable-5",
+ targetProvider: "anthropic",
+ targetApi: "anthropic-messages" as const,
+ targetModel: "claude-fable-5",
+ targetCanonicalModelId: "claude-fable-5",
+ },
+ {
+ sourceProvider: "anthropic",
+ sourceModel: "claude-fable-5",
+ sourceResponseModel: undefined,
+ targetProvider: "microsoft-foundry",
+ targetApi: "anthropic-messages" as const,
+ targetModel: "prod-primary",
+ targetCanonicalModelId: "claude-fable-5",
+ },
+ ])(
+ "preserves Fable thinking across compatible Anthropic transports",
+ ({
+ sourceProvider,
+ sourceModel,
+ sourceResponseModel,
+ targetProvider,
+ targetApi,
+ targetModel,
+ targetCanonicalModelId,
+ }) => {
+ const result = transformTransportMessages(
+ [
+ {
+ role: "assistant",
+ provider: sourceProvider,
+ api: "anthropic-messages",
+ model: sourceModel,
+ responseModel: sourceResponseModel,
+ stopReason: "stop",
+ timestamp: Date.now(),
+ content: [
+ {
+ type: "thinking",
+ thinking: "",
+ thinkingSignature: "sig_omitted",
+ },
+ ],
+ },
+ ] as Context["messages"],
+ makeModel(targetApi, targetProvider, targetModel, targetCanonicalModelId),
+ );
+
+ expect(result[0]).toMatchObject({
+ role: "assistant",
+ content: [
+ {
+ type: "thinking",
+ thinking: "",
+ thinkingSignature: "sig_omitted",
+ },
+ ],
+ });
+ },
+ );
+
+ it("drops Fable thinking across unrelated API overrides", () => {
+ const result = transformTransportMessages(
+ [
+ {
+ role: "assistant",
+ provider: "anthropic",
+ api: "openai-completions",
+ model: "claude-fable-5",
+ stopReason: "stop",
+ timestamp: Date.now(),
+ content: [
+ {
+ type: "thinking",
+ thinking: "adapter reasoning",
+ thinkingSignature: "reasoning_content",
+ },
+ { type: "text", text: "visible answer" },
+ ],
+ },
+ ] as Context["messages"],
+ makeModel("anthropic-messages", "anthropic", "claude-fable-5"),
+ );
+
+ expect(result[0]).toMatchObject({
+ role: "assistant",
+ content: [{ type: "text", text: "visible answer" }],
+ });
+ });
+
it("normalizes malformed assistant content before transport conversion", () => {
const objectContentMessages = [
{
diff --git a/src/agents/transport-message-transform.ts b/src/agents/transport-message-transform.ts
index 7ee5a4866cd1..b466575b7d56 100644
--- a/src/agents/transport-message-transform.ts
+++ b/src/agents/transport-message-transform.ts
@@ -4,6 +4,7 @@
* strict provider tool-result gaps when supported.
*/
import type { Api, Context, Model } from "../llm/types.js";
+import { resolveModelBoundThinkingReplayMode } from "../shared/anthropic-model-contract.js";
import { repairToolUseResultPairing } from "./session-transcript-repair.js";
const SYNTHETIC_TOOL_RESULT_APIS = new Set([
@@ -74,8 +75,23 @@ export function transformTransportMessages(
if (msg.role !== "assistant") {
return msg;
}
+ const modelBoundThinkingReplayMode = resolveModelBoundThinkingReplayMode({
+ source: {
+ provider: msg.provider,
+ api: msg.api,
+ modelId: msg.model,
+ responseModelId: msg.responseModel,
+ },
+ target: {
+ provider: model.provider,
+ api: model.api,
+ modelId: model.id,
+ modelParams: model.params,
+ },
+ });
const isSameModel =
- msg.provider === model.provider && msg.api === model.api && msg.model === model.id;
+ modelBoundThinkingReplayMode === "preserve" ||
+ (msg.provider === model.provider && msg.api === model.api && msg.model === model.id);
const sourceContent = Array.isArray(msg.content)
? msg.content
: msg.content != null && typeof msg.content === "object"
@@ -84,6 +100,9 @@ export function transformTransportMessages(
const content: typeof msg.content = [];
for (const block of sourceContent) {
if (block.type === "thinking") {
+ if (modelBoundThinkingReplayMode === "drop") {
+ continue;
+ }
if (block.redacted) {
if (isSameModel) {
content.push(block);
diff --git a/src/agents/transport-stream-shared.ts b/src/agents/transport-stream-shared.ts
index 2a19ab50ce1d..d00ca3daba3d 100644
--- a/src/agents/transport-stream-shared.ts
+++ b/src/agents/transport-stream-shared.ts
@@ -127,7 +127,7 @@ export function finalizeTransportStream(params: {
throw new Error("Request was aborted");
}
if (output.stopReason === "aborted" || output.stopReason === "error") {
- throw new Error("An unknown error occurred");
+ throw new Error(output.errorMessage ?? "An unknown error occurred");
}
stream.push({ type: "done", reason: output.stopReason as never, message: output as never });
stream.end();
diff --git a/src/auto-reply/thinking.shared.ts b/src/auto-reply/thinking.shared.ts
index 487f9d938fef..f1eeea896910 100644
--- a/src/auto-reply/thinking.shared.ts
+++ b/src/auto-reply/thinking.shared.ts
@@ -28,7 +28,9 @@ export type UsageDisplayLevel = "off" | "tokens" | "full";
export type ThinkingCatalogEntry = {
provider: string;
id: string;
+ api?: string;
reasoning?: boolean;
+ params?: Record;
compat?: {
thinkingFormat?: string;
supportedReasoningEfforts?: readonly string[] | null;
diff --git a/src/auto-reply/thinking.test.ts b/src/auto-reply/thinking.test.ts
index 0f3a5248fff6..62507d4966f1 100644
--- a/src/auto-reply/thinking.test.ts
+++ b/src/auto-reply/thinking.test.ts
@@ -93,24 +93,21 @@ describe("listThinkingLevels", () => {
it("includes xhigh for provider-advertised models", () => {
providerRuntimeMocks.resolveProviderXHighThinking.mockImplementation(({ provider, context }) =>
- (provider === "openai" && ["gpt-5.4", "gpt-5.4", "gpt-5.4-pro"].includes(context.modelId)) ||
(provider === "openai" &&
["gpt-5.4", "gpt-5.4-pro", "gpt-5.3-codex-spark"].includes(context.modelId)) ||
- (provider === "github-copilot" && ["gpt-5.4", "gpt-5.4"].includes(context.modelId))
+ (provider === "github-copilot" && context.modelId === "gpt-5.4")
? true
: undefined,
);
- expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh");
- expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh");
- expect(listThinkingLevels("openai", "gpt-5.3-codex-spark")).toContain("xhigh");
- expect(listThinkingLevels("openai", "gpt-5.4-pro")).toContain("xhigh");
- expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh");
- expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh");
- expect(listThinkingLevels("openai", "gpt-5.4-pro")).toContain("xhigh");
- expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh");
- expect(listThinkingLevels("github-copilot", "gpt-5.4")).toContain("xhigh");
- expect(listThinkingLevels("github-copilot", "gpt-5.4")).toContain("xhigh");
+ for (const [provider, model] of [
+ ["openai", "gpt-5.4"],
+ ["openai", "gpt-5.4-pro"],
+ ["openai", "gpt-5.3-codex-spark"],
+ ["github-copilot", "gpt-5.4"],
+ ] as const) {
+ expect(listThinkingLevels(provider, model)).toContain("xhigh");
+ }
});
it("excludes xhigh for non-codex models", () => {
@@ -285,6 +282,114 @@ describe("listThinkingLevels", () => {
).toBe("low");
});
+ it("uses canonical Fable params when no provider thinking profile exists", () => {
+ const catalog = [
+ {
+ provider: "microsoft-foundry",
+ id: "company-fable",
+ api: "anthropic-messages",
+ reasoning: false,
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ ];
+
+ expect(listThinkingLevels("microsoft-foundry", "company-fable", catalog)).toEqual([
+ "off",
+ "minimal",
+ "low",
+ "medium",
+ "adaptive",
+ "high",
+ "xhigh",
+ "max",
+ ]);
+ expect(
+ resolveThinkingDefaultForModel({
+ provider: "microsoft-foundry",
+ model: "company-fable",
+ catalog,
+ }),
+ ).toBe("high");
+ });
+
+ it("preserves provider-specific profiles for Fable Messages routes", () => {
+ providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({
+ levels: [{ id: "off" }, { id: "low" }],
+ defaultLevel: "off",
+ });
+
+ expect(
+ listThinkingLevels("proxy", "company-fable", [
+ {
+ provider: "proxy",
+ id: "company-fable",
+ api: "anthropic-messages",
+ reasoning: true,
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ ]),
+ ).toEqual(["off", "low"]);
+ });
+
+ it("does not infer the Fable contract without an Anthropic Messages catalog row", () => {
+ providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({
+ levels: [{ id: "off" }, { id: "low" }],
+ defaultLevel: "off",
+ });
+
+ expect(listThinkingLevels("openrouter", "anthropic/claude-fable-5")).toEqual(["off", "low"]);
+ });
+
+ it("does not apply the Fable profile to OpenAI-compatible catalog rows", () => {
+ providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({
+ levels: [{ id: "off" }, { id: "low" }, { id: "high" }],
+ defaultLevel: "off",
+ });
+
+ expect(
+ listThinkingLevels("openrouter", "anthropic/claude-fable-5", [
+ {
+ provider: "openrouter",
+ id: "anthropic/claude-fable-5",
+ api: "openai-completions",
+ reasoning: true,
+ },
+ ]),
+ ).toEqual(["off", "low", "high"]);
+ });
+
+ it("preserves explicit provider opt-outs for canonical Fable aliases", () => {
+ providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({
+ levels: [{ id: "off" }],
+ defaultLevel: "off",
+ });
+ const catalog = [
+ {
+ provider: "claude-cli",
+ id: "company-fable",
+ api: "anthropic-messages",
+ reasoning: true,
+ params: { canonicalModelId: "claude-fable-5" },
+ },
+ ];
+
+ expect(listThinkingLevels("claude-cli", "company-fable", catalog)).toEqual(["off"]);
+ });
+
+ it("uses generic thinking levels when a provider has no custom profile", () => {
+ providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue(null);
+
+ expect(
+ listThinkingLevels("vllm", "reasoning-model", [
+ {
+ provider: "vllm",
+ id: "reasoning-model",
+ reasoning: true,
+ },
+ ]),
+ ).toEqual(["off", "minimal", "low", "medium", "high"]);
+ });
+
it("matches provider-qualified catalog ids for provider thinking profiles", () => {
providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(({ context }) =>
context.reasoning === true && context.compat?.thinkingFormat === "qwen-chat-template"
@@ -368,6 +473,32 @@ describe("listThinkingLevels", () => {
).toBe("high");
});
+ it("maps xhigh to high for provider profiles with max but no xhigh", () => {
+ providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(({ provider }) =>
+ provider === "anthropic"
+ ? {
+ levels: [
+ { id: "off" },
+ { id: "minimal" },
+ { id: "low" },
+ { id: "medium" },
+ { id: "high" },
+ { id: "adaptive" },
+ { id: "max" },
+ ],
+ }
+ : undefined,
+ );
+
+ expect(
+ resolveSupportedThinkingLevel({
+ provider: "anthropic",
+ model: "claude-sonnet-4-6",
+ level: "xhigh",
+ }),
+ ).toBe("high");
+ });
+
it("maps unsupported adaptive to medium and unsupported xhigh to high", () => {
providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({
levels: [{ id: "off" }, { id: "minimal" }, { id: "low" }, { id: "medium" }, { id: "high" }],
diff --git a/src/auto-reply/thinking.ts b/src/auto-reply/thinking.ts
index aa95434fe567..2e60444e0b44 100644
--- a/src/auto-reply/thinking.ts
+++ b/src/auto-reply/thinking.ts
@@ -1,3 +1,7 @@
+import {
+ CLAUDE_FABLE_5_THINKING_PROFILE,
+ resolveClaudeFable5ModelIdentity,
+} from "@openclaw/llm-core";
// Thinking/reasoning level catalog helpers for auto-reply model controls.
import { normalizeProviderId } from "@openclaw/model-catalog-core/provider-id";
import {
@@ -95,7 +99,9 @@ function resolveThinkingPolicyContext(params: {
normalizedProvider,
modelId,
modelKey,
+ api: candidate?.api,
reasoning: candidate?.reasoning,
+ ...(candidate?.params ? { params: candidate.params } : {}),
compat: candidate?.compat,
};
}
@@ -188,12 +194,22 @@ export function resolveThinkingProfile(params: {
provider: context.normalizedProvider,
modelId: context.modelId,
reasoning: context.reasoning,
+ ...(context.params ? { params: context.params } : {}),
compat: context.compat,
};
- const pluginProfile = resolveProviderThinkingProfile({
+ const providerProfile = resolveProviderThinkingProfile({
provider: context.normalizedProvider,
context: providerContext,
});
+ const fableProfile =
+ context.api === "anthropic-messages" &&
+ resolveClaudeFable5ModelIdentity({
+ id: context.modelId,
+ params: context.params,
+ })
+ ? CLAUDE_FABLE_5_THINKING_PROFILE
+ : undefined;
+ const pluginProfile = providerProfile ?? fableProfile;
if (pluginProfile) {
const normalized = normalizeThinkingProfile(pluginProfile);
if (
diff --git a/src/llm/model-utils.test.ts b/src/llm/model-utils.test.ts
new file mode 100644
index 000000000000..800ec2af0165
--- /dev/null
+++ b/src/llm/model-utils.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it } from "vitest";
+import { clampThinkingLevel, getSupportedThinkingLevels } from "./model-utils.js";
+import type { Model } from "./types.js";
+
+function makeModel(
+ thinkingLevelMap: Model["thinkingLevelMap"],
+ overrides: Partial = {},
+): Model {
+ return {
+ id: "test-model",
+ name: "Test Model",
+ api: "openai-responses",
+ provider: "openai",
+ baseUrl: "https://example.com",
+ reasoning: true,
+ thinkingLevelMap,
+ input: ["text"],
+ cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+ contextWindow: 128_000,
+ maxTokens: 4096,
+ ...overrides,
+ };
+}
+
+describe("clampThinkingLevel", () => {
+ it("downgrades explicit extended-level opt-outs", () => {
+ expect(clampThinkingLevel(makeModel({ xhigh: null, max: "max" }), "xhigh")).toBe("high");
+ });
+
+ it("keeps upward clamping for lower-level map holes", () => {
+ expect(clampThinkingLevel(makeModel({ minimal: null }), "minimal")).toBe("low");
+ });
+
+ it("honors canonical Fable capabilities when catalog reasoning is stale", () => {
+ const model = makeModel(undefined, {
+ id: "company-fable",
+ api: "anthropic-messages",
+ provider: "microsoft-foundry",
+ reasoning: false,
+ params: { canonicalModelId: "claude-fable-5" },
+ });
+
+ expect(getSupportedThinkingLevels(model)).toContain("max");
+ expect(clampThinkingLevel(model, "max")).toBe("max");
+ });
+});
diff --git a/src/llm/model-utils.ts b/src/llm/model-utils.ts
index aef13c53a076..c8a4598f051a 100644
--- a/src/llm/model-utils.ts
+++ b/src/llm/model-utils.ts
@@ -1,4 +1,8 @@
// Provides model selection, usage, and thinking-level utility helpers.
+import {
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeNativeThinkingLevelMap,
+} from "@openclaw/llm-core";
import type { Api, Model, ModelThinkingLevel, Usage } from "./types.js";
/** Calculates and stores model cost fields from token usage and per-million pricing. */
@@ -22,16 +26,25 @@ const EXTENDED_THINKING_LEVELS: ModelThinkingLevel[] = [
"max",
];
+function resolveThinkingLevelMap(model: Model) {
+ return model.api === "anthropic-messages"
+ ? (resolveClaudeNativeThinkingLevelMap(model) ?? model.thinkingLevelMap)
+ : model.thinkingLevelMap;
+}
+
/** Returns thinking levels exposed by a reasoning-capable model. */
export function getSupportedThinkingLevels(
model: Model,
): ModelThinkingLevel[] {
- if (!model.reasoning) {
+ const fableContract =
+ model.api === "anthropic-messages" && resolveClaudeFable5ModelIdentity(model) !== undefined;
+ if (!model.reasoning && !fableContract) {
return ["off"];
}
+ const thinkingLevelMap = resolveThinkingLevelMap(model);
return EXTENDED_THINKING_LEVELS.filter((level) => {
- const mapped = model.thinkingLevelMap?.[level];
+ const mapped = thinkingLevelMap?.[level];
if (mapped === null) {
return false;
}
@@ -57,6 +70,18 @@ export function clampThinkingLevel(
return availableLevels[0] ?? "off";
}
+ // Explicit provider opt-outs are hard caps. Downgrade them before considering
+ // stronger levels so unsupported xhigh/max requests cannot increase cost.
+ const thinkingLevelMap = resolveThinkingLevelMap(model);
+ if ((level === "xhigh" || level === "max") && thinkingLevelMap?.[level] === null) {
+ for (let i = requestedIndex - 1; i >= 0; i--) {
+ const candidate = EXTENDED_THINKING_LEVELS[i];
+ if (availableLevels.includes(candidate)) {
+ return candidate;
+ }
+ }
+ }
+
// Prefer the next stronger available level, then walk down if the request was above the model cap.
for (let i = requestedIndex; i < EXTENDED_THINKING_LEVELS.length; i++) {
const candidate = EXTENDED_THINKING_LEVELS[i];
diff --git a/src/llm/providers/anthropic.test.ts b/src/llm/providers/anthropic.test.ts
index 2618d3b1fe91..5205df6e7d86 100644
--- a/src/llm/providers/anthropic.test.ts
+++ b/src/llm/providers/anthropic.test.ts
@@ -24,7 +24,9 @@ vi.mock("@anthropic-ai/sdk", () => ({
import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js";
function createSseResponse(events: Record[] = []): Response {
- const body = events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join("");
+ const body = events
+ .map((event) => `event: ${String(event.type)}\ndata: ${JSON.stringify(event)}\n\n`)
+ .join("");
return new Response(body, {
status: 200,
headers: { "content-type": "text/event-stream" },
@@ -93,7 +95,11 @@ describe("Anthropic provider", () => {
createSseResponse([
{
type: "message_start",
- message: { id: "msg_1", usage: { input_tokens: 1, output_tokens: 0 } },
+ message: {
+ id: "msg_1",
+ model: "claude-fable-5",
+ usage: { input_tokens: 1, output_tokens: 0 },
+ },
},
{
type: "message_delta",
@@ -108,7 +114,10 @@ describe("Anthropic provider", () => {
};
const stream = streamAnthropic(
- makeAnthropicModel(),
+ makeAnthropicModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ }),
{
messages: [
{ role: "user", content: "hello", timestamp: 0 },
@@ -116,7 +125,7 @@ describe("Anthropic provider", () => {
role: "assistant",
provider: "anthropic",
api: "anthropic-messages",
- model: "claude-sonnet-4-6",
+ model: "claude-fable-5",
stopReason: "stop",
timestamp: 0,
usage: {
@@ -133,6 +142,11 @@ describe("Anthropic provider", () => {
thinking: signedThinking,
thinkingSignature: "sig_1",
},
+ {
+ type: "thinking",
+ thinking: "",
+ thinkingSignature: "sig_omitted",
+ },
{
type: "thinking",
thinking: `sanitize${highSurrogate}synthetic`,
@@ -152,7 +166,7 @@ describe("Anthropic provider", () => {
},
);
- await stream.result();
+ const result = await stream.result();
const payload = capturedPayload as { messages: Array<{ role: string; content: unknown[] }> };
const assistantMessage = payload.messages.find((message) => message.role === "assistant");
@@ -163,10 +177,182 @@ describe("Anthropic provider", () => {
thinking: signedThinking,
signature: "sig_1",
},
+ {
+ type: "thinking",
+ thinking: "",
+ signature: "sig_omitted",
+ },
+ ]);
+ expect(result.responseModel).toBe("claude-fable-5");
+ });
+
+ it.each([
+ ["anthropic", "sk-ant-provider"],
+ ["anthropic-vertex", "vertex-token"],
+ ])("surfaces structured Anthropic streaming refusals for %s", async (provider, apiKey) => {
+ const client = {
+ messages: {
+ create: vi.fn(() => ({
+ asResponse: () =>
+ Promise.resolve(
+ createSseResponse([
+ {
+ type: "message_start",
+ message: { id: "msg_refusal", usage: { input_tokens: 3, output_tokens: 0 } },
+ },
+ {
+ type: "content_block_start",
+ index: 0,
+ content_block: { type: "text", text: "" },
+ },
+ {
+ type: "content_block_delta",
+ index: 0,
+ delta: { type: "text_delta", text: "discard this partial output" },
+ },
+ { type: "content_block_stop", index: 0 },
+ {
+ type: "message_delta",
+ delta: {
+ stop_reason: "refusal",
+ stop_details: {
+ type: "refusal",
+ category: "cyber",
+ explanation: "This request is not allowed.",
+ },
+ },
+ usage: { input_tokens: 3, output_tokens: 2 },
+ },
+ { type: "message_stop" },
+ ]),
+ ),
+ })),
+ },
+ };
+
+ const stream = streamAnthropic(
+ makeAnthropicModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ provider,
+ }),
+ { messages: [{ role: "user", content: "hello", timestamp: 0 }] },
+ { apiKey, client: client as never },
+ );
+ const eventTypes: string[] = [];
+ for await (const event of stream) {
+ eventTypes.push(event.type);
+ }
+ const result = await stream.result();
+
+ expect(eventTypes).toEqual(["error"]);
+ expect(result.stopReason).toBe("error");
+ expect(result.content).toEqual([]);
+ expect(result.errorMessage).toBe(
+ "Anthropic refusal (category: cyber): This request is not allowed.",
+ );
+ expect(result.usage).toMatchObject({ input: 3, output: 2 });
+ expect(result.diagnostics).toEqual([
+ expect.objectContaining({
+ type: "provider_refusal",
+ details: {
+ provider,
+ category: "cyber",
+ explanation: "This request is not allowed.",
+ },
+ }),
]);
});
- it("clamps max adaptive effort when the Claude model does not advertise it", async () => {
+ it("discards buffered Fable output when the stream fails before terminal status", async () => {
+ const client = {
+ messages: {
+ create: vi.fn(() => ({
+ asResponse: () =>
+ Promise.resolve(
+ createSseResponse([
+ {
+ type: "content_block_start",
+ index: 0,
+ content_block: { type: "text", text: "" },
+ },
+ {
+ type: "content_block_delta",
+ index: 0,
+ delta: { type: "text_delta", text: "unsafe partial output" },
+ },
+ ]),
+ ),
+ })),
+ },
+ };
+ const stream = streamAnthropic(
+ makeAnthropicModel({ id: "claude-fable-5", name: "Claude Fable 5" }),
+ { messages: [{ role: "user", content: "hello", timestamp: 0 }] },
+ { apiKey: "sk-ant-provider", client: client as never },
+ );
+ const eventTypes: string[] = [];
+ for await (const event of stream) {
+ eventTypes.push(event.type);
+ }
+ const result = await stream.result();
+
+ expect(eventTypes).toEqual(["error"]);
+ expect(result.stopReason).toBe("error");
+ expect(result.content).toEqual([]);
+ expect(result.errorMessage).toContain("ended before message_stop");
+ });
+
+ it("strips Fable thinking when replay targets Anthropic Vertex", async () => {
+ let capturedPayload: unknown;
+ const stream = streamAnthropic(
+ makeAnthropicModel({
+ provider: "anthropic-vertex",
+ id: "claude-opus-4-8",
+ name: "Claude Opus 4.8",
+ }),
+ {
+ messages: [
+ { role: "user", content: "hello", timestamp: 0 },
+ {
+ role: "assistant",
+ provider: "anthropic",
+ api: "anthropic-messages",
+ model: "claude-fable-5",
+ stopReason: "stop",
+ timestamp: 0,
+ content: [
+ {
+ type: "thinking",
+ thinking: "model-bound thought",
+ thinkingSignature: "sig_model_bound",
+ },
+ { type: "text", text: "visible answer" },
+ ],
+ },
+ { role: "user", content: "continue", timestamp: 0 },
+ ],
+ } as Context,
+ {
+ apiKey: "vertex-token",
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ const payload = capturedPayload as { messages: Array<{ role: string; content: unknown[] }> };
+ const assistantMessage = payload.messages.find((message) => message.role === "assistant");
+ expect(assistantMessage?.content).toEqual([{ type: "text", text: "visible answer" }]);
+ expect(JSON.stringify(assistantMessage)).not.toContain("sig_model_bound");
+ });
+
+ it.each([
+ { reasoning: "xhigh", expectedEffort: "high" },
+ { reasoning: "max", expectedEffort: "max" },
+ ] as const)("maps Claude 4.6 $reasoning effort", async ({ reasoning, expectedEffort }) => {
let capturedPayload: unknown;
const stream = streamSimpleAnthropic(
makeAnthropicModel({
@@ -178,7 +364,7 @@ describe("Anthropic provider", () => {
},
{
apiKey: "sk-ant-provider",
- reasoning: "max",
+ reasoning,
onPayload: (payload) => {
capturedPayload = payload;
},
@@ -188,7 +374,290 @@ describe("Anthropic provider", () => {
await stream.result();
expect((capturedPayload as { output_config?: unknown }).output_config).toEqual({
- effort: "high",
+ effort: expectedEffort,
+ });
+ });
+
+ it.each([
+ {
+ id: "claude-opus-4.6-1m",
+ reasoning: "xhigh",
+ thinkingLevelMap: { xhigh: null, max: null },
+ expectedEffort: "high",
+ },
+ {
+ id: "claude-opus-4.7-1m-internal",
+ reasoning: "max",
+ thinkingLevelMap: { xhigh: "xhigh" },
+ expectedEffort: "xhigh",
+ },
+ ] as const)(
+ "honors proxy effort restrictions for $id",
+ async ({ id, reasoning, thinkingLevelMap, expectedEffort }) => {
+ let capturedPayload: unknown;
+ const stream = streamSimpleAnthropic(
+ makeAnthropicModel({
+ id,
+ provider: "github-copilot",
+ thinkingLevelMap,
+ }),
+ { messages: [{ role: "user", content: "hello", timestamp: 0 }] },
+ {
+ apiKey: "copilot-token",
+ reasoning,
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ expect((capturedPayload as { output_config?: unknown }).output_config).toEqual({
+ effort: expectedEffort,
+ });
+ },
+ );
+
+ it("uses always-on adaptive thinking for Claude Fable 5", async () => {
+ let capturedPayload: unknown;
+ const stream = streamSimpleAnthropic(
+ makeAnthropicModel({
+ id: "prod-primary",
+ name: "Production Claude",
+ provider: "microsoft-foundry",
+ params: { canonicalModelId: "claude-fable-5" },
+ reasoning: false,
+ baseUrl: "https://example.services.ai.azure.com/anthropic",
+ }),
+ {
+ messages: [{ role: "user", content: "hello", timestamp: 0 }],
+ },
+ {
+ apiKey: "sk-ant-provider",
+ temperature: 0.2,
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ expect(capturedPayload).toMatchObject({
+ thinking: { type: "adaptive", display: "summarized" },
+ output_config: { effort: "high" },
+ });
+ expect(capturedPayload).not.toHaveProperty("temperature");
+ });
+
+ it.each([
+ {
+ id: "prod-primary",
+ name: "Claude Fable 5",
+ params: undefined,
+ },
+ ])("does not infer the Fable contract from noncanonical metadata", async (overrides) => {
+ let capturedPayload: unknown;
+ const stream = streamSimpleAnthropic(
+ makeAnthropicModel({
+ ...overrides,
+ reasoning: false,
+ }),
+ { messages: [{ role: "user", content: "hello", timestamp: 0 }] },
+ {
+ apiKey: "sk-ant-provider",
+ temperature: 0.2,
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ expect(capturedPayload).toMatchObject({ temperature: 0.2 });
+ expect(capturedPayload).not.toHaveProperty("thinking");
+ });
+
+ it("uses canonical Claude policy for deployment aliases", async () => {
+ let capturedPayload: unknown;
+ const stream = streamSimpleAnthropic(
+ makeAnthropicModel({
+ id: "production-claude",
+ name: "Production Claude",
+ params: { canonicalModelId: "claude-opus-4-8" },
+ reasoning: false,
+ thinkingLevelMap: { xhigh: "xhigh", max: "max" },
+ }),
+ { messages: [{ role: "user", content: "hello", timestamp: 0 }] },
+ {
+ apiKey: "sk-ant-provider",
+ reasoning: "xhigh",
+ temperature: 0.2,
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ expect(capturedPayload).toMatchObject({
+ model: "production-claude",
+ thinking: { type: "adaptive" },
+ output_config: { effort: "xhigh" },
+ });
+ expect(capturedPayload).not.toHaveProperty("temperature");
+ });
+
+ it.each([
+ { canonicalModelId: "claude-opus-4-8", expectedTemperature: undefined },
+ { canonicalModelId: "claude-opus-4-6", expectedTemperature: 0.2 },
+ ] as const)(
+ "normalizes temperature for canonical $canonicalModelId aliases when thinking is off",
+ async ({ canonicalModelId, expectedTemperature }) => {
+ let capturedPayload: unknown;
+ const stream = streamSimpleAnthropic(
+ makeAnthropicModel({
+ id: "production-claude",
+ params: { canonicalModelId },
+ reasoning: false,
+ thinkingLevelMap: { xhigh: "xhigh", max: "max" },
+ }),
+ { messages: [{ role: "user", content: "hello", timestamp: 0 }] },
+ {
+ apiKey: "sk-ant-provider",
+ temperature: 0.2,
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ expect((capturedPayload as { temperature?: number }).temperature).toBe(expectedTemperature);
+ },
+ );
+
+ it("normalizes forced Fable tool choice to auto", async () => {
+ let capturedPayload: unknown;
+ const stream = streamAnthropic(
+ makeAnthropicModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ }),
+ {
+ messages: [{ role: "user", content: "Use a tool.", timestamp: 0 }],
+ },
+ {
+ apiKey: "sk-ant-provider",
+ thinkingEnabled: true,
+ effort: "high",
+ toolChoice: "any",
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ expect(capturedPayload).toMatchObject({
+ thinking: { type: "adaptive", display: "summarized" },
+ tool_choice: { type: "auto" },
+ });
+ });
+
+ it("preserves Claude Fable 5 high effort when catalog reasoning is false", async () => {
+ const model = makeAnthropicModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ reasoning: false,
+ });
+ for (const testCase of [
+ { reasoning: "off", effort: "low" },
+ { reasoning: "high", effort: "high" },
+ { reasoning: "xhigh", effort: "xhigh" },
+ ] as const) {
+ let capturedPayload: unknown;
+ const stream = streamSimpleAnthropic(
+ model,
+ {
+ messages: [{ role: "user", content: "hello", timestamp: 0 }],
+ },
+ {
+ apiKey: "sk-ant-provider",
+ reasoning: testCase.reasoning,
+ onPayload: (payload: unknown) => {
+ capturedPayload = payload;
+ },
+ } as unknown as Parameters[2],
+ );
+
+ await stream.result();
+
+ expect(capturedPayload).toMatchObject({
+ thinking: { type: "adaptive", display: "summarized" },
+ output_config: { effort: testCase.effort },
+ });
+ }
+ });
+
+ it("honors provider effort restrictions for Claude Fable 5", async () => {
+ let capturedPayload: unknown;
+ const stream = streamSimpleAnthropic(
+ makeAnthropicModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ provider: "github-copilot",
+ reasoning: false,
+ thinkingLevelMap: { xhigh: null, max: null },
+ }),
+ { messages: [{ role: "user", content: "hello", timestamp: 0 }] },
+ {
+ apiKey: "copilot-token",
+ reasoning: "xhigh",
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ expect(capturedPayload).toMatchObject({
+ thinking: { type: "adaptive", display: "summarized" },
+ output_config: { effort: "high" },
+ });
+ });
+
+ it("uses the Claude Fable 5 contract on Anthropic Vertex", async () => {
+ let capturedPayload: unknown;
+ const stream = streamSimpleAnthropic(
+ makeAnthropicModel({
+ id: "claude-fable-5",
+ name: "Claude Fable 5",
+ provider: "anthropic-vertex",
+ }),
+ {
+ messages: [{ role: "user", content: "hello", timestamp: 0 }],
+ },
+ {
+ apiKey: "vertex-token",
+ reasoning: "high",
+ onPayload: (payload) => {
+ capturedPayload = payload;
+ },
+ },
+ );
+
+ await stream.result();
+
+ expect(capturedPayload).toMatchObject({
+ thinking: { type: "adaptive", display: "summarized" },
+ output_config: { effort: "high" },
});
});
diff --git a/src/llm/providers/anthropic.ts b/src/llm/providers/anthropic.ts
index c2247db4f9e8..4ac247a63a8f 100644
--- a/src/llm/providers/anthropic.ts
+++ b/src/llm/providers/anthropic.ts
@@ -12,17 +12,29 @@ import {
splitSystemPromptCacheBoundary,
stripSystemPromptCacheBoundary,
} from "../../agents/system-prompt-cache-boundary.js";
+import {
+ resolveClaudeNativeThinkingLevelMap,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeMaxEffort,
+ supportsClaudeNativeXhighEffort,
+ usesClaudeFable5MessagesContract,
+} from "../../shared/anthropic-model-contract.js";
+import { applyAnthropicRefusal } from "../../shared/anthropic-refusal.js";
+import { createDeferredEventBuffer } from "../../shared/deferred-event-buffer.js";
+import { notifyLlmRequestActivity } from "../../shared/llm-request-activity.js";
import { getEnvApiKey } from "../env-api-keys.js";
import { calculateCost, clampThinkingLevel } from "../model-utils.js";
import type {
AnthropicMessagesCompat,
Api,
AssistantMessage,
+ AssistantMessageEvent,
CacheRetention,
Context,
ImageContent,
Message,
Model,
+ ModelThinkingLevel,
SimpleStreamOptions,
StopReason,
StreamFunction,
@@ -399,6 +411,7 @@ async function* iterateSseMessages(
async function* iterateAnthropicEvents(
response: Response,
signal?: AbortSignal,
+ requireMessageStop = false,
): AsyncGenerator {
if (!response.body) {
throw new Error("Attempted to iterate over an Anthropic response with no body");
@@ -433,7 +446,7 @@ async function* iterateAnthropicEvents(
}
}
- if (sawMessageStart && !sawMessageEnd) {
+ if ((sawMessageStart || requireMessageStop) && !sawMessageEnd) {
throw new Error("Anthropic stream ended before message_stop");
}
}
@@ -463,6 +476,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
stopReason: "stop",
timestamp: Date.now(),
};
+ // Fable classifiers can refuse after partial generation, so no event is
+ // safe to expose until the terminal stop reason is known.
+ const refusalBuffer = usesClaudeFable5MessagesContract(model)
+ ? createDeferredEventBuffer(stream, () =>
+ notifyLlmRequestActivity(options?.signal),
+ )
+ : undefined;
+ const eventSink = refusalBuffer ?? stream;
try {
let client: Anthropic;
@@ -521,9 +542,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
};
const blocks = output.content as Block[];
- for await (const event of iterateAnthropicEvents(response, options?.signal)) {
+ for await (const event of iterateAnthropicEvents(
+ response,
+ options?.signal,
+ refusalBuffer !== undefined,
+ )) {
if (event.type === "message_start") {
output.responseId = event.message.id;
+ output.responseModel = event.message.model;
output.usage.input = event.message.usage.input_tokens || 0;
output.usage.output = event.message.usage.output_tokens || 0;
output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
@@ -538,7 +564,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
// (e.g. invalid thinking signatures) arrive before any non-error event
// is yielded, keeping yieldedOutput=false in pumpStreamWithRecovery
// and allowing the thinking-block recovery retry to fire.
- stream.push({ type: "start", partial: output });
+ eventSink.push({ type: "start", partial: output });
} else if (event.type === "content_block_start") {
if (event.content_block.type === "text") {
const block: Block = {
@@ -547,7 +573,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
index: event.index,
};
output.content.push(block);
- stream.push({
+ eventSink.push({
type: "text_start",
contentIndex: output.content.length - 1,
partial: output,
@@ -560,7 +586,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
index: event.index,
};
output.content.push(block);
- stream.push({
+ eventSink.push({
type: "thinking_start",
contentIndex: output.content.length - 1,
partial: output,
@@ -574,7 +600,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
index: event.index,
};
output.content.push(block);
- stream.push({
+ eventSink.push({
type: "thinking_start",
contentIndex: output.content.length - 1,
partial: output,
@@ -591,7 +617,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
index: event.index,
};
output.content.push(block);
- stream.push({
+ eventSink.push({
type: "toolcall_start",
contentIndex: output.content.length - 1,
partial: output,
@@ -603,7 +629,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
const block = blocks[index];
if (block && block.type === "text") {
block.text += event.delta.text;
- stream.push({
+ eventSink.push({
type: "text_delta",
contentIndex: index,
delta: event.delta.text,
@@ -615,7 +641,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
const block = blocks[index];
if (block && block.type === "thinking") {
block.thinking += event.delta.thinking;
- stream.push({
+ eventSink.push({
type: "thinking_delta",
contentIndex: index,
delta: event.delta.thinking,
@@ -628,7 +654,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
if (block && block.type === "toolCall") {
block.partialJson += event.delta.partial_json;
block.arguments = parseStreamingJson(block.partialJson);
- stream.push({
+ eventSink.push({
type: "toolcall_delta",
contentIndex: index,
delta: event.delta.partial_json,
@@ -649,14 +675,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
if (block) {
delete (block as Partial).index;
if (block.type === "text") {
- stream.push({
+ eventSink.push({
type: "text_end",
contentIndex: index,
content: block.text,
partial: output,
});
} else if (block.type === "thinking") {
- stream.push({
+ eventSink.push({
type: "thinking_end",
contentIndex: index,
content: block.thinking,
@@ -667,7 +693,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
// Finalize in-place and strip the scratch buffer so replay only
// carries parsed arguments.
delete (block as { partialJson?: string }).partialJson;
- stream.push({
+ eventSink.push({
type: "toolcall_end",
contentIndex: index,
toolCall: block,
@@ -677,7 +703,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
}
} else if (event.type === "message_delta") {
if (event.delta.stop_reason) {
- output.stopReason = mapStopReason(event.delta.stop_reason);
+ if (event.delta.stop_reason === "refusal") {
+ applyAnthropicRefusal(output, event.delta.stop_details, model.provider);
+ } else {
+ output.stopReason = mapStopReason(event.delta.stop_reason);
+ }
}
// Only update usage fields if present (not null).
// Preserves input_tokens from message_start when proxies omit it in message_delta.
@@ -708,9 +738,10 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
}
if (output.stopReason === "aborted" || output.stopReason === "error") {
- throw new Error("An unknown error occurred");
+ throw new Error(output.errorMessage ?? "An unknown error occurred");
}
+ refusalBuffer?.flush();
stream.push({ type: "done", reason: output.stopReason, message: output });
stream.end();
} catch (error) {
@@ -719,6 +750,10 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
// partialJson is only a streaming scratch buffer; never persist it.
delete (block as { partialJson?: string }).partialJson;
}
+ if (refusalBuffer) {
+ refusalBuffer.discard();
+ output.content = [];
+ }
output.stopReason = options?.signal?.aborted ? "aborted" : "error";
output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error);
stream.push({ type: "error", reason: output.stopReason, error: output });
@@ -729,21 +764,28 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti
return stream;
};
+function normalizeAnthropicToolChoice(
+ model: Model<"anthropic-messages">,
+ toolChoice: AnthropicOptions["toolChoice"],
+) {
+ if (
+ usesClaudeFable5MessagesContract(model) &&
+ (toolChoice === "any" || (typeof toolChoice === "object" && toolChoice.type === "tool"))
+ ) {
+ return { type: "auto" as const };
+ }
+ return typeof toolChoice === "string" ? { type: toolChoice } : toolChoice;
+}
+
/**
- * Check if a model supports adaptive thinking (Opus 4.6+, Sonnet 4.6)
+ * Check if a model supports adaptive thinking (Fable 5, Opus 4.6+, Sonnet 4.6).
*/
-function supportsAdaptiveThinking(modelId: string): boolean {
- // Adaptive-thinking model IDs (with or without date suffix)
- return (
- modelId.includes("opus-4-6") ||
- modelId.includes("opus-4.6") ||
- modelId.includes("opus-4-8") ||
- modelId.includes("opus-4.8") ||
- modelId.includes("opus-4-7") ||
- modelId.includes("opus-4.7") ||
- modelId.includes("sonnet-4-6") ||
- modelId.includes("sonnet-4.6")
- );
+function supportsAdaptiveThinking(model: Model<"anthropic-messages">): boolean {
+ return supportsClaudeAdaptiveThinking(model);
+}
+
+function supportsNativeXhighEffort(model: Model<"anthropic-messages">): boolean {
+ return supportsClaudeNativeXhighEffort(model);
}
/**
@@ -754,13 +796,24 @@ function mapThinkingLevelToEffort(
model: Model<"anthropic-messages">,
level: SimpleStreamOptions["reasoning"],
): AnthropicEffort {
- const clampedLevel = level ? clampThinkingLevel(model, level) : undefined;
- const mapped = clampedLevel ? model.thinkingLevelMap?.[clampedLevel] : undefined;
+ const requestedLevel = level as ModelThinkingLevel | undefined;
+ const hasCanonicalAlias = typeof model.params?.canonicalModelId === "string";
+ const thinkingLevelMap = resolveClaudeNativeThinkingLevelMap(model);
+ const clampModel = {
+ ...model,
+ ...(hasCanonicalAlias ? { reasoning: true } : {}),
+ ...(thinkingLevelMap ? { thinkingLevelMap } : {}),
+ };
+ const clampedLevel = requestedLevel
+ ? clampThinkingLevel(clampModel, requestedLevel)
+ : requestedLevel;
+ const mapped = clampedLevel ? thinkingLevelMap?.[clampedLevel] : undefined;
if (typeof mapped === "string") {
return mapped as AnthropicEffort;
}
switch (clampedLevel) {
+ case "off":
case "minimal":
case "low":
return "low";
@@ -768,8 +821,10 @@ function mapThinkingLevelToEffort(
return "medium";
case "high":
return "high";
+ case "xhigh":
+ return supportsNativeXhighEffort(model) ? "xhigh" : "high";
case "max":
- return "max";
+ return supportsClaudeNativeMaxEffort(model) ? "max" : "high";
default:
return "high";
}
@@ -787,15 +842,17 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS
const base = buildBaseOptions(model, options, apiKey);
if (!options?.reasoning) {
+ const fable5 = usesClaudeFable5MessagesContract(model);
return streamAnthropic(model, context, {
...base,
- thinkingEnabled: false,
+ thinkingEnabled: fable5,
+ ...(fable5 ? { effort: "high" as const } : {}),
} satisfies AnthropicOptions);
}
// For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level
// For older models: use budget-based thinking
- if (supportsAdaptiveThinking(model.id)) {
+ if (supportsAdaptiveThinking(model)) {
const effort = mapThinkingLevelToEffort(model, options.reasoning);
return streamAnthropic(model, context, {
...base,
@@ -836,7 +893,7 @@ function createClient(
): { client: Anthropic; isOAuthToken: boolean } {
// Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
// The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
- const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);
+ const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model);
const betaFeatures: string[] = [];
if (useFineGrainedToolStreamingBeta) {
betaFeatures.push(FINE_GRAINED_TOOL_STREAMING_BETA);
@@ -977,8 +1034,12 @@ function buildParams(
params.system = system;
}
- // Temperature is incompatible with extended thinking (adaptive or budget-based).
- if (options?.temperature !== undefined && !options?.thinkingEnabled) {
+ // Thinking and post-4.6 Claude models reject custom temperature values.
+ if (
+ options?.temperature !== undefined &&
+ !options?.thinkingEnabled &&
+ !supportsNativeXhighEffort(model)
+ ) {
params.temperature = options.temperature;
}
@@ -990,30 +1051,33 @@ function buildParams(
params.tools = tools;
}
- // Configure thinking mode: adaptive (Opus 4.6+ and Sonnet 4.6),
+ // Configure thinking mode: always-on adaptive (Fable 5), adaptive (Opus
+ // 4.6+ and Sonnet 4.6),
// budget-based (older models), or explicitly disabled.
- if (model.reasoning) {
- if (options?.thinkingEnabled) {
+ const fable5 = usesClaudeFable5MessagesContract(model);
+ if (fable5 || model.reasoning || supportsAdaptiveThinking(model)) {
+ if (fable5 || options?.thinkingEnabled) {
// Default to "summarized" so Opus 4.7+ and Mythos Preview behave like
// older Claude 4 models (whose API default is also "summarized").
- const display: AnthropicThinkingDisplay = options.thinkingDisplay ?? "summarized";
- if (supportsAdaptiveThinking(model.id)) {
+ const display: AnthropicThinkingDisplay = options?.thinkingDisplay ?? "summarized";
+ if (supportsAdaptiveThinking(model)) {
// Adaptive thinking: Claude decides when and how much to think.
params.thinking = { type: "adaptive", display };
- if (options.effort) {
+ const effort = options?.effort ?? (fable5 ? "high" : undefined);
+ if (effort) {
// The Anthropic SDK types can lag newly supported effort values such as "xhigh".
params.output_config =
- options.effort === "xhigh"
- ? ({ effort: options.effort } as unknown as NonNullable<
+ effort === "xhigh"
+ ? ({ effort } as unknown as NonNullable<
MessageCreateParamsStreaming["output_config"]
>)
- : { effort: options.effort };
+ : { effort };
}
} else {
// Budget-based thinking for older models
params.thinking = {
type: "enabled",
- budget_tokens: options.thinkingBudgetTokens || 1024,
+ budget_tokens: options?.thinkingBudgetTokens || 1024,
display,
};
}
@@ -1030,11 +1094,7 @@ function buildParams(
}
if (options?.toolChoice) {
- if (typeof options.toolChoice === "string") {
- params.tool_choice = { type: options.toolChoice };
- } else {
- params.tool_choice = options.toolChoice;
- }
+ params.tool_choice = normalizeAnthropicToolChoice(model, options.toolChoice);
}
return params;
@@ -1120,13 +1180,16 @@ function convertMessages(
});
continue;
}
- if (block.thinking.trim().length === 0) {
+ const thinkingSignature = block.thinkingSignature?.trim();
+ const hasNativeThinkingSignature =
+ Boolean(thinkingSignature) && thinkingSignature !== "reasoning_content";
+ if (block.thinking.trim().length === 0 && !hasNativeThinkingSignature) {
continue;
}
// If thinking signature is missing/empty (e.g., from aborted stream),
// convert to plain text block without tags to avoid API rejection
// and prevent Claude from mimicking the tags in responses
- if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) {
+ if (!thinkingSignature) {
blocks.push({
type: "text",
text: sanitizeSurrogates(block.thinking),
@@ -1134,13 +1197,13 @@ function convertMessages(
} else {
// OpenAI-compatible reasoning markers are field names, not native
// Anthropic replay signatures; sending them bricks persisted replays.
- if (block.thinkingSignature === "reasoning_content") {
+ if (thinkingSignature === "reasoning_content") {
continue;
}
blocks.push({
type: "thinking",
thinking: block.thinking,
- signature: block.thinkingSignature,
+ signature: thinkingSignature,
});
}
} else if (block.type === "toolCall") {
diff --git a/src/llm/providers/transform-messages.ts b/src/llm/providers/transform-messages.ts
index 63bdeb274d48..68ff48c86d55 100644
--- a/src/llm/providers/transform-messages.ts
+++ b/src/llm/providers/transform-messages.ts
@@ -1,3 +1,4 @@
+import { resolveModelBoundThinkingReplayMode } from "../../shared/anthropic-model-contract.js";
// Provider message transform helpers convert runtime messages to provider payloads.
import type {
Api,
@@ -96,13 +97,31 @@ export function transformMessages(
// Assistant messages need transformation check
if (msg.role === "assistant") {
const assistantMsg = msg;
+ const modelBoundThinkingReplayMode = resolveModelBoundThinkingReplayMode({
+ source: {
+ provider: assistantMsg.provider,
+ api: assistantMsg.api,
+ modelId: assistantMsg.model,
+ responseModelId: assistantMsg.responseModel,
+ },
+ target: {
+ provider: model.provider,
+ api: model.api,
+ modelId: model.id,
+ modelParams: model.params,
+ },
+ });
const isSameModel =
- assistantMsg.provider === model.provider &&
- assistantMsg.api === model.api &&
- assistantMsg.model === model.id;
+ modelBoundThinkingReplayMode === "preserve" ||
+ (assistantMsg.provider === model.provider &&
+ assistantMsg.api === model.api &&
+ assistantMsg.model === model.id);
const transformedContent = assistantMsg.content.flatMap((block) => {
if (block.type === "thinking") {
+ if (modelBoundThinkingReplayMode === "drop") {
+ return [];
+ }
// Redacted thinking is opaque encrypted content, only valid for the same model.
// Drop it for cross-model to avoid API errors.
if (block.redacted) {
diff --git a/src/plugin-sdk/provider-model-shared.test.ts b/src/plugin-sdk/provider-model-shared.test.ts
index c28ff606f8e2..ae0627a09e2c 100644
--- a/src/plugin-sdk/provider-model-shared.test.ts
+++ b/src/plugin-sdk/provider-model-shared.test.ts
@@ -8,7 +8,10 @@ import {
NATIVE_ANTHROPIC_REPLAY_HOOKS,
OPENAI_COMPATIBLE_REPLAY_HOOKS,
PASSTHROUGH_GEMINI_REPLAY_HOOKS,
+ resolveClaudeFable5ModelIdentity,
resolveClaudeThinkingProfile,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeXhighEffort,
} from "./provider-model-shared.js";
function expectFields(value: unknown, expected: Record): void {
@@ -34,6 +37,28 @@ function expectLevelIdsInclude(profile: unknown, expectedIds: readonly string[])
}
}
+describe("Claude model contracts", () => {
+ it("recognizes Vertex date suffixes", () => {
+ expect(resolveClaudeFable5ModelIdentity({ id: "claude-fable-5@20260601" })).toBe(
+ "claude-fable-5@20260601",
+ );
+ expect(supportsClaudeAdaptiveThinking({ id: "claude-sonnet-4-6@20260301" })).toBe(true);
+ expect(supportsClaudeNativeXhighEffort({ id: "claude-opus-4-8@20260401" })).toBe(true);
+ });
+
+ it("does not classify later numeric model versions as supported aliases", () => {
+ expect(supportsClaudeAdaptiveThinking({ id: "claude-sonnet-4-60" })).toBe(false);
+ expect(supportsClaudeNativeXhighEffort({ id: "claude-opus-4-80" })).toBe(false);
+ expect(readLevelIds(resolveClaudeThinkingProfile("claude-opus-4-80"))).toEqual([
+ "off",
+ "minimal",
+ "low",
+ "medium",
+ "high",
+ ]);
+ });
+});
+
describe("buildProviderReplayFamilyHooks", () => {
it("covers the replay family matrix", () => {
const cases = [
@@ -282,6 +307,15 @@ describe("buildProviderReplayFamilyHooks", () => {
});
describe("resolveClaudeThinkingProfile", () => {
+ it("exposes Fable 5's always-adaptive profile to Claude providers", () => {
+ const profile = resolveClaudeThinkingProfile("claude-fable-5");
+ expectFields(profile, {
+ defaultLevel: "high",
+ preserveWhenCatalogReasoningFalse: true,
+ });
+ expectLevelIdsInclude(profile, ["xhigh", "adaptive", "max"]);
+ });
+
it("leaves Opus 4.8 thinking off by default with xhigh/adaptive/max options", () => {
const profile = resolveClaudeThinkingProfile("claude-opus-4-8");
expectFields(profile, {
diff --git a/src/plugin-sdk/provider-model-shared.ts b/src/plugin-sdk/provider-model-shared.ts
index f70dbcc27bab..bb26fae2ba18 100644
--- a/src/plugin-sdk/provider-model-shared.ts
+++ b/src/plugin-sdk/provider-model-shared.ts
@@ -1,3 +1,10 @@
+import {
+ CLAUDE_FABLE_5_THINKING_PROFILE,
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeXhighEffort,
+} from "@openclaw/llm-core";
// Provider model helpers normalize model catalog entries shared by provider plugins.
import { normalizeProviderId as normalizeProviderIdCore } from "@openclaw/model-catalog-core/provider-id";
import {
@@ -27,6 +34,14 @@ export type {
ModelApi,
ModelProviderDeclarationConfig as ModelProviderConfig,
} from "../config/types.models.js";
+export {
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
+ resolveClaudeNativeThinkingLevelMap,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeMaxEffort,
+ supportsClaudeNativeXhighEffort,
+} from "@openclaw/llm-core";
export type {
UnifiedModelCatalogEntry,
UnifiedModelCatalogKind,
@@ -100,14 +115,6 @@ export {
} from "../plugins/provider-model-helpers.js";
import { normalizeOptionalLowercaseString } from "../../packages/normalization-core/src/string-coerce.js";
-const CLAUDE_OPUS_48_MODEL_PREFIXES = ["claude-opus-4-8", "claude-opus-4.8"] as const;
-const CLAUDE_OPUS_47_MODEL_PREFIXES = ["claude-opus-4-7", "claude-opus-4.7"] as const;
-const CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES = [
- "claude-opus-4-6",
- "claude-opus-4.6",
- "claude-sonnet-4-6",
- "claude-sonnet-4.6",
-] as const;
const BASE_CLAUDE_THINKING_LEVELS = [
{ id: "off" },
{ id: "minimal" },
@@ -136,47 +143,40 @@ export function isProxyReasoningUnsupportedModelHint(
return getModelProviderHint(modelId) === "x-ai";
}
-function matchesClaudeModelPrefix(modelId: string, prefixes: readonly string[]): boolean {
- const lower = normalizeOptionalLowercaseString(modelId);
- return Boolean(lower && prefixes.some((prefix) => lower.startsWith(prefix)));
-}
-
-function isClaudeOpus47ModelId(modelId: string): boolean {
- return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_47_MODEL_PREFIXES);
-}
-
-function isClaudeOpus48ModelId(modelId: string): boolean {
- return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_48_MODEL_PREFIXES);
-}
-
/** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */
export function isClaudeAdaptiveThinkingDefaultModelId(
/** Claude model id to check against adaptive-thinking default families. */
modelId: string,
): boolean {
- return matchesClaudeModelPrefix(modelId, CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES);
+ const ref = { id: modelId };
+ return supportsClaudeAdaptiveThinking(ref) && !supportsClaudeNativeXhighEffort(ref);
}
/** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */
export function resolveClaudeThinkingProfile(
/** Claude model id used to choose available thinking levels and defaults. */
modelId: string,
+ params?: Record,
+ options?: { includeNativeMax?: boolean },
): ProviderThinkingProfile {
- if (isClaudeOpus48ModelId(modelId)) {
+ const ref = { id: modelId, params };
+ const canonicalModelId = resolveClaudeModelIdentity(ref);
+ if (resolveClaudeFable5ModelIdentity(ref)) {
+ return CLAUDE_FABLE_5_THINKING_PROFILE;
+ }
+ if (supportsClaudeNativeXhighEffort(ref)) {
return {
levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
defaultLevel: "off",
};
}
- if (isClaudeOpus47ModelId(modelId)) {
+ if (isClaudeAdaptiveThinkingDefaultModelId(canonicalModelId)) {
return {
- levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }],
- defaultLevel: "off",
- };
- }
- if (isClaudeAdaptiveThinkingDefaultModelId(modelId)) {
- return {
- levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "adaptive" }],
+ levels: [
+ ...BASE_CLAUDE_THINKING_LEVELS,
+ { id: "adaptive" },
+ ...(options?.includeNativeMax ? [{ id: "max" as const }] : []),
+ ],
defaultLevel: "adaptive",
};
}
diff --git a/src/plugin-sdk/provider-stream-shared.ts b/src/plugin-sdk/provider-stream-shared.ts
index 9a63642aca55..f96360bd81db 100644
--- a/src/plugin-sdk/provider-stream-shared.ts
+++ b/src/plugin-sdk/provider-stream-shared.ts
@@ -13,6 +13,9 @@ import type { StreamFn } from "../agents/runtime/index.js";
import { streamWithPayloadPatch } from "../llm/providers/stream-wrappers/stream-payload-utils.js";
import { streamSimple } from "../llm/stream.js";
import { createAssistantMessageEventStream } from "../llm/utils/event-stream.js";
+export { applyAnthropicRefusal } from "../shared/anthropic-refusal.js";
+export { createDeferredEventBuffer } from "../shared/deferred-event-buffer.js";
+export { notifyLlmRequestActivity, onLlmRequestActivity } from "../shared/llm-request-activity.js";
type ProviderWrapStreamFnContext = import("../plugins/types.js").ProviderWrapStreamFnContext;
diff --git a/src/plugins/provider-replay-helpers.test.ts b/src/plugins/provider-replay-helpers.test.ts
index 2978c8c38452..4d03d6cb3b99 100644
--- a/src/plugins/provider-replay-helpers.test.ts
+++ b/src/plugins/provider-replay-helpers.test.ts
@@ -118,6 +118,7 @@ describe("provider replay helpers", () => {
it("preserves thinking blocks for Claude Opus 4.5+ and Sonnet 4.5+ models", () => {
// These models should NOT drop thinking blocks
for (const modelId of [
+ "claude-fable-5",
"claude-opus-4-5-20251101",
"claude-opus-4-6",
"claude-sonnet-4-5-20250929",
diff --git a/src/plugins/provider-replay-helpers.ts b/src/plugins/provider-replay-helpers.ts
index 7d83ddb0b96b..fa7100453633 100644
--- a/src/plugins/provider-replay-helpers.ts
+++ b/src/plugins/provider-replay-helpers.ts
@@ -89,8 +89,8 @@ export function buildStrictAnthropicReplayPolicy(
/**
* Returns true for Claude models that preserve thinking blocks in context
- * natively (Opus 4.5+, Sonnet 4.5+, Haiku 4.5+). For these models, dropping
- * thinking blocks from prior turns breaks prompt cache prefix matching.
+ * natively (Fable 5, Opus 4.5+, Sonnet 4.5+, Haiku 4.5+). For these models,
+ * dropping thinking blocks from prior turns breaks replay and prompt caching.
*
* See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions
*
@@ -103,13 +103,19 @@ export function shouldPreserveThinkingBlocks(modelId?: string): boolean {
}
// Models that preserve thinking blocks natively (Claude 4.5+):
+ // - claude-fable-5
// - claude-opus-4-x (opus-4-5, opus-4-6, ...)
// - claude-sonnet-4-x (sonnet-4-5, sonnet-4-6, ...)
// Note: "sonnet-4" is safe — legacy "claude-3-5-sonnet" does not contain "sonnet-4"
// - claude-haiku-4-x (haiku-4-5, ...)
// Models that require dropping thinking blocks:
// - claude-3-7-sonnet, claude-3-5-sonnet, and earlier
- if (id.includes("opus-4") || id.includes("sonnet-4") || id.includes("haiku-4")) {
+ if (
+ id.includes("fable-5") ||
+ id.includes("opus-4") ||
+ id.includes("sonnet-4") ||
+ id.includes("haiku-4")
+ ) {
return true;
}
diff --git a/src/plugins/provider-thinking.ts b/src/plugins/provider-thinking.ts
index 65ed27b9311b..67598c2cbfb5 100644
--- a/src/plugins/provider-thinking.ts
+++ b/src/plugins/provider-thinking.ts
@@ -83,7 +83,7 @@ export function resolveProviderThinkingProfile(
const activeProfile = resolveActiveThinkingProvider(params.provider)?.resolveThinkingProfile?.(
params.context,
);
- if (activeProfile) {
+ if (activeProfile !== undefined) {
return activeProfile;
}
return resolveBundledProviderPolicySurface(params.provider)?.resolveThinkingProfile?.(
diff --git a/src/plugins/provider-thinking.types.ts b/src/plugins/provider-thinking.types.ts
index f9bc40ee61ff..231812930722 100644
--- a/src/plugins/provider-thinking.types.ts
+++ b/src/plugins/provider-thinking.types.ts
@@ -28,6 +28,7 @@ export type ProviderThinkingModelCompat = {
*/
export type ProviderDefaultThinkingPolicyContext = ProviderThinkingPolicyContext & {
reasoning?: boolean;
+ params?: Record;
compat?: ProviderThinkingModelCompat | null;
};
diff --git a/src/shared/anthropic-model-contract.ts b/src/shared/anthropic-model-contract.ts
new file mode 100644
index 000000000000..700c8832a101
--- /dev/null
+++ b/src/shared/anthropic-model-contract.ts
@@ -0,0 +1,83 @@
+// Model-bound thinking cannot be exposed or replayed after a model switch.
+import { resolveClaudeFable5ModelIdentity } from "@openclaw/llm-core";
+import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce";
+export {
+ resolveClaudeFable5ModelIdentity,
+ resolveClaudeModelIdentity,
+ resolveClaudeNativeThinkingLevelMap,
+ supportsClaudeAdaptiveThinking,
+ supportsClaudeNativeMaxEffort,
+ supportsClaudeNativeXhighEffort,
+} from "@openclaw/llm-core";
+
+type ReplayModelRef = {
+ provider?: string;
+ api?: string;
+ modelId?: string;
+ responseModelId?: string;
+ modelParams?: Record;
+};
+
+function normalizeModelId(modelId?: string): string {
+ const normalized = normalizeLowercaseStringOrEmpty(modelId);
+ const unprefixed = normalized.startsWith("anthropic/")
+ ? normalized.slice("anthropic/".length)
+ : normalized;
+ return unprefixed.replace(/[._\s]+/g, "-");
+}
+
+function normalizeApi(api?: string): string {
+ const normalized = normalizeLowercaseStringOrEmpty(api);
+ return normalized === "openclaw-anthropic-messages-transport" ? "anthropic-messages" : normalized;
+}
+
+function hasConcreteResponseModel(ref: ReplayModelRef): boolean {
+ const responseModelId = normalizeModelId(ref.responseModelId);
+ // Deployment APIs may echo the requested alias. Only a different response
+ // model proves the backing identity and overrides configured metadata.
+ return responseModelId.length > 0 && responseModelId !== normalizeModelId(ref.modelId);
+}
+
+export function usesClaudeFable5MessagesContract(model: {
+ id?: string;
+ params?: Record;
+ api?: string;
+}): boolean {
+ return (
+ normalizeApi(model.api) === "anthropic-messages" &&
+ resolveClaudeFable5ModelIdentity(model) !== undefined
+ );
+}
+
+function resolveReplayFableIdentity(ref: ReplayModelRef): string | undefined {
+ if (normalizeApi(ref.api) !== "anthropic-messages") {
+ return undefined;
+ }
+ if (hasConcreteResponseModel(ref)) {
+ return resolveClaudeFable5ModelIdentity({ id: ref.responseModelId });
+ }
+ return resolveClaudeFable5ModelIdentity({ id: ref.modelId, params: ref.modelParams });
+}
+
+export function resolveModelBoundThinkingReplayMode(params: {
+ source: ReplayModelRef;
+ target: ReplayModelRef;
+}): "default" | "preserve" | "drop" {
+ const sourceApi = normalizeApi(params.source.api);
+ const targetApi = normalizeApi(params.target.api);
+ const sourceIdentity = resolveReplayFableIdentity(params.source);
+ const targetIdentity = resolveReplayFableIdentity(params.target);
+ const sameRoute =
+ normalizeLowercaseStringOrEmpty(params.source.provider) ===
+ normalizeLowercaseStringOrEmpty(params.target.provider) &&
+ sourceApi === targetApi &&
+ normalizeModelId(params.source.modelId) === normalizeModelId(params.target.modelId);
+ if (!sourceIdentity && !targetIdentity) {
+ return "default";
+ }
+ if (!sourceIdentity && !hasConcreteResponseModel(params.source) && targetIdentity && sameRoute) {
+ return "preserve";
+ }
+ const sameModel = sourceApi === targetApi && sourceIdentity === targetIdentity;
+ return sameModel ? "preserve" : "drop";
+}
diff --git a/src/shared/anthropic-refusal.ts b/src/shared/anthropic-refusal.ts
new file mode 100644
index 000000000000..b59dc377057d
--- /dev/null
+++ b/src/shared/anthropic-refusal.ts
@@ -0,0 +1,55 @@
+import type { AssistantMessageDiagnostic } from "../llm/types.js";
+
+type AnthropicRefusalOutput = {
+ stopReason: string;
+ errorMessage?: string;
+ diagnostics?: AssistantMessageDiagnostic[];
+};
+
+type AnthropicRefusalDetails = {
+ category: string | null;
+ explanation: string | null;
+};
+
+function readNullableString(value: unknown): string | null {
+ return typeof value === "string" && value.trim() ? value.trim() : null;
+}
+
+function readAnthropicRefusalDetails(value: unknown): AnthropicRefusalDetails {
+ if (!value || typeof value !== "object") {
+ return { category: null, explanation: null };
+ }
+ const details = value as Record;
+ return {
+ category: readNullableString(details.category),
+ explanation: readNullableString(details.explanation),
+ };
+}
+
+function formatAnthropicRefusalMessage(details: AnthropicRefusalDetails): string {
+ const category = details.category ? ` (category: ${details.category})` : "";
+ const explanation = details.explanation ? `: ${details.explanation}` : ".";
+ return `Anthropic refusal${category}${explanation}`;
+}
+
+export function applyAnthropicRefusal(
+ output: AnthropicRefusalOutput,
+ stopDetails: unknown,
+ provider: string,
+): void {
+ const details = readAnthropicRefusalDetails(stopDetails);
+ output.stopReason = "error";
+ output.errorMessage = formatAnthropicRefusalMessage(details);
+ output.diagnostics = [
+ ...(output.diagnostics ?? []),
+ {
+ type: "provider_refusal",
+ timestamp: Date.now(),
+ details: {
+ provider,
+ category: details.category,
+ explanation: details.explanation,
+ },
+ },
+ ];
+}
diff --git a/src/shared/deferred-event-buffer.ts b/src/shared/deferred-event-buffer.ts
new file mode 100644
index 000000000000..16c9f1deecaa
--- /dev/null
+++ b/src/shared/deferred-event-buffer.ts
@@ -0,0 +1,22 @@
+type EventSink = {
+ push(event: T): void;
+};
+
+export function createDeferredEventBuffer(sink: EventSink, onBufferedEvent?: () => void) {
+ let events: T[] = [];
+ return {
+ push(event: T): void {
+ events.push(event);
+ onBufferedEvent?.();
+ },
+ flush(): void {
+ for (const event of events) {
+ sink.push(event);
+ }
+ events = [];
+ },
+ discard(): void {
+ events = [];
+ },
+ };
+}
diff --git a/src/shared/llm-request-activity.ts b/src/shared/llm-request-activity.ts
new file mode 100644
index 000000000000..bb1837a3bb92
--- /dev/null
+++ b/src/shared/llm-request-activity.ts
@@ -0,0 +1,23 @@
+const requestActivityListeners = new WeakMap void>>();
+
+export function notifyLlmRequestActivity(signal: AbortSignal | undefined): void {
+ if (!signal) {
+ return;
+ }
+ for (const listener of requestActivityListeners.get(signal) ?? []) {
+ listener();
+ }
+}
+
+export function onLlmRequestActivity(signal: AbortSignal, listener: () => void): () => void {
+ const listeners = requestActivityListeners.get(signal) ?? new Set<() => void>();
+ listeners.add(listener);
+ requestActivityListeners.set(signal, listeners);
+
+ return () => {
+ listeners.delete(listener);
+ if (listeners.size === 0) {
+ requestActivityListeners.delete(signal);
+ }
+ };
+}