diff --git a/docs/.generated/plugin-sdk-api-baseline.sha256 b/docs/.generated/plugin-sdk-api-baseline.sha256 index 3adb7c1a922d..394c7b19c411 100644 --- a/docs/.generated/plugin-sdk-api-baseline.sha256 +++ b/docs/.generated/plugin-sdk-api-baseline.sha256 @@ -1,2 +1,2 @@ -4768607253fdc720cb2bc280ac285ccfa7f7057a01659691f5be5b1f58422789 plugin-sdk-api-baseline.json -7901bc511cf6f9628df4cd619035265f48c40939e4e8e51c5c10dc73a263f183 plugin-sdk-api-baseline.jsonl +8a2769df428906990ee0d1bf8b0423f2a099b053c64c816d092ff84d61e11633 plugin-sdk-api-baseline.json +28b798973f3fb2a5b33ccbb6e3c1ac0453fa234a3a1c6cdc27935c27639bd104 plugin-sdk-api-baseline.jsonl diff --git a/docs/plugins/reference/anthropic-vertex.md b/docs/plugins/reference/anthropic-vertex.md index 425219d3e401..71988de7dc4d 100644 --- a/docs/plugins/reference/anthropic-vertex.md +++ b/docs/plugins/reference/anthropic-vertex.md @@ -17,3 +17,9 @@ OpenClaw Anthropic Vertex provider plugin for Claude models on Google Vertex AI. ## Surface providers: anthropic-vertex + +## Claude Fable 5 + +Use `anthropic-vertex/claude-fable-5` where the model is available in your Google Cloud region. +Fable 5 always uses adaptive thinking and defaults to `high` effort. `/think off` and +`/think minimal` use `low` effort because the model does not support disabling thinking. diff --git a/docs/plugins/reference/microsoft-foundry.md b/docs/plugins/reference/microsoft-foundry.md index 7b3e4d2a710a..f27e005124d5 100644 --- a/docs/plugins/reference/microsoft-foundry.md +++ b/docs/plugins/reference/microsoft-foundry.md @@ -50,7 +50,10 @@ chat APIs: Anthropic Claude deployments in Microsoft Foundry use the Anthropic Messages API shape, not the OpenAI-compatible `/openai/v1` shape. Configure those as a custom `anthropic-messages` provider until the Microsoft Foundry plugin grows a -native Anthropic runtime. +native Anthropic runtime. When the Foundry deployment name differs from the +Claude model ID, set `params.canonicalModelId` on the model entry so OpenClaw +can apply model-specific wire contracts, map `/think off` correctly, and +preserve signed thinking safely. ## MAI image generation diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md index ed095df2f12b..a799c8de1282 100644 --- a/docs/providers/anthropic.md +++ b/docs/providers/anthropic.md @@ -160,7 +160,12 @@ Anthropic's current public docs: -## Thinking defaults (Claude 4.8 and 4.6) +## Thinking defaults (Claude Fable 5, 4.8, and 4.6) + +`anthropic/claude-fable-5` always uses adaptive thinking and defaults to `high` +effort. Because Anthropic does not allow thinking to be disabled for this model, +`/think off` and `/think minimal` use `low` effort. OpenClaw also omits custom +temperature values for Fable 5 requests. Claude Opus 4.8 keeps thinking off by default in OpenClaw. When you explicitly enable adaptive thinking with `/think high|xhigh|max`, OpenClaw sends Anthropic's Opus 4.8 effort values; Claude 4.6 models default to `adaptive`. diff --git a/docs/providers/bedrock.md b/docs/providers/bedrock.md index d966d0985c77..23850a49ab52 100644 --- a/docs/providers/bedrock.md +++ b/docs/providers/bedrock.md @@ -310,6 +310,25 @@ openclaw models list the request options object and the `inferenceConfig` payload field. + + Use `amazon-bedrock/anthropic.claude-fable-5` in `us-east-1`, or the + regional inference ids such as `us.anthropic.claude-fable-5`. + OpenClaw applies Fable's 1M context window, 128K output limit, always-on + adaptive thinking, and supported effort mapping. `/think off` and + `/think minimal` map to `low`; unsupported temperature and forced tool + choice controls are omitted. Streaming output is held until Bedrock + returns a terminal status so mid-stream refusals do not expose partial text. + Fable supports only the standard service tier; OpenClaw ignores configured + `flex`, `priority`, and `reserved` tiers for this model. + + AWS requires an explicit `provider_data_share` data-retention opt-in before + Fable is available. Prompts and completions are shared with Anthropic and + retained for up to 30 days for trust and safety. Review and configure + [Bedrock data retention](https://docs.aws.amazon.com/bedrock/latest/userguide/data-retention.html) + before enabling the model. + + + You can apply [Amazon Bedrock Guardrails](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails.html) to all Bedrock model invocations by adding a `guardrail` object to the diff --git a/extensions/amazon-bedrock/bedrock-options.ts b/extensions/amazon-bedrock/bedrock-options.ts index 2732d9d34475..fb0e19e5993e 100644 --- a/extensions/amazon-bedrock/bedrock-options.ts +++ b/extensions/amazon-bedrock/bedrock-options.ts @@ -41,6 +41,9 @@ export function supportsBedrockPromptCaching(modelId: string, modelName?: string if (candidates.some((s) => s.includes("-4-"))) { return true; } + if (candidates.some((s) => s.includes("claude-fable-5"))) { + return true; + } if (candidates.some((s) => s.includes("claude-3-7-sonnet"))) { return true; } diff --git a/extensions/amazon-bedrock/discovery.test.ts b/extensions/amazon-bedrock/discovery.test.ts index 4e46fe7acd94..54a0561736b1 100644 --- a/extensions/amazon-bedrock/discovery.test.ts +++ b/extensions/amazon-bedrock/discovery.test.ts @@ -248,6 +248,58 @@ describe("bedrock discovery", () => { ); }); + it("applies Fable limits and reasoning metadata to foundation and profile models", async () => { + sendMock + .mockResolvedValueOnce({ + modelSummaries: [ + { + modelId: "anthropic.claude-fable-5", + modelName: "Claude Fable 5", + providerName: "anthropic", + inputModalities: ["TEXT", "IMAGE"], + outputModalities: ["TEXT"], + responseStreamingSupported: true, + modelLifecycle: { status: "ACTIVE" }, + }, + ], + }) + .mockResolvedValueOnce({ + inferenceProfileSummaries: [ + { + inferenceProfileId: "company-fable", + inferenceProfileName: "Company Fable", + status: "ACTIVE", + type: "APPLICATION", + models: [ + { + modelArn: "arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-fable-5", + }, + ], + }, + ], + }); + + const models = await discoverBedrockModels({ region: "us-east-1", clientFactory }); + const expected = { + reasoning: true, + contextWindow: 1_000_000, + maxTokens: 128_000, + thinkingLevelMap: { off: "low", minimal: "low", xhigh: "xhigh", max: "max" }, + }; + + expectModelFields( + models.find((model) => model.id === "anthropic.claude-fable-5"), + expected, + ); + expectModelFields( + models.find((model) => model.id === "company-fable"), + { + ...expected, + params: { canonicalModelId: "claude-fable-5" }, + }, + ); + }); + it("caches results when refreshInterval is enabled", async () => { mockSingleActiveSummary(); @@ -410,7 +462,9 @@ describe("bedrock discovery", () => { input: ["text", "image"], contextWindow: 1000000, maxTokens: 4096, + params: { canonicalModelId: "claude-sonnet-4-6" }, }); + expect(usProfile?.thinkingLevelMap).toBeUndefined(); expectModelFields(euProfile, { input: ["text", "image"] }); expectModelFields(globalProfile, { input: ["text", "image"] }); @@ -546,6 +600,8 @@ describe("bedrock discovery", () => { contextWindow: 1_000_000, maxTokens: 4096, input: ["text"], + params: { canonicalModelId: "claude-opus-4-6-v1:0" }, + thinkingLevelMap: { xhigh: null, max: "max" }, }); }); diff --git a/extensions/amazon-bedrock/discovery.ts b/extensions/amazon-bedrock/discovery.ts index de07b3a5f280..a6fe04959ed4 100644 --- a/extensions/amazon-bedrock/discovery.ts +++ b/extensions/amazon-bedrock/discovery.ts @@ -18,12 +18,18 @@ import type { ModelDefinitionConfig, ModelProviderConfig, } from "openclaw/plugin-sdk/provider-model-shared"; +import { + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, + supportsClaudeAdaptiveThinking, +} from "openclaw/plugin-sdk/provider-model-shared"; import { normalizeLowercaseStringOrEmpty, normalizeOptionalLowercaseString, } from "openclaw/plugin-sdk/string-coerce-runtime"; import { refreshAwsSharedConfigCacheForBedrock } from "./aws-credential-refresh.js"; import { resolveBedrockConfigApiKey } from "./discovery-shared.js"; +import { resolveBedrockNativeThinkingLevelMap } from "./thinking-policy.js"; const log = createSubsystemLogger("bedrock-discovery"); @@ -53,6 +59,7 @@ const DEFAULT_MAX_TOKENS = 4096; */ const KNOWN_CONTEXT_WINDOWS: Record = { // Anthropic Claude + "anthropic.claude-fable-5": 1_000_000, "anthropic.claude-3-7-sonnet-20250219-v1:0": 200_000, "anthropic.claude-opus-4-8": 1_000_000, "anthropic.claude-opus-4-7": 1_000_000, @@ -130,6 +137,9 @@ function resolveKnownContextWindow(modelId: string): number | undefined { const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, ""); const candidates = [modelId, stripped]; for (const candidate of candidates) { + if (resolveClaudeFable5ModelIdentity({ id: candidate })) { + return 1_000_000; + } if (/(?:^|[/.:])anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test(candidate)) { return 1_000_000; } @@ -147,20 +157,14 @@ function resolveKnownContextWindow(modelId: string): number | undefined { return undefined; } -function isKnownClaudeOpus47OrNewerModelId(modelId: string): boolean { - const stripped = modelId.replace(/^(?:us|eu|ap|apac|au|jp|global)\./, ""); - return [modelId, stripped].some((candidate) => - /(?:^|[/.:])anthropic\.claude-opus-4[.-][78](?:$|[-.:/])/i.test(candidate), - ); -} - function resolveKnownThinkingLevelMap( modelId: string, ): ModelDefinitionConfig["thinkingLevelMap"] | undefined { - if (!isKnownClaudeOpus47OrNewerModelId(modelId)) { - return undefined; - } - return { xhigh: "xhigh", max: "max" }; + return resolveBedrockNativeThinkingLevelMap(modelId); +} + +function resolveKnownMaxTokens(modelId: string): number | undefined { + return resolveClaudeFable5ModelIdentity({ id: modelId }) ? 128_000 : undefined; } const DEFAULT_COST = { @@ -271,7 +275,7 @@ function mapInputModalities(summary: BedrockModelSummary): Array<"text" | "image } function inferReasoningSupport(summary: BedrockModelSummary): boolean { - if (isKnownClaudeOpus47OrNewerModelId(summary.modelId ?? "")) { + if (supportsClaudeAdaptiveThinking({ id: summary.modelId })) { return true; } const haystack = normalizeLowercaseStringOrEmpty( @@ -340,7 +344,7 @@ function toModelDefinition( input: mapInputModalities(summary), cost: DEFAULT_COST, contextWindow: resolveKnownContextWindow(id) ?? defaults.contextWindow, - maxTokens: defaults.maxTokens, + maxTokens: resolveKnownMaxTokens(id) ?? defaults.maxTokens, ...(thinkingLevelMap ? { thinkingLevelMap } : {}), }; } @@ -456,23 +460,30 @@ function resolveInferenceProfiles( const knownThinkingLevelMap = resolveKnownThinkingLevelMap( baseModelId ?? profile.inferenceProfileId, ); + const canonicalClaudeId = resolveClaudeModelIdentity({ id: baseModelId }); discovered.push({ id: profile.inferenceProfileId, name: profile.inferenceProfileName?.trim() || profile.inferenceProfileId, reasoning: baseModel?.reasoning ?? - isKnownClaudeOpus47OrNewerModelId(baseModelId ?? profile.inferenceProfileId), + supportsClaudeAdaptiveThinking({ id: baseModelId ?? profile.inferenceProfileId }), input: baseModel?.input ?? ["text"], cost: baseModel?.cost ?? DEFAULT_COST, contextWindow: baseModel?.contextWindow ?? resolveKnownContextWindow(baseModelId ?? profile.inferenceProfileId ?? "") ?? defaults.contextWindow, - maxTokens: baseModel?.maxTokens ?? defaults.maxTokens, + maxTokens: + baseModel?.maxTokens ?? + resolveKnownMaxTokens(baseModelId ?? profile.inferenceProfileId) ?? + defaults.maxTokens, ...(baseModel?.thinkingLevelMap || knownThinkingLevelMap ? { thinkingLevelMap: baseModel?.thinkingLevelMap ?? knownThinkingLevelMap } : {}), + ...(canonicalClaudeId.startsWith("claude-") + ? { params: { canonicalModelId: canonicalClaudeId } } + : {}), }); } return discovered; diff --git a/extensions/amazon-bedrock/index.test.ts b/extensions/amazon-bedrock/index.test.ts index 7d02f014a775..acf089c01efe 100644 --- a/extensions/amazon-bedrock/index.test.ts +++ b/extensions/amazon-bedrock/index.test.ts @@ -10,6 +10,7 @@ import { import { withEnvAsync } from "openclaw/plugin-sdk/test-env"; import { afterAll, afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import { setAwsSharedIniFileLoaderForTest } from "./aws-credential-refresh.js"; +import { supportsBedrockPromptCaching } from "./bedrock-options.js"; import { resetBedrockDiscoveryCacheForTest } from "./discovery.js"; import amazonBedrockPlugin from "./index.js"; import { @@ -312,6 +313,40 @@ describe("amazon-bedrock provider plugin", () => { ); }); + it("normalizes explicit Claude 4.6 rows with native max metadata", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + + const normalized = provider.normalizeResolvedModel?.({ + provider: "amazon-bedrock", + modelId: "us.anthropic.claude-opus-4-6-v1", + model: { + id: "us.anthropic.claude-opus-4-6-v1", + name: "Claude Opus 4.6", + provider: "amazon-bedrock", + api: "bedrock-converse-stream", + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_000_000, + maxTokens: 4096, + }, + } as never); + + expect(normalized?.thinkingLevelMap).toEqual({ xhigh: null, max: "max" }); + + const restricted = provider.normalizeResolvedModel?.({ + provider: "amazon-bedrock", + modelId: "us.anthropic.claude-opus-4-6-v1", + model: { + ...(normalized as NonNullable), + thinkingLevelMap: { max: null }, + }, + } as never); + + expect(restricted?.thinkingLevelMap).toEqual({ xhigh: null, max: null }); + }); + it("mirrors Claude Opus 4.7 thinking levels for Bedrock model refs", async () => { const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); @@ -355,6 +390,47 @@ describe("amazon-bedrock provider plugin", () => { } }); + it("keeps Claude Fable 5 always adaptive with high default effort", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + + for (const modelId of [ + "anthropic.claude-fable-5", + "us.anthropic.claude-fable-5", + "global.anthropic.claude-fable-5", + ]) { + expectThinkingProfile( + provider.resolveThinkingProfile?.({ + provider: "amazon-bedrock", + modelId, + } as never), + { + levelIds: ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"], + defaultLevel: "high", + }, + ); + } + }); + + it("keeps Fable thinking policy for opaque deployment aliases", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + + expectThinkingProfile( + provider.resolveThinkingProfile?.({ + provider: "amazon-bedrock", + modelId: "company-fable", + params: { canonicalModelId: "claude-fable-5" }, + } as never), + { + levelIds: ["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"], + defaultLevel: "high", + }, + ); + }); + + it("recognizes direct Fable model refs as prompt-cache eligible", () => { + expect(supportsBedrockPromptCaching("us.anthropic.claude-fable-5")).toBe(true); + }); + it("owns Anthropic-style replay policy for Claude Bedrock models", async () => { const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); @@ -452,6 +528,7 @@ describe("amazon-bedrock provider plugin", () => { expectWrappedResultFields(result, { maxTokens: 10 }); expect(result).not.toHaveProperty("temperature"); + expect(result).not.toHaveProperty("cacheRetention", "none"); }); it("omits temperature for Bedrock Opus 4.8 model ids", async () => { @@ -474,6 +551,62 @@ describe("amazon-bedrock provider plugin", () => { expectWrappedResultFields(result, { maxTokens: 10 }); expect(result).not.toHaveProperty("temperature"); + expect(result).not.toHaveProperty("cacheRetention", "none"); + }); + + it("omits temperature for Bedrock Fable deployment aliases", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: "production-fable", + model: { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "production-fable", + params: { canonicalModelId: "claude-fable-5" }, + }, + streamFn: spyStreamFn, + } as never); + + const result = wrapped?.( + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "production-fable", + params: { canonicalModelId: "claude-fable-5" }, + } as never, + { messages: [] } as never, + { temperature: 0.2, maxTokens: 10 }, + ) as Record | undefined; + + expectWrappedResultFields(result, { maxTokens: 10 }); + expect(result).not.toHaveProperty("temperature"); + expect(result).not.toHaveProperty("cacheRetention", "none"); + }); + + it("omits temperature for canonical Bedrock Opus aliases", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const model = { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "production-claude", + params: { canonicalModelId: "claude-opus-4-8" }, + }; + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: model.id, + model, + streamFn: spyStreamFn, + } as never); + + const result = wrapped?.(model as never, { messages: [] } as never, { + temperature: 0.2, + maxTokens: 10, + }) as Record | undefined; + + expectWrappedResultFields(result, { maxTokens: 10 }); + expect(result).not.toHaveProperty("temperature"); + expect(result).not.toHaveProperty("cacheRetention", "none"); }); it("omits temperature for dotted Bedrock Opus 4.7 model ids", async () => { @@ -604,6 +737,36 @@ describe("amazon-bedrock provider plugin", () => { expect(payload.additionalModelRequestFields.output_config).toEqual({ effort: "max" }); }); + it("preserves Bedrock Opus 4.6 max thinking in the final payload", async () => { + const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); + const wrapped = provider.wrapStreamFn?.({ + provider: "amazon-bedrock", + modelId: "us.anthropic.claude-opus-4-6-v1", + streamFn: spyStreamFn, + thinkingLevel: "max", + } as never); + + const result = wrapped?.( + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "us.anthropic.claude-opus-4-6-v1", + } as never, + { messages: [] } as never, + { reasoning: "high" } as never, + ) as Record | undefined; + const payload = { + additionalModelRequestFields: { + thinking: { type: "adaptive" }, + output_config: { effort: "high" }, + }, + }; + + await (result?.onPayload as ((p: Record) => unknown) | undefined)?.(payload); + + expect(payload.additionalModelRequestFields.output_config).toEqual({ effort: "max" }); + }); + it("keeps Bedrock Opus 4.7 xhigh thinking distinct from max", async () => { const provider = await registerSingleProviderPlugin(amazonBedrockPlugin); const wrapped = provider.wrapStreamFn?.({ @@ -930,6 +1093,38 @@ describe("amazon-bedrock provider plugin", () => { expect(result).not.toHaveProperty("capturedPayload"); }); + it("omits unsupported service tiers for Fable", async () => { + const provider = await registerWithConfig(undefined); + const result = await callWrappedStream( + provider, + "us.anthropic.claude-fable-5", + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "us.anthropic.claude-fable-5", + } as never, + runtimePluginConfig(undefined), + { serviceTier: "flex" }, + ); + expect(result).not.toHaveProperty("capturedPayload"); + }); + + it("keeps the standard service tier for Fable", async () => { + const provider = await registerWithConfig(undefined); + const result = await callWrappedStream( + provider, + "us.anthropic.claude-fable-5", + { + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + id: "us.anthropic.claude-fable-5", + } as never, + runtimePluginConfig(undefined), + { serviceTier: "default" }, + ); + expectPayloadServiceTier(result, "default"); + }); + it("does not overwrite caller-provided serviceTier in payload", async () => { const provider = await registerWithConfig(undefined); const result = await callWrappedStream( diff --git a/extensions/amazon-bedrock/provider-policy-api.test.ts b/extensions/amazon-bedrock/provider-policy-api.test.ts index 2961b226cdee..11a984f63990 100644 --- a/extensions/amazon-bedrock/provider-policy-api.test.ts +++ b/extensions/amazon-bedrock/provider-policy-api.test.ts @@ -16,10 +16,27 @@ describe("amazon-bedrock provider-policy-api", () => { "medium", "high", "adaptive", + "max", ]); expect(profile?.defaultLevel).toBe("adaptive"); }); + it("caps Bedrock Claude Sonnet 4.6 at high effort", () => { + const profile = resolveThinkingProfile({ + provider: "amazon-bedrock", + modelId: "amazon-bedrock/global.anthropic.claude-sonnet-4-6", + }); + + expect(profile?.levels.map((level) => level.id)).toEqual([ + "off", + "minimal", + "low", + "medium", + "high", + "adaptive", + ]); + }); + it("leaves Bedrock Claude Opus 4.8 thinking off by default with max effort available", () => { const profile = resolveThinkingProfile({ provider: "amazon-bedrock", @@ -50,6 +67,32 @@ describe("amazon-bedrock provider-policy-api", () => { ).toEqual(["off", "minimal", "low", "medium", "high", "xhigh", "adaptive", "max"]); }); + it.each([ + { + canonicalModelId: "claude-fable-5", + defaultLevel: "high", + preservesCatalogOptOut: true, + }, + { + canonicalModelId: "claude-opus-4-8", + defaultLevel: "off", + preservesCatalogOptOut: false, + }, + ])( + "resolves $canonicalModelId deployment aliases from canonical metadata", + ({ canonicalModelId, defaultLevel, preservesCatalogOptOut }) => { + const profile = resolveThinkingProfile({ + provider: "amazon-bedrock", + modelId: "production-claude", + params: { canonicalModelId }, + }); + + expect(profile?.defaultLevel).toBe(defaultLevel); + expect(profile?.levels.map((level) => level.id)).toContain("max"); + expect(profile?.preserveWhenCatalogReasoningFalse === true).toBe(preservesCatalogOptOut); + }, + ); + it("ignores unrelated providers", () => { expect( resolveThinkingProfile({ provider: "anthropic", modelId: "claude-opus-4-6" }), diff --git a/extensions/amazon-bedrock/provider-policy-api.ts b/extensions/amazon-bedrock/provider-policy-api.ts index df09b3b5e7d1..d9f56ad1bd49 100644 --- a/extensions/amazon-bedrock/provider-policy-api.ts +++ b/extensions/amazon-bedrock/provider-policy-api.ts @@ -6,9 +6,13 @@ import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared"; import { resolveBedrockClaudeThinkingProfile } from "./thinking-policy.js"; /** Resolve the Bedrock thinking profile for a provider/model pair. */ -export function resolveThinkingProfile(params: { provider: string; modelId: string }) { +export function resolveThinkingProfile(params: { + provider: string; + modelId: string; + params?: Record; +}) { if (normalizeProviderId(params.provider) !== "amazon-bedrock") { return null; } - return resolveBedrockClaudeThinkingProfile(params.modelId); + return resolveBedrockClaudeThinkingProfile(params.modelId, params.params); } diff --git a/extensions/amazon-bedrock/register.sync.runtime.ts b/extensions/amazon-bedrock/register.sync.runtime.ts index cfad4f9f7fdc..f6d6f5af8dd9 100644 --- a/extensions/amazon-bedrock/register.sync.runtime.ts +++ b/extensions/amazon-bedrock/register.sync.runtime.ts @@ -6,10 +6,15 @@ import type { StreamFn } from "openclaw/plugin-sdk/agent-core"; import type { OpenClawConfig } from "openclaw/plugin-sdk/config-contracts"; import { registerApiProvider, streamSimple } from "openclaw/plugin-sdk/llm"; import { resolvePluginConfigObject } from "openclaw/plugin-sdk/plugin-config-runtime"; -import type { OpenClawPluginApi } from "openclaw/plugin-sdk/plugin-entry"; +import type { + OpenClawPluginApi, + ProviderNormalizeResolvedModelContext, +} from "openclaw/plugin-sdk/plugin-entry"; import { ANTHROPIC_BY_MODEL_REPLAY_HOOKS, normalizeProviderId, + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, } from "openclaw/plugin-sdk/provider-model-shared"; import { streamWithPayloadPatch } from "openclaw/plugin-sdk/provider-stream-shared"; import { refreshAwsSharedConfigCacheForBedrock } from "./aws-credential-refresh.js"; @@ -19,7 +24,9 @@ import { bedrockMemoryEmbeddingProviderAdapter } from "./memory-embedding-adapte import { streamBedrock, streamSimpleBedrock } from "./stream.runtime.js"; import { isOpus47OrNewerBedrockModelRef, + resolveBedrockNativeThinkingLevelMap, resolveBedrockClaudeThinkingProfile, + supportsBedrockNativeMaxEffort, } from "./thinking-policy.js"; type GuardrailConfig = { @@ -41,6 +48,29 @@ type AmazonBedrockPluginConfig = { guardrail?: GuardrailConfig; }; +function normalizeBedrockResolvedModel({ modelId, model }: ProviderNormalizeResolvedModelContext) { + const thinkingLevelMap = resolveBedrockNativeThinkingLevelMap(modelId, model.params); + if (!thinkingLevelMap) { + return undefined; + } + const reasoning = + model.reasoning || + resolveClaudeFable5ModelIdentity({ id: modelId, params: model.params }) !== undefined; + const current = model.thinkingLevelMap; + const currentEfforts = current as Record | undefined; + if ( + reasoning === model.reasoning && + Object.entries(thinkingLevelMap).every(([level, effort]) => currentEfforts?.[level] === effort) + ) { + return undefined; + } + return { + ...model, + reasoning, + thinkingLevelMap: { ...thinkingLevelMap, ...current }, + }; +} + const BEDROCK_SERVICE_TIER_VALUES = ["flex", "priority", "default", "reserved"] as const; type BedrockServiceTier = (typeof BEDROCK_SERVICE_TIER_VALUES)[number]; @@ -103,9 +133,17 @@ function createBedrockServiceTierWrapper( } function createGuardrailWrapStreamFn( - innerWrapStreamFn: (ctx: { modelId: string; streamFn?: StreamFn }) => StreamFn | null | undefined, + innerWrapStreamFn: (ctx: { + modelId: string; + model?: { params?: Record }; + streamFn?: StreamFn; + }) => StreamFn | null | undefined, guardrailConfig: GuardrailConfig, -): (ctx: { modelId: string; streamFn?: StreamFn }) => StreamFn | null | undefined { +): (ctx: { + modelId: string; + model?: { params?: Record }; + streamFn?: StreamFn; +}) => StreamFn | null | undefined { return (ctx) => { const inner = innerWrapStreamFn(ctx); if (!inner) { @@ -327,7 +365,7 @@ function injectBedrockCachePoints( } } -function patchOpus47MaxThinkingEffort(payload: Record): void { +function patchMaxThinkingEffort(payload: Record): void { const fieldsValue = payload.additionalModelRequestFields; const fields = fieldsValue && typeof fieldsValue === "object" && !Array.isArray(fieldsValue) @@ -382,8 +420,20 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { api.registerMemoryEmbeddingProvider(bedrockMemoryEmbeddingProviderAdapter); - const baseWrapStreamFn = ({ modelId, streamFn }: { modelId: string; streamFn?: StreamFn }) => { - if (isAnthropicBedrockModel(modelId)) { + const baseWrapStreamFn = ({ + modelId, + model, + streamFn, + }: { + modelId: string; + model?: { params?: Record }; + streamFn?: StreamFn; + }) => { + const modelRef = { id: modelId, params: model?.params }; + if ( + isAnthropicBedrockModel(modelId) || + resolveClaudeModelIdentity(modelRef).startsWith("claude-") + ) { return streamFn; } // For app inference profiles with opaque IDs, don't force cacheRetention: "none" @@ -394,11 +444,16 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { return createBedrockNoCacheWrapper(streamFn); }; - function omitDeprecatedOpus47Temperature( - modelId: string, + function omitUnsupportedClaudeTemperature( + modelRef: { id: string; params?: Record }, options: TOptions, ): TOptions { - if (!isOpus47OrNewerBedrockModelRef(modelId) || !("temperature" in options)) { + const canonicalModelId = resolveClaudeModelIdentity(modelRef); + const omitsTemperature = + isOpus47OrNewerBedrockModelRef(modelRef.id) || + isOpus47OrNewerBedrockModelRef(canonicalModelId) || + resolveClaudeFable5ModelIdentity(modelRef) !== undefined; + if (!omitsTemperature || !("temperature" in options)) { return options; } const next = { ...options } as typeof options & { temperature?: unknown }; @@ -406,7 +461,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { return next; } - function omitDeprecatedOpus47PayloadTemperature(payload: Record): void { + function omitUnsupportedClaudePayloadTemperature(payload: Record): void { const inferenceConfig = payload.inferenceConfig; if (!inferenceConfig || typeof inferenceConfig !== "object") { return; @@ -501,20 +556,38 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { }, }, resolveConfigApiKey: ({ env }) => resolveBedrockConfigApiKey(env), + normalizeResolvedModel: normalizeBedrockResolvedModel, ...anthropicByModelReplayHooks, wrapStreamFn: ({ modelId, config, model, streamFn, thinkingLevel, extraParams }) => { const currentPluginConfig = resolveCurrentPluginConfig(config); const currentGuardrail = currentPluginConfig?.guardrail; + const modelRef = { id: modelId, params: model?.params }; + const fable5 = resolveClaudeFable5ModelIdentity(modelRef) !== undefined; + const canonicalModelId = resolveClaudeModelIdentity(modelRef); + const opus47OrNewer = + isOpus47OrNewerBedrockModelRef(modelId) || isOpus47OrNewerBedrockModelRef(canonicalModelId); + const supportsNativeMax = supportsBedrockNativeMaxEffort(modelId, model?.params); let wrapped = (currentGuardrail?.guardrailIdentifier && currentGuardrail?.guardrailVersion - ? createGuardrailWrapStreamFn(baseWrapStreamFn, currentGuardrail)({ modelId, streamFn }) - : baseWrapStreamFn({ modelId, streamFn })) ?? undefined; + ? createGuardrailWrapStreamFn( + baseWrapStreamFn, + currentGuardrail, + )({ + modelId, + model, + streamFn, + }) + : baseWrapStreamFn({ modelId, model, streamFn })) ?? undefined; const serviceTier = resolveBedrockServiceTier(extraParams, (message) => api.logger.warn(message), ); if (serviceTier && wrapped) { - wrapped = createBedrockServiceTierWrapper(wrapped, serviceTier); + if (fable5 && serviceTier !== "default") { + api.logger.warn(`ignoring unsupported Fable 5 Bedrock service tier: ${serviceTier}`); + } else { + wrapped = createBedrockServiceTierWrapper(wrapped, serviceTier); + } } const region = @@ -523,8 +596,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { currentPluginConfig?.discovery?.region; const mayNeedCacheInjection = isBedrockAppInferenceProfile(modelId) && !sharedRuntimeWouldInjectCachePoints(modelId); - const shouldOmitTemperature = isOpus47OrNewerBedrockModelRef(modelId); - const shouldPatchMaxThinking = shouldOmitTemperature && thinkingLevel === "max"; + const shouldOmitTemperature = opus47OrNewer || fable5; + const shouldPatchMaxThinking = supportsNativeMax && thinkingLevel === "max"; // For known Anthropic models (heuristic match), enable injection immediately. // For opaque profile IDs, we'll resolve via GetInferenceProfile on first call. @@ -539,8 +612,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { return wrapped; } return (streamModel, context, options) => { - const merged = omitDeprecatedOpus47Temperature( - modelId, + const merged = omitUnsupportedClaudeTemperature( + modelRef, Object.assign({}, options, region ? { region } : {}), ); @@ -559,8 +632,8 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { onPayload: (payload: unknown, payloadModel: unknown) => { if (payload && typeof payload === "object") { const payloadRecord = payload as Record; - patchOpus47MaxThinkingEffort(payloadRecord); - omitDeprecatedOpus47PayloadTemperature(payloadRecord); + patchMaxThinkingEffort(payloadRecord); + omitUnsupportedClaudePayloadTemperature(payloadRecord); } return originalOnPayload?.(payload, payloadModel); }, @@ -594,14 +667,14 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { const payloadRecord = payload as Record; injectBedrockCachePoints(payloadRecord, cacheRetention); if (shouldPatchMaxThinking) { - patchOpus47MaxThinkingEffort(payloadRecord); + patchMaxThinkingEffort(payloadRecord); } if (shouldOmitTemperature) { - omitDeprecatedOpus47PayloadTemperature(payloadRecord); + omitUnsupportedClaudePayloadTemperature(payloadRecord); } else if (mayNeedTemperatureTrait) { const traits = await resolveAppProfileTraits(modelId, region); if (traits.omitTemperature) { - omitDeprecatedOpus47PayloadTemperature(payloadRecord); + omitUnsupportedClaudePayloadTemperature(payloadRecord); } } } @@ -626,10 +699,10 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { injectBedrockCachePoints(payloadRecord, cacheRetention); } if (shouldPatchMaxThinking) { - patchOpus47MaxThinkingEffort(payloadRecord); + patchMaxThinkingEffort(payloadRecord); } if (traits.omitTemperature) { - omitDeprecatedOpus47PayloadTemperature(payloadRecord); + omitUnsupportedClaudePayloadTemperature(payloadRecord); } } return originalOnPayload?.(payload, payloadModel); @@ -652,6 +725,7 @@ export function registerAmazonBedrockPlugin(api: OpenClawPluginApi): void { } return undefined; }, - resolveThinkingProfile: ({ modelId }) => resolveBedrockClaudeThinkingProfile(modelId), + resolveThinkingProfile: ({ modelId, params }) => + resolveBedrockClaudeThinkingProfile(modelId, params), }); } diff --git a/extensions/amazon-bedrock/stream.runtime.test.ts b/extensions/amazon-bedrock/stream.runtime.test.ts index 2e2229add84c..cf6d61a4d4d3 100644 --- a/extensions/amazon-bedrock/stream.runtime.test.ts +++ b/extensions/amazon-bedrock/stream.runtime.test.ts @@ -1,6 +1,8 @@ // Amazon Bedrock tests cover stream plugin behavior. -import { describe, expect, it } from "vitest"; -import { testing } from "./stream.runtime.js"; +import { BedrockRuntimeClient, ConversationRole } from "@aws-sdk/client-bedrock-runtime"; +import { onLlmRequestActivity } from "openclaw/plugin-sdk/provider-stream-shared"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { streamBedrock, streamSimpleBedrock, testing } from "./stream.runtime.js"; function bedrockModel(overrides: Record) { return { @@ -39,6 +41,16 @@ function signedThinkingContext(modelId: string) { } as never; } +async function* streamEvents(events: unknown[]) { + for (const event of events) { + yield event; + } +} + +afterEach(() => { + vi.restoreAllMocks(); +}); + describe("Bedrock reasoning replay", () => { it("preserves signed reasoning for Claude profile descriptors", () => { const modelId = @@ -74,6 +86,69 @@ describe("Bedrock reasoning replay", () => { expect(messages[0]?.content).toEqual([{ text: "privatereasoning" }]); }); + + it("preserves signature-only Fable reasoning blocks", () => { + const modelId = "anthropic.claude-fable-5"; + const messages = testing.convertMessages( + { + messages: [ + { + role: "assistant", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + model: modelId, + content: [ + { + type: "thinking", + thinking: "", + thinkingSignature: " sig-fable ", + }, + ], + }, + ], + } as never, + bedrockModel({ id: modelId, name: "Claude Fable 5" }), + "none", + ); + + expect(messages[0]?.content).toEqual([ + { + reasoningContent: { + reasoningText: { + text: "", + signature: " sig-fable ", + }, + }, + }, + ]); + }); + + it("drops synthetic reasoning placeholders from Claude replay", () => { + const modelId = "anthropic.claude-fable-5"; + const messages = testing.convertMessages( + { + messages: [ + { + role: "assistant", + api: "bedrock-converse-stream", + provider: "amazon-bedrock", + model: modelId, + content: [ + { + type: "thinking", + thinking: "hidden compatibility reasoning", + thinkingSignature: "reasoning_content", + }, + ], + }, + ], + } as never, + bedrockModel({ id: modelId, name: "Claude Fable 5" }), + "none", + ); + + expect(messages).toEqual([]); + }); }); describe("Bedrock profile endpoint resolution", () => { @@ -92,7 +167,7 @@ describe("Bedrock profile endpoint resolution", () => { }); describe("Bedrock thinking effort mapping", () => { - it("clamps max effort for Claude models without native max support", () => { + it("caps max effort at high for Claude Sonnet 4.6", () => { expect( testing.mapThinkingLevelToEffort( bedrockModel({ @@ -104,6 +179,18 @@ describe("Bedrock thinking effort mapping", () => { ).toBe("high"); }); + it("caps unsupported xhigh effort at high for Claude Opus 4.6", () => { + expect( + testing.mapThinkingLevelToEffort( + bedrockModel({ + id: "anthropic.claude-opus-4-6-v1:0", + name: "Claude Opus 4.6", + }), + "xhigh", + ), + ).toBe("high"); + }); + it("preserves max effort for Claude Opus 4.8", () => { expect( testing.mapThinkingLevelToEffort( @@ -115,4 +202,275 @@ describe("Bedrock thinking effort mapping", () => { ), ).toBe("max"); }); + + it("uses canonical Claude policy for deployment aliases", () => { + expect( + testing.mapThinkingLevelToEffort( + bedrockModel({ + id: "production-claude", + name: "Production Claude", + params: { canonicalModelId: "claude-opus-4-8" }, + }), + "max", + ), + ).toBe("max"); + }); + + it("preserves adaptive effort for opaque profiles with descriptive Claude names", () => { + expect( + testing.mapThinkingLevelToEffort( + bedrockModel({ + id: "arn:aws:bedrock:us-east-1:123456789012:application-inference-profile/profile-abc", + name: "Claude Production Opus 4.8", + }), + "xhigh", + ), + ).toBe("xhigh"); + }); +}); + +describe("Bedrock Fable contract", () => { + function fableModel() { + return bedrockModel({ + id: "production-fable", + name: "Production deployment", + reasoning: false, + params: { canonicalModelId: "claude-fable-5" }, + contextWindow: 1_000_000, + maxTokens: 128_000, + }); + } + + function context() { + return { + messages: [{ role: "user", content: "Reply briefly.", timestamp: 0 }], + tools: [ + { + name: "lookup", + description: "Lookup", + parameters: { type: "object", properties: {} }, + }, + ], + } as never; + } + + it("sends always-adaptive high effort without unsupported request controls", async () => { + const send = vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({ + $metadata: { httpStatusCode: 200 }, + stream: streamEvents([ + { messageStart: { role: ConversationRole.ASSISTANT } }, + { messageStop: { stopReason: "end_turn" } }, + ]), + } as never); + + const stream = streamBedrock(fableModel(), context(), { + reasoning: "high", + temperature: 0.2, + toolChoice: "any", + }); + await stream.result(); + + const command = send.mock.calls[0]?.[0] as { input?: Record }; + expect(command.input).toMatchObject({ + modelId: "production-fable", + inferenceConfig: {}, + messages: [ + { + role: "user", + content: [{ text: "Reply briefly." }, { cachePoint: { type: "default" } }], + }, + ], + toolConfig: { toolChoice: { auto: {} } }, + additionalModelRequestFields: { + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: "high" }, + }, + additionalModelResponseFieldPaths: ["/stop_details"], + }); + }); + + it("preserves explicit tool disabling", async () => { + const send = vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({ + $metadata: { httpStatusCode: 200 }, + stream: streamEvents([ + { messageStart: { role: ConversationRole.ASSISTANT } }, + { messageStop: { stopReason: "end_turn" } }, + ]), + } as never); + + const stream = streamBedrock(fableModel(), context(), { + reasoning: "high", + toolChoice: "none", + }); + await stream.result(); + + const command = send.mock.calls[0]?.[0] as { input?: Record }; + expect(command.input?.toolConfig).toBeUndefined(); + }); + + it("quarantines partial output when Fable returns a terminal refusal", async () => { + vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({ + $metadata: { httpStatusCode: 200 }, + stream: streamEvents([ + { + contentBlockDelta: { + contentBlockIndex: 0, + delta: { text: "discard this partial output" }, + }, + }, + { + messageStop: { + stopReason: "refusal", + additionalModelResponseFields: { + stop_details: { + category: "cyber", + explanation: "This request is not allowed.", + }, + }, + }, + }, + ]), + } as never); + + const stream = streamSimpleBedrock(fableModel(), context()); + const eventTypes: string[] = []; + for await (const event of stream) { + eventTypes.push(event.type); + } + const result = await stream.result(); + + expect(eventTypes).toEqual(["error"]); + expect(result.content).toEqual([]); + expect(result.errorMessage).toBe( + "Anthropic refusal (category: cyber): This request is not allowed.", + ); + expect(result.diagnostics).toEqual([ + expect.objectContaining({ + type: "provider_refusal", + details: { + provider: "amazon-bedrock", + category: "cyber", + explanation: "This request is not allowed.", + }, + }), + ]); + }); + + it("discards partial output when the Fable stream ends without messageStop", async () => { + vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({ + $metadata: { httpStatusCode: 200 }, + stream: streamEvents([ + { messageStart: { role: ConversationRole.ASSISTANT } }, + { + contentBlockDelta: { + contentBlockIndex: 0, + delta: { text: "unsafe partial output" }, + }, + }, + ]), + } as never); + + const stream = streamSimpleBedrock(fableModel(), context()); + const eventTypes: string[] = []; + for await (const event of stream) { + eventTypes.push(event.type); + } + const result = await stream.result(); + + expect(eventTypes).toEqual(["error"]); + expect(result.content).toEqual([]); + expect(result.errorMessage).toContain("ended before messageStop"); + }); + + it("reports activity while Fable events are buffered", async () => { + vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({ + $metadata: { httpStatusCode: 200 }, + stream: streamEvents([ + { messageStart: { role: ConversationRole.ASSISTANT } }, + { + contentBlockDelta: { + contentBlockIndex: 0, + delta: { text: "buffered output" }, + }, + }, + { messageStop: { stopReason: "end_turn" } }, + ]), + } as never); + const controller = new AbortController(); + let activityCount = 0; + const unsubscribe = onLlmRequestActivity(controller.signal, () => { + activityCount += 1; + }); + + try { + const stream = streamSimpleBedrock(fableModel(), context(), { + signal: controller.signal, + }); + await stream.result(); + } finally { + unsubscribe(); + } + + expect(activityCount).toBeGreaterThan(0); + }); +}); + +describe("Bedrock canonical Claude aliases", () => { + it.each([ + { + canonicalModelId: "claude-opus-4-8", + reasoning: "xhigh" as const, + thinkingLevelMap: { xhigh: "xhigh" as const, max: "max" as const }, + expectedEffort: "xhigh", + }, + { + canonicalModelId: "claude-opus-4-6", + reasoning: "max" as const, + thinkingLevelMap: { xhigh: null, max: "max" as const }, + expectedEffort: "max", + }, + { + canonicalModelId: "claude-opus-4-6", + reasoning: "max" as const, + thinkingLevelMap: { xhigh: null, max: null }, + expectedEffort: "high", + }, + ])( + "uses adaptive thinking and omits temperature for $canonicalModelId aliases", + async ({ canonicalModelId, reasoning, thinkingLevelMap, expectedEffort }) => { + const send = vi.spyOn(BedrockRuntimeClient.prototype, "send").mockResolvedValue({ + $metadata: { httpStatusCode: 200 }, + stream: streamEvents([ + { messageStart: { role: ConversationRole.ASSISTANT } }, + { messageStop: { stopReason: "end_turn" } }, + ]), + } as never); + const model = bedrockModel({ + id: "production-claude", + name: "Production Claude", + reasoning: false, + params: { canonicalModelId }, + thinkingLevelMap, + }); + + await streamSimpleBedrock( + model, + { messages: [{ role: "user", content: "Reply briefly.", timestamp: 0 }] } as never, + { + reasoning, + temperature: 0.2, + }, + ).result(); + + const command = send.mock.calls[0]?.[0] as { input?: Record }; + expect(command.input).toMatchObject({ + modelId: "production-claude", + inferenceConfig: {}, + additionalModelRequestFields: { + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: expectedEffort }, + }, + }); + }, + ); }); diff --git a/extensions/amazon-bedrock/stream.runtime.ts b/extensions/amazon-bedrock/stream.runtime.ts index 5ff6230123a8..883eabc89353 100644 --- a/extensions/amazon-bedrock/stream.runtime.ts +++ b/extensions/amazon-bedrock/stream.runtime.ts @@ -38,6 +38,7 @@ import { transformMessages, type Api, type AssistantMessage, + type AssistantMessageEvent, type CacheRetention, type Context, type Model, @@ -51,9 +52,43 @@ import { type ToolCall, type ToolResultMessage, } from "openclaw/plugin-sdk/llm"; +import { + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeXhighEffort, +} from "openclaw/plugin-sdk/provider-model-shared"; +import { + applyAnthropicRefusal, + createDeferredEventBuffer, + notifyLlmRequestActivity, +} from "openclaw/plugin-sdk/provider-stream-shared"; import { supportsBedrockPromptCaching, type BedrockOptions } from "./bedrock-options.js"; +import { supportsBedrockNativeMaxEffort } from "./thinking-policy.js"; type Block = (TextContent | ThinkingContent | ToolCall) & { index?: number; partialJson?: string }; +type BedrockEventSink = { push(event: AssistantMessageEvent): void }; + +function usesClaudeFable5BedrockContract(model: Model<"bedrock-converse-stream">): boolean { + return resolveClaudeFable5ModelIdentity(model) !== undefined; +} + +function readBedrockStopDetails(fields: DocumentType | undefined): unknown { + if (!fields || typeof fields !== "object" || Array.isArray(fields)) { + return undefined; + } + const record = fields as Record; + return record.stop_details ?? record.stopDetails; +} + +function normalizeFableToolChoice( + toolChoice: BedrockOptions["toolChoice"], +): BedrockOptions["toolChoice"] { + if (toolChoice === "any" || (typeof toolChoice === "object" && toolChoice?.type === "tool")) { + return "auto"; + } + return toolChoice; +} /** Stream a Bedrock Converse request using Bedrock-specific options. */ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOptions> = ( @@ -83,6 +118,15 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt }; const blocks = output.content as Block[]; + const fable5 = usesClaudeFable5BedrockContract(model); + // Fable classifiers may refuse after partial output. Hold every event until + // messageStop proves the response is safe to expose. + const refusalBuffer = fable5 + ? createDeferredEventBuffer(stream, () => + notifyLlmRequestActivity(options.signal), + ) + : undefined; + const eventSink = refusalBuffer ?? stream; const config: BedrockRuntimeClientConfig = { profile: options.profile, @@ -155,16 +199,28 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt try { const client = new BedrockRuntimeClient(config); const cacheRetention = resolveCacheRetention(options.cacheRetention); + const additionalModelRequestFields = buildAdditionalModelRequestFields(model, options); + const thinking = (additionalModelRequestFields as Record | undefined) + ?.thinking; + const sendsAdaptiveThinking = + thinking !== null && + typeof thinking === "object" && + (thinking as { type?: unknown }).type === "adaptive"; let commandInput = { modelId: model.id, messages: convertMessages(context, model, cacheRetention), system: buildSystemPrompt(context.systemPrompt, model, cacheRetention), inferenceConfig: { ...(options.maxTokens !== undefined && { maxTokens: options.maxTokens }), - ...(options.temperature !== undefined && { temperature: options.temperature }), + ...(options.temperature !== undefined && + !sendsAdaptiveThinking && { temperature: options.temperature }), }, - toolConfig: convertToolConfig(context.tools, options.toolChoice), - additionalModelRequestFields: buildAdditionalModelRequestFields(model, options), + toolConfig: convertToolConfig( + context.tools, + fable5 ? normalizeFableToolChoice(options.toolChoice) : options.toolChoice, + ), + additionalModelRequestFields, + ...(fable5 ? { additionalModelResponseFieldPaths: ["/stop_details"] } : {}), ...(options.requestMetadata !== undefined && { requestMetadata: options.requestMetadata }), }; const nextCommandInput = await options?.onPayload?.(commandInput, model); @@ -185,6 +241,7 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt ); } + let sawMessageStop = false; for await (const item of response.stream!) { if (item.messageStart) { if (item.messageStart.role !== ConversationRole.ASSISTANT) { @@ -192,15 +249,24 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt "Unexpected assistant message start but got user message start instead", ); } - stream.push({ type: "start", partial: output }); + eventSink.push({ type: "start", partial: output }); } else if (item.contentBlockStart) { - handleContentBlockStart(item.contentBlockStart, blocks, output, stream); + handleContentBlockStart(item.contentBlockStart, blocks, output, eventSink); } else if (item.contentBlockDelta) { - handleContentBlockDelta(item.contentBlockDelta, blocks, output, stream); + handleContentBlockDelta(item.contentBlockDelta, blocks, output, eventSink); } else if (item.contentBlockStop) { - handleContentBlockStop(item.contentBlockStop, blocks, output, stream); + handleContentBlockStop(item.contentBlockStop, blocks, output, eventSink); } else if (item.messageStop) { - output.stopReason = mapStopReason(item.messageStop.stopReason); + sawMessageStop = true; + if ((item.messageStop.stopReason as string | undefined) === "refusal") { + applyAnthropicRefusal( + output, + readBedrockStopDetails(item.messageStop.additionalModelResponseFields), + model.provider, + ); + } else { + output.stopReason = mapStopReason(item.messageStop.stopReason); + } } else if (item.metadata) { handleMetadata(item.metadata, model, output); } else if (item.internalServerException) { @@ -216,14 +282,18 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt } } + if (refusalBuffer && !sawMessageStop) { + throw new Error("Bedrock stream ended before messageStop"); + } if (options.signal?.aborted) { throw new Error("Request was aborted"); } if (output.stopReason === "error" || output.stopReason === "aborted") { - throw new Error("An unknown error occurred"); + throw new Error(output.errorMessage ?? "An unknown error occurred"); } + refusalBuffer?.flush(); stream.push({ type: "done", reason: output.stopReason, message: output }); stream.end(); } catch (error) { @@ -232,6 +302,10 @@ export const streamBedrock: StreamFunction<"bedrock-converse-stream", BedrockOpt // partialJson is only a streaming scratch buffer; never persist it. delete (block as Block).partialJson; } + if (refusalBuffer) { + refusalBuffer.discard(); + output.content = []; + } output.stopReason = options.signal?.aborted ? "aborted" : "error"; output.errorMessage = formatBedrockError(error); stream.push({ type: "error", reason: output.stopReason, error: output }); @@ -279,6 +353,13 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp options?: SimpleStreamOptions, ) => { const base = buildBaseOptions(model, options, undefined); + if (usesClaudeFable5BedrockContract(model)) { + return streamBedrock(model, context, { + ...base, + reasoning: options?.reasoning ?? "high", + thinkingBudgets: options?.thinkingBudgets, + } satisfies BedrockOptions); + } if (!options?.reasoning) { return streamBedrock(model, context, { ...base, @@ -287,7 +368,7 @@ export const streamSimpleBedrock: StreamFunction<"bedrock-converse-stream", Simp } if (isAnthropicClaudeModel(model)) { - if (supportsAdaptiveThinking(model.id, model.name)) { + if (supportsAdaptiveThinking(model)) { return streamBedrock(model, context, { ...base, reasoning: options.reasoning, @@ -326,7 +407,7 @@ function handleContentBlockStart( event: ContentBlockStartEvent, blocks: Block[], output: AssistantMessage, - stream: AssistantMessageEventStream, + stream: BedrockEventSink, ): void { const index = event.contentBlockIndex!; const start = event.start; @@ -349,7 +430,7 @@ function handleContentBlockDelta( event: ContentBlockDeltaEvent, blocks: Block[], output: AssistantMessage, - stream: AssistantMessageEventStream, + stream: BedrockEventSink, ): void { const contentBlockIndex = event.contentBlockIndex!; const delta = event.delta; @@ -432,7 +513,7 @@ function handleContentBlockStop( event: ContentBlockStopEvent, blocks: Block[], output: AssistantMessage, - stream: AssistantMessageEventStream, + stream: BedrockEventSink, ): void { const index = blocks.findIndex((b) => b.index === event.contentBlockIndex); const block = blocks[index]; @@ -463,47 +544,54 @@ function handleContentBlockStop( } } -/** - * Check if the model supports adaptive thinking (Opus 4.6+, Sonnet 4.6). - * Checks both model ID and model name to support application inference profiles - * whose ARNs don't contain the model name. - */ -function getModelMatchCandidates(modelId: string, modelName?: string): string[] { - const values = modelName ? [modelId, modelName] : [modelId]; - return values.flatMap((value) => { - const lower = value.toLowerCase(); - return [lower, lower.replace(/[\s_.:]+/g, "-")]; - }); +function resolveClaudeProfileNameModelId(modelName?: string): string | undefined { + const normalized = + modelName + ?.trim() + .toLowerCase() + .replace(/[\s_.:]+/g, "-") ?? ""; + if (!normalized.includes("claude")) { + return undefined; + } + const family = /(?:fable-5|opus-4-(?:6|7|8)|sonnet-4-6)(?:$|-)/.exec(normalized)?.[0]; + return family ? `claude-${family.replace(/-$/, "")}` : undefined; } -function supportsAdaptiveThinking(modelId: string, modelName?: string): boolean { - const candidates = getModelMatchCandidates(modelId, modelName); - return candidates.some( - (s) => - s.includes("opus-4-6") || - s.includes("opus-4-7") || - s.includes("opus-4-8") || - s.includes("sonnet-4-6"), +/** Check canonical metadata and profile names for adaptive Claude support. */ +function supportsAdaptiveThinking(model: Model<"bedrock-converse-stream">): boolean { + const profileModelId = resolveClaudeProfileNameModelId(model.name); + return ( + supportsClaudeAdaptiveThinking(model) || supportsClaudeAdaptiveThinking({ id: profileModelId }) ); } function supportsNativeXhighEffort(model: Model<"bedrock-converse-stream">): boolean { - const candidates = getModelMatchCandidates(model.id, model.name); - return candidates.some((s) => s.includes("opus-4-7") || s.includes("opus-4-8")); + const profileModelId = resolveClaudeProfileNameModelId(model.name); + return ( + supportsClaudeNativeXhighEffort(model) || + supportsClaudeNativeXhighEffort({ id: profileModelId }) + ); +} + +function supportsNativeMaxEffort(model: Model<"bedrock-converse-stream">): boolean { + const profileModelId = resolveClaudeProfileNameModelId(model.name); + return ( + supportsBedrockNativeMaxEffort(model.id, model.params) || + supportsBedrockNativeMaxEffort(profileModelId ?? "") + ); } function mapThinkingLevelToEffort( model: Model<"bedrock-converse-stream">, level: SimpleStreamOptions["reasoning"], ): "low" | "medium" | "high" | "xhigh" | "max" { - if (level === "xhigh" && supportsNativeXhighEffort(model)) { - return "xhigh"; - } - const mapped = level ? model.thinkingLevelMap?.[level] : undefined; if (typeof mapped === "string") { return mapped as "low" | "medium" | "high" | "xhigh" | "max"; } + if ((level === "xhigh" || level === "max") && mapped === null) { + return "high"; + } switch (level) { case "minimal": @@ -513,8 +601,10 @@ function mapThinkingLevelToEffort( return "medium"; case "high": return "high"; + case "xhigh": + return supportsNativeXhighEffort(model) ? "xhigh" : "high"; case "max": - return supportsNativeXhighEffort(model) ? "max" : "high"; + return supportsNativeMaxEffort(model) ? "max" : "high"; default: return "high"; } @@ -540,6 +630,12 @@ function resolveCacheRetention(cacheRetention?: CacheRetention): CacheRetention * whose ARNs don't contain the model name. */ function isAnthropicClaudeModel(model: Model<"bedrock-converse-stream">): boolean { + if (usesClaudeFable5BedrockContract(model)) { + return true; + } + if (resolveClaudeModelIdentity(model).startsWith("claude-")) { + return true; + } const id = model.id.toLowerCase(); const name = model.name?.toLowerCase() ?? ""; return ( @@ -552,7 +648,11 @@ function isAnthropicClaudeModel(model: Model<"bedrock-converse-stream">): boolea } function supportsPromptCaching(model: Model<"bedrock-converse-stream">): boolean { - return supportsBedrockPromptCaching(model.id, model.name); + return ( + usesClaudeFable5BedrockContract(model) || + supportsBedrockPromptCaching(model.id, model.name) || + supportsBedrockPromptCaching(resolveClaudeModelIdentity(model), model.name) + ); } /** @@ -656,26 +756,35 @@ function convertMessages( toolUse: { toolUseId: c.id, name: c.name, input: c.arguments as DocumentType }, }); break; - case "thinking": - // Skip empty thinking blocks - if (c.thinking.trim().length === 0) { + case "thinking": { + const thinkingSignature = c.thinkingSignature; + const normalizedThinkingSignature = thinkingSignature?.trim(); + const supportsSignature = supportsThinkingSignature(model); + const hasNativeThinkingSignature = + supportsSignature && + Boolean(normalizedThinkingSignature) && + normalizedThinkingSignature !== "reasoning_content"; + if (c.thinking.trim().length === 0 && !hasNativeThinkingSignature) { continue; } // Only Anthropic models support the signature field in reasoningText. // For other models, we omit the signature to avoid errors like: // "This model doesn't support the reasoningContent.reasoningText.signature field" - if (supportsThinkingSignature(model)) { + if (supportsSignature) { + if (normalizedThinkingSignature === "reasoning_content") { + continue; + } // Signatures arrive after thinking deltas. If a partial or externally // persisted message lacks a signature, Bedrock rejects the replayed // reasoning block. Fall back to plain text, matching Anthropic. - if (!c.thinkingSignature || c.thinkingSignature.trim().length === 0) { + if (!thinkingSignature || !normalizedThinkingSignature) { contentBlocks.push({ text: sanitizeSurrogates(c.thinking) }); } else { contentBlocks.push({ reasoningContent: { reasoningText: { text: c.thinking, - signature: c.thinkingSignature, + signature: thinkingSignature, }, }, }); @@ -684,6 +793,7 @@ function convertMessages( contentBlocks.push({ text: sanitizeSurrogates(c.thinking) }); } break; + } default: continue; } @@ -877,7 +987,12 @@ function buildAdditionalModelRequestFields( model: Model<"bedrock-converse-stream">, options: BedrockOptions, ): DocumentType | undefined { - if (!options.reasoning || !model.reasoning) { + if ( + !options.reasoning || + (!model.reasoning && + !usesClaudeFable5BedrockContract(model) && + !supportsAdaptiveThinking(model)) + ) { return undefined; } @@ -887,7 +1002,7 @@ function buildAdditionalModelRequestFields( const display = isGovCloudBedrockTarget(model, options) ? undefined : (options.thinkingDisplay ?? "summarized"); - const result: Record = supportsAdaptiveThinking(model.id, model.name) + const result: Record = supportsAdaptiveThinking(model) ? { thinking: { type: "adaptive", ...(display !== undefined ? { display } : {}) }, output_config: { effort: mapThinkingLevelToEffort(model, options.reasoning) }, @@ -915,7 +1030,7 @@ function buildAdditionalModelRequestFields( }; })(); - if (!supportsAdaptiveThinking(model.id, model.name) && (options.interleavedThinking ?? true)) { + if (!supportsAdaptiveThinking(model) && (options.interleavedThinking ?? true)) { result.anthropic_beta = ["interleaved-thinking-2025-05-14"]; } diff --git a/extensions/amazon-bedrock/thinking-policy.ts b/extensions/amazon-bedrock/thinking-policy.ts index a81ee6c486b8..1c67daccc6e1 100644 --- a/extensions/amazon-bedrock/thinking-policy.ts +++ b/extensions/amazon-bedrock/thinking-policy.ts @@ -2,7 +2,14 @@ * Thinking-level policy for Claude models on Amazon Bedrock. It maps Bedrock * model ids to the provider SDK thinking levels that are actually supported. */ -import type { ProviderThinkingProfile } from "openclaw/plugin-sdk/plugin-entry"; +import type { + ProviderRuntimeModel, + ProviderThinkingProfile, +} from "openclaw/plugin-sdk/plugin-entry"; +import { + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, +} from "openclaw/plugin-sdk/provider-model-shared"; const BASE_CLAUDE_THINKING_LEVELS = [ { id: "off" }, @@ -13,14 +20,20 @@ const BASE_CLAUDE_THINKING_LEVELS = [ ] as const satisfies ProviderThinkingProfile["levels"]; function isOpus48BedrockModelRef(modelRef: string): boolean { - return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]8(?:$|[-.:/])/i.test( + return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?(?:anthropic\.)?claude-opus-4[.-]8(?:$|[-.:/])/i.test( + modelRef, + ); +} + +function isOpus46BedrockModelRef(modelRef: string): boolean { + return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?(?:anthropic\.)?claude-opus-4[.-]6(?:$|[-.:/])/i.test( modelRef, ); } /** Return whether a Bedrock model ref names Claude Opus 4.7. */ export function isOpus47BedrockModelRef(modelRef: string): boolean { - return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?anthropic\.claude-opus-4[.-]7(?:$|[-.:/])/i.test( + return /(?:^|[/.:])(?:(?:us|eu|ap|apac|au|jp|global)\.)?(?:anthropic\.)?claude-opus-4[.-]7(?:$|[-.:/])/i.test( modelRef, ); } @@ -30,22 +43,73 @@ export function isOpus47OrNewerBedrockModelRef(modelRef: string): boolean { return isOpus47BedrockModelRef(modelRef) || isOpus48BedrockModelRef(modelRef); } +/** Return whether a Bedrock Claude ref supports max effort. */ +export function supportsBedrockNativeMaxEffort( + modelId: string, + params?: Record, +): boolean { + if (resolveClaudeFable5ModelIdentity({ id: modelId, params })) { + return true; + } + const canonicalModelId = resolveClaudeModelIdentity({ id: modelId, params }); + return [modelId, canonicalModelId].some( + (modelRef) => isOpus46BedrockModelRef(modelRef) || isOpus47OrNewerBedrockModelRef(modelRef), + ); +} + +/** Resolve route-specific native effort mappings for Bedrock Claude models. */ +export function resolveBedrockNativeThinkingLevelMap( + modelId: string, + params?: Record, +): ProviderRuntimeModel["thinkingLevelMap"] | undefined { + const modelRef = { id: modelId, params }; + if (resolveClaudeFable5ModelIdentity(modelRef)) { + return { off: "low", minimal: "low", xhigh: "xhigh", max: "max" }; + } + if (!supportsBedrockNativeMaxEffort(modelId, params)) { + return undefined; + } + const canonicalModelId = resolveClaudeModelIdentity(modelRef); + return { + xhigh: [modelId, canonicalModelId].some(isOpus47OrNewerBedrockModelRef) ? "xhigh" : null, + max: "max", + }; +} + /** Resolve supported Claude thinking levels for a Bedrock model id. */ -export function resolveBedrockClaudeThinkingProfile(modelId: string): ProviderThinkingProfile { +export function resolveBedrockClaudeThinkingProfile( + modelId: string, + params?: Record, +): ProviderThinkingProfile { const trimmed = modelId.trim(); - if (isOpus48BedrockModelRef(trimmed)) { + const canonicalModelId = resolveClaudeModelIdentity({ id: trimmed, params }); + const modelRefs = [trimmed, canonicalModelId]; + if (resolveClaudeFable5ModelIdentity({ id: trimmed, params })) { + return { + levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], + defaultLevel: "high", + preserveWhenCatalogReasoningFalse: true, + }; + } + if (modelRefs.some(isOpus48BedrockModelRef)) { return { levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], defaultLevel: "off", }; } - if (isOpus47BedrockModelRef(trimmed)) { + if (modelRefs.some(isOpus47BedrockModelRef)) { return { levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], defaultLevel: "off", }; } - if (/claude-(?:opus|sonnet)-4(?:\.|-)6(?:$|[-.])/i.test(trimmed)) { + if (modelRefs.some(isOpus46BedrockModelRef)) { + return { + levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "adaptive" }, { id: "max" }], + defaultLevel: "adaptive", + }; + } + if (modelRefs.some((modelRef) => /claude-sonnet-4(?:\.|-)6(?:$|[-.])/i.test(modelRef))) { return { levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "adaptive" }], defaultLevel: "adaptive", diff --git a/extensions/anthropic-vertex/index.test.ts b/extensions/anthropic-vertex/index.test.ts index 6fa583799ba2..f2b4bb9727a2 100644 --- a/extensions/anthropic-vertex/index.test.ts +++ b/extensions/anthropic-vertex/index.test.ts @@ -78,14 +78,19 @@ describe("anthropic-vertex provider plugin", () => { expect(result.provider.baseUrl).toBe("https://europe-west4-aiplatform.googleapis.com"); expect(result.provider.headers).toEqual({ "x-test-header": "1" }); expect(result.provider.models.map((model) => model.id)).toEqual([ + "claude-fable-5", "claude-opus-4-8", "claude-opus-4-6", "claude-sonnet-4-6", ]); expect(result.provider.models[0]?.thinkingLevelMap).toEqual({ + off: "low", + minimal: "low", xhigh: "xhigh", max: "max", }); + expect(result.provider.models[2]?.thinkingLevelMap).toEqual({ xhigh: null, max: "max" }); + expect(result.provider.models[3]?.thinkingLevelMap).toEqual({ xhigh: null, max: "max" }); }); it("owns Anthropic-style replay policy", async () => { @@ -107,6 +112,13 @@ describe("anthropic-vertex provider plugin", () => { validateAnthropicTurns: true, allowSyntheticToolResults: true, }); + expect( + provider.buildReplayPolicy?.({ + provider: "anthropic-vertex", + modelApi: "anthropic-messages", + modelId: "claude-fable-5", + } as never), + ).not.toHaveProperty("dropThinkingBlocks"); }); it("owns Anthropic-style thinking policy", async () => { @@ -119,6 +131,81 @@ describe("anthropic-vertex provider plugin", () => { expect(opus48Profile?.defaultLevel).toBe("off"); expect(opus48Profile?.levels.map((level) => level.id)).toContain("max"); + + const fableProfile = provider.resolveThinkingProfile?.({ + provider: "anthropic-vertex", + modelId: "claude-fable-5", + } as never); + expect(fableProfile?.defaultLevel).toBe("high"); + expect(fableProfile?.preserveWhenCatalogReasoningFalse).toBe(true); + + const aliasProfile = provider.resolveThinkingProfile?.({ + provider: "anthropic-vertex", + modelId: "production-claude", + params: { canonicalModelId: "claude-fable-5" }, + } as never); + expect(aliasProfile?.defaultLevel).toBe("high"); + }); + + it("restores Fable metadata for explicit Vertex catalog rows", async () => { + const provider = await registerSingleProviderPlugin(anthropicVertexPlugin); + + const normalized = provider.normalizeResolvedModel?.({ + provider: "anthropic-vertex", + modelId: "claude-fable-5", + model: { + id: "claude-fable-5", + name: "Claude Fable 5", + api: "anthropic-messages", + provider: "anthropic-vertex", + baseUrl: "https://aiplatform.googleapis.com", + reasoning: false, + input: ["text"], + cost: { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 }, + contextWindow: 200_000, + maxTokens: 8192, + }, + } as never); + + expect(normalized).toMatchObject({ + reasoning: true, + input: ["text", "image"], + contextWindow: 1_000_000, + contextTokens: 1_000_000, + maxTokens: 128_000, + thinkingLevelMap: { + off: "low", + minimal: "low", + xhigh: "xhigh", + max: "max", + }, + }); + + const aliasNormalized = provider.normalizeResolvedModel?.({ + provider: "anthropic-vertex", + modelId: "production-claude", + model: { + id: "production-claude", + name: "Production Claude", + api: "anthropic-messages", + provider: "anthropic-vertex", + baseUrl: "https://aiplatform.googleapis.com", + reasoning: false, + input: ["text"], + cost: { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 }, + contextWindow: 200_000, + maxTokens: 8192, + params: { canonicalModelId: "claude-fable-5" }, + thinkingLevelMap: { max: null }, + }, + } as never); + expect(aliasNormalized).toMatchObject({ + reasoning: true, + input: ["text", "image"], + contextWindow: 1_000_000, + maxTokens: 128_000, + thinkingLevelMap: { off: "low", minimal: "low", xhigh: "xhigh", max: null }, + }); }); it("resolves synthetic auth when ADC is available", async () => { diff --git a/extensions/anthropic-vertex/index.ts b/extensions/anthropic-vertex/index.ts index e8983615c954..a7b3a6a4762e 100644 --- a/extensions/anthropic-vertex/index.ts +++ b/extensions/anthropic-vertex/index.ts @@ -14,6 +14,7 @@ import { resolveAnthropicVertexConfigApiKey, resolveImplicitAnthropicVertexProvider, } from "./api.js"; +import { normalizeAnthropicVertexResolvedModel } from "./provider-catalog.js"; const PROVIDER_ID = "anthropic-vertex"; const GCP_VERTEX_CREDENTIALS_MARKER = "gcp-vertex-credentials"; @@ -48,7 +49,10 @@ export default definePluginEntry({ }, resolveConfigApiKey: ({ env }) => resolveAnthropicVertexConfigApiKey(env), ...NATIVE_ANTHROPIC_REPLAY_HOOKS, - resolveThinkingProfile: ({ modelId }) => resolveClaudeThinkingProfile(modelId), + normalizeResolvedModel: ({ modelId, model }) => + normalizeAnthropicVertexResolvedModel(modelId, model), + resolveThinkingProfile: ({ modelId, params }) => + resolveClaudeThinkingProfile(modelId, params, { includeNativeMax: true }), resolveSyntheticAuth: () => { if (!hasAnthropicVertexAvailableAuth()) { return undefined; diff --git a/extensions/anthropic-vertex/provider-catalog.ts b/extensions/anthropic-vertex/provider-catalog.ts index 200a807568a9..9a67fc877b06 100644 --- a/extensions/anthropic-vertex/provider-catalog.ts +++ b/extensions/anthropic-vertex/provider-catalog.ts @@ -1,3 +1,4 @@ +import type { ProviderRuntimeModel } from "openclaw/plugin-sdk/plugin-entry"; /** * Static Anthropic Vertex model catalog builder. It derives provider base URLs * from region configuration and publishes Claude model metadata. @@ -6,11 +7,13 @@ import type { ModelDefinitionConfig, ModelProviderConfig, } from "openclaw/plugin-sdk/provider-model-shared"; +import { resolveClaudeFable5ModelIdentity } from "openclaw/plugin-sdk/provider-model-shared"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime"; import { resolveAnthropicVertexRegion } from "./region.js"; /** Default Anthropic Vertex model used for implicit provider catalogs. */ export const ANTHROPIC_VERTEX_DEFAULT_MODEL_ID = "claude-sonnet-4-6"; const ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW = 1_000_000; +const ANTHROPIC_VERTEX_FABLE_MAX_TOKENS = 128_000; const GCP_VERTEX_CREDENTIALS_MARKER = "gcp-vertex-credentials"; function buildAnthropicVertexModel(params: { @@ -36,6 +39,15 @@ function buildAnthropicVertexModel(params: { function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] { return [ + buildAnthropicVertexModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + reasoning: true, + input: ["text", "image"], + cost: { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 }, + maxTokens: ANTHROPIC_VERTEX_FABLE_MAX_TOKENS, + thinkingLevelMap: { off: "low", minimal: "low", xhigh: "xhigh", max: "max" }, + }), buildAnthropicVertexModel({ id: "claude-opus-4-8", name: "Claude Opus 4.8", @@ -52,6 +64,7 @@ function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] { input: ["text", "image"], cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, maxTokens: 128000, + thinkingLevelMap: { xhigh: null, max: "max" }, }), buildAnthropicVertexModel({ id: ANTHROPIC_VERTEX_DEFAULT_MODEL_ID, @@ -60,10 +73,53 @@ function buildAnthropicVertexCatalog(): ModelDefinitionConfig[] { input: ["text", "image"], cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, maxTokens: 128000, + thinkingLevelMap: { xhigh: null, max: "max" }, }), ]; } +/** Restore required Fable metadata after explicit catalog models replace the implicit row. */ +export function normalizeAnthropicVertexResolvedModel( + modelId: string, + model: ProviderRuntimeModel, +): ProviderRuntimeModel | undefined { + if (!resolveClaudeFable5ModelIdentity({ id: modelId, params: model.params })) { + return undefined; + } + const input: ProviderRuntimeModel["input"] = model.input.includes("image") + ? model.input + : [...model.input, "image"]; + const thinkingLevelMap = { + off: "low", + minimal: "low", + xhigh: "xhigh", + max: "max", + ...model.thinkingLevelMap, + }; + if ( + model.reasoning && + input === model.input && + model.contextWindow === ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW && + model.contextTokens === ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW && + (model.maxTokens ?? 0) >= ANTHROPIC_VERTEX_FABLE_MAX_TOKENS && + model.thinkingLevelMap?.off === "low" && + model.thinkingLevelMap.minimal === "low" && + model.thinkingLevelMap.xhigh === "xhigh" && + model.thinkingLevelMap.max === "max" + ) { + return undefined; + } + return { + ...model, + reasoning: true, + input, + contextWindow: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW, + contextTokens: ANTHROPIC_VERTEX_DEFAULT_CONTEXT_WINDOW, + maxTokens: Math.max(model.maxTokens ?? 0, ANTHROPIC_VERTEX_FABLE_MAX_TOKENS), + thinkingLevelMap, + }; +} + /** Build the implicit Anthropic Vertex provider config for the current env. */ export function buildAnthropicVertexProvider(params?: { env?: NodeJS.ProcessEnv; diff --git a/extensions/anthropic-vertex/provider-policy-api.test.ts b/extensions/anthropic-vertex/provider-policy-api.test.ts index 9f6a78b4ec6c..7d2cdcb75f6e 100644 --- a/extensions/anthropic-vertex/provider-policy-api.test.ts +++ b/extensions/anthropic-vertex/provider-policy-api.test.ts @@ -22,6 +22,38 @@ describe("anthropic-vertex provider-policy-api", () => { expect(profile?.defaultLevel).toBe("off"); }); + it("exposes native max without xhigh for Claude Sonnet 4.6", () => { + const profile = resolveThinkingProfile({ + provider: "anthropic-vertex", + modelId: "claude-sonnet-4-6", + }); + + expect(profile?.levels.map((level) => level.id)).toContain("max"); + expect(profile?.levels.map((level) => level.id)).not.toContain("xhigh"); + }); + + it("inherits Claude Fable 5's provider-agnostic thinking contract", () => { + const profile = resolveThinkingProfile({ + provider: "anthropic-vertex", + modelId: "claude-fable-5", + }); + + expect(profile?.defaultLevel).toBe("high"); + expect(profile?.preserveWhenCatalogReasoningFalse).toBe(true); + expect(profile?.levels.map((level) => level.id)).toContain("max"); + }); + + it("resolves deployment aliases from canonical model metadata", () => { + const profile = resolveThinkingProfile({ + provider: "anthropic-vertex", + modelId: "production-claude", + params: { canonicalModelId: "claude-fable-5" }, + }); + + expect(profile?.defaultLevel).toBe("high"); + expect(profile?.preserveWhenCatalogReasoningFalse).toBe(true); + }); + it("ignores other providers", () => { expect(resolveThinkingProfile({ provider: "anthropic", modelId: "claude-opus-4-8" })).toBe( null, diff --git a/extensions/anthropic-vertex/provider-policy-api.ts b/extensions/anthropic-vertex/provider-policy-api.ts index 2dc2ddee554f..278297448dba 100644 --- a/extensions/anthropic-vertex/provider-policy-api.ts +++ b/extensions/anthropic-vertex/provider-policy-api.ts @@ -5,9 +5,15 @@ import { resolveClaudeThinkingProfile } from "openclaw/plugin-sdk/provider-model-shared"; /** Resolve Anthropic Vertex thinking profile for a provider/model pair. */ -export function resolveThinkingProfile(params: { provider: string; modelId: string }) { +export function resolveThinkingProfile(params: { + provider: string; + modelId: string; + params?: Record; +}) { if (params.provider.trim().toLowerCase() !== "anthropic-vertex") { return null; } - return resolveClaudeThinkingProfile(params.modelId); + return resolveClaudeThinkingProfile(params.modelId, params.params, { + includeNativeMax: true, + }); } diff --git a/extensions/anthropic-vertex/stream-runtime.test.ts b/extensions/anthropic-vertex/stream-runtime.test.ts index ac36f68c8846..05d1abba5829 100644 --- a/extensions/anthropic-vertex/stream-runtime.test.ts +++ b/extensions/anthropic-vertex/stream-runtime.test.ts @@ -32,12 +32,21 @@ function createStreamDeps(): { let createAnthropicVertexStreamFn: typeof import("./stream-runtime.js").createAnthropicVertexStreamFn; let createAnthropicVertexStreamFnForModel: typeof import("./stream-runtime.js").createAnthropicVertexStreamFnForModel; -function makeModel(params: { id: string; maxTokens?: number }): Model<"anthropic-messages"> { +function makeModel(params: { + id: string; + maxTokens?: number; + params?: Record; + reasoning?: boolean; + thinkingLevelMap?: Model<"anthropic-messages">["thinkingLevelMap"]; +}): Model<"anthropic-messages"> { return { id: params.id, api: "anthropic-messages", provider: "anthropic-vertex", + reasoning: params.reasoning ?? true, ...(params.maxTokens !== undefined ? { maxTokens: params.maxTokens } : {}), + ...(params.params ? { params: params.params } : {}), + ...(params.thinkingLevelMap ? { thinkingLevelMap: params.thinkingLevelMap } : {}), } as Model<"anthropic-messages">; } @@ -195,7 +204,66 @@ describe("createAnthropicVertexStreamFn", () => { expect(streamTransportOptions(streamAnthropicMock).temperature).toBe(0.7); }); - it("maps xhigh reasoning to max effort for adaptive Opus models", () => { + it("uses Fable 5's always-adaptive Vertex contract", () => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ id: "claude-fable-5", maxTokens: 128000 }); + + void streamFn(model, { messages: [] }, { temperature: 0.7 }); + + expect(streamTransportOptions(streamAnthropicMock)).toMatchObject({ + thinkingEnabled: true, + effort: "high", + maxTokens: 128000, + }); + expect(streamTransportOptions(streamAnthropicMock)).not.toHaveProperty("temperature"); + }); + + it("uses canonical Claude policy for Vertex deployment aliases", () => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ + id: "production-claude", + maxTokens: 128000, + params: { canonicalModelId: "claude-opus-4-8" }, + }); + + void streamFn(model, { messages: [] }, { reasoning: "xhigh", temperature: 0.7 }); + + expect(streamTransportOptions(streamAnthropicMock)).toMatchObject({ + thinkingEnabled: true, + effort: "xhigh", + }); + expect(streamTransportOptions(streamAnthropicMock)).not.toHaveProperty("temperature"); + }); + + it("preserves Fable 5 low effort on Vertex", () => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ id: "claude-fable-5", maxTokens: 128000 }); + + void streamFn(model, { messages: [] }, { reasoning: "low" }); + + expect(streamTransportOptions(streamAnthropicMock)).toMatchObject({ + thinkingEnabled: true, + effort: "low", + }); + }); + + it("preserves Fable 5 xhigh effort on Vertex", () => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ id: "claude-fable-5", maxTokens: 128000 }); + + void streamFn(model, { messages: [] }, { reasoning: "xhigh" }); + + expect(streamTransportOptions(streamAnthropicMock)).toMatchObject({ + thinkingEnabled: true, + effort: "xhigh", + }); + }); + + it("maps unsupported xhigh reasoning to high effort for Opus 4.6", () => { const { deps, streamAnthropicMock } = createStreamDeps(); const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); const model = makeModel({ id: "claude-opus-4-6", maxTokens: 64000 }); @@ -204,7 +272,7 @@ describe("createAnthropicVertexStreamFn", () => { const transportOptions = streamTransportOptions(streamAnthropicMock); expect(transportOptions.thinkingEnabled).toBe(true); - expect(transportOptions.effort).toBe("max"); + expect(transportOptions.effort).toBe("high"); }); it("maps xhigh reasoning to xhigh effort for Opus 4.8", () => { @@ -231,7 +299,7 @@ describe("createAnthropicVertexStreamFn", () => { expect(transportOptions.effort).toBe("max"); }); - it("clamps max reasoning for adaptive models without native max support", () => { + it("preserves native max reasoning for Sonnet 4.6", () => { const { deps, streamAnthropicMock } = createStreamDeps(); const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); const model = makeModel({ id: "claude-sonnet-4-6", maxTokens: 128000 }); @@ -240,7 +308,24 @@ describe("createAnthropicVertexStreamFn", () => { const transportOptions = streamTransportOptions(streamAnthropicMock); expect(transportOptions.thinkingEnabled).toBe(true); + expect(transportOptions.effort).toBe("max"); + }); + + it("honors explicit max opt-outs for Vertex aliases", () => { + const { deps, streamAnthropicMock } = createStreamDeps(); + const streamFn = createAnthropicVertexStreamFn("vertex-project", "us-east5", undefined, deps); + const model = makeModel({ + id: "production-claude", + params: { canonicalModelId: "claude-sonnet-4-6" }, + reasoning: false, + thinkingLevelMap: { xhigh: null, max: null }, + }); + + void streamFn(model, { messages: [] }, { reasoning: "max", temperature: 0.2 }); + + const transportOptions = streamTransportOptions(streamAnthropicMock); expect(transportOptions.effort).toBe("high"); + expect(transportOptions).not.toHaveProperty("temperature"); }); it("applies Anthropic cache-boundary shaping before forwarding payload hooks", async () => { diff --git a/extensions/anthropic-vertex/stream-runtime.ts b/extensions/anthropic-vertex/stream-runtime.ts index 1fdc78fcfef2..94edb6241c9f 100644 --- a/extensions/anthropic-vertex/stream-runtime.ts +++ b/extensions/anthropic-vertex/stream-runtime.ts @@ -5,10 +5,19 @@ import { AnthropicVertex as AnthropicVertexSdk } from "@anthropic-ai/vertex-sdk"; import type { StreamFn } from "openclaw/plugin-sdk/agent-core"; import { + clampThinkingLevel, stream as streamDefault, type Model, + type ModelThinkingLevel, type ProviderStreamOptions, } from "openclaw/plugin-sdk/llm"; +import { + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeMaxEffort, + supportsClaudeNativeXhighEffort, +} from "openclaw/plugin-sdk/provider-model-shared"; import { applyAnthropicPayloadPolicyToParams, resolveAnthropicPayloadPolicy, @@ -42,44 +51,43 @@ const defaultAnthropicVertexStreamDeps: AnthropicVertexStreamDeps = { }; function isClaudeOpus47OrNewerModel(modelId: string): boolean { - return ( - modelId.includes("opus-4-8") || - modelId.includes("opus-4.8") || - modelId.includes("opus-4-7") || - modelId.includes("opus-4.7") - ); + return supportsClaudeNativeXhighEffort({ id: modelId }); } -function isClaudeOpus46Model(modelId: string): boolean { - return modelId.includes("opus-4-6") || modelId.includes("opus-4.6"); +function isClaudeFable5Model(modelId: string): boolean { + return resolveClaudeFable5ModelIdentity({ id: modelId }) !== undefined; } function supportsAdaptiveThinking(modelId: string): boolean { - return ( - isClaudeOpus47OrNewerModel(modelId) || - isClaudeOpus46Model(modelId) || - modelId.includes("sonnet-4-6") || - modelId.includes("sonnet-4.6") - ); + return supportsClaudeAdaptiveThinking({ id: modelId }); } function mapAnthropicAdaptiveEffort( - reasoning: string, + reasoning: ModelThinkingLevel, + model: Model<"anthropic-messages">, modelId: string, ): AnthropicVertexAdaptiveEffort { + const clampModel = + typeof model.params?.canonicalModelId === "string" ? { ...model, reasoning: true } : model; + const resolvedReasoning = clampThinkingLevel(clampModel, reasoning); + const mapped = model.thinkingLevelMap?.[resolvedReasoning]; + if (typeof mapped === "string") { + return mapped as AnthropicVertexAdaptiveEffort; + } const effortMap: Record = { + off: "low", minimal: "low", low: "low", medium: "medium", high: "high", - xhigh: isClaudeOpus47OrNewerModel(modelId) + xhigh: isClaudeFable5Model(modelId) ? "xhigh" - : isClaudeOpus46Model(modelId) - ? "max" + : isClaudeOpus47OrNewerModel(modelId) + ? "xhigh" : "high", - max: isClaudeOpus47OrNewerModel(modelId) ? "max" : "high", + max: supportsClaudeNativeMaxEffort({ id: modelId }) ? "max" : "high", }; - return effortMap[reasoning] ?? "high"; + return effortMap[resolvedReasoning] ?? "high"; } function resolveAnthropicVertexMaxTokens(params: { @@ -163,7 +171,15 @@ export function createAnthropicVertexStreamFn( modelMaxTokens: transportModel.maxTokens, requestedMaxTokens: options?.maxTokens, }); - const temperature = isClaudeOpus47OrNewerModel(model.id) ? undefined : options?.temperature; + const contractModelId = resolveClaudeModelIdentity(model); + const fable5 = isClaudeFable5Model(contractModelId); + const reasoning = options?.reasoning as ModelThinkingLevel | undefined; + const adaptiveThinking = + fable5 || Boolean(reasoning && supportsAdaptiveThinking(contractModelId)); + const temperature = + adaptiveThinking || isClaudeOpus47OrNewerModel(contractModelId) + ? undefined + : options?.temperature; const opts: AnthropicVertexTransportOptions = { client, ...(temperature !== undefined ? { temperature } : {}), @@ -181,21 +197,25 @@ export function createAnthropicVertexStreamFn( metadata: options?.metadata, }; - if (options?.reasoning) { - if (supportsAdaptiveThinking(model.id)) { + if (reasoning) { + if (supportsAdaptiveThinking(contractModelId)) { opts.thinkingEnabled = true; opts.effort = mapAnthropicAdaptiveEffort( - options.reasoning, - model.id, + reasoning, + transportModel, + contractModelId, ) as AnthropicVertexEffort; } else { opts.thinkingEnabled = true; - const budgets = options.thinkingBudgets; + const budgets = options?.thinkingBudgets; opts.thinkingBudgetTokens = - (budgets && options.reasoning in budgets - ? budgets[options.reasoning as keyof typeof budgets] + (budgets && reasoning in budgets + ? budgets[reasoning as keyof typeof budgets] : undefined) ?? 10000; } + } else if (fable5) { + opts.thinkingEnabled = true; + opts.effort = "high"; } else { opts.thinkingEnabled = false; } diff --git a/extensions/anthropic/cli-shared.ts b/extensions/anthropic/cli-shared.ts index cf33a35532f4..f3fa566f3a8a 100644 --- a/extensions/anthropic/cli-shared.ts +++ b/extensions/anthropic/cli-shared.ts @@ -72,6 +72,12 @@ const CLAUDE_BYPASS_PERMISSION_MODE = "bypassPermissions"; type ClaudeCliEffort = "low" | "medium" | "high" | "xhigh" | "max"; +/** Explicit thinking opt-out for Claude CLI routes unsupported by Claude Code. */ +export const CLAUDE_CLI_OFF_THINKING_PROFILE = { + levels: [{ id: "off" }], + defaultLevel: "off", +} as const; + /** Return whether a provider id refers to the Claude CLI backend. */ export function isClaudeCliProvider(providerId: string): boolean { return normalizeOptionalLowercaseString(providerId) === CLAUDE_CLI_BACKEND_ID; diff --git a/extensions/anthropic/index.test.ts b/extensions/anthropic/index.test.ts index 5727a738d3de..840e11d65267 100644 --- a/extensions/anthropic/index.test.ts +++ b/extensions/anthropic/index.test.ts @@ -124,6 +124,26 @@ describe("anthropic provider replay hooks", () => { }); }); + it("preserves Fable thinking in its same-model replay policy", async () => { + const provider = await registerSingleProviderPlugin(anthropicPlugin); + const fableContext = { + provider: "anthropic", + modelApi: "anthropic-messages", + modelId: "claude-fable-5", + }; + + expect(provider.buildReplayPolicy?.(fableContext)).toEqual({ + sanitizeMode: "full", + sanitizeToolCallIds: true, + toolCallIdMode: "strict", + preserveNativeAnthropicToolUseIds: true, + preserveSignatures: true, + repairToolUseResultPairing: true, + validateAnthropicTurns: true, + allowSyntheticToolResults: true, + }); + }); + it("defaults provider api through plugin config normalization", async () => { const provider = await registerSingleProviderPlugin(anthropicPlugin); @@ -507,7 +527,102 @@ describe("anthropic provider replay hooks", () => { provider: "anthropic", modelId: "claude-opus-4-6", } as never) - ?.levels.some((level) => level.id === "xhigh" || level.id === "max"), + ?.levels.some((level) => level.id === "max"), + ).toBe(true); + expect( + provider + .resolveThinkingProfile?.({ + provider: "anthropic", + modelId: "claude-opus-4-6", + } as never) + ?.levels.some((level) => level.id === "xhigh"), + ).toBe(false); + }); + + it("resolves Claude Fable 5 with its always-adaptive model contract", async () => { + const provider = await registerSingleProviderPlugin(anthropicPlugin); + const resolved = provider.resolveDynamicModel?.({ + provider: "anthropic", + modelId: "claude-fable-5", + modelRegistry: createModelRegistry([]), + } as ProviderResolveDynamicModelContext); + + expectFields(resolved, { + provider: "anthropic", + id: "claude-fable-5", + api: "anthropic-messages", + reasoning: true, + input: ["text", "image"], + cost: { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 }, + contextWindow: 1_000_000, + contextTokens: 1_000_000, + maxTokens: 128_000, + thinkingLevelMap: { + off: "low", + minimal: "low", + xhigh: "xhigh", + max: "max", + }, + }); + expect(requireRecord(resolved, "Fable model").mediaInput).toEqual({ + image: { maxSidePx: 2576, preferredSidePx: 2576, tokenMode: "provider" }, + }); + + const profile = provider.resolveThinkingProfile?.({ + provider: "anthropic", + modelId: "claude-fable-5", + } as never); + expect(levelIds(profile)).toStrictEqual([ + "off", + "minimal", + "low", + "medium", + "high", + "xhigh", + "adaptive", + "max", + ]); + expect(requireRecord(profile, "Fable thinking profile").defaultLevel).toBe("high"); + + const normalized = provider.normalizeResolvedModel?.({ + provider: "anthropic", + modelId: "claude-fable-5", + model: { + ...(resolved as ProviderRuntimeModel), + reasoning: false, + }, + } as never); + expect(normalized?.reasoning).toBe(true); + + expect( + provider.resolveDynamicModel?.({ + provider: "claude-cli", + modelId: "claude-fable-5", + modelRegistry: createModelRegistry([]), + } as ProviderResolveDynamicModelContext), + ).toBeUndefined(); + expect( + provider.resolveThinkingProfile?.({ + provider: "claude-cli", + modelId: "claude-fable-5", + } as never), + ).toEqual({ + levels: [{ id: "off" }], + defaultLevel: "off", + }); + expect( + provider + .resolveThinkingProfile?.({ + provider: "claude-cli", + modelId: "claude-opus-4-6", + } as never) + ?.levels.map((level) => level.id), + ).toContain("max"); + expect( + provider.isModernModelRef?.({ + provider: "claude-cli", + modelId: "claude-fable-5", + }), ).toBe(false); }); @@ -529,6 +644,48 @@ describe("anthropic provider replay hooks", () => { }); }); + it("uses canonical model identity instead of a Fable-looking deployment alias", async () => { + const provider = await registerSingleProviderPlugin(anthropicPlugin); + const model = { + id: "claude-fable-5-prod", + name: "Production Claude", + provider: "anthropic", + api: "anthropic-messages", + baseUrl: "https://api.anthropic.com", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200_000, + maxTokens: 64_000, + params: { canonicalModelId: "claude-opus-4-8" }, + } as ProviderRuntimeModel; + + expectFields( + provider.normalizeResolvedModel?.({ + provider: "anthropic", + modelId: model.id, + model, + } as never), + { + reasoning: false, + contextWindow: 1_048_576, + contextTokens: 1_048_576, + maxTokens: 128_000, + thinkingLevelMap: { + xhigh: "xhigh", + max: "max", + }, + }, + ); + expect( + provider.resolveThinkingProfile?.({ + provider: "anthropic", + modelId: model.id, + params: model.params, + } as never)?.defaultLevel, + ).toBe("off"); + }); + it("does not forward-compat case-mismatched Anthropic model ids", async () => { const provider = await registerSingleProviderPlugin(anthropicPlugin); @@ -569,6 +726,7 @@ describe("anthropic provider replay hooks", () => { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 200_000, maxTokens: 64_000, + thinkingLevelMap: { max: null }, }, } as never); @@ -576,6 +734,29 @@ describe("anthropic provider replay hooks", () => { expect(normalized?.mediaInput).toEqual({ image: { maxSidePx: 1568, preferredSidePx: 1568, tokenMode: "provider" }, }); + expect(normalized?.thinkingLevelMap).toEqual({ xhigh: null, max: null }); + }); + + it("does not normalize numeric successors as known Claude contracts", async () => { + const provider = await registerSingleProviderPlugin(anthropicPlugin); + + const normalized = provider.normalizeResolvedModel?.({ + provider: "anthropic", + modelId: "claude-opus-4-60", + model: { + id: "claude-opus-4-60", + name: "Claude Opus 4.60", + provider: "anthropic", + api: "anthropic-messages", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200_000, + maxTokens: 64_000, + }, + } as never); + + expect(normalized).toBeUndefined(); }); it("merges partial Claude image media metadata with provider limits", async () => { diff --git a/extensions/anthropic/openclaw.plugin.json b/extensions/anthropic/openclaw.plugin.json index ea7f182e1af4..e9d602a197d2 100644 --- a/extensions/anthropic/openclaw.plugin.json +++ b/extensions/anthropic/openclaw.plugin.json @@ -61,6 +61,24 @@ "baseUrl": "https://api.anthropic.com", "api": "anthropic-messages", "models": [ + { + "id": "claude-fable-5", + "name": "Claude Fable 5", + "reasoning": true, + "input": ["text", "image"], + "mediaInput": { + "image": { "maxSidePx": 2576, "preferredSidePx": 2576, "tokenMode": "provider" } + }, + "cost": { "input": 10, "output": 50, "cacheRead": 1, "cacheWrite": 12.5 }, + "contextWindow": 1000000, + "maxTokens": 128000, + "thinkingLevelMap": { + "off": "low", + "minimal": "low", + "xhigh": "xhigh", + "max": "max" + } + }, { "id": "claude-opus-4-8", "name": "Claude Opus 4.8", diff --git a/extensions/anthropic/provider-policy-api.test.ts b/extensions/anthropic/provider-policy-api.test.ts index 6c19bf627364..9055bb770091 100644 --- a/extensions/anthropic/provider-policy-api.test.ts +++ b/extensions/anthropic/provider-policy-api.test.ts @@ -136,18 +136,52 @@ describe("anthropic provider policy public artifact", () => { expect(profile?.defaultLevel).toBe("off"); }); - it("keeps adaptive-only Claude profiles aligned with the runtime provider", () => { + it("exposes the always-adaptive Claude Fable 5 thinking profile", () => { const profile = resolveThinkingProfile({ provider: "anthropic", - modelId: "claude-opus-4-6", + modelId: "claude-fable-5", }); - if (!profile) { - throw new Error("Expected Anthropic policy profile"); + expect(profile).toEqual({ + levels: [ + { id: "off" }, + { id: "minimal" }, + { id: "low" }, + { id: "medium" }, + { id: "high" }, + { id: "xhigh" }, + { id: "adaptive" }, + { id: "max" }, + ], + defaultLevel: "high", + preserveWhenCatalogReasoningFalse: true, + }); + expect( + resolveThinkingProfile({ + provider: "claude-cli", + modelId: "claude-fable-5", + }), + ).toEqual({ + levels: [{ id: "off" }], + defaultLevel: "off", + }); + }); + + it("exposes native max without xhigh for direct Claude 4.6 routes", () => { + for (const provider of ["anthropic", "claude-cli"]) { + const profile = resolveThinkingProfile({ + provider, + modelId: "claude-opus-4-6", + }); + + if (!profile) { + throw new Error(`Expected ${provider} policy profile`); + } + expect(levelIds(profile.levels)).toContain("adaptive"); + expect(levelIds(profile.levels)).toContain("max"); + expect(profile.defaultLevel).toBe("adaptive"); + expect(collectLegacyExtendedLevelIds(profile.levels)).toStrictEqual(["max"]); } - expect(levelIds(profile.levels)).toContain("adaptive"); - expect(profile.defaultLevel).toBe("adaptive"); - expect(collectLegacyExtendedLevelIds(profile.levels)).toStrictEqual([]); }); it("does not expose Anthropic thinking profiles for unrelated providers", () => { diff --git a/extensions/anthropic/provider-policy-api.ts b/extensions/anthropic/provider-policy-api.ts index 891dc7a9dc65..78954ca948de 100644 --- a/extensions/anthropic/provider-policy-api.ts +++ b/extensions/anthropic/provider-policy-api.ts @@ -2,8 +2,12 @@ * Provider-policy API for Anthropic and Claude CLI. Core calls this lightweight * path for config defaults and thinking profiles. */ -import { resolveClaudeThinkingProfile } from "openclaw/plugin-sdk/provider-model-shared"; +import { + resolveClaudeModelIdentity, + resolveClaudeThinkingProfile, +} from "openclaw/plugin-sdk/provider-model-shared"; import type { ModelProviderConfig } from "openclaw/plugin-sdk/provider-model-types"; +import { CLAUDE_CLI_OFF_THINKING_PROFILE } from "./cli-shared.js"; import { applyAnthropicConfigDefaults, normalizeAnthropicProviderConfigForProvider, @@ -20,11 +24,27 @@ export function applyConfigDefaults(params: Parameters; +}) { + const contractModelId = resolveClaudeModelIdentity({ + id: params.modelId, + params: params.params, + }); switch (params.provider.trim().toLowerCase()) { case "anthropic": + return resolveClaudeThinkingProfile(contractModelId, undefined, { + includeNativeMax: true, + }); case "claude-cli": - return resolveClaudeThinkingProfile(params.modelId); + if (contractModelId.startsWith("claude-fable-5")) { + return CLAUDE_CLI_OFF_THINKING_PROFILE; + } + return resolveClaudeThinkingProfile(contractModelId, undefined, { + includeNativeMax: true, + }); default: return null; } diff --git a/extensions/anthropic/register.runtime.ts b/extensions/anthropic/register.runtime.ts index 0e1047f95a1e..121222a83ded 100644 --- a/extensions/anthropic/register.runtime.ts +++ b/extensions/anthropic/register.runtime.ts @@ -28,8 +28,13 @@ import { } from "openclaw/plugin-sdk/provider-auth"; import { cloneFirstTemplateModel, + NATIVE_ANTHROPIC_REPLAY_HOOKS, type ProviderPlugin, + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, resolveClaudeThinkingProfile, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeXhighEffort, } from "openclaw/plugin-sdk/provider-model-shared"; import { fetchClaudeUsage } from "openclaw/plugin-sdk/provider-usage"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime"; @@ -41,13 +46,13 @@ import { CLAUDE_CLI_BACKEND_ID, CLAUDE_CLI_DEFAULT_ALLOWLIST_REFS, CLAUDE_CLI_DEFAULT_MODEL_REF, + CLAUDE_CLI_OFF_THINKING_PROFILE, } from "./cli-shared.js"; import { applyAnthropicConfigDefaults, normalizeAnthropicProviderConfigForProvider, } from "./config-defaults.js"; import { anthropicMediaUnderstandingProvider } from "./media-understanding-provider.js"; -import { buildAnthropicReplayPolicy } from "./replay-policy.js"; import { wrapAnthropicProviderStream } from "./stream-wrappers.js"; const PROVIDER_ID = "anthropic"; @@ -58,7 +63,8 @@ const ANTHROPIC_OPUS_48_DOT_MODEL_ID = "claude-opus-4.8"; const ANTHROPIC_OPUS_47_MODEL_ID = "claude-opus-4-7"; const ANTHROPIC_OPUS_47_DOT_MODEL_ID = "claude-opus-4.7"; const ANTHROPIC_GA_1M_CONTEXT_TOKENS = 1_048_576; -const ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS = 128_000; +const ANTHROPIC_FABLE_CONTEXT_TOKENS = 1_000_000; +const ANTHROPIC_MODERN_MAX_OUTPUT_TOKENS = 128_000; const ANTHROPIC_OPUS_46_MODEL_ID = "claude-opus-4-6"; const ANTHROPIC_OPUS_46_DOT_MODEL_ID = "claude-opus-4.6"; const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [ @@ -67,26 +73,6 @@ const ANTHROPIC_OPUS_47_TEMPLATE_MODEL_IDS = [ ] as const; const ANTHROPIC_SONNET_46_MODEL_ID = "claude-sonnet-4-6"; const ANTHROPIC_SONNET_46_DOT_MODEL_ID = "claude-sonnet-4.6"; -const ANTHROPIC_GA_1M_MODEL_PREFIXES = [ - ANTHROPIC_OPUS_48_MODEL_ID, - ANTHROPIC_OPUS_48_DOT_MODEL_ID, - ANTHROPIC_OPUS_46_MODEL_ID, - ANTHROPIC_OPUS_46_DOT_MODEL_ID, - ANTHROPIC_OPUS_47_MODEL_ID, - ANTHROPIC_OPUS_47_DOT_MODEL_ID, - ANTHROPIC_SONNET_46_MODEL_ID, - ANTHROPIC_SONNET_46_DOT_MODEL_ID, -] as const; -const ANTHROPIC_MODERN_MODEL_PREFIXES = [ - "claude-opus-4-8", - "claude-opus-4.8", - "claude-opus-4-7", - "claude-opus-4.7", - "claude-opus-4-6", - "claude-opus-4.6", - "claude-sonnet-4-6", - "claude-sonnet-4.6", -] as const; const ANTHROPIC_SETUP_TOKEN_NOTE_LINES = [ "Anthropic setup-token auth is supported in OpenClaw.", "OpenClaw prefers Claude CLI reuse when it is available on the host.", @@ -282,13 +268,15 @@ function buildAnthropicForwardCompatModel( ): ProviderRuntimeModel | undefined { const trimmedModelId = ctx.modelId.trim(); const lower = normalizeLowercaseStringOrEmpty(trimmedModelId); + const normalizedProvider = normalizeLowercaseStringOrEmpty(ctx.provider); if (trimmedModelId !== lower || !matchesAnthropicModernModel(lower)) { return undefined; } + if (isAnthropicFable5Model(lower) && normalizedProvider !== PROVIDER_ID) { + return undefined; + } const provider = - normalizeLowercaseStringOrEmpty(ctx.provider) === CLAUDE_CLI_BACKEND_ID - ? CLAUDE_CLI_BACKEND_ID - : PROVIDER_ID; + normalizedProvider === CLAUDE_CLI_BACKEND_ID ? CLAUDE_CLI_BACKEND_ID : PROVIDER_ID; return { id: trimmedModelId, name: trimmedModelId, @@ -297,10 +285,12 @@ function buildAnthropicForwardCompatModel( baseUrl: "https://api.anthropic.com", reasoning: true, input: ["text", "image"], - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: isAnthropicGa1MModel(trimmedModelId) ? ANTHROPIC_GA_1M_CONTEXT_TOKENS : 200_000, - maxTokens: isAnthropicOpus48Model(trimmedModelId) - ? ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS + cost: isAnthropicFable5Model(trimmedModelId) + ? { input: 10, output: 50, cacheRead: 1, cacheWrite: 12.5 } + : { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: resolveAnthropicFixedContextWindow(trimmedModelId) ?? 200_000, + maxTokens: isAnthropic128kOutputModel(trimmedModelId) + ? ANTHROPIC_MODERN_MAX_OUTPUT_TOKENS : 64_000, }; } @@ -346,25 +336,29 @@ function resolveAnthropicForwardCompatModel( } function isAnthropicGa1MModel(modelId: string): boolean { - const normalized = normalizeLowercaseStringOrEmpty(modelId); - return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix)); + return supportsClaudeAdaptiveThinking({ id: modelId }); } -function isAnthropicOpus48Model(modelId: string): boolean { - const normalized = normalizeLowercaseStringOrEmpty(modelId); - return [ANTHROPIC_OPUS_48_MODEL_ID, ANTHROPIC_OPUS_48_DOT_MODEL_ID].some((prefix) => - normalized.startsWith(prefix), - ); +function isAnthropicFable5Model(modelId: string): boolean { + return resolveClaudeFable5ModelIdentity({ id: modelId }) !== undefined; +} + +function resolveAnthropicFixedContextWindow(modelId: string): number | undefined { + if (isAnthropicFable5Model(modelId)) { + return ANTHROPIC_FABLE_CONTEXT_TOKENS; + } + return isAnthropicGa1MModel(modelId) ? ANTHROPIC_GA_1M_CONTEXT_TOKENS : undefined; +} + +function isAnthropic128kOutputModel(modelId: string): boolean { + if (isAnthropicFable5Model(modelId)) { + return true; + } + return /^claude-opus-4-8(?=$|[^a-z0-9])/.test(resolveClaudeModelIdentity({ id: modelId })); } function isAnthropicOpus47OrNewerModel(modelId: string): boolean { - const normalized = normalizeLowercaseStringOrEmpty(modelId); - return [ - ANTHROPIC_OPUS_48_MODEL_ID, - ANTHROPIC_OPUS_48_DOT_MODEL_ID, - ANTHROPIC_OPUS_47_MODEL_ID, - ANTHROPIC_OPUS_47_DOT_MODEL_ID, - ].some((prefix) => normalized.startsWith(prefix)); + return supportsClaudeNativeXhighEffort({ id: modelId }) && !isAnthropicFable5Model(modelId); } function hasConfiguredModelContextOverride( @@ -403,26 +397,29 @@ function hasConfiguredModelContextOverride( return false; } -function applyAnthropicGa1MContextWindow(params: { +function applyAnthropicFixedContextWindow(params: { config?: ProviderNormalizeResolvedModelContext["config"]; provider: string; modelId: string; + contractModelId: string; model: ProviderRuntimeModel; }): ProviderRuntimeModel | undefined { - if (!isAnthropicGa1MModel(params.modelId)) { + const fixedContextWindow = resolveAnthropicFixedContextWindow(params.contractModelId); + if (fixedContextWindow === undefined) { return undefined; } if (hasConfiguredModelContextOverride(params.config, params.provider, params.modelId)) { return undefined; } - const nextContextWindow = Math.max( - params.model.contextWindow ?? 0, - ANTHROPIC_GA_1M_CONTEXT_TOKENS, - ); - const nextContextTokens = - typeof params.model.contextTokens === "number" - ? Math.max(params.model.contextTokens, ANTHROPIC_GA_1M_CONTEXT_TOKENS) - : ANTHROPIC_GA_1M_CONTEXT_TOKENS; + const exactContextWindow = isAnthropicFable5Model(params.contractModelId); + const nextContextWindow = exactContextWindow + ? fixedContextWindow + : Math.max(params.model.contextWindow ?? 0, fixedContextWindow); + const nextContextTokens = exactContextWindow + ? fixedContextWindow + : typeof params.model.contextTokens === "number" + ? Math.max(params.model.contextTokens, fixedContextWindow) + : fixedContextWindow; if ( nextContextWindow === params.model.contextWindow && nextContextTokens === params.model.contextTokens @@ -436,48 +433,52 @@ function applyAnthropicGa1MContextWindow(params: { }; } -function applyAnthropicOpus48MaxTokens(params: { +function applyAnthropicModernMaxTokens(params: { modelId: string; model: ProviderRuntimeModel; }): ProviderRuntimeModel | undefined { - if (!isAnthropicOpus48Model(params.modelId)) { + if (!isAnthropic128kOutputModel(params.modelId)) { return undefined; } - if ((params.model.maxTokens ?? 0) >= ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS) { + if ((params.model.maxTokens ?? 0) >= ANTHROPIC_MODERN_MAX_OUTPUT_TOKENS) { return undefined; } return { ...params.model, - maxTokens: ANTHROPIC_OPUS_48_MAX_OUTPUT_TOKENS, + maxTokens: ANTHROPIC_MODERN_MAX_OUTPUT_TOKENS, }; } -function applyAnthropicOpusThinkingLevelMap(params: { +function applyAnthropicThinkingLevelMap(params: { modelId: string; model: ProviderRuntimeModel; }): ProviderRuntimeModel | undefined { - if (!isAnthropicOpus47OrNewerModel(params.modelId)) { + const fable5 = isAnthropicFable5Model(params.modelId); + const nativeXhigh = fable5 || isAnthropicOpus47OrNewerModel(params.modelId); + if (!matchesAnthropicModernModel(params.modelId)) { return undefined; } - if ( - params.model.thinkingLevelMap?.xhigh === "xhigh" && - params.model.thinkingLevelMap?.max === "max" - ) { + const current = params.model.thinkingLevelMap; + const nativeDefaults = { + ...(fable5 ? { off: "low" as const, minimal: "low" as const } : {}), + xhigh: nativeXhigh ? ("xhigh" as const) : null, + max: "max" as const, + }; + const currentEfforts = current as Record | undefined; + if (Object.keys(nativeDefaults).every((level) => currentEfforts?.[level] !== undefined)) { return undefined; } return { ...params.model, thinkingLevelMap: { - ...params.model.thinkingLevelMap, - xhigh: "xhigh", - max: "max", + ...nativeDefaults, + ...current, }, }; } function matchesAnthropicModernModel(modelId: string): boolean { - const lower = normalizeLowercaseStringOrEmpty(modelId); - return ANTHROPIC_MODERN_MODEL_PREFIXES.some((prefix) => lower.startsWith(prefix)); + return supportsClaudeAdaptiveThinking({ id: modelId }); } function hasImageInput(input: unknown): boolean { @@ -495,18 +496,13 @@ function resolveAnthropicImageMediaInput(modelId: string, modelName?: string) { return undefined; } const refs = [modelId, modelName].filter((value): value is string => typeof value === "string"); - const largeImageOpus = refs.some((ref) => - [ - ANTHROPIC_OPUS_48_MODEL_ID, - ANTHROPIC_OPUS_48_DOT_MODEL_ID, - ANTHROPIC_OPUS_47_MODEL_ID, - ANTHROPIC_OPUS_47_DOT_MODEL_ID, - ].some((prefix) => normalizeLowercaseStringOrEmpty(ref).startsWith(prefix)), + const largeImageModel = refs.some( + (ref) => isAnthropicFable5Model(ref) || isAnthropicOpus47OrNewerModel(ref), ); return { image: { - maxSidePx: largeImageOpus ? 2576 : 1568, - preferredSidePx: largeImageOpus ? 2576 : 1568, + maxSidePx: largeImageModel ? 2576 : 1568, + preferredSidePx: largeImageModel ? 2576 : 1568, tokenMode: "provider" as const, }, }; @@ -531,8 +527,26 @@ function applyAnthropicImageInputCapability(params: { function normalizeAnthropicResolvedModel( ctx: ProviderNormalizeResolvedModelContext, ): ProviderRuntimeModel | undefined { - const imageCapableModel = applyAnthropicImageInputCapability(ctx) ?? ctx.model; - const mediaInput = resolveAnthropicImageMediaInput(ctx.modelId, imageCapableModel.name); + const contractModelId = resolveClaudeModelIdentity({ + id: ctx.modelId, + params: ctx.model.params, + }); + if ( + isAnthropicFable5Model(contractModelId) && + normalizeLowercaseStringOrEmpty(ctx.provider) !== PROVIDER_ID + ) { + return undefined; + } + const contractModel = + isAnthropicFable5Model(contractModelId) && !ctx.model.reasoning + ? { ...ctx.model, reasoning: true } + : ctx.model; + const imageCapableModel = + applyAnthropicImageInputCapability({ + modelId: contractModelId, + model: contractModel, + }) ?? contractModel; + const mediaInput = resolveAnthropicImageMediaInput(contractModelId, imageCapableModel.name); const mediaInputModel = mediaInput ? { ...imageCapableModel, @@ -547,20 +561,21 @@ function normalizeAnthropicResolvedModel( } : imageCapableModel; const outputModel = - applyAnthropicOpus48MaxTokens({ - modelId: ctx.modelId, + applyAnthropicModernMaxTokens({ + modelId: contractModelId, model: mediaInputModel, }) ?? mediaInputModel; const thinkingLevelModel = - applyAnthropicOpusThinkingLevelMap({ - modelId: ctx.modelId, + applyAnthropicThinkingLevelMap({ + modelId: contractModelId, model: outputModel, }) ?? outputModel; const contextWindowModel = - applyAnthropicGa1MContextWindow({ + applyAnthropicFixedContextWindow({ config: ctx.config, provider: ctx.provider, modelId: ctx.modelId, + contractModelId, model: thinkingLevelModel, }) ?? thinkingLevelModel; return contextWindowModel === ctx.model ? undefined : contextWindowModel; @@ -789,28 +804,13 @@ export function buildAnthropicProvider(): ProviderPlugin { if (!model) { return undefined; } - const imageCapableModel = - applyAnthropicImageInputCapability({ - modelId: ctx.modelId, - model, - }) ?? model; - const outputModel = - applyAnthropicOpus48MaxTokens({ - modelId: ctx.modelId, - model: imageCapableModel, - }) ?? imageCapableModel; - const thinkingLevelModel = - applyAnthropicOpusThinkingLevelMap({ - modelId: ctx.modelId, - model: outputModel, - }) ?? outputModel; return ( - applyAnthropicGa1MContextWindow({ + normalizeAnthropicResolvedModel({ config: ctx.config, provider: ctx.provider, modelId: ctx.modelId, - model: thinkingLevelModel, - }) ?? thinkingLevelModel + model, + }) ?? model ); }, normalizeResolvedModel: (ctx) => normalizeAnthropicResolvedModel(ctx), @@ -820,10 +820,23 @@ export function buildAnthropicProvider(): ProviderPlugin { : undefined, // Publish Claude CLI rows through the provider catalog hook. augmentModelCatalog: () => buildClaudeCliCatalogEntries(), - buildReplayPolicy: buildAnthropicReplayPolicy, - isModernModelRef: ({ modelId }) => matchesAnthropicModernModel(modelId), + ...NATIVE_ANTHROPIC_REPLAY_HOOKS, + isModernModelRef: ({ provider, modelId }) => + matchesAnthropicModernModel(modelId) && + (!isAnthropicFable5Model(modelId) || + normalizeLowercaseStringOrEmpty(provider) === PROVIDER_ID), resolveReasoningOutputMode: () => "native", - resolveThinkingProfile: ({ modelId }) => resolveClaudeThinkingProfile(modelId), + resolveThinkingProfile: ({ provider, modelId, params }) => { + const contractModelId = resolveClaudeModelIdentity({ id: modelId, params }); + return isAnthropicFable5Model(contractModelId) && + normalizeLowercaseStringOrEmpty(provider) !== PROVIDER_ID + ? CLAUDE_CLI_OFF_THINKING_PROFILE + : resolveClaudeThinkingProfile(contractModelId, undefined, { + includeNativeMax: [PROVIDER_ID, CLAUDE_CLI_BACKEND_ID].includes( + normalizeLowercaseStringOrEmpty(provider), + ), + }); + }, wrapStreamFn: wrapAnthropicProviderStream, resolveUsageAuth: resolveAnthropicUsageAuth, fetchUsageSnapshot: async (ctx) => diff --git a/extensions/anthropic/replay-policy.ts b/extensions/anthropic/replay-policy.ts deleted file mode 100644 index 63382a3419c6..000000000000 --- a/extensions/anthropic/replay-policy.ts +++ /dev/null @@ -1,13 +0,0 @@ -/** - * Anthropic replay-policy bridge. It re-exports the native Anthropic replay - * policy from the shared provider-model hooks and fails fast if it disappears. - */ -import { NATIVE_ANTHROPIC_REPLAY_HOOKS } from "openclaw/plugin-sdk/provider-model-shared"; - -const { buildReplayPolicy } = NATIVE_ANTHROPIC_REPLAY_HOOKS; - -if (!buildReplayPolicy) { - throw new Error("Expected native Anthropic replay hooks to expose buildReplayPolicy."); -} - -export { buildReplayPolicy as buildAnthropicReplayPolicy }; diff --git a/extensions/github-copilot/index.test.ts b/extensions/github-copilot/index.test.ts index abce8f4d3132..2a9a024902cf 100644 --- a/extensions/github-copilot/index.test.ts +++ b/extensions/github-copilot/index.test.ts @@ -214,6 +214,30 @@ describe("github-copilot plugin", () => { expect(profile?.levels.map((level) => level.id)).toContain("xhigh"); }); + it("exposes max thinking for catalog-supported Copilot reasoning efforts", () => { + const provider = registerProviderWithPluginConfig({}); + + const profile = provider.resolveThinkingProfile({ + provider: "github-copilot", + modelId: "claude-fable-5", + compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] }, + }); + + expect(profile?.levels.map((level) => level.id)).toContain("max"); + }); + + it("does not expose max for non-adaptive Claude Copilot models", () => { + const provider = registerProviderWithPluginConfig({}); + + const profile = provider.resolveThinkingProfile({ + provider: "github-copilot", + modelId: "claude-opus-4-5", + compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] }, + }); + + expect(profile?.levels.map((level) => level.id)).not.toContain("max"); + }); + it("uses live plugin config to re-enable discovery after startup disable", async () => { mocks.resolveCopilotApiToken.mockResolvedValueOnce({ token: "copilot_api_token", diff --git a/extensions/github-copilot/index.ts b/extensions/github-copilot/index.ts index a9a8a820160a..f9f0ac97d4da 100644 --- a/extensions/github-copilot/index.ts +++ b/extensions/github-copilot/index.ts @@ -21,9 +21,9 @@ import { upsertAuthProfileWithLock, } from "openclaw/plugin-sdk/provider-auth"; import { getCachedLiveCatalogValue } from "openclaw/plugin-sdk/provider-catalog-shared"; -import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime"; import { resolveFirstGithubToken } from "./auth.js"; import { githubCopilotMemoryEmbeddingProviderAdapter } from "./embeddings.js"; +import { resolveCopilotExtendedThinkingLevels } from "./model-metadata.js"; import { PROVIDER_ID, fetchCopilotModelCatalog, @@ -35,7 +35,6 @@ import { wrapCopilotProviderStream } from "./stream.js"; const COPILOT_ENV_VARS = ["COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"]; const DEFAULT_COPILOT_MODEL = "github-copilot/claude-opus-4.7"; const DEFAULT_COPILOT_PROFILE_ID = "github-copilot:github"; -const COPILOT_XHIGH_MODEL_IDS = ["gpt-5.4", "gpt-5.3-codex"] as const; type GithubCopilotPluginConfig = { discovery?: { @@ -43,17 +42,6 @@ type GithubCopilotPluginConfig = { }; }; -function compatSupportsXHigh( - compat: { supportedReasoningEfforts?: readonly string[] | null } | null | undefined, -) { - return ( - Array.isArray(compat?.supportedReasoningEfforts) && - compat.supportedReasoningEfforts.some( - (effort) => normalizeOptionalLowercaseString(effort) === "xhigh", - ) - ); -} - async function loadGithubCopilotRuntime() { return await import("./register.runtime.js"); } @@ -463,10 +451,7 @@ export default definePluginEntry({ wrapStreamFn: wrapCopilotProviderStream, buildReplayPolicy: ({ modelId }) => buildGithubCopilotReplayPolicy(modelId), resolveThinkingProfile: ({ modelId, compat }) => { - const modelSupportsXHigh = - COPILOT_XHIGH_MODEL_IDS.includes( - (normalizeOptionalLowercaseString(modelId) ?? "") as never, - ) || compatSupportsXHigh(compat); + const extendedLevels = resolveCopilotExtendedThinkingLevels(modelId, compat); return { levels: [ { id: "off" }, @@ -474,7 +459,7 @@ export default definePluginEntry({ { id: "low" }, { id: "medium" }, { id: "high" }, - ...(modelSupportsXHigh ? [{ id: "xhigh" as const }] : []), + ...extendedLevels.map((id) => ({ id })), ], }; }, diff --git a/extensions/github-copilot/model-metadata.ts b/extensions/github-copilot/model-metadata.ts index fbc4b2761fea..0816c892eefc 100644 --- a/extensions/github-copilot/model-metadata.ts +++ b/extensions/github-copilot/model-metadata.ts @@ -1,8 +1,12 @@ // Github Copilot plugin module implements model metadata behavior. import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared"; +import { supportsClaudeAdaptiveThinking } from "openclaw/plugin-sdk/provider-model-shared"; import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime"; type CopilotRuntimeApi = "anthropic-messages" | "openai-completions" | "openai-responses"; +type CopilotReasoningCompat = { + supportedReasoningEfforts?: readonly string[] | null; +}; const COPILOT_CHAT_COMPLETIONS_COMPAT: ModelDefinitionConfig["compat"] = { supportsStore: false, @@ -10,6 +14,7 @@ const COPILOT_CHAT_COMPLETIONS_COMPAT: ModelDefinitionConfig["compat"] = { supportsUsageInStreaming: false, maxTokensField: "max_tokens", }; +const COPILOT_XHIGH_MODEL_IDS = new Set(["gpt-5.4", "gpt-5.3-codex"]); const STATIC_MODEL_OVERRIDES = new Map>([ [ @@ -20,6 +25,7 @@ const STATIC_MODEL_OVERRIDES = new Map>([ reasoning: true, contextWindow: 1_000_000, maxTokens: 64_000, + thinkingLevelMap: { xhigh: null, max: null }, compat: { supportedReasoningEfforts: ["low", "medium", "high"] }, }, ], @@ -31,7 +37,7 @@ const STATIC_MODEL_OVERRIDES = new Map>([ reasoning: true, contextWindow: 1_000_000, maxTokens: 64_000, - thinkingLevelMap: { xhigh: "xhigh" }, + thinkingLevelMap: { xhigh: "xhigh", max: null }, compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, }, ], @@ -68,6 +74,46 @@ export function resolveCopilotModelCompat( return isCopilotGeminiModelId(normalized) ? { ...COPILOT_CHAT_COMPLETIONS_COMPAT } : undefined; } +function compatSupportsEffort( + compat: CopilotReasoningCompat | null | undefined, + effort: "xhigh" | "max", +): boolean { + return ( + Array.isArray(compat?.supportedReasoningEfforts) && + compat.supportedReasoningEfforts.some( + (candidate) => normalizeOptionalLowercaseString(candidate) === effort, + ) + ); +} + +export function resolveCopilotExtendedThinkingLevels( + modelId: string, + compat?: CopilotReasoningCompat | null, +): Array<"xhigh" | "max"> { + const normalizedModelId = normalizeOptionalLowercaseString(modelId) ?? ""; + const staticCompat = resolveStaticCopilotModelOverride(normalizedModelId)?.compat; + const isClaudeModel = normalizedModelId.includes("claude"); + const supportsAdaptiveClaudeEffort = + !isClaudeModel || supportsClaudeAdaptiveThinking({ id: normalizedModelId }); + const levels: Array<"xhigh" | "max"> = []; + if ( + supportsAdaptiveClaudeEffort && + (COPILOT_XHIGH_MODEL_IDS.has(normalizedModelId) || + compatSupportsEffort(compat, "xhigh") || + compatSupportsEffort(staticCompat, "xhigh")) + ) { + levels.push("xhigh"); + } + if ( + isClaudeModel && + supportsAdaptiveClaudeEffort && + (compatSupportsEffort(compat, "max") || compatSupportsEffort(staticCompat, "max")) + ) { + levels.push("max"); + } + return levels; +} + export function resolveStaticCopilotModelOverride( modelId: string, ): Partial | undefined { diff --git a/extensions/github-copilot/models.test.ts b/extensions/github-copilot/models.test.ts index 9f8ad65c1996..5189967de935 100644 --- a/extensions/github-copilot/models.test.ts +++ b/extensions/github-copilot/models.test.ts @@ -5,7 +5,8 @@ import { buildCopilotModelDefinition, getDefaultCopilotModelIds } from "./models import { deriveCopilotApiBaseUrlFromToken, resolveCopilotApiToken } from "./token.js"; import { fetchCopilotUsage } from "./usage.js"; -vi.mock("openclaw/plugin-sdk/provider-model-shared", () => ({ +vi.mock("openclaw/plugin-sdk/provider-model-shared", async (importOriginal) => ({ + ...(await importOriginal()), normalizeModelCompat: (model: Record) => model, resolveProviderEndpoint: (baseUrl: string) => ({ baseUrl, @@ -116,7 +117,7 @@ describe("github-copilot model defaults", () => { cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 1_000_000, maxTokens: 64_000, - thinkingLevelMap: { xhigh: "xhigh" }, + thinkingLevelMap: { xhigh: "xhigh", max: null }, compat: { supportedReasoningEfforts: ["low", "medium", "high", "xhigh"] }, }); }); @@ -223,8 +224,11 @@ describe("resolveCopilotForwardCompatModel", () => { }); it("preserves static Anthropic thinking maps for Claude Opus 1M fallback rows", () => { + const opus46 = requireResolvedModel(createMockCtx("claude-opus-4.6-1m")); + expect(opus46.thinkingLevelMap).toEqual({ xhigh: null, max: null }); + const result = requireResolvedModel(createMockCtx("claude-opus-4.7-1m-internal")); - expect(result.thinkingLevelMap).toEqual({ xhigh: "xhigh" }); + expect(result.thinkingLevelMap).toEqual({ xhigh: "xhigh", max: null }); expect(result.compat).toEqual({ supportedReasoningEfforts: ["low", "medium", "high", "xhigh"], }); @@ -508,6 +512,24 @@ describe("fetchCopilotModelCatalog", () => { }, }, }, + { + id: "claude-opus-4-5", + name: "Claude Opus 4.5", + object: "model", + vendor: "Anthropic", + capabilities: { + type: "chat", + limits: { + max_context_window_tokens: 200000, + max_output_tokens: 64000, + }, + supports: { + vision: true, + tool_calls: true, + reasoning_effort: ["low", "medium", "high", "max"], + }, + }, + }, { // Internal router — must be filtered out (id starts with "accounts/"). id: "accounts/msft/routers/abc123", @@ -557,6 +579,7 @@ describe("fetchCopilotModelCatalog", () => { "gpt-5.3-codex", "gemini-3.1-pro-preview", "claude-opus-4.7-1m-internal", + "claude-opus-4-5", ]); const gpt55 = out.find((m) => m.id === "gpt-5.5"); @@ -589,10 +612,16 @@ describe("fetchCopilotModelCatalog", () => { const opus1m = out.find((m) => m.id === "claude-opus-4.7-1m-internal"); expect(opus1m?.api).toBe("anthropic-messages"); expect(opus1m?.contextWindow).toBe(1_000_000); - expect(opus1m?.thinkingLevelMap).toEqual({ xhigh: "xhigh" }); + expect(opus1m?.thinkingLevelMap).toEqual({ xhigh: "xhigh", max: null }); expect(opus1m?.compat).toEqual({ supportedReasoningEfforts: ["low", "medium", "high", "xhigh"], }); + + const opus45 = out.find((m) => m.id === "claude-opus-4-5"); + expect(opus45?.thinkingLevelMap).toEqual({ xhigh: null, max: null }); + expect(opus45?.compat).toEqual({ + supportedReasoningEfforts: ["low", "medium", "high", "max"], + }); }); it("strips trailing slash from baseUrl when building the /models URL", async () => { diff --git a/extensions/github-copilot/models.ts b/extensions/github-copilot/models.ts index 98ba9a4a352c..28c708daa5ee 100644 --- a/extensions/github-copilot/models.ts +++ b/extensions/github-copilot/models.ts @@ -6,7 +6,10 @@ import type { import { buildCopilotIdeHeaders, COPILOT_INTEGRATION_ID } from "openclaw/plugin-sdk/provider-auth"; import { readProviderJsonArrayFieldResponse } from "openclaw/plugin-sdk/provider-http"; import type { ModelDefinitionConfig } from "openclaw/plugin-sdk/provider-model-shared"; -import { normalizeModelCompat } from "openclaw/plugin-sdk/provider-model-shared"; +import { + normalizeModelCompat, + supportsClaudeAdaptiveThinking, +} from "openclaw/plugin-sdk/provider-model-shared"; import { asPositiveSafeInteger, normalizeOptionalLowercaseString, @@ -173,15 +176,18 @@ function mergeCopilotCompat( function resolveCopilotThinkingLevelMap( api: ModelDefinitionConfig["api"], + modelId: string, compat: ModelDefinitionConfig["compat"] | undefined, ): ModelDefinitionConfig["thinkingLevelMap"] | undefined { - if ( - api === "anthropic-messages" && - compat?.supportedReasoningEfforts?.some((effort) => effort === "xhigh") - ) { - return { xhigh: "xhigh" }; + const efforts = compat?.supportedReasoningEfforts; + if (api !== "anthropic-messages" || !Array.isArray(efforts)) { + return undefined; } - return undefined; + const supportsAdaptiveEffort = supportsClaudeAdaptiveThinking({ id: modelId }); + return { + xhigh: supportsAdaptiveEffort && efforts.includes("xhigh") ? "xhigh" : null, + max: supportsAdaptiveEffort && efforts.includes("max") ? "max" : null, + }; } function mapCopilotApiModelToDefinition( @@ -215,7 +221,7 @@ function mapCopilotApiModelToDefinition( const maxTokens = asPositiveSafeInteger(limits?.max_output_tokens) ?? DEFAULT_MAX_TOKENS; const compat = mergeCopilotCompat(resolveCopilotModelCompat(id), supports?.reasoning_effort); const api = resolveCopilotApiForVendor(entry.vendor, id); - const thinkingLevelMap = resolveCopilotThinkingLevelMap(api, compat); + const thinkingLevelMap = resolveCopilotThinkingLevelMap(api, id, compat); const definition: ModelDefinitionConfig = { id, diff --git a/extensions/github-copilot/provider-policy-api.test.ts b/extensions/github-copilot/provider-policy-api.test.ts index 931b26fb8d63..7471ca2bc6f6 100644 --- a/extensions/github-copilot/provider-policy-api.test.ts +++ b/extensions/github-copilot/provider-policy-api.test.ts @@ -34,6 +34,36 @@ describe("github-copilot provider-policy-api", () => { ).toContain("xhigh"); }); + it("appends max when catalog compat advertises it", () => { + expect( + resolveThinkingProfile({ + provider: "github-copilot", + modelId: "claude-fable-5", + compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] }, + })?.levels.map((level) => level.id), + ).toContain("max"); + }); + + it("does not expose max for non-Anthropic Copilot transports", () => { + expect( + resolveThinkingProfile({ + provider: "github-copilot", + modelId: "future-copilot-model", + compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] }, + })?.levels.map((level) => level.id), + ).not.toContain("max"); + }); + + it("does not expose adaptive effort for older Claude models", () => { + expect( + resolveThinkingProfile({ + provider: "github-copilot", + modelId: "claude-opus-4-5", + compat: { supportedReasoningEfforts: ["low", "medium", "high", "max"] }, + })?.levels.map((level) => level.id), + ).not.toContain("max"); + }); + it("appends xhigh for static Copilot metadata overrides", () => { expect( resolveThinkingProfile({ diff --git a/extensions/github-copilot/provider-policy-api.ts b/extensions/github-copilot/provider-policy-api.ts index e55483d9dabe..01f14352e0ad 100644 --- a/extensions/github-copilot/provider-policy-api.ts +++ b/extensions/github-copilot/provider-policy-api.ts @@ -1,31 +1,12 @@ // Github Copilot API module exposes the plugin public contract. import type { ProviderDefaultThinkingPolicyContext } from "openclaw/plugin-sdk/core"; -import { normalizeOptionalLowercaseString } from "openclaw/plugin-sdk/string-coerce-runtime"; -import { resolveStaticCopilotModelOverride } from "./model-metadata.js"; - -const COPILOT_XHIGH_MODEL_IDS = ["gpt-5.4", "gpt-5.3-codex"] as const; - -function compatSupportsXHigh( - compat: { supportedReasoningEfforts?: readonly string[] | null } | null | undefined, -) { - return ( - Array.isArray(compat?.supportedReasoningEfforts) && - compat.supportedReasoningEfforts.some( - (effort) => normalizeOptionalLowercaseString(effort) === "xhigh", - ) - ); -} +import { resolveCopilotExtendedThinkingLevels } from "./model-metadata.js"; export function resolveThinkingProfile(context: ProviderDefaultThinkingPolicyContext) { if (context.provider.trim().toLowerCase() !== "github-copilot") { return null; } - const normalizedModelId = normalizeOptionalLowercaseString(context.modelId) ?? ""; - const staticCompat = resolveStaticCopilotModelOverride(normalizedModelId)?.compat; - const modelSupportsXHigh = - COPILOT_XHIGH_MODEL_IDS.includes(normalizedModelId as never) || - compatSupportsXHigh(context.compat) || - compatSupportsXHigh(staticCompat); + const extendedLevels = resolveCopilotExtendedThinkingLevels(context.modelId, context.compat); return { levels: [ @@ -34,7 +15,7 @@ export function resolveThinkingProfile(context: ProviderDefaultThinkingPolicyCon { id: "low" as const }, { id: "medium" as const }, { id: "high" as const }, - ...(modelSupportsXHigh ? [{ id: "xhigh" as const }] : []), + ...extendedLevels.map((id) => ({ id })), ], }; } diff --git a/packages/agent-core/src/agent-loop.test.ts b/packages/agent-core/src/agent-loop.test.ts index 6f4d50ffe1f4..b179e008813e 100644 --- a/packages/agent-core/src/agent-loop.test.ts +++ b/packages/agent-core/src/agent-loop.test.ts @@ -142,3 +142,88 @@ describe("agentLoop streaming updates", () => { } }); }); + +describe("agentLoop thinking state", () => { + function makeAssistantMessage( + activeModel: Model, + content: AssistantMessage["content"], + ): AssistantMessage { + return { + role: "assistant", + content, + api: activeModel.api, + provider: activeModel.provider, + model: activeModel.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: 1, + }; + } + + it.each([ + { + name: "disables reasoning after leaving Fable", + initialModel: { ...model, id: "claude-fable-5", thinkingLevelMap: { off: "low" } }, + nextModel: model, + expected: ["low", undefined], + }, + { + name: "uses Fable's low fallback after entering Fable", + initialModel: model, + nextModel: { ...model, id: "claude-fable-5", thinkingLevelMap: { off: "low" } }, + expected: [undefined, "low"], + }, + ])("$name", async ({ initialModel, nextModel, expected }) => { + const observedReasoning: Array = []; + let callCount = 0; + const streamFn: StreamFn = (activeModel, _context, options) => { + observedReasoning.push(options?.reasoning); + callCount += 1; + const stream = createAssistantMessageEventStream(); + queueMicrotask(() => { + const content: AssistantMessage["content"] = + callCount === 1 + ? [{ type: "toolCall", id: "tool-1", name: "missing_tool", arguments: {} }] + : [{ type: "text", text: "done" }]; + stream.push({ + type: "done", + reason: "stop", + message: makeAssistantMessage(activeModel, content), + }); + stream.end(); + }); + return stream; + }; + let prepared = false; + const stream = agentLoop( + [{ role: "user", content: "hello", timestamp: 1 }], + { systemPrompt: "", messages: [] }, + { + ...config, + model: initialModel, + thinkingLevel: "off", + reasoning: initialModel.thinkingLevelMap?.off === "low" ? "low" : undefined, + prepareNextTurn: () => { + if (prepared) { + return undefined; + } + prepared = true; + return { model: nextModel }; + }, + }, + undefined, + streamFn, + ); + + await collectEvents(stream); + + expect(observedReasoning).toEqual(expected); + }); +}); diff --git a/packages/agent-core/src/agent-loop.ts b/packages/agent-core/src/agent-loop.ts index 1bd912210c0a..e1e584b9ab66 100644 --- a/packages/agent-core/src/agent-loop.ts +++ b/packages/agent-core/src/agent-loop.ts @@ -9,6 +9,7 @@ import type { ToolResultMessage, } from "../../llm-core/src/index.js"; import type { EventStream as SourceEventStream } from "../../llm-core/src/index.js"; +import { resolveAgentReasoningOption } from "./reasoning.js"; import { type AgentCoreStreamRuntimeDeps, resolveAgentCoreStreamFn } from "./runtime-deps.js"; import type { AgentContext, @@ -341,14 +342,19 @@ async function runLoop( const nextTurnSnapshot = await config.prepareNextTurn?.(nextTurnContext); if (nextTurnSnapshot) { currentContext = nextTurnSnapshot.context ?? currentContext; + const nextModel = nextTurnSnapshot.model ?? config.model; + const nextThinkingLevel = nextTurnSnapshot.thinkingLevel ?? config.thinkingLevel; + const shouldResolveReasoning = + nextTurnSnapshot.thinkingLevel !== undefined || + (nextTurnSnapshot.model !== undefined && nextThinkingLevel !== undefined); + const nextReasoning = + shouldResolveReasoning && nextThinkingLevel !== undefined + ? resolveAgentReasoningOption(nextModel, nextThinkingLevel) + : config.reasoning; config = Object.assign({}, config, { - model: nextTurnSnapshot.model ?? config.model, - reasoning: - nextTurnSnapshot.thinkingLevel === undefined - ? config.reasoning - : nextTurnSnapshot.thinkingLevel === "off" - ? undefined - : nextTurnSnapshot.thinkingLevel, + model: nextModel, + thinkingLevel: nextThinkingLevel, + reasoning: nextReasoning, }); } diff --git a/packages/agent-core/src/agent.ts b/packages/agent-core/src/agent.ts index 57fa069ee73e..b052a7256fee 100644 --- a/packages/agent-core/src/agent.ts +++ b/packages/agent-core/src/agent.ts @@ -9,6 +9,7 @@ import type { Transport, } from "../../llm-core/src/index.js"; import { runAgentLoop, runAgentLoopContinue } from "./agent-loop.js"; +import { resolveAgentReasoningOption } from "./reasoning.js"; import { type AgentCoreStreamRuntimeDeps, resolveAgentCoreStreamFn } from "./runtime-deps.js"; import type { AfterToolCallContext, @@ -470,8 +471,11 @@ export class Agent { let skipInitialSteeringPoll = options.skipInitialSteeringPoll === true; return { model: this.mutableState.model, - reasoning: - this.mutableState.thinkingLevel === "off" ? undefined : this.mutableState.thinkingLevel, + thinkingLevel: this.mutableState.thinkingLevel, + reasoning: resolveAgentReasoningOption( + this.mutableState.model, + this.mutableState.thinkingLevel, + ), sessionId: this.sessionId, onPayload: this.onPayload, onResponse: this.onResponse, diff --git a/packages/agent-core/src/harness/agent-harness.ts b/packages/agent-core/src/harness/agent-harness.ts index a42f035000eb..e9787d2eddf0 100644 --- a/packages/agent-core/src/harness/agent-harness.ts +++ b/packages/agent-core/src/harness/agent-harness.ts @@ -6,6 +6,7 @@ import type { UserMessage, } from "../../../llm-core/src/index.js"; import { runAgentLoop } from "../agent-loop.js"; +import { resolveAgentReasoningOption } from "../reasoning.js"; import { type AgentCoreRuntimeDeps, resolveAgentCoreStreamFn } from "../runtime-deps.js"; import type { AgentContext, @@ -489,7 +490,8 @@ export class CoreAgentHarness< const turnState = getTurnState(); return { model: turnState.model, - reasoning: turnState.thinkingLevel === "off" ? undefined : turnState.thinkingLevel, + thinkingLevel: turnState.thinkingLevel, + reasoning: resolveAgentReasoningOption(turnState.model, turnState.thinkingLevel), convertToLlm, transformContext: async (messages) => { const result = await this.emitHook({ type: "context", messages: [...messages] }); diff --git a/packages/agent-core/src/harness/compaction/compaction.test.ts b/packages/agent-core/src/harness/compaction/compaction.test.ts new file mode 100644 index 000000000000..7245d3db05ba --- /dev/null +++ b/packages/agent-core/src/harness/compaction/compaction.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it, vi } from "vitest"; +import { createAssistantMessageEventStream } from "../../llm.js"; +import type { AssistantMessage, Model, StreamFn } from "../../llm.js"; +import { generateSummary } from "./compaction.js"; + +describe("generateSummary thinking options", () => { + it("maps explicit Fable off to low effort for compaction", async () => { + const model: Model = { + id: "production-fable", + name: "Production Fable", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://api.anthropic.com", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1_000_000, + maxTokens: 128_000, + params: { canonicalModelId: "claude-fable-5" }, + }; + const summaryMessage: AssistantMessage = { + role: "assistant", + content: [{ type: "text", text: "summary" }], + api: model.api, + provider: model.provider, + model: model.id, + usage: { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, + }, + stopReason: "stop", + timestamp: 1, + }; + const streamFn = vi.fn((_model, _context, options) => { + expect(options?.reasoning).toBe("low"); + const stream = createAssistantMessageEventStream(); + stream.push({ type: "done", reason: "stop", message: summaryMessage }); + stream.end(); + return stream; + }); + + const result = await generateSummary( + [{ role: "user", content: "hello", timestamp: 1 }], + model, + 1000, + undefined, + undefined, + undefined, + undefined, + undefined, + "off", + streamFn, + ); + + expect(result).toEqual({ ok: true, value: "summary" }); + expect(streamFn).toHaveBeenCalledOnce(); + }); +}); diff --git a/packages/agent-core/src/harness/compaction/compaction.ts b/packages/agent-core/src/harness/compaction/compaction.ts index c497fe0eeb7f..bbde2b19edea 100644 --- a/packages/agent-core/src/harness/compaction/compaction.ts +++ b/packages/agent-core/src/harness/compaction/compaction.ts @@ -1,12 +1,14 @@ // Agent Core module implements compaction behavior. -import type { - AssistantMessage, - Context, - Model, - SimpleStreamOptions, - StreamFn, - Usage, +import { + resolveClaudeFable5ModelIdentity, + type AssistantMessage, + type Context, + type Model, + type SimpleStreamOptions, + type StreamFn, + type Usage, } from "../../../../llm-core/src/index.js"; +import { resolveAgentReasoningOption } from "../../reasoning.js"; import { type AgentCoreCompletionRuntimeDeps, resolveAgentCoreCompleteFn, @@ -517,8 +519,11 @@ function createSummarizationOptions( thinkingLevel: ThinkingLevel | undefined, ): SimpleStreamOptions { const options: SimpleStreamOptions = { maxTokens, signal, apiKey, headers }; - if (model.reasoning && thinkingLevel && thinkingLevel !== "off") { - options.reasoning = thinkingLevel; + const fableReasoning = + (model.api === "anthropic-messages" || model.api === "bedrock-converse-stream") && + resolveClaudeFable5ModelIdentity(model) !== undefined; + if ((model.reasoning || fableReasoning) && thinkingLevel) { + options.reasoning = resolveAgentReasoningOption(model, thinkingLevel); } return options; } diff --git a/packages/agent-core/src/reasoning.test.ts b/packages/agent-core/src/reasoning.test.ts new file mode 100644 index 000000000000..ba3b0aac049a --- /dev/null +++ b/packages/agent-core/src/reasoning.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from "vitest"; +import type { Model } from "../../llm-core/src/index.js"; +import { resolveAgentReasoningOption } from "./reasoning.js"; + +function makeModel( + thinkingLevelMap?: Model["thinkingLevelMap"], + overrides: Partial = {}, +): Model { + return { + id: "test-model", + name: "Test Model", + api: "anthropic-messages", + provider: "anthropic", + baseUrl: "https://example.test", + reasoning: true, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 1000, + maxTokens: 100, + thinkingLevelMap, + ...overrides, + }; +} + +describe("resolveAgentReasoningOption", () => { + it("uses a model's enabled fallback for explicit off", () => { + expect(resolveAgentReasoningOption(makeModel({ off: "low" }), "off")).toBe("low"); + }); + + it.each([undefined, null, "none"])("disables reasoning when off maps to %s", (offFallback) => { + expect(resolveAgentReasoningOption(makeModel({ off: offFallback }), "off")).toBeUndefined(); + }); + + it("preserves enabled thinking levels", () => { + expect(resolveAgentReasoningOption(makeModel({ off: "low" }), "high")).toBe("high"); + }); + + it.each(["anthropic-messages", "bedrock-converse-stream"] as const)( + "maps explicit off to low for canonical Fable aliases on %s", + (api) => { + expect( + resolveAgentReasoningOption( + makeModel(undefined, { + id: "production-deployment", + api, + params: { canonicalModelId: "claude-fable-5" }, + }), + "off", + ), + ).toBe("low"); + }, + ); +}); diff --git a/packages/agent-core/src/reasoning.ts b/packages/agent-core/src/reasoning.ts new file mode 100644 index 000000000000..0c7896b14893 --- /dev/null +++ b/packages/agent-core/src/reasoning.ts @@ -0,0 +1,37 @@ +import { + resolveClaudeFable5ModelIdentity, + type Model, + type SimpleStreamOptions, +} from "../../llm-core/src/index.js"; +import type { ThinkingLevel } from "./types.js"; + +type EnabledThinkingLevel = NonNullable; + +const ENABLED_THINKING_LEVELS = new Set([ + "minimal", + "low", + "medium", + "high", + "xhigh", + "max", +]); + +function isEnabledThinkingLevel(value: unknown): value is EnabledThinkingLevel { + return ENABLED_THINKING_LEVELS.has(value as EnabledThinkingLevel); +} + +export function resolveAgentReasoningOption( + model: Model, + thinkingLevel: ThinkingLevel, +): SimpleStreamOptions["reasoning"] { + if (thinkingLevel !== "off") { + return thinkingLevel; + } + const offFallback = + model.thinkingLevelMap?.off ?? + ((model.api === "anthropic-messages" || model.api === "bedrock-converse-stream") && + resolveClaudeFable5ModelIdentity(model) + ? "low" + : undefined); + return isEnabledThinkingLevel(offFallback) ? offFallback : undefined; +} diff --git a/packages/agent-core/src/types.ts b/packages/agent-core/src/types.ts index ba6e8eace365..e1d8a0d0360e 100644 --- a/packages/agent-core/src/types.ts +++ b/packages/agent-core/src/types.ts @@ -133,6 +133,8 @@ export interface PrepareNextTurnContext extends ShouldStopAfterTurnContext {} export interface AgentLoopConfig extends SimpleStreamOptions { model: Model; + /** Logical thinking level retained across model changes before provider mapping. */ + thinkingLevel?: ThinkingLevel; /** * Converts AgentMessage[] to LLM-compatible Message[] before each LLM call. diff --git a/packages/llm-core/src/index.ts b/packages/llm-core/src/index.ts index 191aa86d9ff0..9a1abdf79402 100644 --- a/packages/llm-core/src/index.ts +++ b/packages/llm-core/src/index.ts @@ -1,4 +1,5 @@ /** Public LLM core contracts shared by providers, plugin SDK wrappers, and tests. */ +export * from "./model-contracts/anthropic.js"; export * from "./types.js"; export * from "./utils/diagnostics.js"; export * from "./utils/event-stream.js"; diff --git a/packages/llm-core/src/model-contracts/anthropic.ts b/packages/llm-core/src/model-contracts/anthropic.ts new file mode 100644 index 000000000000..ed2d4a677e90 --- /dev/null +++ b/packages/llm-core/src/model-contracts/anthropic.ts @@ -0,0 +1,88 @@ +type ClaudeModelRef = { + id?: string; + params?: Record; +}; + +type ClaudeEffortModelRef = ClaudeModelRef & { + thinkingLevelMap?: Record; +}; + +function normalizeClaudeModelId(modelId?: string): string { + const normalized = modelId?.trim().toLowerCase() ?? ""; + const unprefixed = normalized.startsWith("anthropic/") + ? normalized.slice("anthropic/".length) + : normalized; + return unprefixed.replace(/[._\s]+/g, "-"); +} + +export const CLAUDE_FABLE_5_THINKING_PROFILE = { + levels: [ + { id: "off" }, + { id: "minimal" }, + { id: "low" }, + { id: "medium" }, + { id: "high" }, + { id: "xhigh" }, + { id: "adaptive" }, + { id: "max" }, + ], + defaultLevel: "high", + preserveWhenCatalogReasoningFalse: true, +} as const; + +/** Resolve the canonical normalized Claude model id for one runtime model ref. */ +export function resolveClaudeModelIdentity(ref: ClaudeModelRef): string { + const configuredCanonicalModelId = + typeof ref.params?.canonicalModelId === "string" ? ref.params.canonicalModelId : undefined; + const normalized = normalizeClaudeModelId(configuredCanonicalModelId ?? ref.id); + const match = /(?:^|[-/])claude-/.exec(normalized); + return match + ? normalized.slice((match.index ?? 0) + (match[0].startsWith("claude-") ? 0 : 1)) + : normalized; +} + +/** Resolve Claude Fable 5 through direct ids, cloud ids, or deployment metadata. */ +export function resolveClaudeFable5ModelIdentity(ref: ClaudeModelRef): string | undefined { + const normalized = resolveClaudeModelIdentity(ref); + const match = /(?:^|-)claude-fable-5(?=$|[^a-z0-9])/.exec(normalized); + if (!match) { + return undefined; + } + return normalized.slice((match.index ?? 0) + (match[0].startsWith("-") ? 1 : 0)); +} + +/** Return whether a Claude model supports adaptive thinking. */ +export function supportsClaudeAdaptiveThinking(ref: ClaudeModelRef): boolean { + const modelId = resolveClaudeModelIdentity(ref); + return /(?:^|-)claude-(?:fable-5|opus-4-(?:6|7|8)|sonnet-4-6)(?=$|[^a-z0-9])/.test(modelId); +} + +/** Return whether a Claude model supports native max effort. */ +export function supportsClaudeNativeMaxEffort(ref: ClaudeModelRef): boolean { + return supportsClaudeAdaptiveThinking(ref); +} + +/** Return whether a Claude model supports native xhigh effort. */ +export function supportsClaudeNativeXhighEffort(ref: ClaudeModelRef): boolean { + const modelId = resolveClaudeModelIdentity(ref); + return /(?:^|-)claude-(?:fable-5|opus-4-(?:7|8))(?=$|[^a-z0-9])/.test(modelId); +} + +/** + * Fill native Claude effort mappings only when the provider did not publish a + * narrower route-specific contract. + */ +export function resolveClaudeNativeThinkingLevelMap( + ref: ClaudeEffortModelRef, +): Record | undefined { + if (ref.thinkingLevelMap !== undefined) { + return ref.thinkingLevelMap; + } + if (!supportsClaudeNativeMaxEffort(ref)) { + return undefined; + } + return { + xhigh: supportsClaudeNativeXhighEffort(ref) ? "xhigh" : null, + max: "max", + }; +} diff --git a/src/agents/anthropic-transport-stream.test.ts b/src/agents/anthropic-transport-stream.test.ts index 3469b6a76600..e807befc151b 100644 --- a/src/agents/anthropic-transport-stream.test.ts +++ b/src/agents/anthropic-transport-stream.test.ts @@ -139,6 +139,7 @@ function makeAnthropicTransportModel( provider?: string; baseUrl?: string; reasoning?: boolean; + params?: Record; maxTokens?: number; thinkingLevelMap?: AnthropicMessagesModel["thinkingLevelMap"]; headers?: Record; @@ -153,6 +154,7 @@ function makeAnthropicTransportModel( provider: params.provider ?? "anthropic", baseUrl: params.baseUrl ?? "https://api.anthropic.com", reasoning: params.reasoning ?? true, + ...(params.params ? { params: params.params } : {}), input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 200000, @@ -562,6 +564,118 @@ describe("anthropic transport stream", () => { expect(result.errorMessage).toBe("OpenClaw transport error: malformed_streaming_fragment"); }); + it.each(["anthropic", "anthropic-vertex"])( + "surfaces structured Anthropic streaming refusals for %s", + async (provider) => { + guardedFetchMock.mockResolvedValueOnce( + createSseResponse([ + { + type: "message_start", + message: { id: "msg_refusal", usage: { input_tokens: 3, output_tokens: 0 } }, + }, + { + type: "content_block_start", + index: 0, + content_block: { type: "text", text: "" }, + }, + { + type: "content_block_delta", + index: 0, + delta: { type: "text_delta", text: "discard this partial output" }, + }, + { type: "content_block_stop", index: 0 }, + { + type: "message_delta", + delta: { + stop_reason: "refusal", + stop_details: { + type: "refusal", + category: "bio", + explanation: "This request is not allowed.", + }, + }, + usage: { input_tokens: 3, output_tokens: 2 }, + }, + { type: "message_stop" }, + ]), + ); + + const streamFn = createAnthropicMessagesTransportStreamFn(); + const stream = await Promise.resolve( + streamFn( + makeAnthropicTransportModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + provider, + }), + { messages: [{ role: "user", content: "hello" }] } as AnthropicStreamContext, + { apiKey: "sk-ant-api" } as AnthropicStreamOptions, + ), + ); + const eventTypes: string[] = []; + for await (const event of stream as AsyncIterable<{ type: string }>) { + eventTypes.push(event.type); + } + const result = await stream.result(); + + expect(eventTypes).toEqual(["error"]); + expect(result.stopReason).toBe("error"); + expect(result.content).toEqual([]); + expect(result.errorMessage).toBe( + "Anthropic refusal (category: bio): This request is not allowed.", + ); + expect(result.usage).toMatchObject({ input: 3, output: 2 }); + expect(result.diagnostics).toEqual([ + expect.objectContaining({ + type: "provider_refusal", + details: { + provider, + category: "bio", + explanation: "This request is not allowed.", + }, + }), + ]); + }, + ); + + it("discards buffered Fable output when the transport ends before terminal status", async () => { + guardedFetchMock.mockResolvedValueOnce( + createSseResponse([ + { + type: "content_block_start", + index: 0, + content_block: { type: "text", text: "" }, + }, + { + type: "content_block_delta", + index: 0, + delta: { type: "text_delta", text: "unsafe partial output" }, + }, + ]), + ); + const streamFn = createAnthropicMessagesTransportStreamFn(); + const stream = await Promise.resolve( + streamFn( + makeAnthropicTransportModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + }), + { messages: [{ role: "user", content: "hello" }] } as AnthropicStreamContext, + { apiKey: "sk-ant-api" } as AnthropicStreamOptions, + ), + ); + const eventTypes: string[] = []; + for await (const event of stream as AsyncIterable<{ type: string }>) { + eventTypes.push(event.type); + } + const result = await stream.result(); + + expect(eventTypes).toEqual(["error"]); + expect(result.stopReason).toBe("error"); + expect(result.content).toEqual([]); + expect(result.errorMessage).toBe("Anthropic stream ended before message_stop"); + }); + it("preserves unsafe integer Anthropic tool-use input deltas", async () => { guardedFetchMock.mockResolvedValueOnce( createSseResponse([ @@ -1385,7 +1499,10 @@ describe("anthropic transport stream", () => { const highSurrogate = String.fromCharCode(0xd83d); const signedThinking = `keep${highSurrogate}signed`; await runTransportStream( - makeAnthropicTransportModel(), + makeAnthropicTransportModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + }), { messages: [ { role: "user", content: "hello" }, @@ -1393,7 +1510,7 @@ describe("anthropic transport stream", () => { role: "assistant", provider: "anthropic", api: "anthropic-messages", - model: "claude-sonnet-4-6", + model: "claude-fable-5", stopReason: "stop", timestamp: 0, content: [ @@ -1402,6 +1519,11 @@ describe("anthropic transport stream", () => { thinking: signedThinking, thinkingSignature: "sig_1", }, + { + type: "thinking", + thinking: "", + thinkingSignature: "sig_omitted", + }, ], }, { role: "user", content: "again" }, @@ -1423,6 +1545,11 @@ describe("anthropic transport stream", () => { thinking: signedThinking, signature: "sig_1", }, + { + type: "thinking", + thinking: "", + signature: "sig_omitted", + }, ]); }); @@ -1942,7 +2069,7 @@ describe("anthropic transport stream", () => { expect(cancelCalled).toBe(true); }); - it("maps adaptive thinking effort for Claude 4.6 transport runs", async () => { + it("maps unsupported xhigh to high effort for Claude 4.6 transport runs", async () => { const model = makeAnthropicTransportModel({ id: "claude-opus-4-6", name: "Claude Opus 4.6", @@ -1962,7 +2089,239 @@ describe("anthropic transport stream", () => { const payload = latestAnthropicRequest().payload; expect(payload.thinking).toEqual({ type: "adaptive" }); - expect(payload.output_config).toEqual({ effort: "max" }); + expect(payload.output_config).toEqual({ effort: "high" }); + }); + + it("does not infer adaptive thinking from forward-compatible effort maps", async () => { + const model = makeAnthropicTransportModel({ + id: "claude-future", + name: "Future Claude", + provider: "github-copilot", + reasoning: true, + thinkingLevelMap: { xhigh: null, max: "max" }, + }); + + await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think as much as supported." }], + } as AnthropicStreamContext, + { + apiKey: "copilot-token", + reasoning: "max", + } as AnthropicStreamOptions, + ); + + const payload = latestAnthropicRequest().payload; + expect(payload.thinking).toEqual({ type: "enabled", budget_tokens: 7168 }); + expect(payload.output_config).toBeUndefined(); + }); + + it("honors provider effort restrictions for transport runs", async () => { + const model = makeAnthropicTransportModel({ + id: "claude-opus-4.7-1m-internal", + name: "Claude Opus 4.7", + provider: "github-copilot", + maxTokens: 64_000, + thinkingLevelMap: { xhigh: "xhigh" }, + }); + + await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think as much as supported." }], + } as AnthropicStreamContext, + { + apiKey: "copilot-token", + reasoning: "max", + } as AnthropicStreamOptions, + ); + + expect(latestAnthropicRequest().payload.output_config).toEqual({ effort: "xhigh" }); + }); + + it("uses canonical Claude policy for transport deployment aliases", async () => { + const model = makeAnthropicTransportModel({ + id: "production-claude", + name: "Production Claude", + params: { canonicalModelId: "claude-opus-4-8" }, + reasoning: false, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, + maxTokens: 8192, + }); + + await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think extra hard." }], + } as AnthropicStreamContext, + { + apiKey: "sk-ant-api", + reasoning: "xhigh", + temperature: 0.2, + } as AnthropicStreamOptions, + ); + + const payload = latestAnthropicRequest().payload; + expect(payload.model).toBe("production-claude"); + expect(payload.thinking).toEqual({ type: "adaptive" }); + expect(payload.output_config).toEqual({ effort: "xhigh" }); + expect(payload).not.toHaveProperty("temperature"); + }); + + it.each([ + { canonicalModelId: "claude-opus-4-8", expectedTemperature: undefined }, + { canonicalModelId: "claude-opus-4-6", expectedTemperature: 0.2 }, + ] as const)( + "normalizes temperature for canonical $canonicalModelId transport aliases when thinking is off", + async ({ canonicalModelId, expectedTemperature }) => { + const model = makeAnthropicTransportModel({ + id: "production-claude", + name: "Production Claude", + params: { canonicalModelId }, + reasoning: false, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, + maxTokens: 8192, + }); + + await runTransportStream( + model, + { messages: [{ role: "user", content: "Reply briefly." }] } as AnthropicStreamContext, + { apiKey: "sk-ant-api", temperature: 0.2 } as AnthropicStreamOptions, + ); + + expect(latestAnthropicRequest().payload.temperature).toBe(expectedTemperature); + }, + ); + + it("uses always-on adaptive thinking for Claude Fable 5 transport runs", async () => { + const model = makeAnthropicTransportModel({ + id: "prod-primary", + name: "Production Claude", + provider: "microsoft-foundry", + params: { canonicalModelId: "claude-fable-5" }, + reasoning: false, + baseUrl: "https://example.services.ai.azure.com/anthropic", + maxTokens: 128_000, + }); + + guardedFetchMock.mockResolvedValueOnce( + createSseResponse([ + { + type: "message_start", + message: { + id: "msg_1", + model: "claude-fable-5", + usage: { input_tokens: 1, output_tokens: 0 }, + }, + }, + { + type: "message_delta", + delta: { stop_reason: "end_turn" }, + usage: { input_tokens: 1, output_tokens: 1 }, + }, + { type: "message_stop" }, + ]), + ); + const result = await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think." }], + } as AnthropicStreamContext, + { + apiKey: "sk-ant-api", + temperature: 0.2, + toolChoice: { type: "tool", name: "read_file" }, + } as AnthropicStreamOptions, + ); + + const payload = latestAnthropicRequest().payload; + expect(payload.thinking).toEqual({ type: "adaptive", display: "summarized" }); + expect(payload.output_config).toEqual({ effort: "high" }); + expect(payload.tool_choice).toEqual({ type: "auto" }); + expect(payload).not.toHaveProperty("temperature"); + expect(result.responseModel).toBe("claude-fable-5"); + }); + + it("maps Claude Fable 5 transport thinking levels to adaptive effort", async () => { + const model = makeAnthropicTransportModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + maxTokens: 128_000, + }); + + guardedFetchMock.mockImplementation(async () => createSseResponse()); + for (const testCase of [ + { reasoning: "off", effort: "low" }, + { reasoning: "minimal", effort: "low" }, + { reasoning: "high", effort: "high" }, + ] as const) { + await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think carefully." }], + } as AnthropicStreamContext, + { + apiKey: "sk-ant-api", + reasoning: testCase.reasoning, + } as unknown as AnthropicStreamOptions, + ); + + const payload = latestAnthropicRequest().payload; + expect(payload.thinking).toEqual({ type: "adaptive", display: "summarized" }); + expect(payload.output_config).toEqual({ effort: testCase.effort }); + } + }); + + it("honors provider effort restrictions for Claude Fable 5 transport runs", async () => { + const model = makeAnthropicTransportModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + provider: "github-copilot", + reasoning: false, + thinkingLevelMap: { xhigh: null, max: null }, + maxTokens: 128_000, + }); + + guardedFetchMock.mockImplementation(async () => createSseResponse()); + await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think carefully." }], + } as AnthropicStreamContext, + { + apiKey: "copilot-token", + reasoning: "xhigh", + } as unknown as AnthropicStreamOptions, + ); + + const payload = latestAnthropicRequest().payload; + expect(payload.thinking).toEqual({ type: "adaptive", display: "summarized" }); + expect(payload.output_config).toEqual({ effort: "high" }); + }); + + it("uses the Claude Fable 5 contract on Anthropic Vertex transport runs", async () => { + const model = makeAnthropicTransportModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + provider: "anthropic-vertex", + maxTokens: 128_000, + }); + + await runTransportStream( + model, + { + messages: [{ role: "user", content: "Think carefully." }], + } as AnthropicStreamContext, + { + apiKey: "vertex-token", + reasoning: "high", + } as AnthropicStreamOptions, + ); + + const payload = latestAnthropicRequest().payload; + expect(payload.thinking).toEqual({ type: "adaptive", display: "summarized" }); + expect(payload.output_config).toEqual({ effort: "high" }); }); it("maps xhigh thinking effort for Claude Opus 4.8 transport runs", async () => { @@ -1970,6 +2329,7 @@ describe("anthropic transport stream", () => { id: "claude-opus-4-8", name: "Claude Opus 4.8", maxTokens: 8192, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, }); await runTransportStream( @@ -2012,11 +2372,13 @@ describe("anthropic transport stream", () => { expect(payload.output_config).toEqual({ effort: "max" }); }); - it("clamps max thinking effort for Claude models without native max support", async () => { + it("honors provider routes that exclude native max effort", async () => { const model = makeAnthropicTransportModel({ id: "claude-sonnet-4-6", name: "Claude Sonnet 4.6", + provider: "github-copilot", maxTokens: 8192, + thinkingLevelMap: { xhigh: null, max: null }, }); await runTransportStream( diff --git a/src/agents/anthropic-transport-stream.ts b/src/agents/anthropic-transport-stream.ts index f44274d8ba41..5ca35536ba62 100644 --- a/src/agents/anthropic-transport-stream.ts +++ b/src/agents/anthropic-transport-stream.ts @@ -5,11 +5,27 @@ */ import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce"; import { getEnvApiKey } from "../llm/env-api-keys.js"; -import { calculateCost } from "../llm/model-utils.js"; +import { calculateCost, clampThinkingLevel } from "../llm/model-utils.js"; import type { AnthropicOptions } from "../llm/providers/anthropic.js"; -import type { Context, Model, SimpleStreamOptions, ThinkingLevel } from "../llm/types.js"; +import type { + AssistantMessageDiagnostic, + Context, + Model, + SimpleStreamOptions, + ThinkingLevel, +} from "../llm/types.js"; import { parseStreamingJson } from "../llm/utils/json-parse.js"; +import { + resolveClaudeNativeThinkingLevelMap, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeMaxEffort, + supportsClaudeNativeXhighEffort, + usesClaudeFable5MessagesContract, +} from "../shared/anthropic-model-contract.js"; +import { applyAnthropicRefusal } from "../shared/anthropic-refusal.js"; import { MALFORMED_STREAMING_FRAGMENT_ERROR_MESSAGE } from "../shared/assistant-error-format.js"; +import { createDeferredEventBuffer } from "../shared/deferred-event-buffer.js"; +import { notifyLlmRequestActivity } from "../shared/llm-request-activity.js"; import { applyAnthropicPayloadPolicyToParams, resolveAnthropicPayloadPolicy, @@ -102,6 +118,7 @@ type MutableAssistantOutput = { api: "anthropic-messages"; provider: string; model: string; + responseModel?: string; usage: { input: number; output: number; @@ -114,46 +131,58 @@ type MutableAssistantOutput = { timestamp: number; responseId?: string; errorMessage?: string; + diagnostics?: AssistantMessageDiagnostic[]; }; const EMPTY_ANTHROPIC_MESSAGES_FALLBACK_TEXT = "."; -function isClaudeOpus47OrNewerModel(modelId: string): boolean { - return ( - modelId.includes("opus-4-8") || - modelId.includes("opus-4.8") || - modelId.includes("opus-4-7") || - modelId.includes("opus-4.7") - ); +function normalizeAnthropicToolChoice( + model: AnthropicTransportModel, + toolChoice: AnthropicTransportOptions["toolChoice"], +) { + if ( + usesClaudeFable5MessagesContract(model) && + (toolChoice === "any" || (typeof toolChoice === "object" && toolChoice.type === "tool")) + ) { + return { type: "auto" as const }; + } + return typeof toolChoice === "string" ? { type: toolChoice } : toolChoice; } -function isClaudeOpus46Model(modelId: string): boolean { - return modelId.includes("opus-4-6") || modelId.includes("opus-4.6"); +function supportsNativeXhighEffort(model: AnthropicTransportModel): boolean { + return supportsClaudeNativeXhighEffort(model); } -function supportsAdaptiveThinking(modelId: string): boolean { - return ( - isClaudeOpus47OrNewerModel(modelId) || - isClaudeOpus46Model(modelId) || - modelId.includes("sonnet-4-6") || - modelId.includes("sonnet-4.6") - ); +function supportsAdaptiveThinking(model: AnthropicTransportModel): boolean { + return supportsClaudeAdaptiveThinking(model); } -function mapThinkingLevelToEffort(level: ThinkingLevel, modelId: string): AnthropicAdaptiveEffort { - switch (level) { +function mapThinkingLevelToEffort( + level: ThinkingLevel | "off", + model: AnthropicTransportModel, +): AnthropicAdaptiveEffort { + const thinkingLevelMap = resolveClaudeNativeThinkingLevelMap(model); + const clampModel = { + ...model, + ...(typeof model.params?.canonicalModelId === "string" ? { reasoning: true } : {}), + ...(thinkingLevelMap ? { thinkingLevelMap } : {}), + }; + const resolvedLevel = clampThinkingLevel(clampModel, level); + const mapped = thinkingLevelMap?.[resolvedLevel]; + if (typeof mapped === "string") { + return mapped as AnthropicAdaptiveEffort; + } + switch (resolvedLevel) { + case "off": case "minimal": case "low": return "low"; case "medium": return "medium"; case "xhigh": - if (isClaudeOpus47OrNewerModel(modelId)) { - return "xhigh"; - } - return isClaudeOpus46Model(modelId) ? "max" : "high"; + return supportsNativeXhighEffort(model) ? "xhigh" : "high"; case "max": - return isClaudeOpus47OrNewerModel(modelId) ? "max" : "high"; + return supportsClaudeNativeMaxEffort(model) ? "max" : "high"; default: return "high"; } @@ -383,25 +412,28 @@ function convertAnthropicMessages( }); continue; } - if (block.thinking.trim().length === 0) { + const thinkingSignature = block.thinkingSignature?.trim(); + const hasNativeThinkingSignature = + Boolean(thinkingSignature) && thinkingSignature !== "reasoning_content"; + if (block.thinking.trim().length === 0 && !hasNativeThinkingSignature) { continue; } - if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) { + if (!thinkingSignature) { blocks.push({ type: "text", text: sanitizeTransportPayloadText(block.thinking), }); } else { const thinking = - block.thinkingSignature === "reasoning_content" + thinkingSignature === "reasoning_content" ? sanitizeTransportPayloadText(block.thinking) : block.thinking; - if (block.thinkingSignature === "reasoning_content") { + if (thinkingSignature === "reasoning_content") { if (allowReasoningContentReplay) { blocks.push({ type: "thinking", thinking, - signature: block.thinkingSignature, + signature: thinkingSignature, }); reasoningContent.push(thinking); } @@ -410,7 +442,7 @@ function convertAnthropicMessages( blocks.push({ type: "thinking", thinking, - signature: block.thinkingSignature, + signature: thinkingSignature, }); } continue; @@ -727,7 +759,7 @@ function createAnthropicTransportClient(params: { }) { const { model, context, apiKey, options } = params; const needsInterleavedBeta = - (options?.interleavedThinking ?? true) && !supportsAdaptiveThinking(model.id); + (options?.interleavedThinking ?? true) && !supportsAdaptiveThinking(model); // Kimi's Anthropic thinking SSE is already well-formed for this parser, but // the OpenAI SDK compatibility sanitizer can stall before the text block. const fetch = @@ -861,7 +893,11 @@ function buildAnthropicParams( }, ]; } - if (options?.temperature !== undefined && !options.thinkingEnabled) { + if ( + options?.temperature !== undefined && + !options.thinkingEnabled && + !supportsNativeXhighEffort(model) + ) { params.temperature = options.temperature; } if (options?.stop !== undefined && options.stop.length > 0) { @@ -870,17 +906,21 @@ function buildAnthropicParams( if (context.tools) { params.tools = convertAnthropicTools(context.tools, isOAuthToken); } - if (model.reasoning) { - if (options?.thinkingEnabled) { - if (supportsAdaptiveThinking(model.id)) { - params.thinking = { type: "adaptive" }; - if (options.effort) { - params.output_config = { effort: options.effort }; + const fable5 = usesClaudeFable5MessagesContract(model); + if (fable5 || model.reasoning || supportsAdaptiveThinking(model)) { + if (fable5 || options?.thinkingEnabled) { + if (supportsAdaptiveThinking(model)) { + params.thinking = fable5 + ? { type: "adaptive", display: "summarized" } + : { type: "adaptive" }; + const effort = options?.effort ?? (fable5 ? "high" : undefined); + if (effort) { + params.output_config = { effort }; } } else { params.thinking = { type: "enabled", - budget_tokens: options.thinkingBudgetTokens || 1024, + budget_tokens: options?.thinkingBudgetTokens || 1024, }; } } else if (options?.thinkingEnabled === false) { @@ -891,8 +931,7 @@ function buildAnthropicParams( params.metadata = { user_id: options.metadata.user_id }; } if (options?.toolChoice) { - params.tool_choice = - typeof options.toolChoice === "string" ? { type: options.toolChoice } : options.toolChoice; + params.tool_choice = normalizeAnthropicToolChoice(model, options.toolChoice); } applyAnthropicPayloadPolicyToParams(params, payloadPolicy); return params; @@ -932,12 +971,15 @@ function resolveAnthropicTransportOptions( reasoning: options?.reasoning, }; if (!options?.reasoning) { - resolved.thinkingEnabled = false; + resolved.thinkingEnabled = usesClaudeFable5MessagesContract(model); + if (resolved.thinkingEnabled) { + resolved.effort = "high"; + } return resolved; } - if (supportsAdaptiveThinking(model.id)) { + if (supportsAdaptiveThinking(model)) { resolved.thinkingEnabled = true; - resolved.effort = mapThinkingLevelToEffort(options.reasoning, model.id) as NonNullable< + resolved.effort = mapThinkingLevelToEffort(options.reasoning, model) as NonNullable< AnthropicOptions["effort"] >; return resolved; @@ -971,6 +1013,14 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { stopReason: "stop", timestamp: Date.now(), }; + // Fable classifiers can refuse after partial generation, so no event is + // safe to expose until the terminal stop reason is known. + const refusalBuffer = usesClaudeFable5MessagesContract(model) + ? createDeferredEventBuffer(stream, () => + notifyLlmRequestActivity(options?.signal), + ) + : undefined; + const eventSink = refusalBuffer ?? stream; try { const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? ""; if (!apiKey) { @@ -997,6 +1047,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { const allowReasoningContentReplay = supportsReasoningContentReplay(model); const reasoningContentThinkingBlocks = new Map(); const reasoningContentTextBlocks = new Map(); + let sawMessageStop = false; const eventIndexKey = (eventIndex: unknown) => typeof eventIndex === "number" ? eventIndex : -1; const appendReasoningContentThinkingDelta = ( @@ -1021,7 +1072,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { output.content.push(block); contentIndex = output.content.length - 1; reasoningContentThinkingBlocks.set(key, contentIndex); - stream.push({ + eventSink.push({ type: "thinking_start", contentIndex, partial: output as never, @@ -1029,7 +1080,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { } block.thinking += text; block.thinkingSignature = "reasoning_content"; - stream.push({ + eventSink.push({ type: "thinking_delta", contentIndex, delta: text, @@ -1059,14 +1110,14 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { output.content.push(block); contentIndex = output.content.length - 1; reasoningContentTextBlocks.set(key, contentIndex); - stream.push({ + eventSink.push({ type: "text_start", contentIndex, partial: output as never, }); } block.text += text; - stream.push({ + eventSink.push({ type: "text_delta", contentIndex, delta: text, @@ -1081,7 +1132,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { reasoningContentThinkingBlocks.delete(key); const block = output.content[thinkingContentIndex]; if (block?.type === "thinking") { - stream.push({ + eventSink.push({ type: "thinking_end", contentIndex: thinkingContentIndex, content: block.thinking, @@ -1096,7 +1147,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { reasoningContentTextBlocks.delete(key); const block = output.content[textContentIndex]; if (block?.type === "text") { - stream.push({ + eventSink.push({ type: "text_end", contentIndex: textContentIndex, content: block.text, @@ -1111,10 +1162,11 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { } if (event.type === "message_start") { const message = event.message as - | { id?: string; usage?: Record } + | { id?: string; model?: string; usage?: Record } | undefined; const usage = message?.usage ?? {}; output.responseId = typeof message?.id === "string" ? message.id : undefined; + output.responseModel = typeof message?.model === "string" ? message.model : undefined; output.usage.input = typeof usage.input_tokens === "number" ? usage.input_tokens : 0; output.usage.output = typeof usage.output_tokens === "number" ? usage.output_tokens : 0; output.usage.cacheRead = @@ -1133,7 +1185,11 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { // (e.g. invalid thinking signatures) arrive before any non-error event // is yielded, keeping yieldedOutput=false in pumpStreamWithRecovery // and allowing the thinking-block recovery retry to fire. - stream.push({ type: "start", partial: output as never }); + eventSink.push({ type: "start", partial: output as never }); + continue; + } + if (event.type === "message_stop") { + sawMessageStop = true; continue; } if (event.type === "content_block_start") { @@ -1147,13 +1203,13 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { const block: TransportContentBlock = { type: "text", text, index }; output.content.push(block); const contentIndex = output.content.length - 1; - stream.push({ + eventSink.push({ type: "text_start", contentIndex, partial: output as never, }); if (text.length > 0) { - stream.push({ + eventSink.push({ type: "text_delta", contentIndex, delta: text, @@ -1174,13 +1230,13 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { }; output.content.push(block); const contentIndex = output.content.length - 1; - stream.push({ + eventSink.push({ type: "thinking_start", contentIndex, partial: output as never, }); if (thinking.length > 0) { - stream.push({ + eventSink.push({ type: "thinking_delta", contentIndex, delta: thinking, @@ -1198,7 +1254,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { index, }; output.content.push(block); - stream.push({ + eventSink.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output as never, @@ -1223,7 +1279,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { index, }; output.content.push(block); - stream.push({ + eventSink.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output as never, @@ -1255,7 +1311,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { if (text.length > 0) { if (block?.type === "text") { block.text += text; - stream.push({ + eventSink.push({ type: "text_delta", contentIndex: index, delta: text, @@ -1276,7 +1332,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { block = { type: "text", text: "", index: recoveredIndex }; output.content.push(block); index = output.content.length - 1; - stream.push({ + eventSink.push({ type: "text_start", contentIndex: index, partial: output as never, @@ -1288,7 +1344,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { typeof delta.text === "string" ) { block.text += delta.text; - stream.push({ + eventSink.push({ type: "text_delta", contentIndex: index, delta: delta.text, @@ -1302,7 +1358,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { typeof delta.thinking === "string" ) { block.thinking += delta.thinking; - stream.push({ + eventSink.push({ type: "thinking_delta", contentIndex: index, delta: delta.thinking, @@ -1318,7 +1374,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { const partialJson = `${block.partialJson ?? ""}${delta.partial_json}`; block.partialJson = partialJson; block.arguments = parseAnthropicToolCallArguments(partialJson); - stream.push({ + eventSink.push({ type: "toolcall_delta", contentIndex: index, delta: delta.partial_json, @@ -1349,7 +1405,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { } delete block.index; if (block.type === "text") { - stream.push({ + eventSink.push({ type: "text_end", contentIndex: index, content: block.text, @@ -1359,7 +1415,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { continue; } if (block.type === "thinking") { - stream.push({ + eventSink.push({ type: "thinking_end", contentIndex: index, content: block.thinking, @@ -1373,7 +1429,7 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { block.arguments = parseAnthropicToolCallArguments(block.partialJson); } delete block.partialJson; - stream.push({ + eventSink.push({ type: "toolcall_end", contentIndex: index, toolCall: block as never, @@ -1384,10 +1440,16 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { continue; } if (event.type === "message_delta") { - const delta = event.delta as { stop_reason?: string } | undefined; + const delta = event.delta as + | { stop_reason?: string; stop_details?: unknown } + | undefined; const usage = event.usage as Record | undefined; if (delta?.stop_reason) { - output.stopReason = mapStopReason(delta.stop_reason); + if (delta.stop_reason === "refusal") { + applyAnthropicRefusal(output, delta.stop_details, model.provider); + } else { + output.stopReason = mapStopReason(delta.stop_reason); + } } if (typeof usage?.input_tokens === "number") { output.usage.input = usage.input_tokens; @@ -1409,8 +1471,22 @@ export function createAnthropicMessagesTransportStreamFn(): StreamFn { calculateCost(model, output.usage); } } - finalizeTransportStream({ stream, output, signal: transportOptions.signal }); + if (refusalBuffer && !sawMessageStop) { + throw new Error("Anthropic stream ended before message_stop"); + } + if (transportOptions.signal?.aborted) { + throw new Error("Request was aborted"); + } + if (output.stopReason === "aborted" || output.stopReason === "error") { + throw new Error(output.errorMessage ?? "An unknown error occurred"); + } + refusalBuffer?.flush(); + finalizeTransportStream({ stream, output }); } catch (error) { + if (refusalBuffer) { + refusalBuffer.discard(); + output.content = []; + } failTransportStream({ stream, output, diff --git a/src/agents/context.test.ts b/src/agents/context.test.ts index 6b7543a5ced7..2a240ba9a014 100644 --- a/src/agents/context.test.ts +++ b/src/agents/context.test.ts @@ -3,6 +3,7 @@ import { describe, expect, it, vi } from "vitest"; import { createSessionManagerRuntimeRegistry } from "./agent-hooks/session-manager-runtime-registry.js"; import { ANTHROPIC_CONTEXT_1M_TOKENS, + ANTHROPIC_FABLE_CONTEXT_TOKENS, applyConfiguredContextWindows, applyDiscoveredContextWindows, resolveContextTokensForModel, @@ -387,26 +388,33 @@ describe("resolveContextTokensForModel", () => { expect(result).toBe(ANTHROPIC_CONTEXT_1M_TOKENS); }); - it("returns 1M context for Anthropic sonnet 4 even when config reports 200k", () => { - const result = resolveContextTokensForModel({ - cfg: { - models: { - providers: { - anthropic: { - baseUrl: "https://api.anthropic.com", - models: [testModelContextWindow("claude-sonnet-4-6", 200_000)], + it.each([ + ["anthropic", "claude-fable-5", ANTHROPIC_FABLE_CONTEXT_TOKENS], + ["anthropic-vertex", "claude-fable-5", ANTHROPIC_FABLE_CONTEXT_TOKENS], + ["anthropic", "claude-sonnet-4-6", ANTHROPIC_CONTEXT_1M_TOKENS], + ])( + "returns the fixed context for %s model %s even when config reports 200k", + (provider, modelId, expectedContextTokens) => { + const result = resolveContextTokensForModel({ + cfg: { + models: { + providers: { + [provider]: { + baseUrl: "https://api.anthropic.com", + models: [testModelContextWindow(modelId, 200_000)], + }, }, }, }, - }, - provider: "anthropic", - model: "claude-sonnet-4-6", - fallbackContextTokens: 200_000, - allowAsyncLoad: false, - }); + provider, + model: modelId, + fallbackContextTokens: 200_000, + allowAsyncLoad: false, + }); - expect(result).toBe(ANTHROPIC_CONTEXT_1M_TOKENS); - }); + expect(result).toBe(expectedContextTokens); + }, + ); it("keeps older Anthropic Sonnet 4.x models at the configured window when context1m is set", () => { const result = resolveContextTokensForModel({ diff --git a/src/agents/context.ts b/src/agents/context.ts index e3ad2ec5d94c..e3abc5b59602 100644 --- a/src/agents/context.ts +++ b/src/agents/context.ts @@ -46,6 +46,7 @@ const ANTHROPIC_GA_1M_MODEL_PREFIXES = [ "claude-sonnet-4.6", ] as const; export const ANTHROPIC_CONTEXT_1M_TOKENS = 1_048_576; +export const ANTHROPIC_FABLE_CONTEXT_TOKENS = 1_000_000; const CONFIG_LOAD_RETRY_POLICY: BackoffPolicy = { initialMs: 1_000, maxMs: 60_000, @@ -67,9 +68,8 @@ export function applyDiscoveredContextWindows(params: { : typeof model.contextWindow === "number" ? Math.trunc(model.contextWindow) : undefined; - const contextTokens = shouldUseDiscoveredAnthropicGa1MContextWindow(model) - ? ANTHROPIC_CONTEXT_1M_TOKENS - : discoveredContextTokens; + const contextTokens = + resolveDiscoveredAnthropicFixedContextWindow(model) ?? discoveredContextTokens; if (!contextTokens || contextTokens <= 0) { continue; } @@ -323,37 +323,39 @@ function resolveConfiguredProviderContextTokens( return findContextTokens((id) => normalizeProviderId(id) === normalizedProvider); } -function isAnthropic1MModel(provider: string, model: string): boolean { - if (provider !== "anthropic" && provider !== "claude-cli") { - return false; - } +function resolveAnthropicFixedContextWindow(provider: string, model: string): number | undefined { const modelId = resolveModelFamilyId(model); - return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => modelId.startsWith(prefix)); + if ( + (provider === "anthropic" || provider === "anthropic-vertex") && + modelId.startsWith("claude-fable-5") + ) { + return ANTHROPIC_FABLE_CONTEXT_TOKENS; + } + if (provider !== "anthropic" && provider !== "claude-cli") { + return undefined; + } + return ANTHROPIC_GA_1M_MODEL_PREFIXES.some((prefix) => modelId.startsWith(prefix)) + ? ANTHROPIC_CONTEXT_1M_TOKENS + : undefined; } -function shouldUseAnthropicGa1MContextWindow(params: { - provider?: string; - model: string; -}): boolean { - const provider = params.provider ? normalizeProviderId(params.provider) : ""; - return isAnthropic1MModel(provider, params.model); -} - -function shouldUseDiscoveredAnthropicGa1MContextWindow(model: ModelEntry): boolean { +function resolveDiscoveredAnthropicFixedContextWindow(model: ModelEntry): number | undefined { const provider = typeof model.provider === "string" ? normalizeProviderId(model.provider) : undefined; const modelId = model.id; if (provider) { - return isAnthropic1MModel(provider, modelId); + return resolveAnthropicFixedContextWindow(provider, modelId); } const normalized = normalizeLowercaseStringOrEmpty(modelId); const slash = normalized.indexOf("/"); if (slash < 0) { - return false; + return undefined; } const inferredProvider = normalizeProviderId(normalized.slice(0, slash)); const inferredModel = normalized.slice(slash + 1); - return inferredProvider === "claude-cli" && isAnthropic1MModel(inferredProvider, inferredModel); + return inferredProvider === "claude-cli" + ? resolveAnthropicFixedContextWindow(inferredProvider, inferredModel) + : undefined; } function resolveModelFamilyId(modelId: string): string { @@ -379,8 +381,11 @@ export function resolveContextTokensForModel(params: { }); const explicitProvider = params.provider?.trim(); if (ref) { - if (explicitProvider && isAnthropic1MModel(ref.provider, ref.model)) { - return ANTHROPIC_CONTEXT_1M_TOKENS; + if (explicitProvider) { + const fixedContextWindow = resolveAnthropicFixedContextWindow(ref.provider, ref.model); + if (fixedContextWindow !== undefined) { + return fixedContextWindow; + } } // Only do the config direct scan when the caller explicitly passed a // provider. When provider is inferred from a slash in the model string @@ -401,10 +406,6 @@ export function resolveContextTokensForModel(params: { } } - if (explicitProvider && ref && shouldUseAnthropicGa1MContextWindow(ref)) { - return ANTHROPIC_CONTEXT_1M_TOKENS; - } - // When provider is explicitly given and the model ID is bare (no slash), // try the provider-qualified cache key BEFORE the bare key. Discovery // entries are stored under qualified IDs (e.g. "google-gemini-cli/ diff --git a/src/agents/embedded-agent-runner/run/llm-idle-timeout.test.ts b/src/agents/embedded-agent-runner/run/llm-idle-timeout.test.ts index 29902e3d44ca..7dd4f7e26527 100644 --- a/src/agents/embedded-agent-runner/run/llm-idle-timeout.test.ts +++ b/src/agents/embedded-agent-runner/run/llm-idle-timeout.test.ts @@ -1,9 +1,14 @@ // LLM idle-timeout tests cover timeout selection and stream wrapping for // embedded provider calls, including local-provider and cron exceptions. import { MAX_TIMER_TIMEOUT_MS } from "@openclaw/normalization-core/number-coercion"; -import type { AssistantMessageEventStream } from "openclaw/plugin-sdk/llm"; +import { + createAssistantMessageEventStream, + type AssistantMessageEventStream, +} from "openclaw/plugin-sdk/llm"; import { afterEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../../config/config.js"; +import { notifyLlmRequestActivity } from "../../../shared/llm-request-activity.js"; +import type { StreamFn } from "../../runtime/index.js"; import { DEFAULT_LLM_IDLE_TIMEOUT_MS, resolveLlmIdleTimeoutMs, @@ -339,12 +344,12 @@ describe("streamWithIdleTimeout", () => { void wrapped(model, context, options); - expect(baseFn).toHaveBeenCalledWith({ api: "openai", requestTimeoutMs: 1000 }, context, { + expect(baseFn).toHaveBeenCalledWith(model, context, { signal: expect.any(AbortSignal), }); }); - it("keeps model request timeouts that are shorter than the idle watchdog", () => { + it("preserves explicit model request timeouts", () => { const mockStream = createMockAsyncIterable([]); const baseFn = vi.fn().mockReturnValue(mockStream); const wrapped = streamWithIdleTimeout(baseFn, 1000); @@ -355,7 +360,7 @@ describe("streamWithIdleTimeout", () => { void wrapped(model, context, options); - expect(baseFn).toHaveBeenCalledWith({ requestTimeoutMs: 250 }, context, { + expect(baseFn).toHaveBeenCalledWith(model, context, { signal: expect.any(AbortSignal), }); }); @@ -508,6 +513,37 @@ describe("streamWithIdleTimeout", () => { expect(results).toHaveLength(3); }); + it("treats quarantined provider events as stream activity", async () => { + vi.useFakeTimers(); + let requestSignal: AbortSignal | undefined; + const baseFn: StreamFn = vi.fn((_model, _context, options) => { + requestSignal = options?.signal; + const stream = createAssistantMessageEventStream(); + setTimeout(() => { + stream.push({ type: "text_delta", contentIndex: 0, delta: "done" }); + }, 120); + return stream; + }); + const wrapped = streamWithIdleTimeout(baseFn, 50); + const stream = wrapped( + {} as Parameters[0], + {} as Parameters[1], + {} as Parameters[2], + ) as AssistantMessageEventStream; + const iterator = stream[Symbol.asyncIterator](); + const next = iterator.next(); + + setTimeout(() => notifyLlmRequestActivity(requestSignal), 40); + setTimeout(() => notifyLlmRequestActivity(requestSignal), 80); + await vi.advanceTimersByTimeAsync(120); + + await expect(next).resolves.toEqual({ + done: false, + value: { type: "text_delta", contentIndex: 0, delta: "done" }, + }); + await iterator.return?.(); + }); + it("calls timeout hook on idle timeout", async () => { vi.useFakeTimers(); const slowStream = createNeverYieldingStream(); diff --git a/src/agents/embedded-agent-runner/run/llm-idle-timeout.ts b/src/agents/embedded-agent-runner/run/llm-idle-timeout.ts index e5320ee0cde8..832fb22afc19 100644 --- a/src/agents/embedded-agent-runner/run/llm-idle-timeout.ts +++ b/src/agents/embedded-agent-runner/run/llm-idle-timeout.ts @@ -8,6 +8,7 @@ import { } from "@openclaw/normalization-core/number-coercion"; import { DEFAULT_LLM_IDLE_TIMEOUT_SECONDS } from "../../../config/agent-timeout-defaults.js"; import type { OpenClawConfig } from "../../../config/types.openclaw.js"; +import { onLlmRequestActivity } from "../../../shared/llm-request-activity.js"; import type { StreamFn } from "../../runtime/index.js"; import type { MutableAssistantMessageEventStream } from "../../stream-compat.js"; import { createStreamIteratorWrapper } from "../../stream-iterator-wrapper.js"; @@ -239,20 +240,6 @@ export function streamWithIdleTimeout( ...options, signal: streamAbortController.signal, } as typeof options; - const existingRequestTimeoutMs = - typeof (model as { requestTimeoutMs?: unknown })?.requestTimeoutMs === "number" && - Number.isFinite((model as { requestTimeoutMs?: number }).requestTimeoutMs) && - (model as { requestTimeoutMs?: number }).requestTimeoutMs! > 0 - ? Math.floor((model as { requestTimeoutMs?: number }).requestTimeoutMs!) - : timeoutMs; - const wrappedModel = - typeof model === "object" && model !== null - ? ({ - ...model, - requestTimeoutMs: Math.min(existingRequestTimeoutMs, timeoutMs), - } as typeof model) - : model; - const createTimeoutPromise = (setTimer: (timer: NodeJS.Timeout) => void): Promise => { return new Promise((_, reject) => { const timer = setTimeout(() => { @@ -268,7 +255,7 @@ export function streamWithIdleTimeout( let maybeStream: ReturnType; try { - maybeStream = baseFn(wrappedModel, context, wrappedOptions); + maybeStream = baseFn(model, context, wrappedOptions); } catch (error) { cleanupSourceSignal(); throw error; @@ -280,6 +267,8 @@ export function streamWithIdleTimeout( function () { const iterator = originalAsyncIterator(); let idleTimer: NodeJS.Timeout | null = null; + let waitingForProvider = false; + let rejectIdleTimeout: ((error: Error) => void) | undefined; const clearTimer = () => { if (idleTimer) { @@ -287,42 +276,61 @@ export function streamWithIdleTimeout( idleTimer = null; } }; + const armTimer = () => { + clearTimer(); + if (!waitingForProvider) { + return; + } + idleTimer = setTimeout(() => { + idleTimer = null; + const error = createIdleTimeoutError(); + abortStream(error); + onIdleTimeout?.(error); + rejectIdleTimeout?.(error); + }, timeoutMs); + idleTimer.unref?.(); + }; + const stopWaiting = () => { + waitingForProvider = false; + rejectIdleTimeout = undefined; + clearTimer(); + }; + const unsubscribeActivity = onLlmRequestActivity(streamAbortController.signal, armTimer); + const cleanupIterator = () => { + stopWaiting(); + unsubscribeActivity(); + cleanupSourceSignal(); + }; return createStreamIteratorWrapper({ iterator, next: async (streamIterator) => { - clearTimer(); - + waitingForProvider = true; try { - // Arm the watchdog only while waiting for provider progress. - const result = await Promise.race([ - streamIterator.next(), - createTimeoutPromise((timer) => { - idleTimer = timer; - }), - ]); + const timeoutPromise = new Promise((_, reject) => { + rejectIdleTimeout = reject; + armTimer(); + }); + const result = await Promise.race([streamIterator.next(), timeoutPromise]); if (result.done) { - clearTimer(); - cleanupSourceSignal(); + cleanupIterator(); return result; } - clearTimer(); + stopWaiting(); return result; } catch (error) { - clearTimer(); + cleanupIterator(); throw error; } }, onReturn(streamIterator) { - clearTimer(); - cleanupSourceSignal(); + cleanupIterator(); return streamIterator.return?.() ?? Promise.resolve({ done: true, value: undefined }); }, onThrow(streamIterator, error) { - clearTimer(); - cleanupSourceSignal(); + cleanupIterator(); return ( streamIterator.throw?.(error) ?? Promise.reject(toLintErrorObject(error, "Non-Error rejection")) diff --git a/src/agents/model-catalog.test.ts b/src/agents/model-catalog.test.ts index 002c28863f41..bbbd745ad982 100644 --- a/src/agents/model-catalog.test.ts +++ b/src/agents/model-catalog.test.ts @@ -496,6 +496,28 @@ describe("loadModelCatalog", () => { }); }); + it("preserves runtime model params in the internal catalog", async () => { + mockAgentDiscoveryModels([ + { + id: "company-fable", + name: "Company Fable", + provider: "amazon-bedrock", + params: { canonicalModelId: "claude-fable-5" }, + }, + ]); + + const result = await loadModelCatalog({ config: {} as OpenClawConfig }); + + expect(result).toEqual([ + { + id: "company-fable", + name: "Company Fable", + provider: "amazon-bedrock", + params: { canonicalModelId: "claude-fable-5" }, + }, + ]); + }); + it("writes runtime discovery results under the refreshed models.json fingerprint", async () => { buildModelsJsonSourceFingerprintMock.mockResolvedValue({ agentDir: "/tmp/openclaw", @@ -945,6 +967,31 @@ describe("loadModelCatalog", () => { expect(augmentCatalogMock).not.toHaveBeenCalled(); }); + it("inherits provider API and canonical Fable reasoning in persisted rows", async () => { + readFileMock.mockResolvedValueOnce( + JSON.stringify({ + providers: { + "microsoft-foundry": { + api: "anthropic-messages", + models: [ + { + id: "company-fable", + reasoning: false, + params: { canonicalModelId: "claude-fable-5" }, + }, + ], + }, + }, + }), + ); + + const result = await loadModelCatalog({ config: {} as OpenClawConfig, readOnly: true }); + const entry = requireCatalogEntry(result, "microsoft-foundry", "company-fable"); + + expect(entry.api).toBe("anthropic-messages"); + expect(entry.reasoning).toBe(true); + }); + it("refreshes stale persisted read-only rows with manifest catalog metadata", async () => { readFileMock.mockResolvedValueOnce( JSON.stringify({ @@ -1752,6 +1799,7 @@ describe("loadModelCatalog", () => { provider: "xai", id: "grok-4.3", name: "Grok 4.3", + api: "openai-completions", reasoning: false, input: ["text"], contextWindow: 200_000, @@ -1767,6 +1815,7 @@ describe("loadModelCatalog", () => { modelCatalog: { providers: { xai: { + api: "openai-responses", models: [ { id: "grok-4.3", @@ -1787,6 +1836,7 @@ describe("loadModelCatalog", () => { const entry = requireCatalogEntry(result, "xai", "grok-4.3"); expect(result.filter((entryValue) => entryValue.provider === "xai")).toHaveLength(1); + expect(entry.api).toBe("openai-responses"); expect(entry.contextWindow).toBe(1_000_000); expect(entry.input).toEqual(["text", "image"]); expect(entry.reasoning).toBe(true); diff --git a/src/agents/model-catalog.ts b/src/agents/model-catalog.ts index 364b83eb3dd0..b34791f3e852 100644 --- a/src/agents/model-catalog.ts +++ b/src/agents/model-catalog.ts @@ -3,6 +3,7 @@ */ import { readFile } from "node:fs/promises"; import { join } from "node:path"; +import { resolveClaudeFable5ModelIdentity } from "@openclaw/llm-core"; import { normalizeProviderId } from "@openclaw/model-catalog-core/provider-id"; import { normalizeLowercaseStringOrEmpty, @@ -64,10 +65,12 @@ type DiscoveredModel = { id: string; name?: string; provider: string; + api?: ModelCatalogEntry["api"]; contextWindow?: number; contextTokens?: number; reasoning?: boolean; input?: ModelInputType[]; + params?: ModelCatalogEntry["params"]; compat?: ModelCatalogEntry["compat"]; }; @@ -159,20 +162,46 @@ function mergeCatalogCompat( return { ...base, ...override }; } +function mergeCatalogParams( + base: ModelCatalogEntry["params"] | undefined, + override: ModelCatalogEntry["params"] | undefined, +): ModelCatalogEntry["params"] | undefined { + if (!base) { + return override; + } + if (!override) { + return base; + } + return { ...base, ...override }; +} + function overlayCatalogMetadata( base: ModelCatalogEntry, overlay: ModelCatalogEntry, ): ModelCatalogEntry { + const params = mergeCatalogParams(base.params, overlay.params); return { ...base, + ...(overlay.api !== undefined ? { api: overlay.api } : {}), ...(overlay.contextWindow !== undefined ? { contextWindow: overlay.contextWindow } : {}), ...(overlay.contextTokens !== undefined ? { contextTokens: overlay.contextTokens } : {}), ...(overlay.reasoning !== undefined ? { reasoning: overlay.reasoning } : {}), ...(overlay.input !== undefined ? { input: overlay.input } : {}), + ...(params ? { params } : {}), compat: mergeCatalogCompat(base.compat, overlay.compat), }; } +function normalizeCatalogEntryContract(entry: ModelCatalogEntry): ModelCatalogEntry { + if ( + entry.api === "anthropic-messages" && + resolveClaudeFable5ModelIdentity({ id: entry.id, params: entry.params }) + ) { + return { ...entry, reasoning: true }; + } + return entry; +} + function mergeCatalogEntries(models: ModelCatalogEntry[], entries: ModelCatalogEntry[]): void { const indexByKey = new Map( models.map((entry, index) => [catalogEntryDedupeKey(entry.provider, entry.id), index]), @@ -260,7 +289,7 @@ export function loadManifestModelCatalog(params: { } function sortModelCatalogEntries(entries: ModelCatalogEntry[]): ModelCatalogEntry[] { - return entries.toSorted((a, b) => { + return entries.map(normalizeCatalogEntryContract).toSorted((a, b) => { const p = a.provider.localeCompare(b.provider); if (p !== 0) { return p; @@ -273,6 +302,7 @@ function normalizePersistedModelCatalogEntry( providerRaw: string, entry: Record, defaults?: { + api?: ModelCatalogEntry["api"]; contextWindow?: number; contextTokens?: number; }, @@ -303,6 +333,8 @@ function normalizePersistedModelCatalogEntry( ? defaults.contextTokens : undefined; const reasoning = typeof entry?.reasoning === "boolean" ? entry.reasoning : false; + const api = + typeof entry?.api === "string" ? (entry.api as ModelCatalogEntry["api"]) : defaults?.api; const parsedInput = Array.isArray(entry?.input) ? entry.input.filter((value): value is ModelInputType => ["text", "image", "audio", "video", "document"].includes(String(value)), @@ -313,14 +345,20 @@ function normalizePersistedModelCatalogEntry( entry?.compat && typeof entry.compat === "object" ? (entry.compat as ModelCatalogEntry["compat"]) : undefined; + const modelParams = + entry?.params && typeof entry.params === "object" + ? (entry.params as ModelCatalogEntry["params"]) + : undefined; return { id, name, provider, + ...(api ? { api } : {}), contextWindow, ...(contextTokens !== undefined ? { contextTokens } : {}), reasoning, input, + ...(modelParams ? { params: modelParams } : {}), compat, }; } @@ -402,11 +440,16 @@ async function loadReadOnlyPersistedModelCatalog(params?: { typeof providerConfig?.contextTokens === "number" && providerConfig.contextTokens > 0 ? providerConfig.contextTokens : undefined; + const providerApi = + typeof providerConfig?.api === "string" + ? (providerConfig.api as ModelCatalogEntry["api"]) + : undefined; for (const entry of providerConfig.models as Record[]) { const normalized = normalizePersistedModelCatalogEntry( providerRaw, entry, { + api: providerApi, contextWindow: providerContextWindow, contextTokens: providerContextTokens, }, @@ -644,16 +687,21 @@ export async function loadModelCatalog(params?: { ? entry.contextTokens : undefined; const reasoning = typeof entry?.reasoning === "boolean" ? entry.reasoning : undefined; + const api = typeof entry?.api === "string" ? entry.api : undefined; const input = Array.isArray(entry?.input) ? entry.input : undefined; + const modelParams = + entry?.params && typeof entry.params === "object" ? entry.params : undefined; const compat = entry?.compat && typeof entry.compat === "object" ? entry.compat : undefined; models.push({ id, name, provider, + ...(api ? { api } : {}), contextWindow, ...(contextTokens !== undefined ? { contextTokens } : {}), reasoning, input, + ...(modelParams ? { params: modelParams } : {}), compat, }); } diff --git a/src/agents/model-catalog.types.ts b/src/agents/model-catalog.types.ts index 0dc4954321b4..f92b3950943d 100644 --- a/src/agents/model-catalog.types.ts +++ b/src/agents/model-catalog.types.ts @@ -19,6 +19,7 @@ export type ModelCatalogEntry = { contextTokens?: number; reasoning?: boolean; input?: ModelInputType[]; + params?: Record; compat?: ModelCompatConfig; mediaInput?: ModelMediaInputConfig; }; diff --git a/src/agents/model-selection-shared.ts b/src/agents/model-selection-shared.ts index aad94ffabc67..f574f5e8aed1 100644 --- a/src/agents/model-selection-shared.ts +++ b/src/agents/model-selection-shared.ts @@ -659,6 +659,10 @@ function applyModelCatalogMetadata(params: { const nextContextTokens = configuredEntry?.contextTokens ?? params.entry.contextTokens; const nextReasoning = configuredEntry?.reasoning ?? params.entry.reasoning; const nextInput = configuredEntry?.input ?? params.entry.input; + const nextParams = + params.entry.params || configuredEntry?.params + ? { ...params.entry.params, ...configuredEntry?.params } + : undefined; const nextCompat = params.entry.compat || configuredEntry?.compat ? { ...params.entry.compat, ...configuredEntry?.compat } @@ -672,6 +676,7 @@ function applyModelCatalogMetadata(params: { ...(nextContextTokens !== undefined ? { contextTokens: nextContextTokens } : {}), ...(nextReasoning !== undefined ? { reasoning: nextReasoning } : {}), ...(nextInput ? { input: nextInput } : {}), + ...(nextParams ? { params: nextParams } : {}), ...(nextCompat ? { compat: nextCompat } : {}), }; } @@ -687,6 +692,7 @@ function buildSyntheticAllowedCatalogEntry(params: { const nextContextTokens = configuredEntry?.contextTokens; const nextReasoning = configuredEntry?.reasoning; const nextInput = configuredEntry?.input; + const nextParams = configuredEntry?.params; const nextCompat = configuredEntry?.compat; return { @@ -698,6 +704,7 @@ function buildSyntheticAllowedCatalogEntry(params: { ...(nextContextTokens !== undefined ? { contextTokens: nextContextTokens } : {}), ...(nextReasoning !== undefined ? { reasoning: nextReasoning } : {}), ...(nextInput ? { input: nextInput } : {}), + ...(nextParams ? { params: nextParams } : {}), ...(nextCompat ? { compat: nextCompat } : {}), }; } @@ -1302,6 +1309,8 @@ export function buildConfiguredModelCatalog(params: { ? model.contextTokens : undefined; const input = Array.isArray(model?.input) ? model.input : undefined; + const modelParams = + model?.params && typeof model.params === "object" ? model.params : undefined; const compat = model?.compat && typeof model.compat === "object" ? model.compat : undefined; const reasoning = typeof model?.reasoning === "boolean" @@ -1318,6 +1327,7 @@ export function buildConfiguredModelCatalog(params: { contextTokens, reasoning, input, + ...(modelParams ? { params: modelParams } : {}), compat, }); } diff --git a/src/agents/model-selection.test.ts b/src/agents/model-selection.test.ts index a1a67d25a651..cd2ac63b2154 100644 --- a/src/agents/model-selection.test.ts +++ b/src/agents/model-selection.test.ts @@ -889,6 +889,31 @@ describe("model-selection", () => { expect(model?.reasoning).toBe(true); }); + it("carries configured model params into catalog entries for provider policy", () => { + const cfg = { + models: { + providers: { + "amazon-bedrock": { + models: [ + { + id: "company-fable", + name: "Company Fable", + params: { + canonicalModelId: "claude-fable-5", + }, + }, + ], + }, + }, + }, + } as unknown as OpenClawConfig; + + const model = buildConfiguredModelCatalog({ cfg }).find( + (entry) => entry.provider === "amazon-bedrock" && entry.id === "company-fable", + ); + expect(model?.params).toEqual({ canonicalModelId: "claude-fable-5" }); + }); + it("does not infer reasoning from non-vLLM thinking compat", () => { const cfg = { models: { diff --git a/src/agents/sessions/model-registry.test.ts b/src/agents/sessions/model-registry.test.ts index 18a10e9a954f..e7b7c3e05421 100644 --- a/src/agents/sessions/model-registry.test.ts +++ b/src/agents/sessions/model-registry.test.ts @@ -145,6 +145,39 @@ describe("ModelRegistry models.json auth", () => { expect(registry.find("zai", "glm-5.1")?.name).toBe("GLM 5.1"); }); + it("preserves model params from generated plugin catalog shards", () => { + const modelsPath = writeModelsJsonWithPluginCatalog({ + root: { providers: {} }, + pluginRelativePath: join("plugins", "amazon-bedrock", PLUGIN_MODEL_CATALOG_FILE), + pluginCatalog: { + generatedBy: PLUGIN_MODEL_CATALOG_GENERATED_BY, + providers: { + "amazon-bedrock": { + baseUrl: "https://bedrock-runtime.us-east-1.amazonaws.com", + api: "bedrock-converse-stream", + auth: "aws-sdk", + models: [ + { + id: "company-fable", + name: "Company Fable", + params: { canonicalModelId: "claude-fable-5" }, + }, + ], + }, + }, + }, + }); + + const registry = ModelRegistry.create(AuthStorage.inMemory(), modelsPath, { + pluginMetadataSnapshot: pluginOwnerSnapshot("amazon-bedrock", "amazon-bedrock"), + }); + + expect(registry.getError()).toBeUndefined(); + expect(registry.find("amazon-bedrock", "company-fable")?.params).toEqual({ + canonicalModelId: "claude-fable-5", + }); + }); + it("ignores non-generated plugin catalog files", () => { // Plugin catalog shards are codegen artifacts; hand-written lookalikes must // not extend the provider registry. diff --git a/src/agents/sessions/model-registry.ts b/src/agents/sessions/model-registry.ts index ae657114a727..a6d8438c7229 100644 --- a/src/agents/sessions/model-registry.ts +++ b/src/agents/sessions/model-registry.ts @@ -170,6 +170,7 @@ const ModelDefinitionSchema = Type.Object({ ), contextWindow: Type.Optional(Type.Number()), maxTokens: Type.Optional(Type.Number()), + params: Type.Optional(Type.Record(Type.String(), Type.Unknown())), headers: Type.Optional(Type.Record(Type.String(), Type.String())), compat: Type.Optional(ProviderCompatSchema), }); @@ -553,6 +554,7 @@ export class ModelRegistry { cost: modelDef.cost ?? defaultCost, contextWindow: modelDef.contextWindow ?? 128000, maxTokens: modelDef.maxTokens ?? 16384, + params: modelDef.params, headers: undefined, compat, } as Model); @@ -878,6 +880,7 @@ export class ModelRegistry { cost: modelDef.cost, contextWindow: modelDef.contextWindow, maxTokens: modelDef.maxTokens, + params: modelDef.params, headers: undefined, compat: modelDef.compat, } as Model); @@ -923,6 +926,7 @@ export interface ProviderConfigInput { cost: { input: number; output: number; cacheRead: number; cacheWrite: number }; contextWindow: number; maxTokens: number; + params?: Record; headers?: Record; compat?: Model["compat"]; }>; diff --git a/src/agents/transport-message-transform.test.ts b/src/agents/transport-message-transform.test.ts index a3f8ce8b5315..4e276100f256 100644 --- a/src/agents/transport-message-transform.test.ts +++ b/src/agents/transport-message-transform.test.ts @@ -4,8 +4,16 @@ import type { Api, Context, Model } from "openclaw/plugin-sdk/llm"; import { describe, expect, it } from "vitest"; import { transformTransportMessages } from "./transport-message-transform.js"; -function makeModel(api: Api, provider: string, id: string): Model { - return { api, provider, id, input: [], output: [] } as unknown as Model; +function makeModel(api: Api, provider: string, id: string, canonicalModelId?: string): Model { + return { + api, + provider, + id, + name: id, + ...(canonicalModelId ? { params: { canonicalModelId } } : {}), + input: [], + output: [], + } as unknown as Model; } type ToolResultMessage = Extract; @@ -47,6 +55,211 @@ function assistantToolCall( } describe("transformTransportMessages synthetic tool-result policy", () => { + it.each([ + { + source: { provider: "anthropic", model: "claude-fable-5" }, + target: { provider: "anthropic-vertex", model: "claude-opus-4-8" }, + }, + { + source: { provider: "anthropic", model: "claude-sonnet-4-6" }, + target: { provider: "anthropic", model: "claude-fable-5" }, + }, + { + source: { + provider: "microsoft-foundry", + model: "prod-primary", + responseModel: "claude-fable-5", + }, + target: { provider: "anthropic", model: "claude-opus-4-8" }, + }, + { + source: { provider: "legacy-provider", model: "prod-primary" }, + target: { + provider: "microsoft-foundry", + model: "prod-primary", + canonicalModelId: "claude-fable-5", + }, + }, + { + source: { + provider: "anthropic", + model: "claude-fable-5", + responseModel: "claude-opus-4-8", + }, + target: { provider: "anthropic", model: "claude-fable-5" }, + }, + { + source: { + provider: "microsoft-foundry", + model: "prod-primary", + responseModel: "claude-opus-4-8", + }, + target: { + provider: "microsoft-foundry", + model: "prod-primary", + canonicalModelId: "claude-fable-5", + }, + }, + ])("drops model-bound thinking for Fable switches", ({ source, target }) => { + const result = transformTransportMessages( + [ + { + role: "assistant", + provider: source.provider, + api: "anthropic-messages", + model: source.model, + responseModel: source.responseModel, + stopReason: "stop", + timestamp: Date.now(), + content: [ + { + type: "thinking", + thinking: "model-bound thought", + thinkingSignature: "sig_model_bound", + }, + { type: "text", text: "visible answer" }, + ], + }, + ] as Context["messages"], + makeModel("anthropic-messages", target.provider, target.model, target.canonicalModelId), + ); + + expect(result[0]).toMatchObject({ + role: "assistant", + content: [{ type: "text", text: "visible answer" }], + }); + }); + + it.each([ + { + sourceProvider: "anthropic", + sourceModel: "claude-fable-5", + sourceResponseModel: undefined, + targetProvider: "anthropic", + targetApi: "openclaw-anthropic-messages-transport" as const, + targetModel: "claude-fable-5", + targetCanonicalModelId: undefined, + }, + { + sourceProvider: "microsoft-foundry", + sourceModel: "prod-primary", + sourceResponseModel: undefined, + targetProvider: "microsoft-foundry", + targetApi: "anthropic-messages" as const, + targetModel: "prod-primary", + targetCanonicalModelId: "claude-fable-5", + }, + { + sourceProvider: "microsoft-foundry", + sourceModel: "prod-primary", + sourceResponseModel: "prod-primary", + targetProvider: "microsoft-foundry", + targetApi: "anthropic-messages" as const, + targetModel: "prod-primary", + targetCanonicalModelId: "claude-fable-5", + }, + { + sourceProvider: "anthropic", + sourceModel: "claude-fable-5", + sourceResponseModel: undefined, + targetProvider: "anthropic-vertex", + targetApi: "anthropic-messages" as const, + targetModel: "claude-fable-5", + targetCanonicalModelId: undefined, + }, + { + sourceProvider: "microsoft-foundry", + sourceModel: "prod-primary", + sourceResponseModel: "claude-fable-5", + targetProvider: "anthropic", + targetApi: "anthropic-messages" as const, + targetModel: "claude-fable-5", + targetCanonicalModelId: "claude-fable-5", + }, + { + sourceProvider: "anthropic", + sourceModel: "claude-fable-5", + sourceResponseModel: undefined, + targetProvider: "microsoft-foundry", + targetApi: "anthropic-messages" as const, + targetModel: "prod-primary", + targetCanonicalModelId: "claude-fable-5", + }, + ])( + "preserves Fable thinking across compatible Anthropic transports", + ({ + sourceProvider, + sourceModel, + sourceResponseModel, + targetProvider, + targetApi, + targetModel, + targetCanonicalModelId, + }) => { + const result = transformTransportMessages( + [ + { + role: "assistant", + provider: sourceProvider, + api: "anthropic-messages", + model: sourceModel, + responseModel: sourceResponseModel, + stopReason: "stop", + timestamp: Date.now(), + content: [ + { + type: "thinking", + thinking: "", + thinkingSignature: "sig_omitted", + }, + ], + }, + ] as Context["messages"], + makeModel(targetApi, targetProvider, targetModel, targetCanonicalModelId), + ); + + expect(result[0]).toMatchObject({ + role: "assistant", + content: [ + { + type: "thinking", + thinking: "", + thinkingSignature: "sig_omitted", + }, + ], + }); + }, + ); + + it("drops Fable thinking across unrelated API overrides", () => { + const result = transformTransportMessages( + [ + { + role: "assistant", + provider: "anthropic", + api: "openai-completions", + model: "claude-fable-5", + stopReason: "stop", + timestamp: Date.now(), + content: [ + { + type: "thinking", + thinking: "adapter reasoning", + thinkingSignature: "reasoning_content", + }, + { type: "text", text: "visible answer" }, + ], + }, + ] as Context["messages"], + makeModel("anthropic-messages", "anthropic", "claude-fable-5"), + ); + + expect(result[0]).toMatchObject({ + role: "assistant", + content: [{ type: "text", text: "visible answer" }], + }); + }); + it("normalizes malformed assistant content before transport conversion", () => { const objectContentMessages = [ { diff --git a/src/agents/transport-message-transform.ts b/src/agents/transport-message-transform.ts index 7ee5a4866cd1..b466575b7d56 100644 --- a/src/agents/transport-message-transform.ts +++ b/src/agents/transport-message-transform.ts @@ -4,6 +4,7 @@ * strict provider tool-result gaps when supported. */ import type { Api, Context, Model } from "../llm/types.js"; +import { resolveModelBoundThinkingReplayMode } from "../shared/anthropic-model-contract.js"; import { repairToolUseResultPairing } from "./session-transcript-repair.js"; const SYNTHETIC_TOOL_RESULT_APIS = new Set([ @@ -74,8 +75,23 @@ export function transformTransportMessages( if (msg.role !== "assistant") { return msg; } + const modelBoundThinkingReplayMode = resolveModelBoundThinkingReplayMode({ + source: { + provider: msg.provider, + api: msg.api, + modelId: msg.model, + responseModelId: msg.responseModel, + }, + target: { + provider: model.provider, + api: model.api, + modelId: model.id, + modelParams: model.params, + }, + }); const isSameModel = - msg.provider === model.provider && msg.api === model.api && msg.model === model.id; + modelBoundThinkingReplayMode === "preserve" || + (msg.provider === model.provider && msg.api === model.api && msg.model === model.id); const sourceContent = Array.isArray(msg.content) ? msg.content : msg.content != null && typeof msg.content === "object" @@ -84,6 +100,9 @@ export function transformTransportMessages( const content: typeof msg.content = []; for (const block of sourceContent) { if (block.type === "thinking") { + if (modelBoundThinkingReplayMode === "drop") { + continue; + } if (block.redacted) { if (isSameModel) { content.push(block); diff --git a/src/agents/transport-stream-shared.ts b/src/agents/transport-stream-shared.ts index 2a19ab50ce1d..d00ca3daba3d 100644 --- a/src/agents/transport-stream-shared.ts +++ b/src/agents/transport-stream-shared.ts @@ -127,7 +127,7 @@ export function finalizeTransportStream(params: { throw new Error("Request was aborted"); } if (output.stopReason === "aborted" || output.stopReason === "error") { - throw new Error("An unknown error occurred"); + throw new Error(output.errorMessage ?? "An unknown error occurred"); } stream.push({ type: "done", reason: output.stopReason as never, message: output as never }); stream.end(); diff --git a/src/auto-reply/thinking.shared.ts b/src/auto-reply/thinking.shared.ts index 487f9d938fef..f1eeea896910 100644 --- a/src/auto-reply/thinking.shared.ts +++ b/src/auto-reply/thinking.shared.ts @@ -28,7 +28,9 @@ export type UsageDisplayLevel = "off" | "tokens" | "full"; export type ThinkingCatalogEntry = { provider: string; id: string; + api?: string; reasoning?: boolean; + params?: Record; compat?: { thinkingFormat?: string; supportedReasoningEfforts?: readonly string[] | null; diff --git a/src/auto-reply/thinking.test.ts b/src/auto-reply/thinking.test.ts index 0f3a5248fff6..62507d4966f1 100644 --- a/src/auto-reply/thinking.test.ts +++ b/src/auto-reply/thinking.test.ts @@ -93,24 +93,21 @@ describe("listThinkingLevels", () => { it("includes xhigh for provider-advertised models", () => { providerRuntimeMocks.resolveProviderXHighThinking.mockImplementation(({ provider, context }) => - (provider === "openai" && ["gpt-5.4", "gpt-5.4", "gpt-5.4-pro"].includes(context.modelId)) || (provider === "openai" && ["gpt-5.4", "gpt-5.4-pro", "gpt-5.3-codex-spark"].includes(context.modelId)) || - (provider === "github-copilot" && ["gpt-5.4", "gpt-5.4"].includes(context.modelId)) + (provider === "github-copilot" && context.modelId === "gpt-5.4") ? true : undefined, ); - expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh"); - expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh"); - expect(listThinkingLevels("openai", "gpt-5.3-codex-spark")).toContain("xhigh"); - expect(listThinkingLevels("openai", "gpt-5.4-pro")).toContain("xhigh"); - expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh"); - expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh"); - expect(listThinkingLevels("openai", "gpt-5.4-pro")).toContain("xhigh"); - expect(listThinkingLevels("openai", "gpt-5.4")).toContain("xhigh"); - expect(listThinkingLevels("github-copilot", "gpt-5.4")).toContain("xhigh"); - expect(listThinkingLevels("github-copilot", "gpt-5.4")).toContain("xhigh"); + for (const [provider, model] of [ + ["openai", "gpt-5.4"], + ["openai", "gpt-5.4-pro"], + ["openai", "gpt-5.3-codex-spark"], + ["github-copilot", "gpt-5.4"], + ] as const) { + expect(listThinkingLevels(provider, model)).toContain("xhigh"); + } }); it("excludes xhigh for non-codex models", () => { @@ -285,6 +282,114 @@ describe("listThinkingLevels", () => { ).toBe("low"); }); + it("uses canonical Fable params when no provider thinking profile exists", () => { + const catalog = [ + { + provider: "microsoft-foundry", + id: "company-fable", + api: "anthropic-messages", + reasoning: false, + params: { canonicalModelId: "claude-fable-5" }, + }, + ]; + + expect(listThinkingLevels("microsoft-foundry", "company-fable", catalog)).toEqual([ + "off", + "minimal", + "low", + "medium", + "adaptive", + "high", + "xhigh", + "max", + ]); + expect( + resolveThinkingDefaultForModel({ + provider: "microsoft-foundry", + model: "company-fable", + catalog, + }), + ).toBe("high"); + }); + + it("preserves provider-specific profiles for Fable Messages routes", () => { + providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({ + levels: [{ id: "off" }, { id: "low" }], + defaultLevel: "off", + }); + + expect( + listThinkingLevels("proxy", "company-fable", [ + { + provider: "proxy", + id: "company-fable", + api: "anthropic-messages", + reasoning: true, + params: { canonicalModelId: "claude-fable-5" }, + }, + ]), + ).toEqual(["off", "low"]); + }); + + it("does not infer the Fable contract without an Anthropic Messages catalog row", () => { + providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({ + levels: [{ id: "off" }, { id: "low" }], + defaultLevel: "off", + }); + + expect(listThinkingLevels("openrouter", "anthropic/claude-fable-5")).toEqual(["off", "low"]); + }); + + it("does not apply the Fable profile to OpenAI-compatible catalog rows", () => { + providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({ + levels: [{ id: "off" }, { id: "low" }, { id: "high" }], + defaultLevel: "off", + }); + + expect( + listThinkingLevels("openrouter", "anthropic/claude-fable-5", [ + { + provider: "openrouter", + id: "anthropic/claude-fable-5", + api: "openai-completions", + reasoning: true, + }, + ]), + ).toEqual(["off", "low", "high"]); + }); + + it("preserves explicit provider opt-outs for canonical Fable aliases", () => { + providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({ + levels: [{ id: "off" }], + defaultLevel: "off", + }); + const catalog = [ + { + provider: "claude-cli", + id: "company-fable", + api: "anthropic-messages", + reasoning: true, + params: { canonicalModelId: "claude-fable-5" }, + }, + ]; + + expect(listThinkingLevels("claude-cli", "company-fable", catalog)).toEqual(["off"]); + }); + + it("uses generic thinking levels when a provider has no custom profile", () => { + providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue(null); + + expect( + listThinkingLevels("vllm", "reasoning-model", [ + { + provider: "vllm", + id: "reasoning-model", + reasoning: true, + }, + ]), + ).toEqual(["off", "minimal", "low", "medium", "high"]); + }); + it("matches provider-qualified catalog ids for provider thinking profiles", () => { providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(({ context }) => context.reasoning === true && context.compat?.thinkingFormat === "qwen-chat-template" @@ -368,6 +473,32 @@ describe("listThinkingLevels", () => { ).toBe("high"); }); + it("maps xhigh to high for provider profiles with max but no xhigh", () => { + providerRuntimeMocks.resolveProviderThinkingProfile.mockImplementation(({ provider }) => + provider === "anthropic" + ? { + levels: [ + { id: "off" }, + { id: "minimal" }, + { id: "low" }, + { id: "medium" }, + { id: "high" }, + { id: "adaptive" }, + { id: "max" }, + ], + } + : undefined, + ); + + expect( + resolveSupportedThinkingLevel({ + provider: "anthropic", + model: "claude-sonnet-4-6", + level: "xhigh", + }), + ).toBe("high"); + }); + it("maps unsupported adaptive to medium and unsupported xhigh to high", () => { providerRuntimeMocks.resolveProviderThinkingProfile.mockReturnValue({ levels: [{ id: "off" }, { id: "minimal" }, { id: "low" }, { id: "medium" }, { id: "high" }], diff --git a/src/auto-reply/thinking.ts b/src/auto-reply/thinking.ts index aa95434fe567..2e60444e0b44 100644 --- a/src/auto-reply/thinking.ts +++ b/src/auto-reply/thinking.ts @@ -1,3 +1,7 @@ +import { + CLAUDE_FABLE_5_THINKING_PROFILE, + resolveClaudeFable5ModelIdentity, +} from "@openclaw/llm-core"; // Thinking/reasoning level catalog helpers for auto-reply model controls. import { normalizeProviderId } from "@openclaw/model-catalog-core/provider-id"; import { @@ -95,7 +99,9 @@ function resolveThinkingPolicyContext(params: { normalizedProvider, modelId, modelKey, + api: candidate?.api, reasoning: candidate?.reasoning, + ...(candidate?.params ? { params: candidate.params } : {}), compat: candidate?.compat, }; } @@ -188,12 +194,22 @@ export function resolveThinkingProfile(params: { provider: context.normalizedProvider, modelId: context.modelId, reasoning: context.reasoning, + ...(context.params ? { params: context.params } : {}), compat: context.compat, }; - const pluginProfile = resolveProviderThinkingProfile({ + const providerProfile = resolveProviderThinkingProfile({ provider: context.normalizedProvider, context: providerContext, }); + const fableProfile = + context.api === "anthropic-messages" && + resolveClaudeFable5ModelIdentity({ + id: context.modelId, + params: context.params, + }) + ? CLAUDE_FABLE_5_THINKING_PROFILE + : undefined; + const pluginProfile = providerProfile ?? fableProfile; if (pluginProfile) { const normalized = normalizeThinkingProfile(pluginProfile); if ( diff --git a/src/llm/model-utils.test.ts b/src/llm/model-utils.test.ts new file mode 100644 index 000000000000..800ec2af0165 --- /dev/null +++ b/src/llm/model-utils.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from "vitest"; +import { clampThinkingLevel, getSupportedThinkingLevels } from "./model-utils.js"; +import type { Model } from "./types.js"; + +function makeModel( + thinkingLevelMap: Model["thinkingLevelMap"], + overrides: Partial = {}, +): Model { + return { + id: "test-model", + name: "Test Model", + api: "openai-responses", + provider: "openai", + baseUrl: "https://example.com", + reasoning: true, + thinkingLevelMap, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128_000, + maxTokens: 4096, + ...overrides, + }; +} + +describe("clampThinkingLevel", () => { + it("downgrades explicit extended-level opt-outs", () => { + expect(clampThinkingLevel(makeModel({ xhigh: null, max: "max" }), "xhigh")).toBe("high"); + }); + + it("keeps upward clamping for lower-level map holes", () => { + expect(clampThinkingLevel(makeModel({ minimal: null }), "minimal")).toBe("low"); + }); + + it("honors canonical Fable capabilities when catalog reasoning is stale", () => { + const model = makeModel(undefined, { + id: "company-fable", + api: "anthropic-messages", + provider: "microsoft-foundry", + reasoning: false, + params: { canonicalModelId: "claude-fable-5" }, + }); + + expect(getSupportedThinkingLevels(model)).toContain("max"); + expect(clampThinkingLevel(model, "max")).toBe("max"); + }); +}); diff --git a/src/llm/model-utils.ts b/src/llm/model-utils.ts index aef13c53a076..c8a4598f051a 100644 --- a/src/llm/model-utils.ts +++ b/src/llm/model-utils.ts @@ -1,4 +1,8 @@ // Provides model selection, usage, and thinking-level utility helpers. +import { + resolveClaudeFable5ModelIdentity, + resolveClaudeNativeThinkingLevelMap, +} from "@openclaw/llm-core"; import type { Api, Model, ModelThinkingLevel, Usage } from "./types.js"; /** Calculates and stores model cost fields from token usage and per-million pricing. */ @@ -22,16 +26,25 @@ const EXTENDED_THINKING_LEVELS: ModelThinkingLevel[] = [ "max", ]; +function resolveThinkingLevelMap(model: Model) { + return model.api === "anthropic-messages" + ? (resolveClaudeNativeThinkingLevelMap(model) ?? model.thinkingLevelMap) + : model.thinkingLevelMap; +} + /** Returns thinking levels exposed by a reasoning-capable model. */ export function getSupportedThinkingLevels( model: Model, ): ModelThinkingLevel[] { - if (!model.reasoning) { + const fableContract = + model.api === "anthropic-messages" && resolveClaudeFable5ModelIdentity(model) !== undefined; + if (!model.reasoning && !fableContract) { return ["off"]; } + const thinkingLevelMap = resolveThinkingLevelMap(model); return EXTENDED_THINKING_LEVELS.filter((level) => { - const mapped = model.thinkingLevelMap?.[level]; + const mapped = thinkingLevelMap?.[level]; if (mapped === null) { return false; } @@ -57,6 +70,18 @@ export function clampThinkingLevel( return availableLevels[0] ?? "off"; } + // Explicit provider opt-outs are hard caps. Downgrade them before considering + // stronger levels so unsupported xhigh/max requests cannot increase cost. + const thinkingLevelMap = resolveThinkingLevelMap(model); + if ((level === "xhigh" || level === "max") && thinkingLevelMap?.[level] === null) { + for (let i = requestedIndex - 1; i >= 0; i--) { + const candidate = EXTENDED_THINKING_LEVELS[i]; + if (availableLevels.includes(candidate)) { + return candidate; + } + } + } + // Prefer the next stronger available level, then walk down if the request was above the model cap. for (let i = requestedIndex; i < EXTENDED_THINKING_LEVELS.length; i++) { const candidate = EXTENDED_THINKING_LEVELS[i]; diff --git a/src/llm/providers/anthropic.test.ts b/src/llm/providers/anthropic.test.ts index 2618d3b1fe91..5205df6e7d86 100644 --- a/src/llm/providers/anthropic.test.ts +++ b/src/llm/providers/anthropic.test.ts @@ -24,7 +24,9 @@ vi.mock("@anthropic-ai/sdk", () => ({ import { streamAnthropic, streamSimpleAnthropic } from "./anthropic.js"; function createSseResponse(events: Record[] = []): Response { - const body = events.map((event) => `data: ${JSON.stringify(event)}\n\n`).join(""); + const body = events + .map((event) => `event: ${String(event.type)}\ndata: ${JSON.stringify(event)}\n\n`) + .join(""); return new Response(body, { status: 200, headers: { "content-type": "text/event-stream" }, @@ -93,7 +95,11 @@ describe("Anthropic provider", () => { createSseResponse([ { type: "message_start", - message: { id: "msg_1", usage: { input_tokens: 1, output_tokens: 0 } }, + message: { + id: "msg_1", + model: "claude-fable-5", + usage: { input_tokens: 1, output_tokens: 0 }, + }, }, { type: "message_delta", @@ -108,7 +114,10 @@ describe("Anthropic provider", () => { }; const stream = streamAnthropic( - makeAnthropicModel(), + makeAnthropicModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + }), { messages: [ { role: "user", content: "hello", timestamp: 0 }, @@ -116,7 +125,7 @@ describe("Anthropic provider", () => { role: "assistant", provider: "anthropic", api: "anthropic-messages", - model: "claude-sonnet-4-6", + model: "claude-fable-5", stopReason: "stop", timestamp: 0, usage: { @@ -133,6 +142,11 @@ describe("Anthropic provider", () => { thinking: signedThinking, thinkingSignature: "sig_1", }, + { + type: "thinking", + thinking: "", + thinkingSignature: "sig_omitted", + }, { type: "thinking", thinking: `sanitize${highSurrogate}synthetic`, @@ -152,7 +166,7 @@ describe("Anthropic provider", () => { }, ); - await stream.result(); + const result = await stream.result(); const payload = capturedPayload as { messages: Array<{ role: string; content: unknown[] }> }; const assistantMessage = payload.messages.find((message) => message.role === "assistant"); @@ -163,10 +177,182 @@ describe("Anthropic provider", () => { thinking: signedThinking, signature: "sig_1", }, + { + type: "thinking", + thinking: "", + signature: "sig_omitted", + }, + ]); + expect(result.responseModel).toBe("claude-fable-5"); + }); + + it.each([ + ["anthropic", "sk-ant-provider"], + ["anthropic-vertex", "vertex-token"], + ])("surfaces structured Anthropic streaming refusals for %s", async (provider, apiKey) => { + const client = { + messages: { + create: vi.fn(() => ({ + asResponse: () => + Promise.resolve( + createSseResponse([ + { + type: "message_start", + message: { id: "msg_refusal", usage: { input_tokens: 3, output_tokens: 0 } }, + }, + { + type: "content_block_start", + index: 0, + content_block: { type: "text", text: "" }, + }, + { + type: "content_block_delta", + index: 0, + delta: { type: "text_delta", text: "discard this partial output" }, + }, + { type: "content_block_stop", index: 0 }, + { + type: "message_delta", + delta: { + stop_reason: "refusal", + stop_details: { + type: "refusal", + category: "cyber", + explanation: "This request is not allowed.", + }, + }, + usage: { input_tokens: 3, output_tokens: 2 }, + }, + { type: "message_stop" }, + ]), + ), + })), + }, + }; + + const stream = streamAnthropic( + makeAnthropicModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + provider, + }), + { messages: [{ role: "user", content: "hello", timestamp: 0 }] }, + { apiKey, client: client as never }, + ); + const eventTypes: string[] = []; + for await (const event of stream) { + eventTypes.push(event.type); + } + const result = await stream.result(); + + expect(eventTypes).toEqual(["error"]); + expect(result.stopReason).toBe("error"); + expect(result.content).toEqual([]); + expect(result.errorMessage).toBe( + "Anthropic refusal (category: cyber): This request is not allowed.", + ); + expect(result.usage).toMatchObject({ input: 3, output: 2 }); + expect(result.diagnostics).toEqual([ + expect.objectContaining({ + type: "provider_refusal", + details: { + provider, + category: "cyber", + explanation: "This request is not allowed.", + }, + }), ]); }); - it("clamps max adaptive effort when the Claude model does not advertise it", async () => { + it("discards buffered Fable output when the stream fails before terminal status", async () => { + const client = { + messages: { + create: vi.fn(() => ({ + asResponse: () => + Promise.resolve( + createSseResponse([ + { + type: "content_block_start", + index: 0, + content_block: { type: "text", text: "" }, + }, + { + type: "content_block_delta", + index: 0, + delta: { type: "text_delta", text: "unsafe partial output" }, + }, + ]), + ), + })), + }, + }; + const stream = streamAnthropic( + makeAnthropicModel({ id: "claude-fable-5", name: "Claude Fable 5" }), + { messages: [{ role: "user", content: "hello", timestamp: 0 }] }, + { apiKey: "sk-ant-provider", client: client as never }, + ); + const eventTypes: string[] = []; + for await (const event of stream) { + eventTypes.push(event.type); + } + const result = await stream.result(); + + expect(eventTypes).toEqual(["error"]); + expect(result.stopReason).toBe("error"); + expect(result.content).toEqual([]); + expect(result.errorMessage).toContain("ended before message_stop"); + }); + + it("strips Fable thinking when replay targets Anthropic Vertex", async () => { + let capturedPayload: unknown; + const stream = streamAnthropic( + makeAnthropicModel({ + provider: "anthropic-vertex", + id: "claude-opus-4-8", + name: "Claude Opus 4.8", + }), + { + messages: [ + { role: "user", content: "hello", timestamp: 0 }, + { + role: "assistant", + provider: "anthropic", + api: "anthropic-messages", + model: "claude-fable-5", + stopReason: "stop", + timestamp: 0, + content: [ + { + type: "thinking", + thinking: "model-bound thought", + thinkingSignature: "sig_model_bound", + }, + { type: "text", text: "visible answer" }, + ], + }, + { role: "user", content: "continue", timestamp: 0 }, + ], + } as Context, + { + apiKey: "vertex-token", + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + const payload = capturedPayload as { messages: Array<{ role: string; content: unknown[] }> }; + const assistantMessage = payload.messages.find((message) => message.role === "assistant"); + expect(assistantMessage?.content).toEqual([{ type: "text", text: "visible answer" }]); + expect(JSON.stringify(assistantMessage)).not.toContain("sig_model_bound"); + }); + + it.each([ + { reasoning: "xhigh", expectedEffort: "high" }, + { reasoning: "max", expectedEffort: "max" }, + ] as const)("maps Claude 4.6 $reasoning effort", async ({ reasoning, expectedEffort }) => { let capturedPayload: unknown; const stream = streamSimpleAnthropic( makeAnthropicModel({ @@ -178,7 +364,7 @@ describe("Anthropic provider", () => { }, { apiKey: "sk-ant-provider", - reasoning: "max", + reasoning, onPayload: (payload) => { capturedPayload = payload; }, @@ -188,7 +374,290 @@ describe("Anthropic provider", () => { await stream.result(); expect((capturedPayload as { output_config?: unknown }).output_config).toEqual({ - effort: "high", + effort: expectedEffort, + }); + }); + + it.each([ + { + id: "claude-opus-4.6-1m", + reasoning: "xhigh", + thinkingLevelMap: { xhigh: null, max: null }, + expectedEffort: "high", + }, + { + id: "claude-opus-4.7-1m-internal", + reasoning: "max", + thinkingLevelMap: { xhigh: "xhigh" }, + expectedEffort: "xhigh", + }, + ] as const)( + "honors proxy effort restrictions for $id", + async ({ id, reasoning, thinkingLevelMap, expectedEffort }) => { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + makeAnthropicModel({ + id, + provider: "github-copilot", + thinkingLevelMap, + }), + { messages: [{ role: "user", content: "hello", timestamp: 0 }] }, + { + apiKey: "copilot-token", + reasoning, + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect((capturedPayload as { output_config?: unknown }).output_config).toEqual({ + effort: expectedEffort, + }); + }, + ); + + it("uses always-on adaptive thinking for Claude Fable 5", async () => { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + makeAnthropicModel({ + id: "prod-primary", + name: "Production Claude", + provider: "microsoft-foundry", + params: { canonicalModelId: "claude-fable-5" }, + reasoning: false, + baseUrl: "https://example.services.ai.azure.com/anthropic", + }), + { + messages: [{ role: "user", content: "hello", timestamp: 0 }], + }, + { + apiKey: "sk-ant-provider", + temperature: 0.2, + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect(capturedPayload).toMatchObject({ + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: "high" }, + }); + expect(capturedPayload).not.toHaveProperty("temperature"); + }); + + it.each([ + { + id: "prod-primary", + name: "Claude Fable 5", + params: undefined, + }, + ])("does not infer the Fable contract from noncanonical metadata", async (overrides) => { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + makeAnthropicModel({ + ...overrides, + reasoning: false, + }), + { messages: [{ role: "user", content: "hello", timestamp: 0 }] }, + { + apiKey: "sk-ant-provider", + temperature: 0.2, + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect(capturedPayload).toMatchObject({ temperature: 0.2 }); + expect(capturedPayload).not.toHaveProperty("thinking"); + }); + + it("uses canonical Claude policy for deployment aliases", async () => { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + makeAnthropicModel({ + id: "production-claude", + name: "Production Claude", + params: { canonicalModelId: "claude-opus-4-8" }, + reasoning: false, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, + }), + { messages: [{ role: "user", content: "hello", timestamp: 0 }] }, + { + apiKey: "sk-ant-provider", + reasoning: "xhigh", + temperature: 0.2, + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect(capturedPayload).toMatchObject({ + model: "production-claude", + thinking: { type: "adaptive" }, + output_config: { effort: "xhigh" }, + }); + expect(capturedPayload).not.toHaveProperty("temperature"); + }); + + it.each([ + { canonicalModelId: "claude-opus-4-8", expectedTemperature: undefined }, + { canonicalModelId: "claude-opus-4-6", expectedTemperature: 0.2 }, + ] as const)( + "normalizes temperature for canonical $canonicalModelId aliases when thinking is off", + async ({ canonicalModelId, expectedTemperature }) => { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + makeAnthropicModel({ + id: "production-claude", + params: { canonicalModelId }, + reasoning: false, + thinkingLevelMap: { xhigh: "xhigh", max: "max" }, + }), + { messages: [{ role: "user", content: "hello", timestamp: 0 }] }, + { + apiKey: "sk-ant-provider", + temperature: 0.2, + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect((capturedPayload as { temperature?: number }).temperature).toBe(expectedTemperature); + }, + ); + + it("normalizes forced Fable tool choice to auto", async () => { + let capturedPayload: unknown; + const stream = streamAnthropic( + makeAnthropicModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + }), + { + messages: [{ role: "user", content: "Use a tool.", timestamp: 0 }], + }, + { + apiKey: "sk-ant-provider", + thinkingEnabled: true, + effort: "high", + toolChoice: "any", + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect(capturedPayload).toMatchObject({ + thinking: { type: "adaptive", display: "summarized" }, + tool_choice: { type: "auto" }, + }); + }); + + it("preserves Claude Fable 5 high effort when catalog reasoning is false", async () => { + const model = makeAnthropicModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + reasoning: false, + }); + for (const testCase of [ + { reasoning: "off", effort: "low" }, + { reasoning: "high", effort: "high" }, + { reasoning: "xhigh", effort: "xhigh" }, + ] as const) { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + model, + { + messages: [{ role: "user", content: "hello", timestamp: 0 }], + }, + { + apiKey: "sk-ant-provider", + reasoning: testCase.reasoning, + onPayload: (payload: unknown) => { + capturedPayload = payload; + }, + } as unknown as Parameters[2], + ); + + await stream.result(); + + expect(capturedPayload).toMatchObject({ + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: testCase.effort }, + }); + } + }); + + it("honors provider effort restrictions for Claude Fable 5", async () => { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + makeAnthropicModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + provider: "github-copilot", + reasoning: false, + thinkingLevelMap: { xhigh: null, max: null }, + }), + { messages: [{ role: "user", content: "hello", timestamp: 0 }] }, + { + apiKey: "copilot-token", + reasoning: "xhigh", + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect(capturedPayload).toMatchObject({ + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: "high" }, + }); + }); + + it("uses the Claude Fable 5 contract on Anthropic Vertex", async () => { + let capturedPayload: unknown; + const stream = streamSimpleAnthropic( + makeAnthropicModel({ + id: "claude-fable-5", + name: "Claude Fable 5", + provider: "anthropic-vertex", + }), + { + messages: [{ role: "user", content: "hello", timestamp: 0 }], + }, + { + apiKey: "vertex-token", + reasoning: "high", + onPayload: (payload) => { + capturedPayload = payload; + }, + }, + ); + + await stream.result(); + + expect(capturedPayload).toMatchObject({ + thinking: { type: "adaptive", display: "summarized" }, + output_config: { effort: "high" }, }); }); diff --git a/src/llm/providers/anthropic.ts b/src/llm/providers/anthropic.ts index c2247db4f9e8..4ac247a63a8f 100644 --- a/src/llm/providers/anthropic.ts +++ b/src/llm/providers/anthropic.ts @@ -12,17 +12,29 @@ import { splitSystemPromptCacheBoundary, stripSystemPromptCacheBoundary, } from "../../agents/system-prompt-cache-boundary.js"; +import { + resolveClaudeNativeThinkingLevelMap, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeMaxEffort, + supportsClaudeNativeXhighEffort, + usesClaudeFable5MessagesContract, +} from "../../shared/anthropic-model-contract.js"; +import { applyAnthropicRefusal } from "../../shared/anthropic-refusal.js"; +import { createDeferredEventBuffer } from "../../shared/deferred-event-buffer.js"; +import { notifyLlmRequestActivity } from "../../shared/llm-request-activity.js"; import { getEnvApiKey } from "../env-api-keys.js"; import { calculateCost, clampThinkingLevel } from "../model-utils.js"; import type { AnthropicMessagesCompat, Api, AssistantMessage, + AssistantMessageEvent, CacheRetention, Context, ImageContent, Message, Model, + ModelThinkingLevel, SimpleStreamOptions, StopReason, StreamFunction, @@ -399,6 +411,7 @@ async function* iterateSseMessages( async function* iterateAnthropicEvents( response: Response, signal?: AbortSignal, + requireMessageStop = false, ): AsyncGenerator { if (!response.body) { throw new Error("Attempted to iterate over an Anthropic response with no body"); @@ -433,7 +446,7 @@ async function* iterateAnthropicEvents( } } - if (sawMessageStart && !sawMessageEnd) { + if ((sawMessageStart || requireMessageStop) && !sawMessageEnd) { throw new Error("Anthropic stream ended before message_stop"); } } @@ -463,6 +476,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti stopReason: "stop", timestamp: Date.now(), }; + // Fable classifiers can refuse after partial generation, so no event is + // safe to expose until the terminal stop reason is known. + const refusalBuffer = usesClaudeFable5MessagesContract(model) + ? createDeferredEventBuffer(stream, () => + notifyLlmRequestActivity(options?.signal), + ) + : undefined; + const eventSink = refusalBuffer ?? stream; try { let client: Anthropic; @@ -521,9 +542,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti }; const blocks = output.content as Block[]; - for await (const event of iterateAnthropicEvents(response, options?.signal)) { + for await (const event of iterateAnthropicEvents( + response, + options?.signal, + refusalBuffer !== undefined, + )) { if (event.type === "message_start") { output.responseId = event.message.id; + output.responseModel = event.message.model; output.usage.input = event.message.usage.input_tokens || 0; output.usage.output = event.message.usage.output_tokens || 0; output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0; @@ -538,7 +564,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti // (e.g. invalid thinking signatures) arrive before any non-error event // is yielded, keeping yieldedOutput=false in pumpStreamWithRecovery // and allowing the thinking-block recovery retry to fire. - stream.push({ type: "start", partial: output }); + eventSink.push({ type: "start", partial: output }); } else if (event.type === "content_block_start") { if (event.content_block.type === "text") { const block: Block = { @@ -547,7 +573,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti index: event.index, }; output.content.push(block); - stream.push({ + eventSink.push({ type: "text_start", contentIndex: output.content.length - 1, partial: output, @@ -560,7 +586,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti index: event.index, }; output.content.push(block); - stream.push({ + eventSink.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output, @@ -574,7 +600,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti index: event.index, }; output.content.push(block); - stream.push({ + eventSink.push({ type: "thinking_start", contentIndex: output.content.length - 1, partial: output, @@ -591,7 +617,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti index: event.index, }; output.content.push(block); - stream.push({ + eventSink.push({ type: "toolcall_start", contentIndex: output.content.length - 1, partial: output, @@ -603,7 +629,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti const block = blocks[index]; if (block && block.type === "text") { block.text += event.delta.text; - stream.push({ + eventSink.push({ type: "text_delta", contentIndex: index, delta: event.delta.text, @@ -615,7 +641,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti const block = blocks[index]; if (block && block.type === "thinking") { block.thinking += event.delta.thinking; - stream.push({ + eventSink.push({ type: "thinking_delta", contentIndex: index, delta: event.delta.thinking, @@ -628,7 +654,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti if (block && block.type === "toolCall") { block.partialJson += event.delta.partial_json; block.arguments = parseStreamingJson(block.partialJson); - stream.push({ + eventSink.push({ type: "toolcall_delta", contentIndex: index, delta: event.delta.partial_json, @@ -649,14 +675,14 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti if (block) { delete (block as Partial).index; if (block.type === "text") { - stream.push({ + eventSink.push({ type: "text_end", contentIndex: index, content: block.text, partial: output, }); } else if (block.type === "thinking") { - stream.push({ + eventSink.push({ type: "thinking_end", contentIndex: index, content: block.thinking, @@ -667,7 +693,7 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti // Finalize in-place and strip the scratch buffer so replay only // carries parsed arguments. delete (block as { partialJson?: string }).partialJson; - stream.push({ + eventSink.push({ type: "toolcall_end", contentIndex: index, toolCall: block, @@ -677,7 +703,11 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti } } else if (event.type === "message_delta") { if (event.delta.stop_reason) { - output.stopReason = mapStopReason(event.delta.stop_reason); + if (event.delta.stop_reason === "refusal") { + applyAnthropicRefusal(output, event.delta.stop_details, model.provider); + } else { + output.stopReason = mapStopReason(event.delta.stop_reason); + } } // Only update usage fields if present (not null). // Preserves input_tokens from message_start when proxies omit it in message_delta. @@ -708,9 +738,10 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti } if (output.stopReason === "aborted" || output.stopReason === "error") { - throw new Error("An unknown error occurred"); + throw new Error(output.errorMessage ?? "An unknown error occurred"); } + refusalBuffer?.flush(); stream.push({ type: "done", reason: output.stopReason, message: output }); stream.end(); } catch (error) { @@ -719,6 +750,10 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti // partialJson is only a streaming scratch buffer; never persist it. delete (block as { partialJson?: string }).partialJson; } + if (refusalBuffer) { + refusalBuffer.discard(); + output.content = []; + } output.stopReason = options?.signal?.aborted ? "aborted" : "error"; output.errorMessage = error instanceof Error ? error.message : JSON.stringify(error); stream.push({ type: "error", reason: output.stopReason, error: output }); @@ -729,21 +764,28 @@ export const streamAnthropic: StreamFunction<"anthropic-messages", AnthropicOpti return stream; }; +function normalizeAnthropicToolChoice( + model: Model<"anthropic-messages">, + toolChoice: AnthropicOptions["toolChoice"], +) { + if ( + usesClaudeFable5MessagesContract(model) && + (toolChoice === "any" || (typeof toolChoice === "object" && toolChoice.type === "tool")) + ) { + return { type: "auto" as const }; + } + return typeof toolChoice === "string" ? { type: toolChoice } : toolChoice; +} + /** - * Check if a model supports adaptive thinking (Opus 4.6+, Sonnet 4.6) + * Check if a model supports adaptive thinking (Fable 5, Opus 4.6+, Sonnet 4.6). */ -function supportsAdaptiveThinking(modelId: string): boolean { - // Adaptive-thinking model IDs (with or without date suffix) - return ( - modelId.includes("opus-4-6") || - modelId.includes("opus-4.6") || - modelId.includes("opus-4-8") || - modelId.includes("opus-4.8") || - modelId.includes("opus-4-7") || - modelId.includes("opus-4.7") || - modelId.includes("sonnet-4-6") || - modelId.includes("sonnet-4.6") - ); +function supportsAdaptiveThinking(model: Model<"anthropic-messages">): boolean { + return supportsClaudeAdaptiveThinking(model); +} + +function supportsNativeXhighEffort(model: Model<"anthropic-messages">): boolean { + return supportsClaudeNativeXhighEffort(model); } /** @@ -754,13 +796,24 @@ function mapThinkingLevelToEffort( model: Model<"anthropic-messages">, level: SimpleStreamOptions["reasoning"], ): AnthropicEffort { - const clampedLevel = level ? clampThinkingLevel(model, level) : undefined; - const mapped = clampedLevel ? model.thinkingLevelMap?.[clampedLevel] : undefined; + const requestedLevel = level as ModelThinkingLevel | undefined; + const hasCanonicalAlias = typeof model.params?.canonicalModelId === "string"; + const thinkingLevelMap = resolveClaudeNativeThinkingLevelMap(model); + const clampModel = { + ...model, + ...(hasCanonicalAlias ? { reasoning: true } : {}), + ...(thinkingLevelMap ? { thinkingLevelMap } : {}), + }; + const clampedLevel = requestedLevel + ? clampThinkingLevel(clampModel, requestedLevel) + : requestedLevel; + const mapped = clampedLevel ? thinkingLevelMap?.[clampedLevel] : undefined; if (typeof mapped === "string") { return mapped as AnthropicEffort; } switch (clampedLevel) { + case "off": case "minimal": case "low": return "low"; @@ -768,8 +821,10 @@ function mapThinkingLevelToEffort( return "medium"; case "high": return "high"; + case "xhigh": + return supportsNativeXhighEffort(model) ? "xhigh" : "high"; case "max": - return "max"; + return supportsClaudeNativeMaxEffort(model) ? "max" : "high"; default: return "high"; } @@ -787,15 +842,17 @@ export const streamSimpleAnthropic: StreamFunction<"anthropic-messages", SimpleS const base = buildBaseOptions(model, options, apiKey); if (!options?.reasoning) { + const fable5 = usesClaudeFable5MessagesContract(model); return streamAnthropic(model, context, { ...base, - thinkingEnabled: false, + thinkingEnabled: fable5, + ...(fable5 ? { effort: "high" as const } : {}), } satisfies AnthropicOptions); } // For Opus 4.6 and Sonnet 4.6: use adaptive thinking with effort level // For older models: use budget-based thinking - if (supportsAdaptiveThinking(model.id)) { + if (supportsAdaptiveThinking(model)) { const effort = mapThinkingLevelToEffort(model, options.reasoning); return streamAnthropic(model, context, { ...base, @@ -836,7 +893,7 @@ function createClient( ): { client: Anthropic; isOAuthToken: boolean } { // Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in. // The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it. - const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id); + const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model); const betaFeatures: string[] = []; if (useFineGrainedToolStreamingBeta) { betaFeatures.push(FINE_GRAINED_TOOL_STREAMING_BETA); @@ -977,8 +1034,12 @@ function buildParams( params.system = system; } - // Temperature is incompatible with extended thinking (adaptive or budget-based). - if (options?.temperature !== undefined && !options?.thinkingEnabled) { + // Thinking and post-4.6 Claude models reject custom temperature values. + if ( + options?.temperature !== undefined && + !options?.thinkingEnabled && + !supportsNativeXhighEffort(model) + ) { params.temperature = options.temperature; } @@ -990,30 +1051,33 @@ function buildParams( params.tools = tools; } - // Configure thinking mode: adaptive (Opus 4.6+ and Sonnet 4.6), + // Configure thinking mode: always-on adaptive (Fable 5), adaptive (Opus + // 4.6+ and Sonnet 4.6), // budget-based (older models), or explicitly disabled. - if (model.reasoning) { - if (options?.thinkingEnabled) { + const fable5 = usesClaudeFable5MessagesContract(model); + if (fable5 || model.reasoning || supportsAdaptiveThinking(model)) { + if (fable5 || options?.thinkingEnabled) { // Default to "summarized" so Opus 4.7+ and Mythos Preview behave like // older Claude 4 models (whose API default is also "summarized"). - const display: AnthropicThinkingDisplay = options.thinkingDisplay ?? "summarized"; - if (supportsAdaptiveThinking(model.id)) { + const display: AnthropicThinkingDisplay = options?.thinkingDisplay ?? "summarized"; + if (supportsAdaptiveThinking(model)) { // Adaptive thinking: Claude decides when and how much to think. params.thinking = { type: "adaptive", display }; - if (options.effort) { + const effort = options?.effort ?? (fable5 ? "high" : undefined); + if (effort) { // The Anthropic SDK types can lag newly supported effort values such as "xhigh". params.output_config = - options.effort === "xhigh" - ? ({ effort: options.effort } as unknown as NonNullable< + effort === "xhigh" + ? ({ effort } as unknown as NonNullable< MessageCreateParamsStreaming["output_config"] >) - : { effort: options.effort }; + : { effort }; } } else { // Budget-based thinking for older models params.thinking = { type: "enabled", - budget_tokens: options.thinkingBudgetTokens || 1024, + budget_tokens: options?.thinkingBudgetTokens || 1024, display, }; } @@ -1030,11 +1094,7 @@ function buildParams( } if (options?.toolChoice) { - if (typeof options.toolChoice === "string") { - params.tool_choice = { type: options.toolChoice }; - } else { - params.tool_choice = options.toolChoice; - } + params.tool_choice = normalizeAnthropicToolChoice(model, options.toolChoice); } return params; @@ -1120,13 +1180,16 @@ function convertMessages( }); continue; } - if (block.thinking.trim().length === 0) { + const thinkingSignature = block.thinkingSignature?.trim(); + const hasNativeThinkingSignature = + Boolean(thinkingSignature) && thinkingSignature !== "reasoning_content"; + if (block.thinking.trim().length === 0 && !hasNativeThinkingSignature) { continue; } // If thinking signature is missing/empty (e.g., from aborted stream), // convert to plain text block without tags to avoid API rejection // and prevent Claude from mimicking the tags in responses - if (!block.thinkingSignature || block.thinkingSignature.trim().length === 0) { + if (!thinkingSignature) { blocks.push({ type: "text", text: sanitizeSurrogates(block.thinking), @@ -1134,13 +1197,13 @@ function convertMessages( } else { // OpenAI-compatible reasoning markers are field names, not native // Anthropic replay signatures; sending them bricks persisted replays. - if (block.thinkingSignature === "reasoning_content") { + if (thinkingSignature === "reasoning_content") { continue; } blocks.push({ type: "thinking", thinking: block.thinking, - signature: block.thinkingSignature, + signature: thinkingSignature, }); } } else if (block.type === "toolCall") { diff --git a/src/llm/providers/transform-messages.ts b/src/llm/providers/transform-messages.ts index 63bdeb274d48..68ff48c86d55 100644 --- a/src/llm/providers/transform-messages.ts +++ b/src/llm/providers/transform-messages.ts @@ -1,3 +1,4 @@ +import { resolveModelBoundThinkingReplayMode } from "../../shared/anthropic-model-contract.js"; // Provider message transform helpers convert runtime messages to provider payloads. import type { Api, @@ -96,13 +97,31 @@ export function transformMessages( // Assistant messages need transformation check if (msg.role === "assistant") { const assistantMsg = msg; + const modelBoundThinkingReplayMode = resolveModelBoundThinkingReplayMode({ + source: { + provider: assistantMsg.provider, + api: assistantMsg.api, + modelId: assistantMsg.model, + responseModelId: assistantMsg.responseModel, + }, + target: { + provider: model.provider, + api: model.api, + modelId: model.id, + modelParams: model.params, + }, + }); const isSameModel = - assistantMsg.provider === model.provider && - assistantMsg.api === model.api && - assistantMsg.model === model.id; + modelBoundThinkingReplayMode === "preserve" || + (assistantMsg.provider === model.provider && + assistantMsg.api === model.api && + assistantMsg.model === model.id); const transformedContent = assistantMsg.content.flatMap((block) => { if (block.type === "thinking") { + if (modelBoundThinkingReplayMode === "drop") { + return []; + } // Redacted thinking is opaque encrypted content, only valid for the same model. // Drop it for cross-model to avoid API errors. if (block.redacted) { diff --git a/src/plugin-sdk/provider-model-shared.test.ts b/src/plugin-sdk/provider-model-shared.test.ts index c28ff606f8e2..ae0627a09e2c 100644 --- a/src/plugin-sdk/provider-model-shared.test.ts +++ b/src/plugin-sdk/provider-model-shared.test.ts @@ -8,7 +8,10 @@ import { NATIVE_ANTHROPIC_REPLAY_HOOKS, OPENAI_COMPATIBLE_REPLAY_HOOKS, PASSTHROUGH_GEMINI_REPLAY_HOOKS, + resolveClaudeFable5ModelIdentity, resolveClaudeThinkingProfile, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeXhighEffort, } from "./provider-model-shared.js"; function expectFields(value: unknown, expected: Record): void { @@ -34,6 +37,28 @@ function expectLevelIdsInclude(profile: unknown, expectedIds: readonly string[]) } } +describe("Claude model contracts", () => { + it("recognizes Vertex date suffixes", () => { + expect(resolveClaudeFable5ModelIdentity({ id: "claude-fable-5@20260601" })).toBe( + "claude-fable-5@20260601", + ); + expect(supportsClaudeAdaptiveThinking({ id: "claude-sonnet-4-6@20260301" })).toBe(true); + expect(supportsClaudeNativeXhighEffort({ id: "claude-opus-4-8@20260401" })).toBe(true); + }); + + it("does not classify later numeric model versions as supported aliases", () => { + expect(supportsClaudeAdaptiveThinking({ id: "claude-sonnet-4-60" })).toBe(false); + expect(supportsClaudeNativeXhighEffort({ id: "claude-opus-4-80" })).toBe(false); + expect(readLevelIds(resolveClaudeThinkingProfile("claude-opus-4-80"))).toEqual([ + "off", + "minimal", + "low", + "medium", + "high", + ]); + }); +}); + describe("buildProviderReplayFamilyHooks", () => { it("covers the replay family matrix", () => { const cases = [ @@ -282,6 +307,15 @@ describe("buildProviderReplayFamilyHooks", () => { }); describe("resolveClaudeThinkingProfile", () => { + it("exposes Fable 5's always-adaptive profile to Claude providers", () => { + const profile = resolveClaudeThinkingProfile("claude-fable-5"); + expectFields(profile, { + defaultLevel: "high", + preserveWhenCatalogReasoningFalse: true, + }); + expectLevelIdsInclude(profile, ["xhigh", "adaptive", "max"]); + }); + it("leaves Opus 4.8 thinking off by default with xhigh/adaptive/max options", () => { const profile = resolveClaudeThinkingProfile("claude-opus-4-8"); expectFields(profile, { diff --git a/src/plugin-sdk/provider-model-shared.ts b/src/plugin-sdk/provider-model-shared.ts index f70dbcc27bab..bb26fae2ba18 100644 --- a/src/plugin-sdk/provider-model-shared.ts +++ b/src/plugin-sdk/provider-model-shared.ts @@ -1,3 +1,10 @@ +import { + CLAUDE_FABLE_5_THINKING_PROFILE, + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeXhighEffort, +} from "@openclaw/llm-core"; // Provider model helpers normalize model catalog entries shared by provider plugins. import { normalizeProviderId as normalizeProviderIdCore } from "@openclaw/model-catalog-core/provider-id"; import { @@ -27,6 +34,14 @@ export type { ModelApi, ModelProviderDeclarationConfig as ModelProviderConfig, } from "../config/types.models.js"; +export { + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, + resolveClaudeNativeThinkingLevelMap, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeMaxEffort, + supportsClaudeNativeXhighEffort, +} from "@openclaw/llm-core"; export type { UnifiedModelCatalogEntry, UnifiedModelCatalogKind, @@ -100,14 +115,6 @@ export { } from "../plugins/provider-model-helpers.js"; import { normalizeOptionalLowercaseString } from "../../packages/normalization-core/src/string-coerce.js"; -const CLAUDE_OPUS_48_MODEL_PREFIXES = ["claude-opus-4-8", "claude-opus-4.8"] as const; -const CLAUDE_OPUS_47_MODEL_PREFIXES = ["claude-opus-4-7", "claude-opus-4.7"] as const; -const CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES = [ - "claude-opus-4-6", - "claude-opus-4.6", - "claude-sonnet-4-6", - "claude-sonnet-4.6", -] as const; const BASE_CLAUDE_THINKING_LEVELS = [ { id: "off" }, { id: "minimal" }, @@ -136,47 +143,40 @@ export function isProxyReasoningUnsupportedModelHint( return getModelProviderHint(modelId) === "x-ai"; } -function matchesClaudeModelPrefix(modelId: string, prefixes: readonly string[]): boolean { - const lower = normalizeOptionalLowercaseString(modelId); - return Boolean(lower && prefixes.some((prefix) => lower.startsWith(prefix))); -} - -function isClaudeOpus47ModelId(modelId: string): boolean { - return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_47_MODEL_PREFIXES); -} - -function isClaudeOpus48ModelId(modelId: string): boolean { - return matchesClaudeModelPrefix(modelId, CLAUDE_OPUS_48_MODEL_PREFIXES); -} - /** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */ export function isClaudeAdaptiveThinkingDefaultModelId( /** Claude model id to check against adaptive-thinking default families. */ modelId: string, ): boolean { - return matchesClaudeModelPrefix(modelId, CLAUDE_ADAPTIVE_THINKING_DEFAULT_MODEL_PREFIXES); + const ref = { id: modelId }; + return supportsClaudeAdaptiveThinking(ref) && !supportsClaudeNativeXhighEffort(ref); } /** @deprecated Anthropic provider-owned model helper; do not use from third-party plugins. */ export function resolveClaudeThinkingProfile( /** Claude model id used to choose available thinking levels and defaults. */ modelId: string, + params?: Record, + options?: { includeNativeMax?: boolean }, ): ProviderThinkingProfile { - if (isClaudeOpus48ModelId(modelId)) { + const ref = { id: modelId, params }; + const canonicalModelId = resolveClaudeModelIdentity(ref); + if (resolveClaudeFable5ModelIdentity(ref)) { + return CLAUDE_FABLE_5_THINKING_PROFILE; + } + if (supportsClaudeNativeXhighEffort(ref)) { return { levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], defaultLevel: "off", }; } - if (isClaudeOpus47ModelId(modelId)) { + if (isClaudeAdaptiveThinkingDefaultModelId(canonicalModelId)) { return { - levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "xhigh" }, { id: "adaptive" }, { id: "max" }], - defaultLevel: "off", - }; - } - if (isClaudeAdaptiveThinkingDefaultModelId(modelId)) { - return { - levels: [...BASE_CLAUDE_THINKING_LEVELS, { id: "adaptive" }], + levels: [ + ...BASE_CLAUDE_THINKING_LEVELS, + { id: "adaptive" }, + ...(options?.includeNativeMax ? [{ id: "max" as const }] : []), + ], defaultLevel: "adaptive", }; } diff --git a/src/plugin-sdk/provider-stream-shared.ts b/src/plugin-sdk/provider-stream-shared.ts index 9a63642aca55..f96360bd81db 100644 --- a/src/plugin-sdk/provider-stream-shared.ts +++ b/src/plugin-sdk/provider-stream-shared.ts @@ -13,6 +13,9 @@ import type { StreamFn } from "../agents/runtime/index.js"; import { streamWithPayloadPatch } from "../llm/providers/stream-wrappers/stream-payload-utils.js"; import { streamSimple } from "../llm/stream.js"; import { createAssistantMessageEventStream } from "../llm/utils/event-stream.js"; +export { applyAnthropicRefusal } from "../shared/anthropic-refusal.js"; +export { createDeferredEventBuffer } from "../shared/deferred-event-buffer.js"; +export { notifyLlmRequestActivity, onLlmRequestActivity } from "../shared/llm-request-activity.js"; type ProviderWrapStreamFnContext = import("../plugins/types.js").ProviderWrapStreamFnContext; diff --git a/src/plugins/provider-replay-helpers.test.ts b/src/plugins/provider-replay-helpers.test.ts index 2978c8c38452..4d03d6cb3b99 100644 --- a/src/plugins/provider-replay-helpers.test.ts +++ b/src/plugins/provider-replay-helpers.test.ts @@ -118,6 +118,7 @@ describe("provider replay helpers", () => { it("preserves thinking blocks for Claude Opus 4.5+ and Sonnet 4.5+ models", () => { // These models should NOT drop thinking blocks for (const modelId of [ + "claude-fable-5", "claude-opus-4-5-20251101", "claude-opus-4-6", "claude-sonnet-4-5-20250929", diff --git a/src/plugins/provider-replay-helpers.ts b/src/plugins/provider-replay-helpers.ts index 7d83ddb0b96b..fa7100453633 100644 --- a/src/plugins/provider-replay-helpers.ts +++ b/src/plugins/provider-replay-helpers.ts @@ -89,8 +89,8 @@ export function buildStrictAnthropicReplayPolicy( /** * Returns true for Claude models that preserve thinking blocks in context - * natively (Opus 4.5+, Sonnet 4.5+, Haiku 4.5+). For these models, dropping - * thinking blocks from prior turns breaks prompt cache prefix matching. + * natively (Fable 5, Opus 4.5+, Sonnet 4.5+, Haiku 4.5+). For these models, + * dropping thinking blocks from prior turns breaks replay and prompt caching. * * See: https://platform.claude.com/docs/en/build-with-claude/extended-thinking#differences-in-thinking-across-model-versions * @@ -103,13 +103,19 @@ export function shouldPreserveThinkingBlocks(modelId?: string): boolean { } // Models that preserve thinking blocks natively (Claude 4.5+): + // - claude-fable-5 // - claude-opus-4-x (opus-4-5, opus-4-6, ...) // - claude-sonnet-4-x (sonnet-4-5, sonnet-4-6, ...) // Note: "sonnet-4" is safe — legacy "claude-3-5-sonnet" does not contain "sonnet-4" // - claude-haiku-4-x (haiku-4-5, ...) // Models that require dropping thinking blocks: // - claude-3-7-sonnet, claude-3-5-sonnet, and earlier - if (id.includes("opus-4") || id.includes("sonnet-4") || id.includes("haiku-4")) { + if ( + id.includes("fable-5") || + id.includes("opus-4") || + id.includes("sonnet-4") || + id.includes("haiku-4") + ) { return true; } diff --git a/src/plugins/provider-thinking.ts b/src/plugins/provider-thinking.ts index 65ed27b9311b..67598c2cbfb5 100644 --- a/src/plugins/provider-thinking.ts +++ b/src/plugins/provider-thinking.ts @@ -83,7 +83,7 @@ export function resolveProviderThinkingProfile( const activeProfile = resolveActiveThinkingProvider(params.provider)?.resolveThinkingProfile?.( params.context, ); - if (activeProfile) { + if (activeProfile !== undefined) { return activeProfile; } return resolveBundledProviderPolicySurface(params.provider)?.resolveThinkingProfile?.( diff --git a/src/plugins/provider-thinking.types.ts b/src/plugins/provider-thinking.types.ts index f9bc40ee61ff..231812930722 100644 --- a/src/plugins/provider-thinking.types.ts +++ b/src/plugins/provider-thinking.types.ts @@ -28,6 +28,7 @@ export type ProviderThinkingModelCompat = { */ export type ProviderDefaultThinkingPolicyContext = ProviderThinkingPolicyContext & { reasoning?: boolean; + params?: Record; compat?: ProviderThinkingModelCompat | null; }; diff --git a/src/shared/anthropic-model-contract.ts b/src/shared/anthropic-model-contract.ts new file mode 100644 index 000000000000..700c8832a101 --- /dev/null +++ b/src/shared/anthropic-model-contract.ts @@ -0,0 +1,83 @@ +// Model-bound thinking cannot be exposed or replayed after a model switch. +import { resolveClaudeFable5ModelIdentity } from "@openclaw/llm-core"; +import { normalizeLowercaseStringOrEmpty } from "@openclaw/normalization-core/string-coerce"; +export { + resolveClaudeFable5ModelIdentity, + resolveClaudeModelIdentity, + resolveClaudeNativeThinkingLevelMap, + supportsClaudeAdaptiveThinking, + supportsClaudeNativeMaxEffort, + supportsClaudeNativeXhighEffort, +} from "@openclaw/llm-core"; + +type ReplayModelRef = { + provider?: string; + api?: string; + modelId?: string; + responseModelId?: string; + modelParams?: Record; +}; + +function normalizeModelId(modelId?: string): string { + const normalized = normalizeLowercaseStringOrEmpty(modelId); + const unprefixed = normalized.startsWith("anthropic/") + ? normalized.slice("anthropic/".length) + : normalized; + return unprefixed.replace(/[._\s]+/g, "-"); +} + +function normalizeApi(api?: string): string { + const normalized = normalizeLowercaseStringOrEmpty(api); + return normalized === "openclaw-anthropic-messages-transport" ? "anthropic-messages" : normalized; +} + +function hasConcreteResponseModel(ref: ReplayModelRef): boolean { + const responseModelId = normalizeModelId(ref.responseModelId); + // Deployment APIs may echo the requested alias. Only a different response + // model proves the backing identity and overrides configured metadata. + return responseModelId.length > 0 && responseModelId !== normalizeModelId(ref.modelId); +} + +export function usesClaudeFable5MessagesContract(model: { + id?: string; + params?: Record; + api?: string; +}): boolean { + return ( + normalizeApi(model.api) === "anthropic-messages" && + resolveClaudeFable5ModelIdentity(model) !== undefined + ); +} + +function resolveReplayFableIdentity(ref: ReplayModelRef): string | undefined { + if (normalizeApi(ref.api) !== "anthropic-messages") { + return undefined; + } + if (hasConcreteResponseModel(ref)) { + return resolveClaudeFable5ModelIdentity({ id: ref.responseModelId }); + } + return resolveClaudeFable5ModelIdentity({ id: ref.modelId, params: ref.modelParams }); +} + +export function resolveModelBoundThinkingReplayMode(params: { + source: ReplayModelRef; + target: ReplayModelRef; +}): "default" | "preserve" | "drop" { + const sourceApi = normalizeApi(params.source.api); + const targetApi = normalizeApi(params.target.api); + const sourceIdentity = resolveReplayFableIdentity(params.source); + const targetIdentity = resolveReplayFableIdentity(params.target); + const sameRoute = + normalizeLowercaseStringOrEmpty(params.source.provider) === + normalizeLowercaseStringOrEmpty(params.target.provider) && + sourceApi === targetApi && + normalizeModelId(params.source.modelId) === normalizeModelId(params.target.modelId); + if (!sourceIdentity && !targetIdentity) { + return "default"; + } + if (!sourceIdentity && !hasConcreteResponseModel(params.source) && targetIdentity && sameRoute) { + return "preserve"; + } + const sameModel = sourceApi === targetApi && sourceIdentity === targetIdentity; + return sameModel ? "preserve" : "drop"; +} diff --git a/src/shared/anthropic-refusal.ts b/src/shared/anthropic-refusal.ts new file mode 100644 index 000000000000..b59dc377057d --- /dev/null +++ b/src/shared/anthropic-refusal.ts @@ -0,0 +1,55 @@ +import type { AssistantMessageDiagnostic } from "../llm/types.js"; + +type AnthropicRefusalOutput = { + stopReason: string; + errorMessage?: string; + diagnostics?: AssistantMessageDiagnostic[]; +}; + +type AnthropicRefusalDetails = { + category: string | null; + explanation: string | null; +}; + +function readNullableString(value: unknown): string | null { + return typeof value === "string" && value.trim() ? value.trim() : null; +} + +function readAnthropicRefusalDetails(value: unknown): AnthropicRefusalDetails { + if (!value || typeof value !== "object") { + return { category: null, explanation: null }; + } + const details = value as Record; + return { + category: readNullableString(details.category), + explanation: readNullableString(details.explanation), + }; +} + +function formatAnthropicRefusalMessage(details: AnthropicRefusalDetails): string { + const category = details.category ? ` (category: ${details.category})` : ""; + const explanation = details.explanation ? `: ${details.explanation}` : "."; + return `Anthropic refusal${category}${explanation}`; +} + +export function applyAnthropicRefusal( + output: AnthropicRefusalOutput, + stopDetails: unknown, + provider: string, +): void { + const details = readAnthropicRefusalDetails(stopDetails); + output.stopReason = "error"; + output.errorMessage = formatAnthropicRefusalMessage(details); + output.diagnostics = [ + ...(output.diagnostics ?? []), + { + type: "provider_refusal", + timestamp: Date.now(), + details: { + provider, + category: details.category, + explanation: details.explanation, + }, + }, + ]; +} diff --git a/src/shared/deferred-event-buffer.ts b/src/shared/deferred-event-buffer.ts new file mode 100644 index 000000000000..16c9f1deecaa --- /dev/null +++ b/src/shared/deferred-event-buffer.ts @@ -0,0 +1,22 @@ +type EventSink = { + push(event: T): void; +}; + +export function createDeferredEventBuffer(sink: EventSink, onBufferedEvent?: () => void) { + let events: T[] = []; + return { + push(event: T): void { + events.push(event); + onBufferedEvent?.(); + }, + flush(): void { + for (const event of events) { + sink.push(event); + } + events = []; + }, + discard(): void { + events = []; + }, + }; +} diff --git a/src/shared/llm-request-activity.ts b/src/shared/llm-request-activity.ts new file mode 100644 index 000000000000..bb1837a3bb92 --- /dev/null +++ b/src/shared/llm-request-activity.ts @@ -0,0 +1,23 @@ +const requestActivityListeners = new WeakMap void>>(); + +export function notifyLlmRequestActivity(signal: AbortSignal | undefined): void { + if (!signal) { + return; + } + for (const listener of requestActivityListeners.get(signal) ?? []) { + listener(); + } +} + +export function onLlmRequestActivity(signal: AbortSignal, listener: () => void): () => void { + const listeners = requestActivityListeners.get(signal) ?? new Set<() => void>(); + listeners.add(listener); + requestActivityListeners.set(signal, listeners); + + return () => { + listeners.delete(listener); + if (listeners.size === 0) { + requestActivityListeners.delete(signal); + } + }; +}