diff --git a/.github/workflows/qa-profile-evidence.yml b/.github/workflows/qa-profile-evidence.yml index f22bb100c947..22065b189f6c 100644 --- a/.github/workflows/qa-profile-evidence.yml +++ b/.github/workflows/qa-profile-evidence.yml @@ -16,7 +16,7 @@ on: default: "" type: string qa_profile: - description: Taxonomy QA profile id to run + description: Taxonomy QA profile id to run (for example release or all) required: true default: release type: string @@ -47,7 +47,7 @@ on: type: boolean secrets: OPENAI_API_KEY: - description: OpenAI API key used by release QA profile scenarios + description: OpenAI API key used by live QA profile scenarios required: true outputs: artifact_name: diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md index d3f7791a9ab9..9bc5cb555757 100644 --- a/docs/concepts/qa-e2e-automation.md +++ b/docs/concepts/qa-e2e-automation.md @@ -31,7 +31,7 @@ script aliases; both forms are supported. | Command | Purpose | | --------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `qa run` | Bundled QA self-check without `--qa-profile`; taxonomy-backed maturity profile runner with `--qa-profile smoke-ci` or `--qa-profile release`. | +| `qa run` | Bundled QA self-check without `--qa-profile`; taxonomy-backed maturity profile runner with `--qa-profile smoke-ci`, `--qa-profile release`, or `--qa-profile all`. | | `qa suite` | Run repo-backed scenarios against the QA gateway lane. Aliases: `pnpm openclaw qa suite --runner multipass` for a disposable Linux VM. | | `qa coverage` | Print the YAML scenario-coverage inventory (`--json` for machine output). | | `qa parity-report` | Compare two `qa-suite-summary.json` files and write the agentic parity report, or use `--runtime-axis --token-efficiency` to write Codex-vs-OpenClaw runtime parity and token-efficiency reports from one runtime-pair summary. | @@ -75,8 +75,10 @@ pnpm openclaw qa run \ Use `smoke-ci` for deterministic profile proof with mock model providers and Crabline fake provider servers. Use `release` for Stable/LTS proof against live -channels. When a command also needs an OpenClaw root profile, put the root -profile before the QA command: +channels. Use `all` only for explicit full-taxonomy evidence runs; it selects +every active maturity category and can be dispatched through the `QA Profile +Evidence` workflow with `qa_profile=all`. When a command also needs an OpenClaw +root profile, put the root profile before the QA command: ```bash pnpm openclaw --profile work qa run --qa-profile smoke-ci diff --git a/docs/help/testing.md b/docs/help/testing.md index 718e987257de..af162a5c315f 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -190,7 +190,10 @@ inside every shard. - When dispatched by `pnpm openclaw qa run --qa-profile `, embeds the selected taxonomy profile scorecard in the same `qa-evidence.json`. `smoke-ci` writes slim evidence, which sets `evidenceMode: "slim"` and omits - per-entry `execution`. + per-entry `execution`. `release` covers the curated release-readiness slice; + `all` selects every active maturity category and is intended for explicit QA + Profile Evidence workflow dispatches when a full scorecard artifact is + needed. - Runs multiple selected scenarios in parallel by default with isolated gateway workers. `qa-channel` defaults to concurrency 4 (bounded by the selected scenario count). Use `--concurrency ` to tune the worker diff --git a/extensions/qa-lab/src/cli.runtime.test.ts b/extensions/qa-lab/src/cli.runtime.test.ts index 3b038925f074..a25276c0dbeb 100644 --- a/extensions/qa-lab/src/cli.runtime.test.ts +++ b/extensions/qa-lab/src/cli.runtime.test.ts @@ -514,6 +514,24 @@ describe("qa cli runtime", () => { expect(suiteArgs.channelDriverSelection).toBeUndefined(); }); + it("runs the all profile through the live taxonomy profile path", async () => { + await runQaProfileCommand({ + repoRoot: "/tmp/openclaw-repo", + profile: "all", + surface: "agent-runtime-and-provider-execution", + category: "agent-runtime-and-provider-execution.agent-turn-execution", + providerMode: "mock-openai", + }); + + const suiteArgs = mockFirstObjectArg(runQaSuite); + expectFields(suiteArgs, { + providerMode: "mock-openai", + channelDriver: "live", + }); + expect(suiteArgs.channelDriverSelection).toBeUndefined(); + expectWriteContains(stdoutWrite, "QA run profile: all; categories: 1; scenarios:"); + }); + it("filters QA-channel-pinned scenarios from the Crabline smoke profile", async () => { runQaSuite.mockImplementationOnce(async () => { await fs.writeFile(suiteEvidencePath, JSON.stringify(makeQaEvidence()), "utf8"); @@ -592,7 +610,7 @@ describe("qa cli runtime", () => { repoRoot: "/tmp/openclaw-repo", profile: "nightly", }), - ).rejects.toThrow('--qa-profile must be one of smoke-ci, release, got "nightly".'); + ).rejects.toThrow('--qa-profile must be one of smoke-ci, release, all, got "nightly".'); expect(runQaSuite).not.toHaveBeenCalled(); }); diff --git a/extensions/qa-lab/src/coverage-report.test.ts b/extensions/qa-lab/src/coverage-report.test.ts index 15e63b78746b..9fb6b8a1c4e3 100644 --- a/extensions/qa-lab/src/coverage-report.test.ts +++ b/extensions/qa-lab/src/coverage-report.test.ts @@ -135,7 +135,7 @@ describe("qa coverage report", () => { "telegram", "whatsapp", ]); - expect(inventory.scorecardTaxonomy.profileCount).toBe(2); + expect(inventory.scorecardTaxonomy.profileCount).toBe(3); expect( inventory.scorecardTaxonomy.profiles.find((profile) => profile.id === "smoke-ci"), ).toMatchObject({ @@ -147,6 +147,14 @@ describe("qa coverage report", () => { ).toMatchObject({ channelDriver: "live", }); + expect( + inventory.scorecardTaxonomy.profiles.find((profile) => profile.id === "all"), + ).toMatchObject({ + channelDriver: "live", + categoryIds: expect.arrayContaining([ + "browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution", + ]), + }); expect(inventory.scorecardTaxonomy.categoryCount).toBeGreaterThan(200); expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeGreaterThan(0); expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeLessThanOrEqual( @@ -233,7 +241,7 @@ describe("qa coverage report", () => { expect(report).toContain("- Evidence refs:"); expect(report).toContain("- Scenario coverage IDs:"); expect(report).toContain( - "- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution (browser-automation-and-exec-sandbox-tools / Tool Invocation and Execution; partial): profiles: release, smoke-ci; coverage IDs:", + "- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution (browser-automation-and-exec-sandbox-tools / Tool Invocation and Execution; partial): profiles: all, release, smoke-ci; coverage IDs:", ); expect(report).toContain("primary:playwright:ui/src/ui/e2e/chat-flow.e2e.test.ts (ui.control)"); expect(report).not.toContain("### Unknown Scenario Coverage IDs"); diff --git a/taxonomy.yaml b/taxonomy.yaml index e762bea7e093..3b860bee3399 100644 --- a/taxonomy.yaml +++ b/taxonomy.yaml @@ -219,6 +219,12 @@ profiles: - browser-automation-and-exec-sandbox-tools.browser-automation - browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution - browser-automation-and-exec-sandbox-tools.sandbox-and-tool-policy + - id: all + description: Full taxonomy scorecard selector for generated maturity coverage evidence across + every active category. Intended for explicit QA evidence workflow dispatches rather than the + default release gate. + channelDriver: live + includeAllCategories: true levels: - id: planned code: M0 diff --git a/test/scripts/ci-workflow-guards.test.ts b/test/scripts/ci-workflow-guards.test.ts index 4029b173d911..f30a3561d406 100644 --- a/test/scripts/ci-workflow-guards.test.ts +++ b/test/scripts/ci-workflow-guards.test.ts @@ -588,6 +588,15 @@ describe("ci workflow guards", () => { default: false, type: "boolean", }); + expect(qaEvidenceWorkflow.on.workflow_dispatch.inputs.qa_profile).not.toHaveProperty("options"); + expect(qaEvidenceWorkflow.on.workflow_call.inputs.qa_profile.type).toBe("string"); + const validateProfileStep = qaRunJob.steps.find( + (step) => step.name === "Validate QA profile input", + ); + expect(validateProfileStep.run).toContain( + "taxonomy.profiles.find((entry) => entry.id === requested)", + ); + expect(validateProfileStep.run).toContain("profile=${profile.id}"); expect(generateJob.if).toBe("${{ inputs.qa_evidence_run_id == '' }}"); expect(generateJob.uses).toBe("./.github/workflows/qa-profile-evidence.yml"); expect(generateJob.with).toMatchObject({