feat(qa): add "all" taxonomy profile (#95947)

* qa: add all maturity profile

* test: update qa coverage profile expectations
This commit is contained in:
Dallin Romney
2026-06-22 20:49:08 -07:00
committed by GitHub
parent ca0eb62c87
commit 4460fa78c3
7 changed files with 55 additions and 9 deletions

View File

@@ -16,7 +16,7 @@ on:
default: ""
type: string
qa_profile:
description: Taxonomy QA profile id to run
description: Taxonomy QA profile id to run (for example release or all)
required: true
default: release
type: string
@@ -47,7 +47,7 @@ on:
type: boolean
secrets:
OPENAI_API_KEY:
description: OpenAI API key used by release QA profile scenarios
description: OpenAI API key used by live QA profile scenarios
required: true
outputs:
artifact_name:

View File

@@ -31,7 +31,7 @@ script aliases; both forms are supported.
| Command | Purpose |
| --------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `qa run` | Bundled QA self-check without `--qa-profile`; taxonomy-backed maturity profile runner with `--qa-profile smoke-ci` or `--qa-profile release`. |
| `qa run` | Bundled QA self-check without `--qa-profile`; taxonomy-backed maturity profile runner with `--qa-profile smoke-ci`, `--qa-profile release`, or `--qa-profile all`. |
| `qa suite` | Run repo-backed scenarios against the QA gateway lane. Aliases: `pnpm openclaw qa suite --runner multipass` for a disposable Linux VM. |
| `qa coverage` | Print the YAML scenario-coverage inventory (`--json` for machine output). |
| `qa parity-report` | Compare two `qa-suite-summary.json` files and write the agentic parity report, or use `--runtime-axis --token-efficiency` to write Codex-vs-OpenClaw runtime parity and token-efficiency reports from one runtime-pair summary. |
@@ -75,8 +75,10 @@ pnpm openclaw qa run \
Use `smoke-ci` for deterministic profile proof with mock model providers and
Crabline fake provider servers. Use `release` for Stable/LTS proof against live
channels. When a command also needs an OpenClaw root profile, put the root
profile before the QA command:
channels. Use `all` only for explicit full-taxonomy evidence runs; it selects
every active maturity category and can be dispatched through the `QA Profile
Evidence` workflow with `qa_profile=all`. When a command also needs an OpenClaw
root profile, put the root profile before the QA command:
```bash
pnpm openclaw --profile work qa run --qa-profile smoke-ci

View File

@@ -190,7 +190,10 @@ inside every shard.
- When dispatched by `pnpm openclaw qa run --qa-profile <profile>`, embeds the
selected taxonomy profile scorecard in the same `qa-evidence.json`.
`smoke-ci` writes slim evidence, which sets `evidenceMode: "slim"` and omits
per-entry `execution`.
per-entry `execution`. `release` covers the curated release-readiness slice;
`all` selects every active maturity category and is intended for explicit QA
Profile Evidence workflow dispatches when a full scorecard artifact is
needed.
- Runs multiple selected scenarios in parallel by default with isolated
gateway workers. `qa-channel` defaults to concurrency 4 (bounded by the
selected scenario count). Use `--concurrency <count>` to tune the worker

View File

@@ -514,6 +514,24 @@ describe("qa cli runtime", () => {
expect(suiteArgs.channelDriverSelection).toBeUndefined();
});
it("runs the all profile through the live taxonomy profile path", async () => {
await runQaProfileCommand({
repoRoot: "/tmp/openclaw-repo",
profile: "all",
surface: "agent-runtime-and-provider-execution",
category: "agent-runtime-and-provider-execution.agent-turn-execution",
providerMode: "mock-openai",
});
const suiteArgs = mockFirstObjectArg(runQaSuite);
expectFields(suiteArgs, {
providerMode: "mock-openai",
channelDriver: "live",
});
expect(suiteArgs.channelDriverSelection).toBeUndefined();
expectWriteContains(stdoutWrite, "QA run profile: all; categories: 1; scenarios:");
});
it("filters QA-channel-pinned scenarios from the Crabline smoke profile", async () => {
runQaSuite.mockImplementationOnce(async () => {
await fs.writeFile(suiteEvidencePath, JSON.stringify(makeQaEvidence()), "utf8");
@@ -592,7 +610,7 @@ describe("qa cli runtime", () => {
repoRoot: "/tmp/openclaw-repo",
profile: "nightly",
}),
).rejects.toThrow('--qa-profile must be one of smoke-ci, release, got "nightly".');
).rejects.toThrow('--qa-profile must be one of smoke-ci, release, all, got "nightly".');
expect(runQaSuite).not.toHaveBeenCalled();
});

View File

@@ -135,7 +135,7 @@ describe("qa coverage report", () => {
"telegram",
"whatsapp",
]);
expect(inventory.scorecardTaxonomy.profileCount).toBe(2);
expect(inventory.scorecardTaxonomy.profileCount).toBe(3);
expect(
inventory.scorecardTaxonomy.profiles.find((profile) => profile.id === "smoke-ci"),
).toMatchObject({
@@ -147,6 +147,14 @@ describe("qa coverage report", () => {
).toMatchObject({
channelDriver: "live",
});
expect(
inventory.scorecardTaxonomy.profiles.find((profile) => profile.id === "all"),
).toMatchObject({
channelDriver: "live",
categoryIds: expect.arrayContaining([
"browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution",
]),
});
expect(inventory.scorecardTaxonomy.categoryCount).toBeGreaterThan(200);
expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeGreaterThan(0);
expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeLessThanOrEqual(
@@ -233,7 +241,7 @@ describe("qa coverage report", () => {
expect(report).toContain("- Evidence refs:");
expect(report).toContain("- Scenario coverage IDs:");
expect(report).toContain(
"- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution (browser-automation-and-exec-sandbox-tools / Tool Invocation and Execution; partial): profiles: release, smoke-ci; coverage IDs:",
"- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution (browser-automation-and-exec-sandbox-tools / Tool Invocation and Execution; partial): profiles: all, release, smoke-ci; coverage IDs:",
);
expect(report).toContain("primary:playwright:ui/src/ui/e2e/chat-flow.e2e.test.ts (ui.control)");
expect(report).not.toContain("### Unknown Scenario Coverage IDs");

View File

@@ -219,6 +219,12 @@ profiles:
- browser-automation-and-exec-sandbox-tools.browser-automation
- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution
- browser-automation-and-exec-sandbox-tools.sandbox-and-tool-policy
- id: all
description: Full taxonomy scorecard selector for generated maturity coverage evidence across
every active category. Intended for explicit QA evidence workflow dispatches rather than the
default release gate.
channelDriver: live
includeAllCategories: true
levels:
- id: planned
code: M0

View File

@@ -588,6 +588,15 @@ describe("ci workflow guards", () => {
default: false,
type: "boolean",
});
expect(qaEvidenceWorkflow.on.workflow_dispatch.inputs.qa_profile).not.toHaveProperty("options");
expect(qaEvidenceWorkflow.on.workflow_call.inputs.qa_profile.type).toBe("string");
const validateProfileStep = qaRunJob.steps.find(
(step) => step.name === "Validate QA profile input",
);
expect(validateProfileStep.run).toContain(
"taxonomy.profiles.find((entry) => entry.id === requested)",
);
expect(validateProfileStep.run).toContain("profile=${profile.id}");
expect(generateJob.if).toBe("${{ inputs.qa_evidence_run_id == '' }}");
expect(generateJob.uses).toBe("./.github/workflows/qa-profile-evidence.yml");
expect(generateJob.with).toMatchObject({