mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-30 19:59:35 +00:00
feat(qa): add "all" taxonomy profile (#95947)
* qa: add all maturity profile * test: update qa coverage profile expectations
This commit is contained in:
4
.github/workflows/qa-profile-evidence.yml
vendored
4
.github/workflows/qa-profile-evidence.yml
vendored
@@ -16,7 +16,7 @@ on:
|
||||
default: ""
|
||||
type: string
|
||||
qa_profile:
|
||||
description: Taxonomy QA profile id to run
|
||||
description: Taxonomy QA profile id to run (for example release or all)
|
||||
required: true
|
||||
default: release
|
||||
type: string
|
||||
@@ -47,7 +47,7 @@ on:
|
||||
type: boolean
|
||||
secrets:
|
||||
OPENAI_API_KEY:
|
||||
description: OpenAI API key used by release QA profile scenarios
|
||||
description: OpenAI API key used by live QA profile scenarios
|
||||
required: true
|
||||
outputs:
|
||||
artifact_name:
|
||||
|
||||
@@ -31,7 +31,7 @@ script aliases; both forms are supported.
|
||||
|
||||
| Command | Purpose |
|
||||
| --------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `qa run` | Bundled QA self-check without `--qa-profile`; taxonomy-backed maturity profile runner with `--qa-profile smoke-ci` or `--qa-profile release`. |
|
||||
| `qa run` | Bundled QA self-check without `--qa-profile`; taxonomy-backed maturity profile runner with `--qa-profile smoke-ci`, `--qa-profile release`, or `--qa-profile all`. |
|
||||
| `qa suite` | Run repo-backed scenarios against the QA gateway lane. Aliases: `pnpm openclaw qa suite --runner multipass` for a disposable Linux VM. |
|
||||
| `qa coverage` | Print the YAML scenario-coverage inventory (`--json` for machine output). |
|
||||
| `qa parity-report` | Compare two `qa-suite-summary.json` files and write the agentic parity report, or use `--runtime-axis --token-efficiency` to write Codex-vs-OpenClaw runtime parity and token-efficiency reports from one runtime-pair summary. |
|
||||
@@ -75,8 +75,10 @@ pnpm openclaw qa run \
|
||||
|
||||
Use `smoke-ci` for deterministic profile proof with mock model providers and
|
||||
Crabline fake provider servers. Use `release` for Stable/LTS proof against live
|
||||
channels. When a command also needs an OpenClaw root profile, put the root
|
||||
profile before the QA command:
|
||||
channels. Use `all` only for explicit full-taxonomy evidence runs; it selects
|
||||
every active maturity category and can be dispatched through the `QA Profile
|
||||
Evidence` workflow with `qa_profile=all`. When a command also needs an OpenClaw
|
||||
root profile, put the root profile before the QA command:
|
||||
|
||||
```bash
|
||||
pnpm openclaw --profile work qa run --qa-profile smoke-ci
|
||||
|
||||
@@ -190,7 +190,10 @@ inside every shard.
|
||||
- When dispatched by `pnpm openclaw qa run --qa-profile <profile>`, embeds the
|
||||
selected taxonomy profile scorecard in the same `qa-evidence.json`.
|
||||
`smoke-ci` writes slim evidence, which sets `evidenceMode: "slim"` and omits
|
||||
per-entry `execution`.
|
||||
per-entry `execution`. `release` covers the curated release-readiness slice;
|
||||
`all` selects every active maturity category and is intended for explicit QA
|
||||
Profile Evidence workflow dispatches when a full scorecard artifact is
|
||||
needed.
|
||||
- Runs multiple selected scenarios in parallel by default with isolated
|
||||
gateway workers. `qa-channel` defaults to concurrency 4 (bounded by the
|
||||
selected scenario count). Use `--concurrency <count>` to tune the worker
|
||||
|
||||
@@ -514,6 +514,24 @@ describe("qa cli runtime", () => {
|
||||
expect(suiteArgs.channelDriverSelection).toBeUndefined();
|
||||
});
|
||||
|
||||
it("runs the all profile through the live taxonomy profile path", async () => {
|
||||
await runQaProfileCommand({
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
profile: "all",
|
||||
surface: "agent-runtime-and-provider-execution",
|
||||
category: "agent-runtime-and-provider-execution.agent-turn-execution",
|
||||
providerMode: "mock-openai",
|
||||
});
|
||||
|
||||
const suiteArgs = mockFirstObjectArg(runQaSuite);
|
||||
expectFields(suiteArgs, {
|
||||
providerMode: "mock-openai",
|
||||
channelDriver: "live",
|
||||
});
|
||||
expect(suiteArgs.channelDriverSelection).toBeUndefined();
|
||||
expectWriteContains(stdoutWrite, "QA run profile: all; categories: 1; scenarios:");
|
||||
});
|
||||
|
||||
it("filters QA-channel-pinned scenarios from the Crabline smoke profile", async () => {
|
||||
runQaSuite.mockImplementationOnce(async () => {
|
||||
await fs.writeFile(suiteEvidencePath, JSON.stringify(makeQaEvidence()), "utf8");
|
||||
@@ -592,7 +610,7 @@ describe("qa cli runtime", () => {
|
||||
repoRoot: "/tmp/openclaw-repo",
|
||||
profile: "nightly",
|
||||
}),
|
||||
).rejects.toThrow('--qa-profile must be one of smoke-ci, release, got "nightly".');
|
||||
).rejects.toThrow('--qa-profile must be one of smoke-ci, release, all, got "nightly".');
|
||||
expect(runQaSuite).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
|
||||
@@ -135,7 +135,7 @@ describe("qa coverage report", () => {
|
||||
"telegram",
|
||||
"whatsapp",
|
||||
]);
|
||||
expect(inventory.scorecardTaxonomy.profileCount).toBe(2);
|
||||
expect(inventory.scorecardTaxonomy.profileCount).toBe(3);
|
||||
expect(
|
||||
inventory.scorecardTaxonomy.profiles.find((profile) => profile.id === "smoke-ci"),
|
||||
).toMatchObject({
|
||||
@@ -147,6 +147,14 @@ describe("qa coverage report", () => {
|
||||
).toMatchObject({
|
||||
channelDriver: "live",
|
||||
});
|
||||
expect(
|
||||
inventory.scorecardTaxonomy.profiles.find((profile) => profile.id === "all"),
|
||||
).toMatchObject({
|
||||
channelDriver: "live",
|
||||
categoryIds: expect.arrayContaining([
|
||||
"browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution",
|
||||
]),
|
||||
});
|
||||
expect(inventory.scorecardTaxonomy.categoryCount).toBeGreaterThan(200);
|
||||
expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeGreaterThan(0);
|
||||
expect(inventory.scorecardTaxonomy.requiredCategoryCount).toBeLessThanOrEqual(
|
||||
@@ -233,7 +241,7 @@ describe("qa coverage report", () => {
|
||||
expect(report).toContain("- Evidence refs:");
|
||||
expect(report).toContain("- Scenario coverage IDs:");
|
||||
expect(report).toContain(
|
||||
"- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution (browser-automation-and-exec-sandbox-tools / Tool Invocation and Execution; partial): profiles: release, smoke-ci; coverage IDs:",
|
||||
"- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution (browser-automation-and-exec-sandbox-tools / Tool Invocation and Execution; partial): profiles: all, release, smoke-ci; coverage IDs:",
|
||||
);
|
||||
expect(report).toContain("primary:playwright:ui/src/ui/e2e/chat-flow.e2e.test.ts (ui.control)");
|
||||
expect(report).not.toContain("### Unknown Scenario Coverage IDs");
|
||||
|
||||
@@ -219,6 +219,12 @@ profiles:
|
||||
- browser-automation-and-exec-sandbox-tools.browser-automation
|
||||
- browser-automation-and-exec-sandbox-tools.tool-invocation-and-execution
|
||||
- browser-automation-and-exec-sandbox-tools.sandbox-and-tool-policy
|
||||
- id: all
|
||||
description: Full taxonomy scorecard selector for generated maturity coverage evidence across
|
||||
every active category. Intended for explicit QA evidence workflow dispatches rather than the
|
||||
default release gate.
|
||||
channelDriver: live
|
||||
includeAllCategories: true
|
||||
levels:
|
||||
- id: planned
|
||||
code: M0
|
||||
|
||||
@@ -588,6 +588,15 @@ describe("ci workflow guards", () => {
|
||||
default: false,
|
||||
type: "boolean",
|
||||
});
|
||||
expect(qaEvidenceWorkflow.on.workflow_dispatch.inputs.qa_profile).not.toHaveProperty("options");
|
||||
expect(qaEvidenceWorkflow.on.workflow_call.inputs.qa_profile.type).toBe("string");
|
||||
const validateProfileStep = qaRunJob.steps.find(
|
||||
(step) => step.name === "Validate QA profile input",
|
||||
);
|
||||
expect(validateProfileStep.run).toContain(
|
||||
"taxonomy.profiles.find((entry) => entry.id === requested)",
|
||||
);
|
||||
expect(validateProfileStep.run).toContain("profile=${profile.id}");
|
||||
expect(generateJob.if).toBe("${{ inputs.qa_evidence_run_id == '' }}");
|
||||
expect(generateJob.uses).toBe("./.github/workflows/qa-profile-evidence.yml");
|
||||
expect(generateJob.with).toMatchObject({
|
||||
|
||||
Reference in New Issue
Block a user