Standardize QA coverage IDs on dotted names (#94702)

* fix: standardize qa coverage ids

* test: avoid qa coverage id assertion spread
This commit is contained in:
Dallin Romney
2026-06-18 17:25:26 -07:00
committed by GitHub
parent e9e44bf83c
commit e12cf72b17
24 changed files with 1646 additions and 1613 deletions

View File

@@ -30,6 +30,9 @@ out of this repo. If a score needs private evidence, use the redacted
completeness-instruction paths.
- Feature `coverageIds` are ANDed proof targets, not aliases. A feature may
list multiple IDs when each ID proves part of one capability.
- Coverage IDs use dotted `namespace.behavior` form, with lowercase
alphanumeric/dash segments. Profile, surface, and category IDs may remain
dashed or dotted.
- Keep categories and feature names unique, product-shaped, and broader than raw
coverage IDs. Do not promote generic IDs into standalone feature names.
- Avoid duplicate coverage-ID bundles under different feature names in one

View File

@@ -59,6 +59,9 @@ selected-category counts and missing coverage IDs; the individual evidence
entries remain the source of truth for the tests, coverage roles, and results.
Taxonomy feature coverage IDs are exact proof targets, not aliases. Primary
scenario coverage fulfills matching IDs; secondary coverage stays advisory.
Coverage IDs use dotted `namespace.behavior` form with lowercase
alphanumeric/dash segments; profile, surface, and category IDs may still use
the existing dashed or dotted taxonomy IDs.
Slim evidence omits per-entry `execution` and sets `evidenceMode: "slim"`;
`smoke-ci` defaults to slim, and `--evidence-mode full` restores full entries:

View File

@@ -14,6 +14,7 @@ const TEST_EXECUTABLE_COVERAGE_ID = "channels.dm";
const TEST_BROWSER_CATEGORY_ID = "browser-control-ui-and-webchat.browser-ui";
const TEST_BROWSER_COVERAGE_ID = "ui.control";
const TEST_WEBCHAT_COVERAGE_ID = "ui.webchat";
const DOTTED_COVERAGE_ID_PATTERN = /^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$/;
function testMaturityTaxonomy(params?: {
categoryId?: string;
@@ -138,6 +139,11 @@ describe("qa coverage report", () => {
expect(inventory.scorecardTaxonomy.evidenceRefCount).toBeGreaterThan(0);
expect(inventory.scorecardTaxonomy.scenarioCoverageIdCount).toBeGreaterThan(0);
expect(inventory.scorecardTaxonomy.unknownCoverageIdCount).toBe(0);
expect(
inventory.scorecardTaxonomy.categories
.flatMap((category) => category.coverageIds)
.every((coverageId) => DOTTED_COVERAGE_ID_PATTERN.test(coverageId)),
).toBe(true);
expect(inventory.scorecardTaxonomy.validationIssues.length).toBeGreaterThan(0);
expect(
inventory.scorecardTaxonomy.validationIssues.some((issue) =>

View File

@@ -198,7 +198,7 @@ describe("evidence gallery", () => {
surface: "web-ui",
},
{
coverageIds: ["cli-entrypoint"],
coverageIds: ["cli.entrypoint"],
runner: {
availability: "local",
command: "pnpm openclaw qa suite --scenario ux-matrix-evidence-dashboard",
@@ -274,7 +274,7 @@ describe("evidence gallery", () => {
title: "UX Matrix: cli / error-state",
source: { path: "scripts/ux-matrix/dashboard.ts" },
},
coverage: [{ id: "status-snapshots", role: "primary" }],
coverage: [{ id: "cli.status-snapshots", role: "primary" }],
execution: {
runner: "ux-matrix-dashboard",
environment: {
@@ -359,7 +359,7 @@ describe("evidence gallery", () => {
{
artifactKinds: [],
artifactPaths: [],
coverageIds: ["cli-entrypoint"],
coverageIds: ["cli.entrypoint"],
runner: {
availability: "local",
command: "pnpm openclaw qa suite --scenario ux-matrix-evidence-dashboard",

View File

@@ -346,7 +346,7 @@ describe("evidence summary", () => {
id: "control-ui.browser-run",
title: "Control UI browser workflow",
sourcePath: "ui/control-ui.e2e.test.ts",
primaryCoverageIds: ["control-ui.browser"],
primaryCoverageIds: ["ui.control"],
docsRefs: ["docs/concepts/qa-e2e-automation.md"],
codeRefs: ["ui/"],
},
@@ -374,7 +374,7 @@ describe("evidence summary", () => {
},
coverage: [
{
id: "control-ui.browser",
id: "ui.control",
role: "primary",
},
],

View File

@@ -25,6 +25,8 @@ function listScenarioMarkdownPaths(dir = "qa/scenarios"): string[] {
}
describe("qa scenario catalog", () => {
const dottedCoverageIdPattern = /^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$/;
it("keeps repo-backed scenarios YAML-only", () => {
expect(listScenarioMarkdownPaths()).toStrictEqual([]);
});
@@ -74,6 +76,17 @@ describe("qa scenario catalog", () => {
.filter((scenario) => !(scenario.coverage?.primary.length ?? 0))
.map((scenario) => scenario.id),
).toStrictEqual([]);
expect(
pack.scenarios.every(
(scenario) =>
(scenario.coverage?.primary ?? []).every((coverageId) =>
dottedCoverageIdPattern.test(coverageId),
) &&
(scenario.coverage?.secondary ?? []).every((coverageId) =>
dottedCoverageIdPattern.test(coverageId),
),
),
).toBe(true);
expect(readQaScenarioById("memory-recall").coverage?.primary).toContain("memory.recall");
});

View File

@@ -87,8 +87,8 @@ const qaScenarioExecutionSchema = z.union([
const qaCoverageIdSchema = z
.string()
.trim()
.regex(/^[a-z0-9]+(?:[.-][a-z0-9]+)*$/, {
message: "coverage ids must use lowercase dotted or dashed tokens",
.regex(/^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$/, {
message: "coverage ids must use lowercase dotted tokens",
});
const qaCoverageIdListSchema = z.array(qaCoverageIdSchema).min(1);

View File

@@ -11,7 +11,14 @@ const qaScorecardIdSchema = z
.string()
.trim()
.regex(/^[a-z0-9]+(?:[.-][a-z0-9]+)*$/, {
message: "scorecard and coverage ids must use lowercase dotted or dashed tokens",
message: "scorecard ids must use lowercase dotted or dashed tokens",
});
const qaCoverageIdSchema = z
.string()
.trim()
.regex(/^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$/, {
message: "coverage ids must use lowercase dotted tokens",
});
function isRepoRootRelativeRef(value: string) {
@@ -31,7 +38,7 @@ const qaScorecardProfileSchema = z.object({
const qaMaturityFeatureSchema = z.object({
name: z.string().trim().min(1),
coverageIds: z.array(qaScorecardIdSchema).default([]),
coverageIds: z.array(qaCoverageIdSchema).default([]),
description: z.string().trim().min(1).optional(),
});

View File

@@ -726,9 +726,9 @@ describe("qa test file scenario runner", () => {
"tools.evidence",
"workspace.artifacts",
"ui.control",
"control-ui",
"cli-entrypoint",
"status-snapshots",
"gateway.control-ui-hosting",
"cli.entrypoint",
"cli.status-snapshots",
]),
);
const artifactKinds = evidence.entries.flatMap(

View File

@@ -213,7 +213,7 @@ describe("QA Lab UI evidence render", () => {
{
artifactKinds: [],
artifactPaths: [],
coverageIds: ["cli-entrypoint"],
coverageIds: ["cli.entrypoint"],
runner: {
availability: "local",
command: "pnpm openclaw qa suite --scenario ux-matrix-evidence-dashboard",
@@ -247,7 +247,7 @@ describe("QA Lab UI evidence render", () => {
expect(html).toContain('data-evidence-entry-id="ux-matrix.web-ui.first-run"');
expect(html).toContain("evidence-matrix-cell-proof-gap");
expect(html).toContain("not executed in this run");
expect(html).toContain("Coverage: cli-entrypoint");
expect(html).toContain("Coverage: cli.entrypoint");
expect(html).toContain("Runner: cli-status");
expect(html).toContain("Open media artifact");
expect(html).toContain("Open video artifact");

View File

@@ -6,7 +6,7 @@ scenario:
coverage:
primary:
- channels.threads
- thread-parent-child-placement
- channels.thread-parent-child-placement
secondary:
- channels.qa-channel
objective: Verify the agent can keep follow-up work inside a thread and not leak context into the root channel.

View File

@@ -17,7 +17,8 @@ title: OpenClaw QA Scenario Pack
#
# - add `coverage.primary` IDs to each scenario's `scenario` block
# - add `coverage.secondary` only when a scenario intentionally protects another behavior
# - keep IDs behavior-shaped, broad enough to reuse, lowercase, and dotted or dashed
# - keep IDs behavior-shaped, broad enough to reuse, lowercase, and dotted
# as `namespace.behavior`, with dashes allowed inside each segment
# - use the exact values listed under feature `coverageIds` in `taxonomy.yaml`
# - taxonomy feature coverage IDs are exact proof targets, not aliases
# - scenario primary can list multiple IDs only when this scenario is primary

View File

@@ -6,7 +6,7 @@ scenario:
coverage:
primary:
- media.image-generation
- generated-image-persistence-and-delivery
- media.image-generation-delivery
secondary:
- channels.qa-channel
objective: Verify a generated image is saved as media, reattached on the next turn, and described correctly through the vision path.

View File

@@ -7,8 +7,8 @@ scenario:
primary:
- plugins.lifecycle
secondary:
- plugin-validation-and-repair
- plugin-setup
- cli.plugin-validation-repair
- plugins.setup-flows
objective: Exercise strict plugin load/uninstall proof parsing through QA Lab evidence.
successCriteria:
- Enabled loaded plugin inspect JSON is accepted as proof.

View File

@@ -5,7 +5,7 @@ scenario:
surface: runtime
coverage:
primary:
- signed-redacted-thinking-replay
- anthropic.signed-redacted-thinking-replay
secondary:
- runtime.retry-policy
gatewayConfigPatch:

View File

@@ -6,7 +6,7 @@ scenario:
coverage:
primary:
- telemetry.prometheus
- gateway-authenticated-get-api-diagnostics-prometheus
- telemetry.prometheus-authenticated-gateway-export
secondary:
- harness.qa-lab
- docker.e2e

View File

@@ -5,10 +5,10 @@ scenario:
surface: runtime
coverage:
primary:
- websocket-transport
- gateway.websocket-transport
secondary:
- health-apis
- hello-ok-snapshot
- gateway.health-apis
- gateway.hello-ok-snapshot
objective: Exercise gateway health and WebSocket smoke assertions through QA Lab evidence.
successCriteria:
- Gateway health probe succeeds against a reachable local endpoint.

View File

@@ -5,9 +5,9 @@ scenario:
surface: docker-podman-hosting
coverage:
primary:
- docker-e2e-package-artifact-generation
- docker.package-artifact-generation
secondary:
- package-manager-installs
- cli.package-manager-installs
- runtime.package-update
objective: Exercise bounded OpenClaw package artifact generation through QA Lab evidence.
successCriteria:

View File

@@ -8,7 +8,7 @@ scenario:
- telemetry.otel
secondary:
- harness.qa-lab
- plugin-sdk-diagnostic-runtime-exports
- telemetry.plugin-sdk-runtime-exports
objective: Exercise bounded local OTLP capture and OpenTelemetry smoke assertions through QA Lab evidence.
successCriteria:
- Package-manager forwarded QA OTEL smoke arguments parse correctly.

View File

@@ -6,8 +6,8 @@ scenario:
coverage:
primary:
- scheduling.cron
- cron-rpcs
- chat-announce-delivery
- scheduling.cron-rpcs
- scheduling.chat-announce-delivery
secondary:
- channels.qa-channel
objective: Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel.

View File

@@ -6,7 +6,7 @@ scenario:
coverage:
primary:
- ui.control
- dashboard-open-auth-bootstrap
- ui.dashboard-auth-bootstrap
secondary:
- media.image-understanding
- channels.qa-channel

View File

@@ -9,9 +9,9 @@ scenario:
- qa.artifact-safety
secondary:
- ui.control
- control-ui
- cli-entrypoint
- status-snapshots
- gateway.control-ui-hosting
- cli.entrypoint
- cli.status-snapshots
- tools.evidence
- workspace.artifacts
objective: Produce UX Matrix evidence artifacts through the QA Lab script producer contract.

View File

@@ -516,7 +516,7 @@ export async function runUxMatrixEvidenceProducer(options: ProducerOptions) {
]
: []),
],
coverageIds: ["ui.control", "control-ui"],
coverageIds: ["ui.control", "gateway.control-ui-hosting"],
failureReason: matrixScreenshotResult.failureReason,
stage: "screenshot-artifact",
status: matrixScreenshotResult.status,
@@ -526,7 +526,7 @@ export async function runUxMatrixEvidenceProducer(options: ProducerOptions) {
},
{
artifacts: [{ kind: "log", path: relativeToArtifactBase(options.artifactBase, cliLogPath) }],
coverageIds: ["cli-entrypoint", "status-snapshots"],
coverageIds: ["cli.entrypoint", "cli.status-snapshots"],
failureReason: cliResult.failureReason,
stage: "entrypoint-help",
status: cliResult.status,

File diff suppressed because it is too large Load Diff