mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-30 19:59:35 +00:00
Standardize QA coverage IDs on dotted names (#94702)
* fix: standardize qa coverage ids * test: avoid qa coverage id assertion spread
This commit is contained in:
@@ -30,6 +30,9 @@ out of this repo. If a score needs private evidence, use the redacted
|
||||
completeness-instruction paths.
|
||||
- Feature `coverageIds` are ANDed proof targets, not aliases. A feature may
|
||||
list multiple IDs when each ID proves part of one capability.
|
||||
- Coverage IDs use dotted `namespace.behavior` form, with lowercase
|
||||
alphanumeric/dash segments. Profile, surface, and category IDs may remain
|
||||
dashed or dotted.
|
||||
- Keep categories and feature names unique, product-shaped, and broader than raw
|
||||
coverage IDs. Do not promote generic IDs into standalone feature names.
|
||||
- Avoid duplicate coverage-ID bundles under different feature names in one
|
||||
|
||||
@@ -59,6 +59,9 @@ selected-category counts and missing coverage IDs; the individual evidence
|
||||
entries remain the source of truth for the tests, coverage roles, and results.
|
||||
Taxonomy feature coverage IDs are exact proof targets, not aliases. Primary
|
||||
scenario coverage fulfills matching IDs; secondary coverage stays advisory.
|
||||
Coverage IDs use dotted `namespace.behavior` form with lowercase
|
||||
alphanumeric/dash segments; profile, surface, and category IDs may still use
|
||||
the existing dashed or dotted taxonomy IDs.
|
||||
Slim evidence omits per-entry `execution` and sets `evidenceMode: "slim"`;
|
||||
`smoke-ci` defaults to slim, and `--evidence-mode full` restores full entries:
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ const TEST_EXECUTABLE_COVERAGE_ID = "channels.dm";
|
||||
const TEST_BROWSER_CATEGORY_ID = "browser-control-ui-and-webchat.browser-ui";
|
||||
const TEST_BROWSER_COVERAGE_ID = "ui.control";
|
||||
const TEST_WEBCHAT_COVERAGE_ID = "ui.webchat";
|
||||
const DOTTED_COVERAGE_ID_PATTERN = /^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$/;
|
||||
|
||||
function testMaturityTaxonomy(params?: {
|
||||
categoryId?: string;
|
||||
@@ -138,6 +139,11 @@ describe("qa coverage report", () => {
|
||||
expect(inventory.scorecardTaxonomy.evidenceRefCount).toBeGreaterThan(0);
|
||||
expect(inventory.scorecardTaxonomy.scenarioCoverageIdCount).toBeGreaterThan(0);
|
||||
expect(inventory.scorecardTaxonomy.unknownCoverageIdCount).toBe(0);
|
||||
expect(
|
||||
inventory.scorecardTaxonomy.categories
|
||||
.flatMap((category) => category.coverageIds)
|
||||
.every((coverageId) => DOTTED_COVERAGE_ID_PATTERN.test(coverageId)),
|
||||
).toBe(true);
|
||||
expect(inventory.scorecardTaxonomy.validationIssues.length).toBeGreaterThan(0);
|
||||
expect(
|
||||
inventory.scorecardTaxonomy.validationIssues.some((issue) =>
|
||||
|
||||
@@ -198,7 +198,7 @@ describe("evidence gallery", () => {
|
||||
surface: "web-ui",
|
||||
},
|
||||
{
|
||||
coverageIds: ["cli-entrypoint"],
|
||||
coverageIds: ["cli.entrypoint"],
|
||||
runner: {
|
||||
availability: "local",
|
||||
command: "pnpm openclaw qa suite --scenario ux-matrix-evidence-dashboard",
|
||||
@@ -274,7 +274,7 @@ describe("evidence gallery", () => {
|
||||
title: "UX Matrix: cli / error-state",
|
||||
source: { path: "scripts/ux-matrix/dashboard.ts" },
|
||||
},
|
||||
coverage: [{ id: "status-snapshots", role: "primary" }],
|
||||
coverage: [{ id: "cli.status-snapshots", role: "primary" }],
|
||||
execution: {
|
||||
runner: "ux-matrix-dashboard",
|
||||
environment: {
|
||||
@@ -359,7 +359,7 @@ describe("evidence gallery", () => {
|
||||
{
|
||||
artifactKinds: [],
|
||||
artifactPaths: [],
|
||||
coverageIds: ["cli-entrypoint"],
|
||||
coverageIds: ["cli.entrypoint"],
|
||||
runner: {
|
||||
availability: "local",
|
||||
command: "pnpm openclaw qa suite --scenario ux-matrix-evidence-dashboard",
|
||||
|
||||
@@ -346,7 +346,7 @@ describe("evidence summary", () => {
|
||||
id: "control-ui.browser-run",
|
||||
title: "Control UI browser workflow",
|
||||
sourcePath: "ui/control-ui.e2e.test.ts",
|
||||
primaryCoverageIds: ["control-ui.browser"],
|
||||
primaryCoverageIds: ["ui.control"],
|
||||
docsRefs: ["docs/concepts/qa-e2e-automation.md"],
|
||||
codeRefs: ["ui/"],
|
||||
},
|
||||
@@ -374,7 +374,7 @@ describe("evidence summary", () => {
|
||||
},
|
||||
coverage: [
|
||||
{
|
||||
id: "control-ui.browser",
|
||||
id: "ui.control",
|
||||
role: "primary",
|
||||
},
|
||||
],
|
||||
|
||||
@@ -25,6 +25,8 @@ function listScenarioMarkdownPaths(dir = "qa/scenarios"): string[] {
|
||||
}
|
||||
|
||||
describe("qa scenario catalog", () => {
|
||||
const dottedCoverageIdPattern = /^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$/;
|
||||
|
||||
it("keeps repo-backed scenarios YAML-only", () => {
|
||||
expect(listScenarioMarkdownPaths()).toStrictEqual([]);
|
||||
});
|
||||
@@ -74,6 +76,17 @@ describe("qa scenario catalog", () => {
|
||||
.filter((scenario) => !(scenario.coverage?.primary.length ?? 0))
|
||||
.map((scenario) => scenario.id),
|
||||
).toStrictEqual([]);
|
||||
expect(
|
||||
pack.scenarios.every(
|
||||
(scenario) =>
|
||||
(scenario.coverage?.primary ?? []).every((coverageId) =>
|
||||
dottedCoverageIdPattern.test(coverageId),
|
||||
) &&
|
||||
(scenario.coverage?.secondary ?? []).every((coverageId) =>
|
||||
dottedCoverageIdPattern.test(coverageId),
|
||||
),
|
||||
),
|
||||
).toBe(true);
|
||||
expect(readQaScenarioById("memory-recall").coverage?.primary).toContain("memory.recall");
|
||||
});
|
||||
|
||||
|
||||
@@ -87,8 +87,8 @@ const qaScenarioExecutionSchema = z.union([
|
||||
const qaCoverageIdSchema = z
|
||||
.string()
|
||||
.trim()
|
||||
.regex(/^[a-z0-9]+(?:[.-][a-z0-9]+)*$/, {
|
||||
message: "coverage ids must use lowercase dotted or dashed tokens",
|
||||
.regex(/^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$/, {
|
||||
message: "coverage ids must use lowercase dotted tokens",
|
||||
});
|
||||
|
||||
const qaCoverageIdListSchema = z.array(qaCoverageIdSchema).min(1);
|
||||
|
||||
@@ -11,7 +11,14 @@ const qaScorecardIdSchema = z
|
||||
.string()
|
||||
.trim()
|
||||
.regex(/^[a-z0-9]+(?:[.-][a-z0-9]+)*$/, {
|
||||
message: "scorecard and coverage ids must use lowercase dotted or dashed tokens",
|
||||
message: "scorecard ids must use lowercase dotted or dashed tokens",
|
||||
});
|
||||
|
||||
const qaCoverageIdSchema = z
|
||||
.string()
|
||||
.trim()
|
||||
.regex(/^[a-z0-9][a-z0-9-]*(?:\.[a-z0-9][a-z0-9-]*)+$/, {
|
||||
message: "coverage ids must use lowercase dotted tokens",
|
||||
});
|
||||
|
||||
function isRepoRootRelativeRef(value: string) {
|
||||
@@ -31,7 +38,7 @@ const qaScorecardProfileSchema = z.object({
|
||||
|
||||
const qaMaturityFeatureSchema = z.object({
|
||||
name: z.string().trim().min(1),
|
||||
coverageIds: z.array(qaScorecardIdSchema).default([]),
|
||||
coverageIds: z.array(qaCoverageIdSchema).default([]),
|
||||
description: z.string().trim().min(1).optional(),
|
||||
});
|
||||
|
||||
|
||||
@@ -726,9 +726,9 @@ describe("qa test file scenario runner", () => {
|
||||
"tools.evidence",
|
||||
"workspace.artifacts",
|
||||
"ui.control",
|
||||
"control-ui",
|
||||
"cli-entrypoint",
|
||||
"status-snapshots",
|
||||
"gateway.control-ui-hosting",
|
||||
"cli.entrypoint",
|
||||
"cli.status-snapshots",
|
||||
]),
|
||||
);
|
||||
const artifactKinds = evidence.entries.flatMap(
|
||||
|
||||
@@ -213,7 +213,7 @@ describe("QA Lab UI evidence render", () => {
|
||||
{
|
||||
artifactKinds: [],
|
||||
artifactPaths: [],
|
||||
coverageIds: ["cli-entrypoint"],
|
||||
coverageIds: ["cli.entrypoint"],
|
||||
runner: {
|
||||
availability: "local",
|
||||
command: "pnpm openclaw qa suite --scenario ux-matrix-evidence-dashboard",
|
||||
@@ -247,7 +247,7 @@ describe("QA Lab UI evidence render", () => {
|
||||
expect(html).toContain('data-evidence-entry-id="ux-matrix.web-ui.first-run"');
|
||||
expect(html).toContain("evidence-matrix-cell-proof-gap");
|
||||
expect(html).toContain("not executed in this run");
|
||||
expect(html).toContain("Coverage: cli-entrypoint");
|
||||
expect(html).toContain("Coverage: cli.entrypoint");
|
||||
expect(html).toContain("Runner: cli-status");
|
||||
expect(html).toContain("Open media artifact");
|
||||
expect(html).toContain("Open video artifact");
|
||||
|
||||
@@ -6,7 +6,7 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- channels.threads
|
||||
- thread-parent-child-placement
|
||||
- channels.thread-parent-child-placement
|
||||
secondary:
|
||||
- channels.qa-channel
|
||||
objective: Verify the agent can keep follow-up work inside a thread and not leak context into the root channel.
|
||||
|
||||
@@ -17,7 +17,8 @@ title: OpenClaw QA Scenario Pack
|
||||
#
|
||||
# - add `coverage.primary` IDs to each scenario's `scenario` block
|
||||
# - add `coverage.secondary` only when a scenario intentionally protects another behavior
|
||||
# - keep IDs behavior-shaped, broad enough to reuse, lowercase, and dotted or dashed
|
||||
# - keep IDs behavior-shaped, broad enough to reuse, lowercase, and dotted
|
||||
# as `namespace.behavior`, with dashes allowed inside each segment
|
||||
# - use the exact values listed under feature `coverageIds` in `taxonomy.yaml`
|
||||
# - taxonomy feature coverage IDs are exact proof targets, not aliases
|
||||
# - scenario primary can list multiple IDs only when this scenario is primary
|
||||
|
||||
@@ -6,7 +6,7 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- media.image-generation
|
||||
- generated-image-persistence-and-delivery
|
||||
- media.image-generation-delivery
|
||||
secondary:
|
||||
- channels.qa-channel
|
||||
objective: Verify a generated image is saved as media, reattached on the next turn, and described correctly through the vision path.
|
||||
|
||||
@@ -7,8 +7,8 @@ scenario:
|
||||
primary:
|
||||
- plugins.lifecycle
|
||||
secondary:
|
||||
- plugin-validation-and-repair
|
||||
- plugin-setup
|
||||
- cli.plugin-validation-repair
|
||||
- plugins.setup-flows
|
||||
objective: Exercise strict plugin load/uninstall proof parsing through QA Lab evidence.
|
||||
successCriteria:
|
||||
- Enabled loaded plugin inspect JSON is accepted as proof.
|
||||
|
||||
@@ -5,7 +5,7 @@ scenario:
|
||||
surface: runtime
|
||||
coverage:
|
||||
primary:
|
||||
- signed-redacted-thinking-replay
|
||||
- anthropic.signed-redacted-thinking-replay
|
||||
secondary:
|
||||
- runtime.retry-policy
|
||||
gatewayConfigPatch:
|
||||
|
||||
@@ -6,7 +6,7 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- telemetry.prometheus
|
||||
- gateway-authenticated-get-api-diagnostics-prometheus
|
||||
- telemetry.prometheus-authenticated-gateway-export
|
||||
secondary:
|
||||
- harness.qa-lab
|
||||
- docker.e2e
|
||||
|
||||
@@ -5,10 +5,10 @@ scenario:
|
||||
surface: runtime
|
||||
coverage:
|
||||
primary:
|
||||
- websocket-transport
|
||||
- gateway.websocket-transport
|
||||
secondary:
|
||||
- health-apis
|
||||
- hello-ok-snapshot
|
||||
- gateway.health-apis
|
||||
- gateway.hello-ok-snapshot
|
||||
objective: Exercise gateway health and WebSocket smoke assertions through QA Lab evidence.
|
||||
successCriteria:
|
||||
- Gateway health probe succeeds against a reachable local endpoint.
|
||||
|
||||
@@ -5,9 +5,9 @@ scenario:
|
||||
surface: docker-podman-hosting
|
||||
coverage:
|
||||
primary:
|
||||
- docker-e2e-package-artifact-generation
|
||||
- docker.package-artifact-generation
|
||||
secondary:
|
||||
- package-manager-installs
|
||||
- cli.package-manager-installs
|
||||
- runtime.package-update
|
||||
objective: Exercise bounded OpenClaw package artifact generation through QA Lab evidence.
|
||||
successCriteria:
|
||||
|
||||
@@ -8,7 +8,7 @@ scenario:
|
||||
- telemetry.otel
|
||||
secondary:
|
||||
- harness.qa-lab
|
||||
- plugin-sdk-diagnostic-runtime-exports
|
||||
- telemetry.plugin-sdk-runtime-exports
|
||||
objective: Exercise bounded local OTLP capture and OpenTelemetry smoke assertions through QA Lab evidence.
|
||||
successCriteria:
|
||||
- Package-manager forwarded QA OTEL smoke arguments parse correctly.
|
||||
|
||||
@@ -6,8 +6,8 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- scheduling.cron
|
||||
- cron-rpcs
|
||||
- chat-announce-delivery
|
||||
- scheduling.cron-rpcs
|
||||
- scheduling.chat-announce-delivery
|
||||
secondary:
|
||||
- channels.qa-channel
|
||||
objective: Verify the agent can schedule a cron reminder one minute in the future and receive the follow-up in the QA channel.
|
||||
|
||||
@@ -6,7 +6,7 @@ scenario:
|
||||
coverage:
|
||||
primary:
|
||||
- ui.control
|
||||
- dashboard-open-auth-bootstrap
|
||||
- ui.dashboard-auth-bootstrap
|
||||
secondary:
|
||||
- media.image-understanding
|
||||
- channels.qa-channel
|
||||
|
||||
@@ -9,9 +9,9 @@ scenario:
|
||||
- qa.artifact-safety
|
||||
secondary:
|
||||
- ui.control
|
||||
- control-ui
|
||||
- cli-entrypoint
|
||||
- status-snapshots
|
||||
- gateway.control-ui-hosting
|
||||
- cli.entrypoint
|
||||
- cli.status-snapshots
|
||||
- tools.evidence
|
||||
- workspace.artifacts
|
||||
objective: Produce UX Matrix evidence artifacts through the QA Lab script producer contract.
|
||||
|
||||
@@ -516,7 +516,7 @@ export async function runUxMatrixEvidenceProducer(options: ProducerOptions) {
|
||||
]
|
||||
: []),
|
||||
],
|
||||
coverageIds: ["ui.control", "control-ui"],
|
||||
coverageIds: ["ui.control", "gateway.control-ui-hosting"],
|
||||
failureReason: matrixScreenshotResult.failureReason,
|
||||
stage: "screenshot-artifact",
|
||||
status: matrixScreenshotResult.status,
|
||||
@@ -526,7 +526,7 @@ export async function runUxMatrixEvidenceProducer(options: ProducerOptions) {
|
||||
},
|
||||
{
|
||||
artifacts: [{ kind: "log", path: relativeToArtifactBase(options.artifactBase, cliLogPath) }],
|
||||
coverageIds: ["cli-entrypoint", "status-snapshots"],
|
||||
coverageIds: ["cli.entrypoint", "cli.status-snapshots"],
|
||||
failureReason: cliResult.failureReason,
|
||||
stage: "entrypoint-help",
|
||||
status: cliResult.status,
|
||||
|
||||
3156
taxonomy.yaml
3156
taxonomy.yaml
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user