fix(curator): stop restore from matching unrelated skills by name prefix

restore_skill() falls back to p.name.startswith(f"{skill_name}-") when no
archive directory matches the requested name exactly. That fallback is meant
to catch the timestamped duplicate archive_skill() writes on a name collision
(<skill>-YYYYMMDDHHMMSS), but the bare prefix also matches any unrelated
archived skill named <name>-something. So restoring "git" can pull an archived
"git-helpers" out of .archive/, rename it to "git", and report success: the
requested skill is not restored and the sibling is gone from the archive.

Constrain the fallback to the exact suffix archive_skill() produces, a 14 digit
timestamp. The exact-name match and the recursive nested-archive walk are
unchanged, so nested and timestamped restores still work; unrelated siblings no
longer match.

Fixes #47647
This commit is contained in:
Max Freedom Pollard
2026-06-16 21:59:16 -07:00
committed by Teknium
parent cbfa018aef
commit 992b922389
2 changed files with 86 additions and 3 deletions

View File

@@ -453,6 +453,76 @@ def test_archive_collision_gets_suffix(skills_home):
assert any(n.startswith("dup-") and n != "dup" for n in archived)
def test_restore_does_not_pull_unrelated_sibling_out_of_archive(skills_home):
"""Restoring a name with no exact archive entry must NOT grab a different
archived skill that merely shares a ``<name>-`` prefix.
The timestamped-duplicate fallback recognises only the suffix
``archive_skill`` writes on a collision (``-YYYYMMDDHHMMSS``). A bare
``startswith(f"{name}-")`` also matches sibling skills, so restoring
``git`` would rip an archived ``git-helpers`` out of the archive, rename
it to ``git``, and report success — destroying the sibling's only copy."""
from tools.skill_usage import (
archive_skill, restore_skill, list_archived_skill_names, mark_agent_created,
)
skills_dir = skills_home / "skills"
_write_skill(skills_dir, "git-helpers")
mark_agent_created("git-helpers")
ok, msg = archive_skill("git-helpers")
assert ok, msg
# "git" was never archived; only its prefix-sharing sibling was.
ok, msg = restore_skill("git")
assert not ok, f"restore('git') should not match 'git-helpers': {msg}"
assert "not found" in msg.lower()
# The sibling must be untouched: still in the archive, never moved to skills/git.
assert (skills_dir / ".archive" / "git-helpers" / "SKILL.md").exists()
assert "git-helpers" in list_archived_skill_names()
assert not (skills_dir / "git").exists()
def test_restore_still_matches_timestamped_duplicate(skills_home):
"""The fix must not over-narrow: a real collision dupe written by
``archive_skill`` (``<name>-YYYYMMDDHHMMSS``) is still restorable by name
when no bare ``<name>`` entry exists."""
from tools.skill_usage import restore_skill
skills_dir = skills_home / "skills"
dupe = skills_dir / ".archive" / "report-tool-20260101000000"
dupe.mkdir(parents=True)
(dupe / "SKILL.md").write_text(
"---\nname: report-tool\ndescription: x\n---\n", encoding="utf-8",
)
ok, msg = restore_skill("report-tool")
assert ok, msg
assert (skills_dir / "report-tool" / "SKILL.md").exists()
def test_restore_prefers_timestamped_dupe_over_unrelated_sibling(skills_home):
"""With both a real timestamped duplicate and an unrelated sibling present,
restoring the bare name picks the duplicate and leaves the sibling alone."""
from tools.skill_usage import restore_skill
archive = skills_home / "skills" / ".archive"
dupe = archive / "report-20260101000000" # real collision dupe of "report"
sibling = archive / "report-card" # unrelated sibling skill
for d, frontname in ((dupe, "report"), (sibling, "report-card")):
d.mkdir(parents=True)
(d / "SKILL.md").write_text(
f"---\nname: {frontname}\ndescription: x\n---\n", encoding="utf-8",
)
ok, msg = restore_skill("report")
assert ok, msg
# The duplicate (name: report) was restored, not the sibling (name: report-card).
restored = (skills_home / "skills" / "report" / "SKILL.md").read_text()
assert "name: report\n" in restored
assert "name: report-card" not in restored
assert not dupe.exists() # the dupe moved out of the archive
assert sibling.exists() # the unrelated sibling stayed put
# ---------------------------------------------------------------------------
# Reporting
# ---------------------------------------------------------------------------

View File

@@ -752,14 +752,27 @@ def restore_skill(skill_name: str) -> Tuple[bool, str]:
if not archive_root.exists():
return False, "no archive directory"
# Try exact name match first, then any prefix match (for timestamped dupes).
# Try exact name match first, then the timestamped-duplicate fallback.
# Recursive walk handles nested archive layouts (e.g. .archive/<category>/<skill>/)
# left behind by older archive paths or external imports.
candidates = [p for p in archive_root.rglob("*") if p.is_dir() and p.name == skill_name]
if not candidates:
# A name collision makes archive_skill() disambiguate by appending its
# UTC timestamp ("<skill>-YYYYMMDDHHMMSS", a 14-digit suffix), so only
# that exact shape is another copy of THIS skill. A bare
# startswith(f"{skill_name}-") also swallows unrelated sibling skills —
# restoring "git" would otherwise pull an archived "git-helpers" out of
# the archive and rename it to "git", destroying the sibling's only
# copy. Require the suffix to be the timestamp archive_skill writes.
prefix = f"{skill_name}-"
candidates = sorted(
[p for p in archive_root.rglob("*")
if p.is_dir() and p.name.startswith(f"{skill_name}-")],
[
p for p in archive_root.rglob("*")
if p.is_dir()
and p.name.startswith(prefix)
and len(p.name) - len(prefix) == 14
and p.name[len(prefix):].isdigit()
],
reverse=True,
)
if not candidates: