fix(auth): make load_pool() non-destructive for env-seeded credentials

load_pool() is meant to be a read, but it persistently pruned env-seeded
pool entries whenever the calling process's os.environ lacked the seeding
var. A process without MINIMAX_API_KEY would delete the persisted
env:MINIMAX_API_KEY entry from auth.json for every other process, causing
auth.json to oscillate and auxiliary auto-detect to fall through to the
wrong provider.

env:* entries are persisted references re-hydrated from the environment on
each load — a missing var means "cannot re-seed right now", not "source is
gone forever". _prune_stale_seeded_entries now gates env-source removal
behind prune_env_sources (default True for explicit cleanup paths);
load_pool() passes prune_env_sources=False. File-backed singletons
(device-code OAuth, hermes_pkce) still prune when their backing file is
gone, and explicit removal via `hermes auth remove` (source suppression)
is unaffected.

Fixes #9331.

Co-authored-by: houko <suzukaze.haduki@gmail.com>
This commit is contained in:
yeyitech
2026-06-21 07:34:21 -07:00
committed by Teknium
parent a966932392
commit 8a506ed3ac
2 changed files with 82 additions and 12 deletions

View File

@@ -2062,19 +2062,34 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
return changed, active_sources
def _prune_stale_seeded_entries(entries: List[PooledCredential], active_sources: Set[str]) -> bool:
def _prune_stale_seeded_entries(
entries: List[PooledCredential],
active_sources: Set[str],
*,
prune_env_sources: bool = True,
) -> bool:
def _is_prunable(entry: PooledCredential) -> bool:
# ``env:*`` entries are persisted references that get re-hydrated from
# the environment on every load. A process that merely lacks the env
# var this call must NOT delete the on-disk entry for every other
# process — that destructive read is the bug behind #9331. Only prune
# an env source when ``prune_env_sources`` is explicitly requested
# (e.g. an `hermes auth` command that confirmed the source is gone).
if entry.source.startswith("env:"):
return prune_env_sources
# File-backed singletons (device-code OAuth, claude_code) and Hermes
# PKCE should disappear from the pool when their backing file is gone.
return (
is_borrowed_credential_source(entry.source, entry.provider)
or entry.source == "hermes_pkce"
)
retained = [
entry
for entry in entries
if _is_manual_source(entry.source)
or entry.source in active_sources
or not (
is_borrowed_credential_source(entry.source, entry.provider)
# Hermes PKCE is Hermes-owned/persistable while present, but it is
# still a file-backed singleton and should disappear from the pool
# when the backing OAuth file is gone.
or entry.source == "hermes_pkce"
)
or not _is_prunable(entry)
]
if len(retained) == len(entries):
return False
@@ -2174,7 +2189,15 @@ def load_pool(provider: str) -> CredentialPool:
singleton_changed, singleton_sources = _seed_from_singletons(provider, entries)
env_changed, env_sources = _seed_from_env(provider, entries)
changed = raw_needs_sanitization or singleton_changed or env_changed
changed |= _prune_stale_seeded_entries(entries, singleton_sources | env_sources)
# ``load_pool()`` is a non-destructive read for env-seeded entries: a
# process missing a provider env var must not delete the persisted
# pool entry for every other process (#9331). File-backed singletons
# still prune when their backing file is gone.
changed |= _prune_stale_seeded_entries(
entries,
singleton_sources | env_sources,
prune_env_sources=False,
)
changed |= _normalize_pool_priorities(provider, entries)
if changed:

View File

@@ -1179,7 +1179,10 @@ def test_load_pool_falls_back_to_os_environ_when_dotenv_empty(tmp_path, monkeypa
assert entry.access_token == "sk-or-from-runtime-env"
def test_load_pool_removes_stale_seeded_env_entry(tmp_path, monkeypatch):
def test_load_pool_preserves_env_seeded_entry_when_env_is_missing(tmp_path, monkeypatch):
# Regression for #9331: load_pool() is a non-destructive read. A process
# that lacks the seeding env var must NOT delete the persisted pool entry
# that another process correctly seeded.
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
_write_auth_store(
@@ -1206,10 +1209,54 @@ def test_load_pool_removes_stale_seeded_env_entry(tmp_path, monkeypatch):
pool = load_pool("openrouter")
assert pool.entries() == []
entries = pool.entries()
assert len(entries) == 1
assert entries[0].source == "env:OPENROUTER_API_KEY"
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
assert auth_payload["credential_pool"]["openrouter"] == []
persisted = auth_payload["credential_pool"]["openrouter"]
assert len(persisted) == 1
assert persisted[0]["source"] == "env:OPENROUTER_API_KEY"
def test_load_pool_missing_env_does_not_overwrite_other_process_seed(tmp_path, monkeypatch):
# The exact cross-process oscillation described in #9331: a process without
# MINIMAX_API_KEY must leave the on-disk entry intact for processes that
# do have it.
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes"))
monkeypatch.delenv("MINIMAX_API_KEY", raising=False)
_write_auth_store(
tmp_path,
{
"version": 1,
"credential_pool": {
"minimax": [
{
"id": "minimax-env",
"label": "MINIMAX_API_KEY",
"auth_type": "api_key",
"priority": 0,
"source": "env:MINIMAX_API_KEY",
"access_token": "seeded-by-other-process",
"base_url": "https://api.minimaxi.chat/v1",
}
]
},
},
)
from agent.credential_pool import load_pool
pool = load_pool("minimax")
assert pool.has_credentials()
assert len(pool.entries()) == 1
assert pool.entries()[0].source == "env:MINIMAX_API_KEY"
auth_payload = json.loads((tmp_path / "hermes" / "auth.json").read_text())
persisted = auth_payload["credential_pool"]["minimax"]
assert len(persisted) == 1
assert persisted[0]["source"] == "env:MINIMAX_API_KEY"
def test_load_pool_migrates_nous_provider_state(tmp_path, monkeypatch):