mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-24 11:38:29 +00:00
fix(agent): shrink anthropic-native image history
Retry image-size rejections by rewriting Anthropic base64 image source blocks, not just OpenAI-style image_url parts.
This commit is contained in:
@@ -805,10 +805,11 @@ def try_shrink_image_parts_in_messages(
|
||||
Pillow couldn't help (caller should surface the original error).
|
||||
|
||||
Strategy: look for ``image_url`` / ``input_image`` parts carrying a
|
||||
``data:image/...;base64,...`` payload. For each one whose encoded
|
||||
size exceeds 4 MB (a safe target that slides under Anthropic's 5 MB
|
||||
ceiling with header overhead) or whose longest side exceeds
|
||||
``max_dimension``, write the base64 to a tempfile, call
|
||||
``data:image/...;base64,...`` payload, plus Anthropic-native
|
||||
``{"type": "image", "source": {"type": "base64", ...}}`` blocks.
|
||||
For each one whose encoded size exceeds 4 MB (a safe target that slides
|
||||
under Anthropic's 5 MB ceiling with header overhead) or whose longest side
|
||||
exceeds ``max_dimension``, write the base64 to a tempfile, call
|
||||
``vision_tools._resize_image_for_vision`` to produce a smaller data
|
||||
URL, and substitute it in place.
|
||||
|
||||
@@ -964,6 +965,28 @@ def try_shrink_image_parts_in_messages(
|
||||
logger.warning("image-shrink recovery: re-encode failed — %s", exc)
|
||||
return None, triggered_by is not None
|
||||
|
||||
def _source_to_data_url(source: Any) -> Optional[str]:
|
||||
if not isinstance(source, dict) or source.get("type") != "base64":
|
||||
return None
|
||||
data = source.get("data")
|
||||
if not isinstance(data, str) or not data:
|
||||
return None
|
||||
media_type = str(source.get("media_type") or "image/jpeg").strip()
|
||||
if not media_type.startswith("image/"):
|
||||
media_type = "image/jpeg"
|
||||
return f"data:{media_type};base64,{data}"
|
||||
|
||||
def _write_data_url_to_source(source: dict, data_url: str) -> None:
|
||||
header, _, data = data_url.partition(",")
|
||||
media_type = "image/jpeg"
|
||||
if header.startswith("data:"):
|
||||
candidate = header[len("data:"):].split(";", 1)[0].strip()
|
||||
if candidate.startswith("image/"):
|
||||
media_type = candidate
|
||||
source["type"] = "base64"
|
||||
source["media_type"] = media_type
|
||||
source["data"] = data
|
||||
|
||||
for msg in api_messages:
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
@@ -974,6 +997,16 @@ def try_shrink_image_parts_in_messages(
|
||||
if not isinstance(part, dict):
|
||||
continue
|
||||
ptype = part.get("type")
|
||||
if ptype == "image":
|
||||
source = part.get("source")
|
||||
url = _source_to_data_url(source)
|
||||
resized, unshrinkable = _shrink_data_url(url or "")
|
||||
if resized and isinstance(source, dict):
|
||||
_write_data_url_to_source(source, resized)
|
||||
changed_count += 1
|
||||
elif unshrinkable:
|
||||
unshrinkable_oversized += 1
|
||||
continue
|
||||
if ptype not in {"image_url", "input_image"}:
|
||||
continue
|
||||
image_value = part.get("image_url")
|
||||
|
||||
@@ -260,6 +260,52 @@ class TestShrinkImagePartsHelper:
|
||||
assert seen["max_dimension"] == 2000
|
||||
assert msgs[0]["content"][0]["image_url"]["url"] == shrunk
|
||||
|
||||
def test_anthropic_base64_image_source_rewritten(self, monkeypatch):
|
||||
"""Anthropic-native image blocks are shrinkable after adapter conversion."""
|
||||
agent = _make_agent()
|
||||
_install_fake_pillow(monkeypatch, (2501, 100), shrunk_size=(1500, 60))
|
||||
original = _big_png_data_url(100)
|
||||
_, _, original_data = original.partition(",")
|
||||
shrunk = "data:image/jpeg;base64," + "N" * 1000
|
||||
seen = {}
|
||||
|
||||
def _fake_resize(path, mime_type=None, max_base64_bytes=None, max_dimension=None):
|
||||
seen["mime_type"] = mime_type
|
||||
seen["max_dimension"] = max_dimension
|
||||
return shrunk
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.vision_tools._resize_image_for_vision",
|
||||
_fake_resize,
|
||||
raising=False,
|
||||
)
|
||||
|
||||
msgs = [{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/png",
|
||||
"data": original_data,
|
||||
},
|
||||
},
|
||||
],
|
||||
}]
|
||||
changed = agent._try_shrink_image_parts_in_messages(
|
||||
msgs,
|
||||
max_dimension=2000,
|
||||
)
|
||||
source = msgs[0]["content"][0]["source"]
|
||||
|
||||
assert changed is True
|
||||
assert seen["mime_type"] == "image/png"
|
||||
assert seen["max_dimension"] == 2000
|
||||
assert source["type"] == "base64"
|
||||
assert source["media_type"] == "image/jpeg"
|
||||
assert source["data"] == "N" * 1000
|
||||
|
||||
def test_oversized_input_image_string_shape_rewritten(self, monkeypatch):
|
||||
"""OpenAI Responses shape: {type: input_image, image_url: "data:..."}."""
|
||||
agent = _make_agent()
|
||||
|
||||
Reference in New Issue
Block a user