fix(exam): dedupe all AI auto-map rows by id before insert
Some checks failed
api-ci-deploy / test-build-deploy (push) Has been cancelled

B1-4 live-route validation: continuation bands re-emit the same stable AI id for
response_areas/boundaries/layout (not just questions), causing duplicate-pkey insert
failures. Add _dedupe_rows_by_id applied to all four tables in _refresh_ai_rows.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
CC Worker 2026-06-08 18:02:51 +00:00
parent 150b915282
commit e83873e822
2 changed files with 30 additions and 1 deletions

View File

@ -543,12 +543,27 @@ def _map_first_pass_to_rows(template_id: str, first_pass: Dict[str, Any], pdf_by
return {"questions": questions, "response_areas": response_areas, "boundaries": boundaries, "layout": layout}
def _dedupe_rows_by_id(rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Preserve first occurrence of stable AI row ids emitted by noisy OCR detectors."""
out: List[Dict[str, Any]] = []
seen: set[str] = set()
for row in rows:
row_id = row.get("id")
if row_id:
key = str(row_id)
if key in seen:
continue
seen.add(key)
out.append(row)
return out
def _refresh_ai_rows(ctx: ExamContext, template_id: str, rows: Dict[str, List[Dict[str, Any]]]) -> None:
sb = ctx.supabase
for table in ("exam_response_areas", "exam_boundaries", "exam_template_layout", "exam_questions"):
sb.table(table).delete().eq("template_id", template_id).eq("source", "ai").eq("confirmed", False).execute()
for table, key in (("exam_questions", "questions"), ("exam_response_areas", "response_areas"), ("exam_boundaries", "boundaries"), ("exam_template_layout", "layout")):
payload = rows.get(key) or []
payload = _dedupe_rows_by_id(rows.get(key) or [])
if payload:
sb.table(table).insert(payload).execute()

View File

@ -642,6 +642,20 @@ def test_auto_map_fast_path_merges_ai_rows_and_returns_detail(monkeypatch):
assert store["exam_boundaries"] and store["exam_boundaries"][0]["derivation"] == "docling-main-band"
def test_auto_map_deduplicates_repeated_response_area_ids(monkeypatch):
store = _template_with_source()
client, store = make_client(store=store)
_patch_auto_map(monkeypatch, store, fast=True)
dup = {"page_index": 0, "bbox": {"l": 50, "t": 700, "r": 100, "b": 680, "coord_origin": "BOTTOMLEFT"}, "region_type": "answer_lines", "confidence": 0.9}
monkeypatch.setattr(templates_mod, "detect_response_regions_from_pdf", lambda *_a, **_k: [dup, dict(dup)])
resp = client.post("/api/exam/templates/t1/auto-map")
assert resp.status_code == 200
response_area_ids = [r["id"] for r in store["exam_response_areas"]]
assert len(response_area_ids) == len(set(response_area_ids))
def test_auto_map_preserves_manual_and_confirmed_rows_on_rerun(monkeypatch):
store = _template_with_source()
store.update({