From e83873e822a93d44b228add3c7a9de333f6ca8d7 Mon Sep 17 00:00:00 2001 From: CC Worker Date: Mon, 8 Jun 2026 18:02:51 +0000 Subject: [PATCH] fix(exam): dedupe all AI auto-map rows by id before insert B1-4 live-route validation: continuation bands re-emit the same stable AI id for response_areas/boundaries/layout (not just questions), causing duplicate-pkey insert failures. Add _dedupe_rows_by_id applied to all four tables in _refresh_ai_rows. Co-Authored-By: Claude Opus 4.8 --- routers/exam/templates.py | 17 ++++++++++++++++- tests/test_exam_templates.py | 14 ++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/routers/exam/templates.py b/routers/exam/templates.py index 736b35f..1090751 100644 --- a/routers/exam/templates.py +++ b/routers/exam/templates.py @@ -543,12 +543,27 @@ def _map_first_pass_to_rows(template_id: str, first_pass: Dict[str, Any], pdf_by return {"questions": questions, "response_areas": response_areas, "boundaries": boundaries, "layout": layout} +def _dedupe_rows_by_id(rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Preserve first occurrence of stable AI row ids emitted by noisy OCR detectors.""" + out: List[Dict[str, Any]] = [] + seen: set[str] = set() + for row in rows: + row_id = row.get("id") + if row_id: + key = str(row_id) + if key in seen: + continue + seen.add(key) + out.append(row) + return out + + def _refresh_ai_rows(ctx: ExamContext, template_id: str, rows: Dict[str, List[Dict[str, Any]]]) -> None: sb = ctx.supabase for table in ("exam_response_areas", "exam_boundaries", "exam_template_layout", "exam_questions"): sb.table(table).delete().eq("template_id", template_id).eq("source", "ai").eq("confirmed", False).execute() for table, key in (("exam_questions", "questions"), ("exam_response_areas", "response_areas"), ("exam_boundaries", "boundaries"), ("exam_template_layout", "layout")): - payload = rows.get(key) or [] + payload = _dedupe_rows_by_id(rows.get(key) or []) if payload: sb.table(table).insert(payload).execute() diff --git a/tests/test_exam_templates.py b/tests/test_exam_templates.py index 307e9ef..37e38d2 100644 --- a/tests/test_exam_templates.py +++ b/tests/test_exam_templates.py @@ -642,6 +642,20 @@ def test_auto_map_fast_path_merges_ai_rows_and_returns_detail(monkeypatch): assert store["exam_boundaries"] and store["exam_boundaries"][0]["derivation"] == "docling-main-band" +def test_auto_map_deduplicates_repeated_response_area_ids(monkeypatch): + store = _template_with_source() + client, store = make_client(store=store) + _patch_auto_map(monkeypatch, store, fast=True) + dup = {"page_index": 0, "bbox": {"l": 50, "t": 700, "r": 100, "b": 680, "coord_origin": "BOTTOMLEFT"}, "region_type": "answer_lines", "confidence": 0.9} + monkeypatch.setattr(templates_mod, "detect_response_regions_from_pdf", lambda *_a, **_k: [dup, dict(dup)]) + + resp = client.post("/api/exam/templates/t1/auto-map") + + assert resp.status_code == 200 + response_area_ids = [r["id"] for r in store["exam_response_areas"]] + assert len(response_area_ids) == len(set(response_area_ids)) + + def test_auto_map_preserves_manual_and_confirmed_rows_on_rerun(monkeypatch): store = _template_with_source() store.update({