From 150b9152824c995cc732ed08213241ca2b4c1e8d Mon Sep 17 00:00:00 2001 From: kcar Date: Mon, 8 Jun 2026 18:44:30 +0100 Subject: [PATCH] [verified] fix exam auto-map duplicate continued parts (cherry picked from commit 31c51cb7aa33d7f2e1102cea4ffabfefee259faa) --- routers/exam/templates.py | 6 ++++++ tests/test_exam_templates.py | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/routers/exam/templates.py b/routers/exam/templates.py index 48ad039..736b35f 100644 --- a/routers/exam/templates.py +++ b/routers/exam/templates.py @@ -507,6 +507,12 @@ def _map_first_pass_to_rows(template_id: str, first_pass: Dict[str, Any], pdf_by questions.append({"id": parent_id, "template_id": template_id, "label": parent_label, "order": len(q_ids) - 1, "max_marks": 0, "is_container": True, "source": "ai", "confirmed": False, "confidence": 0.7, "derivation": "docling-inferred-main-question"}) pid = _ai_id(template_id, "part", label) first_part_by_page.setdefault(page_index, pid) + # B1 live-route papers can carry continuation bands for the same part label + # on later pages. The UUID is intentionally stable per template+part label, + # so only insert the first question row; later continuations still map + # response/context regions through first_part_by_page. + if any(q["id"] == pid for q in questions): + continue bounds = None y1, y2 = band.get("y_start"), band.get("y_end") if margins["left"] is not None and margins["right"] is not None and y1 is not None and y2 is not None: diff --git a/tests/test_exam_templates.py b/tests/test_exam_templates.py index 6ac1676..307e9ef 100644 --- a/tests/test_exam_templates.py +++ b/tests/test_exam_templates.py @@ -595,6 +595,27 @@ def test_box_to_canvas_uses_cropbox_as_page_origin(): assert templates_mod._box_to_canvas(box, 1, pages) == {"x": 0.0, "y": 25.0, "w": 80.0, "h": 40.0} +def test_auto_map_deduplicates_continued_part_labels(monkeypatch): + monkeypatch.setattr(templates_mod, "_pdf_page_geometry", lambda _pdf: [ + {"media_x0": 0.0, "crop_x0": 0.0, "crop_y0": 0.0, "page_pt_w": 600.0, "page_pt_h": 800.0, "rendered_w": 600.0, "rendered_h": 800.0, "page_top": 0.0}, + {"media_x0": 0.0, "crop_x0": 0.0, "crop_y0": 0.0, "page_pt_w": 600.0, "page_pt_h": 800.0, "rendered_w": 600.0, "rendered_h": 800.0, "page_top": 800.0}, + ]) + first_pass = _first_pass_template() + first_pass["meta"]["n_pages"] = 2 + first_pass["pages"]["2"] = { + "role": "question", "role_source": "auto", "margins_enabled": True, + "main_bands": [], + "part_bands": [{"label": "01.1", "question": "01", "y_start": 760, "y_end": 600, "label_box": {"l": 50, "t": 760, "r": 90, "b": 740, "coord_origin": "BOTTOMLEFT"}, "source": "auto", "confirmed": False}], + "furniture": [], "figures": [], "tables": [], + } + + rows = templates_mod._map_first_pass_to_rows("t1", first_pass, b"%PDF", []) + + question_ids = [q["id"] for q in rows["questions"]] + assert len(question_ids) == len(set(question_ids)) + assert [q["label"] for q in rows["questions"]].count("01.1") == 1 + + def test_response_region_types_are_mapped_to_response_form_enum(monkeypatch): monkeypatch.setattr(templates_mod, "_pdf_page_geometry", lambda _pdf: [{"media_x0": 0.0, "crop_x0": 0.0, "crop_y0": 0.0, "page_pt_w": 600.0, "page_pt_h": 800.0, "rendered_w": 600.0, "rendered_h": 800.0, "page_top": 0.0}]) first_pass = _first_pass_template()