From 5434a5bf21e7025e9649075f66d7a587d4d6a38f Mon Sep 17 00:00:00 2001 From: CC Worker Date: Mon, 8 Jun 2026 19:18:09 +0000 Subject: [PATCH] fix(exam): emit auto-map canvas coords in the frontend 780-wide page space _pdf_page_geometry left rendered_w/h in PDF points (~595x842), but the app renders each PDF page at PAGE_WIDTH=780 with proportional height and places shapes at the raw bounds. Result: every detected region rendered shrunk (~0.76x) and shifted up-left. Set rendered_w=780 + rendered_h=780*aspect (matches pdfLoader + pageGeometryFromImages), and scale px/point TOPLEFT boxes into that space (was a hardcoded 0.5). Path-2 point boxes auto-correct via rendered_w/page_pt_w. Co-Authored-By: Claude Opus 4.8 --- routers/exam/templates.py | 40 ++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/routers/exam/templates.py b/routers/exam/templates.py index ef5cf75..953797f 100644 --- a/routers/exam/templates.py +++ b/routers/exam/templates.py @@ -342,6 +342,13 @@ def _pdf_has_text_layer(pdf_bytes: bytes) -> bool: pass +# Canvas page width the frontend renders each PDF page at (app src/utils/exam-canvas/model.ts +# PAGE_WIDTH). All auto-map canvas coords are emitted in this 780-wide, proportional-height space. +CANVAS_PAGE_WIDTH = 780.0 +# Response/answer-region detector (api/services/docling/regions.py) renders at 144 DPI = 2 px / PDF point. +REGIONS_PX_PER_PT = 2.0 + + def _pdf_page_geometry(pdf_bytes: bytes) -> List[Dict[str, float]]: with tempfile.NamedTemporaryFile(prefix="cc-auto-map-geom-", suffix=".pdf", delete=False) as fh: fh.write(pdf_bytes) @@ -355,14 +362,20 @@ def _pdf_page_geometry(pdf_bytes: bytes) -> List[Dict[str, float]]: for page in doc: media = page.mediabox crop = page.cropbox - rendered_w = float(crop.width or page.rect.width or 595.0) - rendered_h = float(crop.height or page.rect.height or 842.0) + page_pt_w = float(crop.width or page.rect.width or 1.0) + page_pt_h = float(crop.height or page.rect.height or 1.0) + # Emit canvas coords in the FRONTEND render space: the app draws each page at + # CANVAS_PAGE_WIDTH (app model.ts PAGE_WIDTH=780) with proportional height and stacks + # pages by those heights. Previously rendered_w/h were left in PDF points (~595x842), + # so every shape landed shrunk (~0.76x) and shifted up-left on the 780-wide canvas. + rendered_w = CANVAS_PAGE_WIDTH + rendered_h = CANVAS_PAGE_WIDTH * page_pt_h / page_pt_w pages.append({ "media_x0": float(media.x0), "crop_x0": float(crop.x0), "crop_y0": float(crop.y0), - "page_pt_w": float(crop.width or page.rect.width or 1), - "page_pt_h": float(crop.height or page.rect.height or 1), + "page_pt_w": page_pt_w, + "page_pt_h": page_pt_h, "rendered_w": rendered_w, "rendered_h": rendered_h, "page_top": page_top, @@ -384,11 +397,12 @@ def _pdf_page_geometry(pdf_bytes: bytes) -> List[Dict[str, float]]: def _page_geom(pages: List[Dict[str, float]], page_number: int) -> Dict[str, float]: if 1 <= page_number <= len(pages): return pages[page_number - 1] + _fallback_h = CANVAS_PAGE_WIDTH * 842.0 / 595.0 return { "media_x0": 0.0, "crop_x0": 0.0, "crop_y0": 0.0, "page_pt_w": 595.0, "page_pt_h": 842.0, - "rendered_w": 595.0, "rendered_h": 842.0, - "page_top": (page_number - 1) * 842.0, + "rendered_w": CANVAS_PAGE_WIDTH, "rendered_h": _fallback_h, + "page_top": (page_number - 1) * _fallback_h, } @@ -397,12 +411,16 @@ def _box_to_canvas(box: Optional[Dict[str, Any]], page_number: int, pages: List[ return None g = _page_geom(pages, page_number) if box.get("coord_origin") == "TOPLEFT" and {"x", "y", "w", "h"}.issubset(box): - scale = 0.5 if box.get("unit") == "px" else 1.0 + # Scale the box into the 780-wide canvas space. px boxes (opencv/gemma regions) are in + # rendered-image px at REGIONS_PX_PER_PT px/point; TOPLEFT point boxes are 1 px/point. + px_per_pt = REGIONS_PX_PER_PT if box.get("unit") == "px" else 1.0 + sx = g["rendered_w"] / (g["page_pt_w"] * px_per_pt) + sy = g["rendered_h"] / (g["page_pt_h"] * px_per_pt) return { - "x": round(float(box["x"]) * scale, 2), - "y": round(g["page_top"] + float(box["y"]) * scale, 2), - "w": round(float(box["w"]) * scale, 2), - "h": round(float(box["h"]) * scale, 2), + "x": round(float(box["x"]) * sx, 2), + "y": round(g["page_top"] + float(box["y"]) * sy, 2), + "w": round(float(box["w"]) * sx, 2), + "h": round(float(box["h"]) * sy, 2), } if not {"l", "t", "r", "b"}.issubset(box): return None