fix(exam): emit auto-map canvas coords in the frontend 780-wide page space
Some checks failed
api-ci-deploy / test-build-deploy (push) Has been cancelled

_pdf_page_geometry left rendered_w/h in PDF points (~595x842), but the app renders each PDF
page at PAGE_WIDTH=780 with proportional height and places shapes at the raw bounds. Result:
every detected region rendered shrunk (~0.76x) and shifted up-left. Set rendered_w=780 +
rendered_h=780*aspect (matches pdfLoader + pageGeometryFromImages), and scale px/point TOPLEFT
boxes into that space (was a hardcoded 0.5). Path-2 point boxes auto-correct via rendered_w/page_pt_w.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
CC Worker 2026-06-08 19:18:09 +00:00
parent 44ccba2151
commit 5434a5bf21

View File

@ -342,6 +342,13 @@ def _pdf_has_text_layer(pdf_bytes: bytes) -> bool:
pass
# Canvas page width the frontend renders each PDF page at (app src/utils/exam-canvas/model.ts
# PAGE_WIDTH). All auto-map canvas coords are emitted in this 780-wide, proportional-height space.
CANVAS_PAGE_WIDTH = 780.0
# Response/answer-region detector (api/services/docling/regions.py) renders at 144 DPI = 2 px / PDF point.
REGIONS_PX_PER_PT = 2.0
def _pdf_page_geometry(pdf_bytes: bytes) -> List[Dict[str, float]]:
with tempfile.NamedTemporaryFile(prefix="cc-auto-map-geom-", suffix=".pdf", delete=False) as fh:
fh.write(pdf_bytes)
@ -355,14 +362,20 @@ def _pdf_page_geometry(pdf_bytes: bytes) -> List[Dict[str, float]]:
for page in doc:
media = page.mediabox
crop = page.cropbox
rendered_w = float(crop.width or page.rect.width or 595.0)
rendered_h = float(crop.height or page.rect.height or 842.0)
page_pt_w = float(crop.width or page.rect.width or 1.0)
page_pt_h = float(crop.height or page.rect.height or 1.0)
# Emit canvas coords in the FRONTEND render space: the app draws each page at
# CANVAS_PAGE_WIDTH (app model.ts PAGE_WIDTH=780) with proportional height and stacks
# pages by those heights. Previously rendered_w/h were left in PDF points (~595x842),
# so every shape landed shrunk (~0.76x) and shifted up-left on the 780-wide canvas.
rendered_w = CANVAS_PAGE_WIDTH
rendered_h = CANVAS_PAGE_WIDTH * page_pt_h / page_pt_w
pages.append({
"media_x0": float(media.x0),
"crop_x0": float(crop.x0),
"crop_y0": float(crop.y0),
"page_pt_w": float(crop.width or page.rect.width or 1),
"page_pt_h": float(crop.height or page.rect.height or 1),
"page_pt_w": page_pt_w,
"page_pt_h": page_pt_h,
"rendered_w": rendered_w,
"rendered_h": rendered_h,
"page_top": page_top,
@ -384,11 +397,12 @@ def _pdf_page_geometry(pdf_bytes: bytes) -> List[Dict[str, float]]:
def _page_geom(pages: List[Dict[str, float]], page_number: int) -> Dict[str, float]:
if 1 <= page_number <= len(pages):
return pages[page_number - 1]
_fallback_h = CANVAS_PAGE_WIDTH * 842.0 / 595.0
return {
"media_x0": 0.0, "crop_x0": 0.0, "crop_y0": 0.0,
"page_pt_w": 595.0, "page_pt_h": 842.0,
"rendered_w": 595.0, "rendered_h": 842.0,
"page_top": (page_number - 1) * 842.0,
"rendered_w": CANVAS_PAGE_WIDTH, "rendered_h": _fallback_h,
"page_top": (page_number - 1) * _fallback_h,
}
@ -397,12 +411,16 @@ def _box_to_canvas(box: Optional[Dict[str, Any]], page_number: int, pages: List[
return None
g = _page_geom(pages, page_number)
if box.get("coord_origin") == "TOPLEFT" and {"x", "y", "w", "h"}.issubset(box):
scale = 0.5 if box.get("unit") == "px" else 1.0
# Scale the box into the 780-wide canvas space. px boxes (opencv/gemma regions) are in
# rendered-image px at REGIONS_PX_PER_PT px/point; TOPLEFT point boxes are 1 px/point.
px_per_pt = REGIONS_PX_PER_PT if box.get("unit") == "px" else 1.0
sx = g["rendered_w"] / (g["page_pt_w"] * px_per_pt)
sy = g["rendered_h"] / (g["page_pt_h"] * px_per_pt)
return {
"x": round(float(box["x"]) * scale, 2),
"y": round(g["page_top"] + float(box["y"]) * scale, 2),
"w": round(float(box["w"]) * scale, 2),
"h": round(float(box["h"]) * scale, 2),
"x": round(float(box["x"]) * sx, 2),
"y": round(g["page_top"] + float(box["y"]) * sy, 2),
"w": round(float(box["w"]) * sx, 2),
"h": round(float(box["h"]) * sy, 2),
}
if not {"l", "t", "r", "b"}.issubset(box):
return None