S5-5: centralized part-box synthesis (band-y x content-margins)
Add synthesize_part_box() as the single authoritative S5 part-box projection (T3 swap point): content-margin x-extent x part-band y-extent, BOTTOMLEFT coords; label_box retained as a separate anchor. build() attaches box per part. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
71ddceb19e
commit
621d283ceb
@ -44,6 +44,34 @@ def _furn_kind(it):
|
||||
return "chrome_text"
|
||||
|
||||
|
||||
def synthesize_part_box(part_band, content_x_band):
|
||||
"""Return the one authoritative S5 part-box projection.
|
||||
|
||||
Parts remain boxes in S5, but the box is a projection rather than intrinsic
|
||||
geometry: document content margins provide the x-extent and the part band
|
||||
provides y. The band end is already bounded by the next part in bands.py;
|
||||
the original label box remains a separate anchor for rendering/review.
|
||||
|
||||
Coordinates stay in the first-pass PDF-point BOTTOMLEFT bbox shape.
|
||||
"""
|
||||
if not content_x_band:
|
||||
return None
|
||||
try:
|
||||
x_left = content_x_band["x_left"]
|
||||
x_right = content_x_band["x_right"]
|
||||
y_start = part_band["y_start"]
|
||||
y_end = part_band["y_end"]
|
||||
except KeyError:
|
||||
return None
|
||||
return {
|
||||
"l": round(x_left, 1),
|
||||
"t": round(y_start, 1),
|
||||
"r": round(x_right, 1),
|
||||
"b": round(y_end, 1),
|
||||
"coord_origin": "BOTTOMLEFT",
|
||||
}
|
||||
|
||||
|
||||
def build(structured, bands, furniture, pdf=None, page_roles=None):
|
||||
page_roles = page_roles or {}
|
||||
part_bbox = {p["label"]: p.get("bbox")
|
||||
@ -124,10 +152,15 @@ def build(structured, bands, furniture, pdf=None, page_roles=None):
|
||||
main = [{"question": m["question"], "y_start": m["y_start"], "y_end": m["y_end"],
|
||||
"is_start": m.get("is_start", True),
|
||||
"source": "auto", "confirmed": False} for m in pb["main"]]
|
||||
part = [{"label": p["label"], "question": p["question"],
|
||||
"y_start": p["y_start"], "y_end": p["y_end"],
|
||||
"label_box": part_bbox.get(p["label"]), # app may render a box instead of lines
|
||||
"source": "auto", "confirmed": False} for p in pb["part"]]
|
||||
part = []
|
||||
for p in pb["part"]:
|
||||
part.append({
|
||||
"label": p["label"], "question": p["question"],
|
||||
"y_start": p["y_start"], "y_end": p["y_end"],
|
||||
"label_box": part_bbox.get(p["label"]), # anchor, not the part extent
|
||||
"box": synthesize_part_box(p, xband),
|
||||
"source": "auto", "confirmed": False,
|
||||
})
|
||||
pr = page_roles.get(pgs) or page_roles.get(pg) or {}
|
||||
pages[pgs] = {
|
||||
"role": pr.get("role", "question"),
|
||||
|
||||
@ -26,7 +26,9 @@ def test_auto_map_matches_spike_physics_template_shape():
|
||||
assert len(result["margins"]) == len(expected["margins"])
|
||||
assert set(result["pages"].keys()) == set(expected["pages"].keys())
|
||||
assert result["pages"]["2"]["role"] == expected["pages"]["2"]["role"]
|
||||
assert result["pages"]["2"]["part_bands"][0].keys() == expected["pages"]["2"]["part_bands"][0].keys()
|
||||
part_band = result["pages"]["2"]["part_bands"][0]
|
||||
assert set(expected["pages"]["2"]["part_bands"][0].keys()).issubset(part_band.keys())
|
||||
assert part_band["box"]
|
||||
|
||||
|
||||
@pytest.mark.skipif(not BORN_DIGITAL_PDF.exists(), reason="born-digital spike PDF not present")
|
||||
|
||||
55
tests/test_docling_template.py
Normal file
55
tests/test_docling_template.py
Normal file
@ -0,0 +1,55 @@
|
||||
from api.services.docling.template import build, synthesize_part_box
|
||||
|
||||
|
||||
def test_synthesize_part_box_uses_content_margins_and_band_y():
|
||||
part_band = {"label": "01.1", "y_start": 712.34, "y_end": 601.27}
|
||||
content_x_band = {"x_left": 54.04, "x_right": 521.96}
|
||||
|
||||
assert synthesize_part_box(part_band, content_x_band) == {
|
||||
"l": 54.0,
|
||||
"t": 712.3,
|
||||
"r": 522.0,
|
||||
"b": 601.3,
|
||||
"coord_origin": "BOTTOMLEFT",
|
||||
}
|
||||
|
||||
|
||||
def test_synthesize_part_box_returns_none_without_margin_contract():
|
||||
assert synthesize_part_box({"y_start": 700, "y_end": 650}, {}) is None
|
||||
assert synthesize_part_box({"y_start": 700}, {"x_left": 50, "x_right": 520}) is None
|
||||
|
||||
|
||||
def test_build_carries_label_box_as_anchor_and_single_synthesized_part_box():
|
||||
structured = {
|
||||
"board": "aqa",
|
||||
"paper_code": "8463/1",
|
||||
"questions": [{
|
||||
"question": "01",
|
||||
"parts": [{
|
||||
"label": "01.1",
|
||||
"page": 2,
|
||||
"bbox": {"l": 40, "t": 720, "r": 70, "b": 705},
|
||||
}],
|
||||
}],
|
||||
}
|
||||
bands = {
|
||||
"pages": {
|
||||
"2": {
|
||||
"main": [{"question": "01", "y_start": 730, "y_end": 0, "is_start": True}],
|
||||
"part": [{"label": "01.1", "question": "01", "y_start": 720, "y_end": 610}],
|
||||
}
|
||||
}
|
||||
}
|
||||
furniture = {
|
||||
"n_pages": 2,
|
||||
"content_margins": {
|
||||
"content_x_band": {"x_left": 55, "x_right": 515},
|
||||
"per_page": {"2": {"top": 760, "bottom": 40, "left": 55, "right": 515}},
|
||||
},
|
||||
"items": [],
|
||||
}
|
||||
|
||||
part = build(structured, bands, furniture)["pages"]["2"]["part_bands"][0]
|
||||
|
||||
assert part["label_box"] == {"l": 40, "t": 720, "r": 70, "b": 705}
|
||||
assert part["box"] == {"l": 55, "t": 720, "r": 515, "b": 610, "coord_origin": "BOTTOMLEFT"}
|
||||
Loading…
x
Reference in New Issue
Block a user