api/run/initialization/init_exam_graph.py
CC Worker c690caa26d feat(exams): cc.public.exams Neo4j graph init + node schemas
- modules/database/schemas/nodes/exams/exam_nodes.py: neontology node classes for
  ExamBoard/Specification/SpecPoint/ExamPaper/Question/Part/Region (uuid_string joins
  to Supabase exam_questions.id / exam_response_areas.id / eb_exams.exam_code).
- run/initialization/init_exam_graph.py: idempotent init — creates the shared public
  cc.public.exams database, 10 uniqueness constraints, and seeds AQA + AQA-PHYS-8463
  (GCSE Physics) with its 8 top-level topic SpecPoints.

Applied + verified on dev Neo4j (192.168.0.209, enterprise): db online, 10 constraints,
AQA-[:PUBLISHES]->AQA-PHYS-8463-[:HAS_SPEC_POINT]->8 points. Full sub-point catalogue is
a later data task. spec_code AQA-PHYS-8463 must match the eb_exams seed (S4-3).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-06 16:14:56 +00:00

129 lines
5.6 KiB
Python

"""
init_exam_graph.py — Initialise the cc.public.exams Neo4j knowledge graph.
Creates the shared, public exam database, its uniqueness constraints, and seeds the AQA exam
board + AQA GCSE Physics (8463) specification with its 8 top-level topic SpecPoints. Idempotent
(CREATE DATABASE IF NOT EXISTS / CREATE CONSTRAINT IF NOT EXISTS / MERGE).
Run inside the ccapi container:
python3 -c "from run.initialization.init_exam_graph import init; import json; print(json.dumps(init()))"
NOTE: the 8 SpecPoints seeded here are the real AQA GCSE Physics *top-level* topics. The full
sub-point breakdown (e.g. 4.1.1.1 ...) is a later data-population task (sourceable from the AQA
spec PDF via Docling). spec_code AQA-PHYS-8463 is the standalone GCSE Physics code that matches
"AQA Physics Paper 1H"; the eb_exams/eb_specifications seed (card S4-3) must use the same code.
"""
import uuid
from typing import Dict, Any
from modules.database.tools.neo4j_driver_tools import get_driver
EXAM_DB = "cc.public.exams"
NS = uuid.UUID("00000000-0000-0000-0000-00000000e8a1") # stable namespace for deterministic uuids
BOARD = {"code": "AQA", "name": "AQA"}
SPEC = {
"spec_code": "AQA-PHYS-8463",
"exam_board_code": "AQA",
"subject_code": "PHYS",
"award_code": "GCSE",
"title": "AQA GCSE Physics (8463)",
}
# Real AQA GCSE Physics (8463) top-level topics (ref = topic number).
SPEC_POINTS = [
("4.1", "Energy"),
("4.2", "Electricity"),
("4.3", "Particle model of matter"),
("4.4", "Atomic structure"),
("4.5", "Forces"),
("4.6", "Waves"),
("4.7", "Magnetism and electromagnetism"),
("4.8", "Space physics"),
]
CONSTRAINTS = [
"CREATE CONSTRAINT exam_board_uid IF NOT EXISTS FOR (n:ExamBoard) REQUIRE n.uuid_string IS UNIQUE",
"CREATE CONSTRAINT spec_uid IF NOT EXISTS FOR (n:Specification) REQUIRE n.uuid_string IS UNIQUE",
"CREATE CONSTRAINT specpoint_uid IF NOT EXISTS FOR (n:SpecPoint) REQUIRE n.uuid_string IS UNIQUE",
"CREATE CONSTRAINT exampaper_uid IF NOT EXISTS FOR (n:ExamPaper) REQUIRE n.uuid_string IS UNIQUE",
"CREATE CONSTRAINT question_uid IF NOT EXISTS FOR (n:Question) REQUIRE n.uuid_string IS UNIQUE",
"CREATE CONSTRAINT part_uid IF NOT EXISTS FOR (n:Part) REQUIRE n.uuid_string IS UNIQUE",
"CREATE CONSTRAINT region_uid IF NOT EXISTS FOR (n:Region) REQUIRE n.uuid_string IS UNIQUE",
"CREATE CONSTRAINT spec_code_unique IF NOT EXISTS FOR (n:Specification) REQUIRE n.spec_code IS UNIQUE",
"CREATE CONSTRAINT exam_code_unique IF NOT EXISTS FOR (n:ExamPaper) REQUIRE n.exam_code IS UNIQUE",
"CREATE CONSTRAINT board_code_unique IF NOT EXISTS FOR (n:ExamBoard) REQUIRE n.code IS UNIQUE",
]
def _uid(*parts: str) -> str:
return str(uuid.uuid5(NS, ":".join(parts)))
def init() -> Dict[str, Any]:
driver = get_driver()
result: Dict[str, Any] = {"db": EXAM_DB, "constraints": 0, "spec_points": 0}
# 1. database
with driver.session(database="system") as s:
s.run(f"CREATE DATABASE `{EXAM_DB}` IF NOT EXISTS").consume()
# wait for availability
import time
for _ in range(30):
with driver.session(database="system") as s:
st = s.run("SHOW DATABASE $n YIELD currentStatus RETURN currentStatus", n=EXAM_DB).single()
if st and st["currentStatus"] == "online":
break
time.sleep(1)
with driver.session(database=EXAM_DB) as s:
# 2. constraints
for c in CONSTRAINTS:
s.run(c).consume()
result["constraints"] += 1
# 3. board + spec
board_uid = _uid("ExamBoard", BOARD["code"])
spec_uid = _uid("Specification", SPEC["spec_code"])
s.run(
"MERGE (b:ExamBoard {uuid_string:$uid}) "
"SET b.code=$code, b.name=$name, b.node_storage_path=$nsp",
uid=board_uid, code=BOARD["code"], name=BOARD["name"],
nsp=f"{EXAM_DB}/ExamBoard/{BOARD['code']}",
).consume()
s.run(
"MERGE (sp:Specification {uuid_string:$uid}) "
"SET sp.spec_code=$sc, sp.exam_board_code=$ebc, sp.subject_code=$subj, "
" sp.award_code=$award, sp.title=$title, sp.node_storage_path=$nsp "
"WITH sp MATCH (b:ExamBoard {code:$ebc}) MERGE (b)-[:PUBLISHES]->(sp)",
uid=spec_uid, sc=SPEC["spec_code"], ebc=SPEC["exam_board_code"],
subj=SPEC["subject_code"], award=SPEC["award_code"], title=SPEC["title"],
nsp=f"{EXAM_DB}/Specification/{SPEC['spec_code']}",
).consume()
# 4. spec points
for ref, desc in SPEC_POINTS:
sp_uid = _uid("SpecPoint", SPEC["spec_code"], ref)
s.run(
"MERGE (p:SpecPoint {uuid_string:$uid}) "
"SET p.ref=$ref, p.description=$desc, p.spec_code=$sc, "
" p.exam_board_code=$ebc, p.node_storage_path=$nsp "
"WITH p MATCH (s:Specification {spec_code:$sc}) MERGE (s)-[:HAS_SPEC_POINT]->(p)",
uid=sp_uid, ref=ref, desc=desc, sc=SPEC["spec_code"],
ebc=SPEC["exam_board_code"], nsp=f"{EXAM_DB}/SpecPoint/{SPEC['spec_code']}/{ref}",
).consume()
result["spec_points"] += 1
counts = s.run(
"MATCH (b:ExamBoard) WITH count(b) AS boards "
"MATCH (sp:Specification) WITH boards, count(sp) AS specs "
"MATCH (p:SpecPoint) RETURN boards, specs, count(p) AS spec_points"
).single()
result["verify"] = dict(counts) if counts else {}
return result
if __name__ == "__main__":
import json
print(json.dumps(init(), indent=2, default=str))