Adds the real AQA GCSE Physics 8463 specification and the AQA-PHYS-8463-1H-22-JUN exam paper (Paper 1, Higher, June 2022, QP) to seed_curriculum.py, with storage_loc pointing at the uploaded PDF in the cc.examboards bucket. spec_code AQA-PHYS-8463 matches the cc.public.exams Specification node (S4-1). Applied + verified on dev .94: eb_specifications + eb_exams rows present; the real PDF (3,963,384 bytes) is uploaded to cc.examboards/aqa/physics/8463/AQA-PHYS-8463-1H-22-JUN.pdf and retrievable (HTTP 200, exact byte match). seed run populated the empty catalogue (7 specs / 16 exams / 42 Neo4j topics). NOTE: the PDF upload is a one-time ops step (curl from the host to the Storage API) — the container can't reach the host file. A reproducible fixture-upload step is a follow-up. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
385 lines
18 KiB
Python
385 lines
18 KiB
Python
"""
|
|
seed_curriculum.py — Create curriculum data: exam board specifications and exams.
|
|
|
|
Seeds eb_specifications and eb_exams tables with realistic UK exam board data
|
|
(AQA, Edexcel, OCR) for Physics, Maths, and Computer Science across both schools.
|
|
|
|
Also seeds curriculum_topics in Neo4j for the school databases.
|
|
|
|
Tables: eb_specifications, eb_exams
|
|
Neo4j: curriculum topic nodes in school databases
|
|
|
|
Run inside ccapi container:
|
|
python3 -c "from run.initialization.seed_curriculum import seed; seed()"
|
|
"""
|
|
import os
|
|
import time
|
|
import uuid
|
|
import requests
|
|
from typing import Dict, Any, List, Optional
|
|
|
|
SUPA_URL = os.environ["SUPABASE_URL"]
|
|
SERVICE_KEY = os.environ["SERVICE_ROLE_KEY"]
|
|
API_BASE = os.environ.get("API_BASE_URL", "http://localhost:8000")
|
|
|
|
# ─── School constants ────────────────────────────────────────────────────────
|
|
|
|
KEVLARAI_ID = "6585bf91-6ae8-4d72-ab54-cddf3ba4e648"
|
|
GREENFIELD_ID = "a1b2c3d4-e5f6-7890-abcd-ef1234567890"
|
|
|
|
# ─── Exam board specifications ───────────────────────────────────────────────
|
|
# Realistic UK exam board data for the subjects we teach.
|
|
|
|
SPECIFICATIONS = [
|
|
# AQA Physics
|
|
{
|
|
"spec_code": "AQA-PHYS-8201",
|
|
"exam_board_code": "AQA",
|
|
"award_code": "8201",
|
|
"subject_code": "PHYSICS",
|
|
"first_teach": "2016",
|
|
"spec_ver": "1.3",
|
|
"storage_loc": "cc.public.snapshots/curriculum/aqa/physics/8201_spec.pdf",
|
|
"doc_type": "pdf",
|
|
},
|
|
{
|
|
"spec_code": "AQA-PHYS-8203",
|
|
"exam_board_code": "AQA",
|
|
"award_code": "8203",
|
|
"subject_code": "PHYSICS",
|
|
"first_teach": "2016",
|
|
"spec_ver": "1.3",
|
|
"storage_loc": "cc.public.snapshots/curriculum/aqa/physics/8203_spec.pdf",
|
|
"doc_type": "pdf",
|
|
},
|
|
# AQA GCSE Physics 8463 (standalone) — the real spec for the exam-marker test paper
|
|
# (AQA Physics Paper 1H 2022). Spec graph: cc.public.exams Specification AQA-PHYS-8463.
|
|
{
|
|
"spec_code": "AQA-PHYS-8463",
|
|
"exam_board_code": "AQA",
|
|
"award_code": "8463",
|
|
"subject_code": "PHYSICS",
|
|
"first_teach": "2016",
|
|
"spec_ver": "1.0",
|
|
"storage_loc": "cc.examboards/aqa/physics/8463/8463_spec.pdf", # placeholder (no file yet)
|
|
"doc_type": "pdf",
|
|
},
|
|
# Edexcel Maths
|
|
{
|
|
"spec_code": "EDX-MATH-1MA1",
|
|
"exam_board_code": "EDexcel",
|
|
"award_code": "1MA1",
|
|
"subject_code": "MATHEMATICS",
|
|
"first_teach": "2015",
|
|
"spec_ver": "2.0",
|
|
"storage_loc": "cc.public.snapshots/curriculum/edexcel/maths/1MA1_spec.pdf",
|
|
"doc_type": "pdf",
|
|
},
|
|
# OCR Maths
|
|
{
|
|
"spec_code": "OCR-MATH-FMH1",
|
|
"exam_board_code": "OCR",
|
|
"award_code": "FMH1",
|
|
"subject_code": "MATHEMATICS",
|
|
"first_teach": "2017",
|
|
"spec_ver": "1.1",
|
|
"storage_loc": "cc.public.snapshots/curriculum/ocr/maths/FMH1_spec.pdf",
|
|
"doc_type": "pdf",
|
|
},
|
|
# AQA Computer Science
|
|
{
|
|
"spec_code": "AQA-COMP-7516",
|
|
"exam_board_code": "AQA",
|
|
"award_code": "7516",
|
|
"subject_code": "COMPUTER SCIENCE",
|
|
"first_teach": "2016",
|
|
"spec_ver": "1.2",
|
|
"storage_loc": "cc.public.snapshots/curriculum/aqa/cs/7516_spec.pdf",
|
|
"doc_type": "pdf",
|
|
},
|
|
# Edexcel Computer Science
|
|
{
|
|
"spec_code": "EDX-COMP-X042",
|
|
"exam_board_code": "Edexcel",
|
|
"award_code": "X042",
|
|
"subject_code": "COMPUTER SCIENCE",
|
|
"first_teach": "2016",
|
|
"spec_ver": "1.0",
|
|
"storage_loc": "cc.public.snapshots/curriculum/edexcel/cs/X042_spec.pdf",
|
|
"doc_type": "pdf",
|
|
},
|
|
]
|
|
|
|
# ─── Exam papers ─────────────────────────────────────────────────────────────
|
|
# Realistic exam paper references linked to specifications.
|
|
|
|
EXAMS = [
|
|
# AQA GCSE Physics 8463/1 Higher — the exam-marker test paper (real PDF uploaded to
|
|
# cc.examboards). Join key for cc.public.exams ExamPaper.exam_code.
|
|
{"exam_code": "AQA-PHYS-8463-1H-22-JUN", "spec_code": "AQA-PHYS-8463", "paper_code": "8463/1",
|
|
"tier": "higher", "session": "June", "type_code": "QP",
|
|
"storage_loc": "cc.examboards/aqa/physics/8463/AQA-PHYS-8463-1H-22-JUN.pdf"},
|
|
|
|
# AQA Physics 8201/1 (Foundation)
|
|
{"exam_code": "AQA-PHYS-8201-1-23-JUN", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/1",
|
|
"tier": "foundation", "session": "June", "type_code": "QP"},
|
|
{"exam_code": "AQA-PHYS-8201-MS-23-JUN", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/1",
|
|
"tier": "foundation", "session": "June", "type_code": "MS"},
|
|
{"exam_code": "AQA-PHYS-8201-ER-23-JUN", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/1",
|
|
"tier": "foundation", "session": "June", "type_code": "ER"},
|
|
|
|
# AQA Physics 8201/2 (Higher)
|
|
{"exam_code": "AQA-PHYS-8201-2-23-JUN", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/2",
|
|
"tier": "higher", "session": "June", "type_code": "QP"},
|
|
{"exam_code": "AQA-PHYS-8201-MS-23-JUN-H", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/2",
|
|
"tier": "higher", "session": "June", "type_code": "MS"},
|
|
|
|
# Edexcel Maths 1MA1/1 (Foundation)
|
|
{"exam_code": "EDX-MATH-1MA1-1-24-JUN", "spec_code": "EDX-MATH-1MA1", "paper_code": "1MA1/1F",
|
|
"tier": "foundation", "session": "June", "type_code": "QP"},
|
|
{"exam_code": "EDX-MATH-1MA1-MS-24-JUN", "spec_code": "EDX-MATH-1MA1", "paper_code": "1MA1/1F",
|
|
"tier": "foundation", "session": "June", "type_code": "MS"},
|
|
|
|
# Edexcel Maths 1MA1/2 (Higher)
|
|
{"exam_code": "EDX-MATH-1MA1-2-24-JUN", "spec_code": "EDX-MATH-1MA1", "paper_code": "1MA1/2H",
|
|
"tier": "higher", "session": "June", "type_code": "QP"},
|
|
{"exam_code": "EDX-MATH-1MA1-MS-24-JUN-H", "spec_code": "EDX-MATH-1MA1", "paper_code": "1MA1/2H",
|
|
"tier": "higher", "session": "June", "type_code": "MS"},
|
|
|
|
# OCR Maths FMH1/1
|
|
{"exam_code": "OCR-MATH-FMH1-1-24-JUN", "spec_code": "OCR-MATH-FMH1", "paper_code": "FMH1/1",
|
|
"tier": "higher", "session": "June", "type_code": "QP"},
|
|
{"exam_code": "OCR-MATH-FMH1-MS-24-JUN", "spec_code": "OCR-MATH-FMH1", "paper_code": "FMH1/1",
|
|
"tier": "higher", "session": "June", "type_code": "MS"},
|
|
|
|
# AQA CS 7516/1
|
|
{"exam_code": "AQA-COMP-7516-1-23-JUN", "spec_code": "AQA-COMP-7516", "paper_code": "7516/1",
|
|
"tier": None, "session": "June", "type_code": "QP"},
|
|
{"exam_code": "AQA-COMP-7516-MS-23-JUN", "spec_code": "AQA-COMP-7516", "paper_code": "7516/1",
|
|
"tier": None, "session": "June", "type_code": "MS"},
|
|
|
|
# AQA CS 7516/2
|
|
{"exam_code": "AQA-COMP-7516-2-23-JUN", "spec_code": "AQA-COMP-7516", "paper_code": "7516/2",
|
|
"tier": None, "session": "June", "type_code": "QP"},
|
|
{"exam_code": "AQA-COMP-7516-ER-23-JUN", "spec_code": "AQA-COMP-7516", "paper_code": "7516/2",
|
|
"tier": None, "session": "June", "type_code": "ER"},
|
|
]
|
|
|
|
|
|
# ─── Neo4j curriculum topics ─────────────────────────────────────────────────
|
|
# Curriculum topics stored in Neo4j school databases (not Supabase).
|
|
|
|
CURRICULUM_TOPICS = {
|
|
"Physics": [
|
|
{"topic_code": "PHYS-KS3-01", "title": "Forces", "year_group": "9", "key_stage": "3",
|
|
"description": "Contact and non-contact forces, resultant forces, moments"},
|
|
{"topic_code": "PHYS-KS3-02", "title": "Energy", "year_group": "9", "key_stage": "3",
|
|
"description": "Energy stores, transfers, conservation, dissipation"},
|
|
{"topic_code": "PHYS-KS3-03", "title": "Waves", "year_group": "9", "key_stage": "3",
|
|
"description": "Transverse and longitudinal waves, reflection, refraction, diffraction"},
|
|
{"topic_code": "PHYS-KS4-01", "title": "Electricity", "year_group": "10", "key_stage": "4",
|
|
"description": "Circuits, current, potential difference, resistance, power"},
|
|
{"topic_code": "PHYS-KS4-02", "title": "Magnetism and Electromagnetism", "year_group": "10", "key_stage": "4",
|
|
"description": "Magnetic fields, electromagnets, motors, generators"},
|
|
{"topic_code": "PHYS-KS4-03", "title": "Atomic Structure", "year_group": "10", "key_stage": "4",
|
|
"description": "Atoms, isotopes, radioactivity, half-life"},
|
|
{"topic_code": "PHYS-KS4-04", "title": "Particle Physics", "year_group": "11", "key_stage": "4",
|
|
"description": "Standard model, quarks, leptons, bosons"},
|
|
{"topic_code": "PHYS-KS4-05", "title": "Cosmology", "year_group": "11", "key_stage": "4",
|
|
"description": "Big Bang, stellar evolution, redshift"},
|
|
],
|
|
"Mathematics": [
|
|
{"topic_code": "MATH-KS3-01", "title": "Number", "year_group": "9", "key_stage": "3",
|
|
"description": "Integers, fractions, decimals, percentages, ratio, proportion"},
|
|
{"topic_code": "MATH-KS3-02", "title": "Algebra", "year_group": "9", "key_stage": "3",
|
|
"description": "Expressions, equations, inequalities, sequences"},
|
|
{"topic_code": "MATH-KS3-03", "title": "Geometry", "year_group": "9", "key_stage": "3",
|
|
"description": "Angles, polygons, circles, transformations, constructions"},
|
|
{"topic_code": "MATH-KS4-01", "title": "Number and Algebra", "year_group": "10", "key_stage": "4",
|
|
"description": "Surds, indices, standard form, expanding brackets, factorising"},
|
|
{"topic_code": "MATH-KS4-02", "title": "Graphs and Functions", "year_group": "10", "key_stage": "4",
|
|
"description": "Linear, quadratic, cubic graphs, gradients, intercepts"},
|
|
{"topic_code": "MATH-KS4-03", "title": "Statistics and Probability", "year_group": "10", "key_stage": "4",
|
|
"description": "Data types, charts, expected frequency, tree diagrams, two-way tables"},
|
|
{"topic_code": "MATH-KS4-04", "title": "Geometry and Measures", "year_group": "10", "key_stage": "4",
|
|
"description": "Area, volume, surface area, Pythagoras, trigonometry, bearings"},
|
|
{"topic_code": "MATH-KS4-05", "title": "Simultaneous Equations and Quadratics", "year_group": "11", "key_stage": "4",
|
|
"description": "Solving simultaneous equations, completing the square, quadratic formula"},
|
|
],
|
|
"Computer Science": [
|
|
{"topic_code": "CS-KS4-01", "title": "Data Representation", "year_group": "10", "key_stage": "4",
|
|
"description": "Binary, hexadecimal, bit operations, compression, encryption"},
|
|
{"topic_code": "CS-KS4-02", "title": "Computer Systems", "year_group": "10", "key_stage": "4",
|
|
"description": "CPU architecture, memory, storage, networks, topologies"},
|
|
{"topic_code": "CS-KS4-03", "title": "Algorithms and Programming", "year_group": "10", "key_stage": "4",
|
|
"description": "Algorithms, flowcharts, pseudocode, debugging, testing"},
|
|
{"topic_code": "CS-KS4-04", "title": "Data Types and Structures", "year_group": "11", "key_stage": "4",
|
|
"description": "Strings, arrays, lists, records, 2D arrays"},
|
|
{"topic_code": "CS-KS4-05", "title": "Boolean Logic and Search", "year_group": "11", "key_stage": "4",
|
|
"description": "Boolean operators, linear search, binary search, sorting"},
|
|
],
|
|
}
|
|
|
|
|
|
# ─── Helpers ───────────────────────────────────────────────────────────────────
|
|
|
|
def _sb_headers() -> Dict:
|
|
return {
|
|
"apikey": SERVICE_KEY,
|
|
"Authorization": f"Bearer {SERVICE_KEY}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
|
|
def _sign_in(email: str, password: str) -> str:
|
|
r = requests.post(
|
|
f"{SUPA_URL}/auth/v1/token?grant_type=password",
|
|
headers={"apikey": SERVICE_KEY, "Content-Type": "application/json"},
|
|
json={"email": email, "password": password},
|
|
)
|
|
r.raise_for_status()
|
|
return r.json()["access_token"]
|
|
|
|
|
|
# ─── Main seed ─────────────────────────────────────────────────────────────────
|
|
|
|
def seed() -> Dict[str, Any]:
|
|
print("=" * 60)
|
|
print("Curriculum seed — exam board specs and exams")
|
|
print("=" * 60)
|
|
results: Dict[str, Any] = {}
|
|
errors: List[str] = []
|
|
|
|
# ── [1] Seed eb_specifications ──────────────────────────────────────────
|
|
print("\n[1] Seeding exam board specifications...")
|
|
specs_created = 0
|
|
specs_skipped = 0
|
|
|
|
for spec in SPECIFICATIONS:
|
|
r = requests.post(
|
|
f"{SUPA_URL}/rest/v1/eb_specifications",
|
|
headers={**_sb_headers(), "Prefer": "return=representation"},
|
|
json={
|
|
**spec,
|
|
"id": str(uuid.uuid4()),
|
|
"doc_details": {},
|
|
"docling_docs": {},
|
|
},
|
|
params={"on_conflict": "spec_code"},
|
|
)
|
|
if r.status_code in (200, 201):
|
|
specs_created += 1
|
|
print(f" ✓ {spec['spec_code']} ({spec['exam_board_code']}/{spec['subject_code']})")
|
|
elif r.status_code == 409:
|
|
specs_skipped += 1
|
|
print(f" ~ SKIP (exists): {spec['spec_code']}")
|
|
else:
|
|
err = f"spec {spec['spec_code']}: {r.status_code} {r.text[:100]}"
|
|
print(f" ✗ {err}")
|
|
errors.append(err)
|
|
|
|
results["specifications"] = {"created": specs_created, "skipped": specs_skipped}
|
|
|
|
# ── [2] Seed eb_exams ───────────────────────────────────────────────────
|
|
print("\n[2] Seeding exam papers...")
|
|
exams_created = 0
|
|
exams_skipped = 0
|
|
|
|
for exam in EXAMS:
|
|
r = requests.post(
|
|
f"{SUPA_URL}/rest/v1/eb_exams",
|
|
headers={**_sb_headers(), "Prefer": "return=representation"},
|
|
json={
|
|
**exam,
|
|
"id": str(uuid.uuid4()),
|
|
"doc_details": {},
|
|
"docling_docs": {},
|
|
},
|
|
params={"on_conflict": "exam_code"},
|
|
)
|
|
if r.status_code in (200, 201):
|
|
exams_created += 1
|
|
print(f" ✓ {exam['exam_code']} ({exam['type_code']})")
|
|
elif r.status_code == 409:
|
|
exams_skipped += 1
|
|
print(f" ~ SKIP (exists): {exam['exam_code']}")
|
|
else:
|
|
err = f"exam {exam['exam_code']}: {r.status_code} {r.text[:100]}"
|
|
print(f" ✗ {err}")
|
|
errors.append(err)
|
|
|
|
results["exams"] = {"created": exams_created, "skipped": exams_skipped}
|
|
|
|
# ── [3] Seed Neo4j curriculum topics ────────────────────────────────────
|
|
print("\n[3] Seeding Neo4j curriculum topics...")
|
|
try:
|
|
from neo4j import GraphDatabase
|
|
driver = GraphDatabase.driver("bolt://192.168.0.209:7687", auth=("neo4j", "&%N304j&%"))
|
|
|
|
topics_created = 0
|
|
topics_skipped = 0
|
|
|
|
for school_id, school_name in [(KEVLARAI_ID, "KevlarAI"), (GREENFIELD_ID, "Greenfield Academy")]:
|
|
db_name = f"cc.institutes.{school_id.replace('-', '')}"
|
|
print(f"\n [{school_name}] -> {db_name}")
|
|
|
|
with driver.session(database=db_name) as s:
|
|
for subject, topics in CURRICULUM_TOPICS.items():
|
|
# Create subject node
|
|
s.run(
|
|
"MERGE (s:Subject {code: $subject}) "
|
|
"SET s.title = $title, s.school_id = $school_id",
|
|
subject=subject, title=subject, school_id=school_id,
|
|
)
|
|
|
|
for topic in topics:
|
|
result = s.run(
|
|
"MERGE (t:CurriculumTopic {code: $code}) "
|
|
"SET t.title = $title, "
|
|
" t.year_group = $year_group, "
|
|
" t.key_stage = $key_stage, "
|
|
" t.description = $description, "
|
|
" t.subject_code = $subject, "
|
|
" t.school_id = $school_id "
|
|
"MERGE (s:Subject {code: $subject}) "
|
|
"MERGE (s)-[:CONTAINS_TOPIC]->(t)",
|
|
code=topic["topic_code"],
|
|
title=topic["title"],
|
|
year_group=topic["year_group"],
|
|
key_stage=topic["key_stage"],
|
|
description=topic["description"],
|
|
subject=subject,
|
|
school_id=school_id,
|
|
)
|
|
# Check if it was created or matched
|
|
topics_created += 1
|
|
|
|
print(f" ✓ {school_name}: {len(CURRICULUM_TOPICS) * len(list(CURRICULUM_TOPICS.values())[0])} topic nodes")
|
|
|
|
driver.close()
|
|
results["neo4j_topics"] = {"created": topics_created}
|
|
|
|
except Exception as e:
|
|
err = f"neo4j_topics: {e}"
|
|
print(f" ✗ {err}")
|
|
errors.append(err)
|
|
results["neo4j_topics"] = {"error": str(e)}
|
|
|
|
# ── Summary ─────────────────────────────────────────────────────────────
|
|
print("\n" + "=" * 60)
|
|
results["success"] = len(errors) == 0
|
|
results["errors"] = errors
|
|
print(f"COMPLETE — {specs_created} specs, {exams_created} exams, "
|
|
f"{results.get('neo4j_topics', {}).get('created', '?')} topics")
|
|
if errors:
|
|
print(f"Errors ({len(errors)}):")
|
|
for e in errors:
|
|
print(f" ✗ {e}")
|
|
print("=" * 60)
|
|
return results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import json
|
|
print(json.dumps(seed(), indent=2, default=str))
|