""" seed_curriculum.py — DEPRECATED hardcoded curriculum/exam seeder. ⚠️ SUPERSEDED (2026-06-07) by the manifest-driven corpus loader: run/initialization/seed_exam_corpus.py (+ manifests/exam-corpus.yaml) The exam-board parts of this file (eb_specifications / eb_exams) are now seeded from a verified, provenance-bearing manifest with real uploaded PDFs — not the hardcoded rows below. This module also had a storage_loc inconsistency the overhaul standardises away: exam-board files belong in the `cc.examboards` bucket at the canonical path `cc.examboards/{board}/{subject}/{award}/{paper}/{session}/{role}.pdf`, NOT under `cc.public.snapshots/curriculum/...` (the placeholder rows below still show the old path). KEEP ONLY for the Neo4j `curriculum_topics` seed (step [3]) which has no replacement yet. Do NOT use the eb_specifications/eb_exams blocks for new work — use seed_exam_corpus.py. Run (Neo4j curriculum topics only is the supported remaining use): python3 -c "from run.initialization.seed_curriculum import seed; seed()" """ import os import time import uuid import requests from typing import Dict, Any, List, Optional SUPA_URL = os.environ["SUPABASE_URL"] SERVICE_KEY = os.environ["SERVICE_ROLE_KEY"] API_BASE = os.environ.get("API_BASE_URL", "http://localhost:8000") # ─── School constants ──────────────────────────────────────────────────────── KEVLARAI_ID = "6585bf91-6ae8-4d72-ab54-cddf3ba4e648" GREENFIELD_ID = "a1b2c3d4-e5f6-7890-abcd-ef1234567890" # ─── Exam board specifications ─────────────────────────────────────────────── # Realistic UK exam board data for the subjects we teach. SPECIFICATIONS = [ # AQA Physics { "spec_code": "AQA-PHYS-8201", "exam_board_code": "AQA", "award_code": "8201", "subject_code": "PHYSICS", "first_teach": "2016", "spec_ver": "1.3", "storage_loc": "cc.public.snapshots/curriculum/aqa/physics/8201_spec.pdf", "doc_type": "pdf", }, { "spec_code": "AQA-PHYS-8203", "exam_board_code": "AQA", "award_code": "8203", "subject_code": "PHYSICS", "first_teach": "2016", "spec_ver": "1.3", "storage_loc": "cc.public.snapshots/curriculum/aqa/physics/8203_spec.pdf", "doc_type": "pdf", }, # AQA GCSE Physics 8463 (standalone) — the real spec for the exam-marker test paper # (AQA Physics Paper 1H 2022). Spec graph: cc.public.exams Specification AQA-PHYS-8463. { "spec_code": "AQA-PHYS-8463", "exam_board_code": "AQA", "award_code": "8463", "subject_code": "PHYSICS", "first_teach": "2016", "spec_ver": "1.0", "storage_loc": "cc.examboards/aqa/physics/8463/8463_spec.pdf", # placeholder (no file yet) "doc_type": "pdf", }, # Edexcel Maths { "spec_code": "EDX-MATH-1MA1", "exam_board_code": "EDexcel", "award_code": "1MA1", "subject_code": "MATHEMATICS", "first_teach": "2015", "spec_ver": "2.0", "storage_loc": "cc.public.snapshots/curriculum/edexcel/maths/1MA1_spec.pdf", "doc_type": "pdf", }, # OCR Maths { "spec_code": "OCR-MATH-FMH1", "exam_board_code": "OCR", "award_code": "FMH1", "subject_code": "MATHEMATICS", "first_teach": "2017", "spec_ver": "1.1", "storage_loc": "cc.public.snapshots/curriculum/ocr/maths/FMH1_spec.pdf", "doc_type": "pdf", }, # AQA Computer Science { "spec_code": "AQA-COMP-7516", "exam_board_code": "AQA", "award_code": "7516", "subject_code": "COMPUTER SCIENCE", "first_teach": "2016", "spec_ver": "1.2", "storage_loc": "cc.public.snapshots/curriculum/aqa/cs/7516_spec.pdf", "doc_type": "pdf", }, # Edexcel Computer Science { "spec_code": "EDX-COMP-X042", "exam_board_code": "Edexcel", "award_code": "X042", "subject_code": "COMPUTER SCIENCE", "first_teach": "2016", "spec_ver": "1.0", "storage_loc": "cc.public.snapshots/curriculum/edexcel/cs/X042_spec.pdf", "doc_type": "pdf", }, ] # ─── Exam papers ───────────────────────────────────────────────────────────── # Realistic exam paper references linked to specifications. EXAMS = [ # AQA GCSE Physics 8463/1 Higher — the exam-marker test paper (real PDF uploaded to # cc.examboards). Join key for cc.public.exams ExamPaper.exam_code. {"exam_code": "AQA-PHYS-8463-1H-22-JUN", "spec_code": "AQA-PHYS-8463", "paper_code": "8463/1", "tier": "higher", "session": "June", "type_code": "QP", "storage_loc": "cc.examboards/aqa/physics/8463/AQA-PHYS-8463-1H-22-JUN.pdf"}, # AQA Physics 8201/1 (Foundation) {"exam_code": "AQA-PHYS-8201-1-23-JUN", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/1", "tier": "foundation", "session": "June", "type_code": "QP"}, {"exam_code": "AQA-PHYS-8201-MS-23-JUN", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/1", "tier": "foundation", "session": "June", "type_code": "MS"}, {"exam_code": "AQA-PHYS-8201-ER-23-JUN", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/1", "tier": "foundation", "session": "June", "type_code": "ER"}, # AQA Physics 8201/2 (Higher) {"exam_code": "AQA-PHYS-8201-2-23-JUN", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/2", "tier": "higher", "session": "June", "type_code": "QP"}, {"exam_code": "AQA-PHYS-8201-MS-23-JUN-H", "spec_code": "AQA-PHYS-8201", "paper_code": "8201/2", "tier": "higher", "session": "June", "type_code": "MS"}, # Edexcel Maths 1MA1/1 (Foundation) {"exam_code": "EDX-MATH-1MA1-1-24-JUN", "spec_code": "EDX-MATH-1MA1", "paper_code": "1MA1/1F", "tier": "foundation", "session": "June", "type_code": "QP"}, {"exam_code": "EDX-MATH-1MA1-MS-24-JUN", "spec_code": "EDX-MATH-1MA1", "paper_code": "1MA1/1F", "tier": "foundation", "session": "June", "type_code": "MS"}, # Edexcel Maths 1MA1/2 (Higher) {"exam_code": "EDX-MATH-1MA1-2-24-JUN", "spec_code": "EDX-MATH-1MA1", "paper_code": "1MA1/2H", "tier": "higher", "session": "June", "type_code": "QP"}, {"exam_code": "EDX-MATH-1MA1-MS-24-JUN-H", "spec_code": "EDX-MATH-1MA1", "paper_code": "1MA1/2H", "tier": "higher", "session": "June", "type_code": "MS"}, # OCR Maths FMH1/1 {"exam_code": "OCR-MATH-FMH1-1-24-JUN", "spec_code": "OCR-MATH-FMH1", "paper_code": "FMH1/1", "tier": "higher", "session": "June", "type_code": "QP"}, {"exam_code": "OCR-MATH-FMH1-MS-24-JUN", "spec_code": "OCR-MATH-FMH1", "paper_code": "FMH1/1", "tier": "higher", "session": "June", "type_code": "MS"}, # AQA CS 7516/1 {"exam_code": "AQA-COMP-7516-1-23-JUN", "spec_code": "AQA-COMP-7516", "paper_code": "7516/1", "tier": None, "session": "June", "type_code": "QP"}, {"exam_code": "AQA-COMP-7516-MS-23-JUN", "spec_code": "AQA-COMP-7516", "paper_code": "7516/1", "tier": None, "session": "June", "type_code": "MS"}, # AQA CS 7516/2 {"exam_code": "AQA-COMP-7516-2-23-JUN", "spec_code": "AQA-COMP-7516", "paper_code": "7516/2", "tier": None, "session": "June", "type_code": "QP"}, {"exam_code": "AQA-COMP-7516-ER-23-JUN", "spec_code": "AQA-COMP-7516", "paper_code": "7516/2", "tier": None, "session": "June", "type_code": "ER"}, ] # ─── Neo4j curriculum topics ───────────────────────────────────────────────── # Curriculum topics stored in Neo4j school databases (not Supabase). CURRICULUM_TOPICS = { "Physics": [ {"topic_code": "PHYS-KS3-01", "title": "Forces", "year_group": "9", "key_stage": "3", "description": "Contact and non-contact forces, resultant forces, moments"}, {"topic_code": "PHYS-KS3-02", "title": "Energy", "year_group": "9", "key_stage": "3", "description": "Energy stores, transfers, conservation, dissipation"}, {"topic_code": "PHYS-KS3-03", "title": "Waves", "year_group": "9", "key_stage": "3", "description": "Transverse and longitudinal waves, reflection, refraction, diffraction"}, {"topic_code": "PHYS-KS4-01", "title": "Electricity", "year_group": "10", "key_stage": "4", "description": "Circuits, current, potential difference, resistance, power"}, {"topic_code": "PHYS-KS4-02", "title": "Magnetism and Electromagnetism", "year_group": "10", "key_stage": "4", "description": "Magnetic fields, electromagnets, motors, generators"}, {"topic_code": "PHYS-KS4-03", "title": "Atomic Structure", "year_group": "10", "key_stage": "4", "description": "Atoms, isotopes, radioactivity, half-life"}, {"topic_code": "PHYS-KS4-04", "title": "Particle Physics", "year_group": "11", "key_stage": "4", "description": "Standard model, quarks, leptons, bosons"}, {"topic_code": "PHYS-KS4-05", "title": "Cosmology", "year_group": "11", "key_stage": "4", "description": "Big Bang, stellar evolution, redshift"}, ], "Mathematics": [ {"topic_code": "MATH-KS3-01", "title": "Number", "year_group": "9", "key_stage": "3", "description": "Integers, fractions, decimals, percentages, ratio, proportion"}, {"topic_code": "MATH-KS3-02", "title": "Algebra", "year_group": "9", "key_stage": "3", "description": "Expressions, equations, inequalities, sequences"}, {"topic_code": "MATH-KS3-03", "title": "Geometry", "year_group": "9", "key_stage": "3", "description": "Angles, polygons, circles, transformations, constructions"}, {"topic_code": "MATH-KS4-01", "title": "Number and Algebra", "year_group": "10", "key_stage": "4", "description": "Surds, indices, standard form, expanding brackets, factorising"}, {"topic_code": "MATH-KS4-02", "title": "Graphs and Functions", "year_group": "10", "key_stage": "4", "description": "Linear, quadratic, cubic graphs, gradients, intercepts"}, {"topic_code": "MATH-KS4-03", "title": "Statistics and Probability", "year_group": "10", "key_stage": "4", "description": "Data types, charts, expected frequency, tree diagrams, two-way tables"}, {"topic_code": "MATH-KS4-04", "title": "Geometry and Measures", "year_group": "10", "key_stage": "4", "description": "Area, volume, surface area, Pythagoras, trigonometry, bearings"}, {"topic_code": "MATH-KS4-05", "title": "Simultaneous Equations and Quadratics", "year_group": "11", "key_stage": "4", "description": "Solving simultaneous equations, completing the square, quadratic formula"}, ], "Computer Science": [ {"topic_code": "CS-KS4-01", "title": "Data Representation", "year_group": "10", "key_stage": "4", "description": "Binary, hexadecimal, bit operations, compression, encryption"}, {"topic_code": "CS-KS4-02", "title": "Computer Systems", "year_group": "10", "key_stage": "4", "description": "CPU architecture, memory, storage, networks, topologies"}, {"topic_code": "CS-KS4-03", "title": "Algorithms and Programming", "year_group": "10", "key_stage": "4", "description": "Algorithms, flowcharts, pseudocode, debugging, testing"}, {"topic_code": "CS-KS4-04", "title": "Data Types and Structures", "year_group": "11", "key_stage": "4", "description": "Strings, arrays, lists, records, 2D arrays"}, {"topic_code": "CS-KS4-05", "title": "Boolean Logic and Search", "year_group": "11", "key_stage": "4", "description": "Boolean operators, linear search, binary search, sorting"}, ], } # ─── Helpers ─────────────────────────────────────────────────────────────────── def _sb_headers() -> Dict: return { "apikey": SERVICE_KEY, "Authorization": f"Bearer {SERVICE_KEY}", "Content-Type": "application/json", } def _sign_in(email: str, password: str) -> str: r = requests.post( f"{SUPA_URL}/auth/v1/token?grant_type=password", headers={"apikey": SERVICE_KEY, "Content-Type": "application/json"}, json={"email": email, "password": password}, ) r.raise_for_status() return r.json()["access_token"] # ─── Main seed ───────────────────────────────────────────────────────────────── def seed() -> Dict[str, Any]: print("=" * 60) print("Curriculum seed — exam board specs and exams") print("=" * 60) results: Dict[str, Any] = {} errors: List[str] = [] # ── [1] Seed eb_specifications ────────────────────────────────────────── print("\n[1] Seeding exam board specifications...") specs_created = 0 specs_skipped = 0 for spec in SPECIFICATIONS: r = requests.post( f"{SUPA_URL}/rest/v1/eb_specifications", headers={**_sb_headers(), "Prefer": "return=representation"}, json={ **spec, "id": str(uuid.uuid4()), "doc_details": {}, "docling_docs": {}, }, params={"on_conflict": "spec_code"}, ) if r.status_code in (200, 201): specs_created += 1 print(f" ✓ {spec['spec_code']} ({spec['exam_board_code']}/{spec['subject_code']})") elif r.status_code == 409: specs_skipped += 1 print(f" ~ SKIP (exists): {spec['spec_code']}") else: err = f"spec {spec['spec_code']}: {r.status_code} {r.text[:100]}" print(f" ✗ {err}") errors.append(err) results["specifications"] = {"created": specs_created, "skipped": specs_skipped} # ── [2] Seed eb_exams ─────────────────────────────────────────────────── print("\n[2] Seeding exam papers...") exams_created = 0 exams_skipped = 0 for exam in EXAMS: r = requests.post( f"{SUPA_URL}/rest/v1/eb_exams", headers={**_sb_headers(), "Prefer": "return=representation"}, json={ **exam, "id": str(uuid.uuid4()), "doc_details": {}, "docling_docs": {}, }, params={"on_conflict": "exam_code"}, ) if r.status_code in (200, 201): exams_created += 1 print(f" ✓ {exam['exam_code']} ({exam['type_code']})") elif r.status_code == 409: exams_skipped += 1 print(f" ~ SKIP (exists): {exam['exam_code']}") else: err = f"exam {exam['exam_code']}: {r.status_code} {r.text[:100]}" print(f" ✗ {err}") errors.append(err) results["exams"] = {"created": exams_created, "skipped": exams_skipped} # ── [3] Seed Neo4j curriculum topics ──────────────────────────────────── print("\n[3] Seeding Neo4j curriculum topics...") try: from neo4j import GraphDatabase driver = GraphDatabase.driver("bolt://192.168.0.209:7687", auth=("neo4j", "&%N304j&%")) topics_created = 0 topics_skipped = 0 for school_id, school_name in [(KEVLARAI_ID, "KevlarAI"), (GREENFIELD_ID, "Greenfield Academy")]: db_name = f"cc.institutes.{school_id.replace('-', '')}" print(f"\n [{school_name}] -> {db_name}") with driver.session(database=db_name) as s: for subject, topics in CURRICULUM_TOPICS.items(): # Create subject node s.run( "MERGE (s:Subject {code: $subject}) " "SET s.title = $title, s.school_id = $school_id", subject=subject, title=subject, school_id=school_id, ) for topic in topics: result = s.run( "MERGE (t:CurriculumTopic {code: $code}) " "SET t.title = $title, " " t.year_group = $year_group, " " t.key_stage = $key_stage, " " t.description = $description, " " t.subject_code = $subject, " " t.school_id = $school_id " "MERGE (s:Subject {code: $subject}) " "MERGE (s)-[:CONTAINS_TOPIC]->(t)", code=topic["topic_code"], title=topic["title"], year_group=topic["year_group"], key_stage=topic["key_stage"], description=topic["description"], subject=subject, school_id=school_id, ) # Check if it was created or matched topics_created += 1 print(f" ✓ {school_name}: {len(CURRICULUM_TOPICS) * len(list(CURRICULUM_TOPICS.values())[0])} topic nodes") driver.close() results["neo4j_topics"] = {"created": topics_created} except Exception as e: err = f"neo4j_topics: {e}" print(f" ✗ {err}") errors.append(err) results["neo4j_topics"] = {"error": str(e)} # ── Summary ───────────────────────────────────────────────────────────── print("\n" + "=" * 60) results["success"] = len(errors) == 0 results["errors"] = errors print(f"COMPLETE — {specs_created} specs, {exams_created} exams, " f"{results.get('neo4j_topics', {}).get('created', '?')} topics") if errors: print(f"Errors ({len(errors)}):") for e in errors: print(f" ✗ {e}") print("=" * 60) return results if __name__ == "__main__": import json print(json.dumps(seed(), indent=2, default=str))