feat(seed): expand corpus to 1178 papers + download-only/unseed/granular reset
PRIMARY — corpus breadth (505->1178 papers, 18->60 specs, all URLs HEAD-verified): - AQA (enumerated): Maths, English Lang/Lit, Geography, Computer Science, Business, Psychology, MFL (French/Spanish/German), GCSE + A-level, on top of round-1 sciences. - Edexcel + OCR (confirmed direct URLs via research): Maths, English, Geography, History, Business, Computer Science, GCSE + A-level. - generate_corpus_manifest.py: _subj/_mfl AQA builders, Edexcel/OCR spec+URL tables, derived exam_code (_mk_exam_code) matching the locked convention, concurrent re-verify. Verified on dev .94: eb_specifications=60, eb_exams=1178, QP=469, doc_type all 'pdf', seed idempotent (uploaded=673 new, skipped=505), failed=0. SECONDARY: - --download-only + persistent bucket-shaped local store (manifests/_corpus_store/, gitignored): download-once, seed-many, offline-repeatable; --store-dir/--no-store. (_store_path/_item_bytes/ download_corpus). Verified: store populated, seed reads offline (download_cached). - --unseed [--board/--spec]: inverse loader — storage objects (Storage API; protect_delete blocks raw SQL), first-sweep seed templates, eb_exams, eb_specifications. Verified reversible on .94. - Granular admin reset: POST /admin/reset?scope=all|exam-corpus|timetable. reset_environment.reset(scope) adds EXAM_CORPUS_TABLES (10) + cc.examboards storage cleanup + TIMETABLE_TABLES (13); 'all' now also clears the exam subsystem the legacy reset missed. No schema migration required. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
5750413f43
commit
cdc105ae54
@ -126,13 +126,24 @@ async def platform_stats(
|
||||
|
||||
@router.post("/reset")
|
||||
async def reset_environment(
|
||||
scope: str = "all",
|
||||
_: dict = Depends(require_platform_admin),
|
||||
) -> Dict[str, Any]:
|
||||
"""DESTRUCTIVE: wipe all test data. Neo4j + Supabase. Platform admin only."""
|
||||
"""DESTRUCTIVE: wipe test data. Platform admin only.
|
||||
|
||||
scope (query param):
|
||||
- all : full wipe (Neo4j + Supabase data + auth users) AND exam subsystem + storage.
|
||||
- exam-corpus : ONLY the exam corpus — eb_*/exam_* tables + cc.examboards storage objects
|
||||
(load/unload the public corpus without touching schools/users).
|
||||
- timetable : ONLY timetable/calendar materialization tables.
|
||||
"""
|
||||
if scope not in ("all", "exam-corpus", "timetable"):
|
||||
raise HTTPException(status_code=400, detail="scope must be one of: all, exam-corpus, timetable")
|
||||
import asyncio
|
||||
import functools
|
||||
from run.initialization.reset_environment import reset as _reset
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(None, _reset)
|
||||
result = await loop.run_in_executor(None, functools.partial(_reset, scope))
|
||||
return {"status": "ok", **result}
|
||||
|
||||
|
||||
|
||||
3
run/initialization/manifests/_corpus_store/.gitignore
vendored
Normal file
3
run/initialization/manifests/_corpus_store/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
# Persistent local corpus store — PDFs are NOT committed (re-downloadable from manifest).
|
||||
*
|
||||
!.gitignore
|
||||
File diff suppressed because it is too large
Load Diff
@ -70,7 +70,27 @@ def _trilogy(award: str) -> List[Tuple[str, str, Optional[str]]]:
|
||||
def _alevel(award: str, papers=("1", "2", "3")) -> List[Tuple[str, str, Optional[str]]]:
|
||||
return [(f"{award}{p}", f"{award}/{p}", None) for p in papers]
|
||||
|
||||
def _subj(award: str, papers, tiers=(None,)) -> List[Tuple[str, str, Optional[str]]]:
|
||||
"""Generic GCSE/A-level builder. tiers=('F','H') for tiered subjects (Maths/Science),
|
||||
tiers=(None,) for untiered (English/Geography/CS/Business/Psychology)."""
|
||||
out = []
|
||||
for p in papers:
|
||||
for t in tiers:
|
||||
tl = t or ""
|
||||
out.append((f"{award}{p}{tl}", f"{award}/{p}{tl}", t))
|
||||
return out
|
||||
|
||||
def _mfl(award: str) -> List[Tuple[str, str, Optional[str]]]:
|
||||
"""AQA MFL: Listening/Reading/Writing papers, each Foundation/Higher (Speaking is teacher-conducted,
|
||||
no public QP). Filestore code encodes skill+tier, e.g. 8658LH = French Listening Higher."""
|
||||
out = []
|
||||
for skill in ("L", "R", "W"):
|
||||
for t in ("F", "H"):
|
||||
out.append((f"{award}{skill}{t}", f"{award}/{skill}{t}", t))
|
||||
return out
|
||||
|
||||
AQA_SPECS = [
|
||||
# ── Sciences (round 1 — kept at full depth) ──────────────────────────────────────
|
||||
("AQA-BIOL-8461", "BIOLOGY", "8461", "GCSE", "2016", _gcse_single("8461")),
|
||||
("AQA-CHEM-8462", "CHEMISTRY", "8462", "GCSE", "2016", _gcse_single("8462")),
|
||||
("AQA-PHYS-8463", "PHYSICS", "8463", "GCSE", "2016", _gcse_single("8463")),
|
||||
@ -81,6 +101,30 @@ AQA_SPECS = [
|
||||
("AQA-CHEM-7405", "CHEMISTRY", "7405", "A-level", "2015", _alevel("7405")),
|
||||
("AQA-PHYS-7407", "PHYSICS", "7407", "AS", "2015", _alevel("7407", ("1", "2"))),
|
||||
("AQA-PHYS-7408", "PHYSICS", "7408", "A-level", "2015", _alevel("7408")),
|
||||
# ── Round 2 breadth — high-volume core (Maths, English) ───────────────────────────
|
||||
("AQA-MATH-8300", "MATHEMATICS", "8300", "GCSE", "2015", _subj("8300", ("1", "2", "3"), ("F", "H"))),
|
||||
("AQA-MATH-7357", "MATHEMATICS", "7357", "A-level", "2017", _alevel("7357", ("1", "2", "3"))),
|
||||
("AQA-MATH-7356", "MATHEMATICS", "7356", "AS", "2017", _alevel("7356", ("1", "2"))),
|
||||
("AQA-ENGL-8700", "ENGLISH LANGUAGE", "8700", "GCSE", "2015", _subj("8700", ("1", "2"))),
|
||||
("AQA-ENGLIT-8702", "ENGLISH LITERATURE", "8702", "GCSE", "2015", _subj("8702", ("1", "2"))),
|
||||
("AQA-ENGL-7702", "ENGLISH LANGUAGE", "7702", "A-level", "2015", _alevel("7702", ("1", "2"))),
|
||||
("AQA-ENGLIT-7712", "ENGLISH LITERATURE A", "7712", "A-level", "2015", _alevel("7712", ("1", "2"))),
|
||||
# ── Round 2 breadth — humanities / others ─────────────────────────────────────────
|
||||
("AQA-GEOG-8035", "GEOGRAPHY", "8035", "GCSE", "2016", _subj("8035", ("1", "2", "3"))),
|
||||
("AQA-GEOG-7037", "GEOGRAPHY", "7037", "A-level", "2016", _alevel("7037", ("1", "2"))),
|
||||
("AQA-COMP-8525", "COMPUTER SCIENCE", "8525", "GCSE", "2020", _subj("8525", ("1", "2"))),
|
||||
("AQA-COMP-7517", "COMPUTER SCIENCE", "7517", "A-level", "2015", _alevel("7517", ("1", "2"))),
|
||||
("AQA-BUS-8132", "BUSINESS", "8132", "GCSE", "2017", _subj("8132", ("1", "2"))),
|
||||
("AQA-BUS-7132", "BUSINESS", "7132", "A-level", "2015", _alevel("7132", ("1", "2", "3"))),
|
||||
("AQA-PSYC-8182", "PSYCHOLOGY", "8182", "GCSE", "2017", _subj("8182", ("1", "2"))),
|
||||
("AQA-PSYC-7182", "PSYCHOLOGY", "7182", "A-level", "2015", _alevel("7182", ("1", "2", "3"))),
|
||||
# ── Round 2 breadth — modern foreign languages (Listening/Reading/Writing, F+H) ───
|
||||
("AQA-FREN-8658", "FRENCH", "8658", "GCSE", "2016", _mfl("8658")),
|
||||
("AQA-SPAN-8698", "SPANISH", "8698", "GCSE", "2016", _mfl("8698")),
|
||||
("AQA-GERM-8668", "GERMAN", "8668", "GCSE", "2016", _mfl("8668")),
|
||||
("AQA-FREN-7652", "FRENCH", "7652", "A-level", "2016", _alevel("7652", ("1", "2"))),
|
||||
("AQA-SPAN-7692", "SPANISH", "7692", "A-level", "2016", _alevel("7692", ("1", "2"))),
|
||||
("AQA-GERM-7662", "GERMAN", "7662", "A-level", "2016", _alevel("7662", ("1", "2"))),
|
||||
]
|
||||
AQA_SESSIONS = ["JUN18", "JUN19", "NOV20", "NOV21", "JUN22", "JUN23", "JUN24"]
|
||||
AQA_ROLES = ["QP", "MS", "ER"]
|
||||
@ -167,31 +211,113 @@ def build_aqa() -> Dict[str, Any]:
|
||||
|
||||
|
||||
# ─────────────── Edexcel / OCR — confirmed direct URLs (re-verified at build) ───────────────
|
||||
# Each tuple: (spec_code, subject, award, level, first_teach, exam_code, paper_code, tier,
|
||||
# session, role, url, original_name)
|
||||
# These boards aren't templatable (Edexcel has a non-derivable date suffix; OCR uses opaque
|
||||
# doc-ids), so confirmed URLs are listed as 6-tuples: (spec_code, paper_code, tier, session, role,
|
||||
# url). exam_code is DERIVED (see _mk_exam_code) so it always matches the locked convention.
|
||||
EXAM_CODE_PREFIX = {"EDEXCEL": "EDX", "OCR": "OCR"}
|
||||
|
||||
def _ec_token(paper_code: str) -> str:
|
||||
t = paper_code.split("/")[-1]
|
||||
return str(int(t)) if t.isdigit() else t # "01"->"1", "1H"->"1H", "1CH"->"1CH", "11"->"11"
|
||||
|
||||
def _mk_exam_code(prefix: str, award: str, paper_code: str, session: str, role: str) -> str:
|
||||
y, m = session.split("-")
|
||||
return f"{prefix}-{award}-{_ec_token(paper_code)}-{y}{m.upper()}-{role}"
|
||||
|
||||
_PE = "https://qualifications.pearson.com/content/dam/pdf"
|
||||
_EDX = f"{_PE}/GCSE/Science/2016"
|
||||
_OCR = "https://www.ocr.org.uk/Images"
|
||||
|
||||
EDEXCEL_SPECS = {
|
||||
"EDX-BIOL-1BI0": ("BIOLOGY", "1BI0", "GCSE", "2016"),
|
||||
"EDX-CHEM-1CH0": ("CHEMISTRY", "1CH0", "GCSE", "2016"),
|
||||
"EDX-PHYS-1PH0": ("PHYSICS", "1PH0", "GCSE", "2016"),
|
||||
"EDX-COMB-1SC0": ("COMBINED SCIENCE", "1SC0", "GCSE", "2016"),
|
||||
"EDX-MATH-1MA1": ("MATHEMATICS", "1MA1", "GCSE", "2015"),
|
||||
"EDX-ENGL-1EN0": ("ENGLISH LANGUAGE", "1EN0", "GCSE", "2015"),
|
||||
"EDX-ENGLIT-1ET0": ("ENGLISH LITERATURE", "1ET0", "GCSE", "2015"),
|
||||
"EDX-GEOG-1GA0": ("GEOGRAPHY A", "1GA0", "GCSE", "2016"),
|
||||
"EDX-HIST-1HI0": ("HISTORY", "1HI0", "GCSE", "2016"),
|
||||
"EDX-BUS-1BS0": ("BUSINESS", "1BS0", "GCSE", "2017"),
|
||||
"EDX-COMP-1CP2": ("COMPUTER SCIENCE", "1CP2", "GCSE", "2020"),
|
||||
"EDX-MATH-9MA0": ("MATHEMATICS", "9MA0", "A-level", "2017"),
|
||||
"EDX-ENGL-9EN0": ("ENGLISH LANGUAGE", "9EN0", "A-level", "2015"),
|
||||
"EDX-ENGLIT-9ET0": ("ENGLISH LITERATURE", "9ET0", "A-level", "2015"),
|
||||
"EDX-GEOG-9GE0": ("GEOGRAPHY", "9GE0", "A-level", "2016"),
|
||||
}
|
||||
_EDX = "https://qualifications.pearson.com/content/dam/pdf/GCSE/Science/2016"
|
||||
EDEXCEL_PAPERS = [
|
||||
("EDX-BIOL-1BI0", "EDX-1BI0-1H-2024JUN-QP", "1BI0/1H", "H", "2024-Jun", "QP", f"{_EDX}/Exam-materials/1bi0-1h-que-20240511.pdf"),
|
||||
("EDX-BIOL-1BI0", "EDX-1BI0-2F-2023JUN-QP", "1BI0/2F", "F", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1bi0-2f-que-20230610.pdf"),
|
||||
("EDX-BIOL-1BI0", "EDX-1BI0-2H-2023JUN-QP", "1BI0/2H", "H", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1bi0-2h-que-20230610.pdf"),
|
||||
("EDX-BIOL-1BI0", "EDX-1BI0-1F-2023JUN-MS", "1BI0/1F", "F", "2023-Jun", "MS", f"{_EDX}/Exam-materials/1bi0-1f-rms-20230824.pdf"),
|
||||
("EDX-BIOL-1BI0", "EDX-1BI0-1H-2024JUN-MS", "1BI0/1H", "H", "2024-Jun", "MS", f"{_EDX}/Exam-materials/1bi0-1h-rms-20240822.pdf"),
|
||||
("EDX-BIOL-1BI0", "EDX-1BI0-1H-2022JUN-MS", "1BI0/1H", "H", "2022-Jun", "MS", f"{_EDX}/exam-materials/1bi0-1h-rms-20220825.pdf"),
|
||||
("EDX-CHEM-1CH0", "EDX-1CH0-1F-2023JUN-QP", "1CH0/1F", "F", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1ch0-1f-que-20230523.pdf"),
|
||||
("EDX-CHEM-1CH0", "EDX-1CH0-1H-2024JUN-QP", "1CH0/1H", "H", "2024-Jun", "QP", f"{_EDX}/Exam-materials/1ch0-1h-que-20240518.pdf"),
|
||||
("EDX-CHEM-1CH0", "EDX-1CH0-2H-2024JUN-MS", "1CH0/2H", "H", "2024-Jun", "MS", f"{_EDX}/Exam-materials/1ch0-2h-rms-20240822.pdf"),
|
||||
("EDX-PHYS-1PH0", "EDX-1PH0-1H-2023JUN-QP", "1PH0/1H", "H", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1ph0-1h-que-20230526.pdf"),
|
||||
("EDX-PHYS-1PH0", "EDX-1PH0-2F-2023JUN-QP", "1PH0/2F", "F", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1ph0-2f-que-20230617.pdf"),
|
||||
("EDX-PHYS-1PH0", "EDX-1PH0-1H-2024JUN-QP", "1PH0/1H", "H", "2024-Jun", "QP", f"{_EDX}/Exam-materials/1ph0-1h-que-20240523.pdf"),
|
||||
("EDX-PHYS-1PH0", "EDX-1PH0-2H-2023JUN-MS", "1PH0/2H", "H", "2023-Jun", "MS", f"{_EDX}/Exam-materials/1ph0-2h-rms-20230824.pdf"),
|
||||
("EDX-PHYS-1PH0", "EDX-1PH0-2H-2022JUN-MS", "1PH0/2H", "H", "2022-Jun", "MS", f"{_EDX}/exam-materials/1ph0-2h-rms-20220825.pdf"),
|
||||
("EDX-COMB-1SC0", "EDX-1SC0-1CH-2023JUN-MS", "1SC0/1CH", None, "2023-Jun", "MS", f"{_EDX}/Exam-materials/1sc0-1ch-rms-20230824.pdf"),
|
||||
# ── Sciences (round 1) ──
|
||||
("EDX-BIOL-1BI0", "1BI0/1H", "H", "2024-Jun", "QP", f"{_EDX}/Exam-materials/1bi0-1h-que-20240511.pdf"),
|
||||
("EDX-BIOL-1BI0", "1BI0/2F", "F", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1bi0-2f-que-20230610.pdf"),
|
||||
("EDX-BIOL-1BI0", "1BI0/2H", "H", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1bi0-2h-que-20230610.pdf"),
|
||||
("EDX-BIOL-1BI0", "1BI0/1F", "F", "2023-Jun", "MS", f"{_EDX}/Exam-materials/1bi0-1f-rms-20230824.pdf"),
|
||||
("EDX-BIOL-1BI0", "1BI0/1H", "H", "2024-Jun", "MS", f"{_EDX}/Exam-materials/1bi0-1h-rms-20240822.pdf"),
|
||||
("EDX-BIOL-1BI0", "1BI0/1H", "H", "2022-Jun", "MS", f"{_EDX}/exam-materials/1bi0-1h-rms-20220825.pdf"),
|
||||
("EDX-CHEM-1CH0", "1CH0/1F", "F", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1ch0-1f-que-20230523.pdf"),
|
||||
("EDX-CHEM-1CH0", "1CH0/1H", "H", "2024-Jun", "QP", f"{_EDX}/Exam-materials/1ch0-1h-que-20240518.pdf"),
|
||||
("EDX-CHEM-1CH0", "1CH0/2H", "H", "2024-Jun", "MS", f"{_EDX}/Exam-materials/1ch0-2h-rms-20240822.pdf"),
|
||||
("EDX-PHYS-1PH0", "1PH0/1H", "H", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1ph0-1h-que-20230526.pdf"),
|
||||
("EDX-PHYS-1PH0", "1PH0/2F", "F", "2023-Jun", "QP", f"{_EDX}/Exam-materials/1ph0-2f-que-20230617.pdf"),
|
||||
("EDX-PHYS-1PH0", "1PH0/1H", "H", "2024-Jun", "QP", f"{_EDX}/Exam-materials/1ph0-1h-que-20240523.pdf"),
|
||||
("EDX-PHYS-1PH0", "1PH0/2H", "H", "2023-Jun", "MS", f"{_EDX}/Exam-materials/1ph0-2h-rms-20230824.pdf"),
|
||||
("EDX-PHYS-1PH0", "1PH0/2H", "H", "2022-Jun", "MS", f"{_EDX}/exam-materials/1ph0-2h-rms-20220825.pdf"),
|
||||
("EDX-COMB-1SC0", "1SC0/1CH", None, "2023-Jun", "MS", f"{_EDX}/Exam-materials/1sc0-1ch-rms-20230824.pdf"),
|
||||
# ── Maths 1MA1 (round 2) ──
|
||||
("EDX-MATH-1MA1", "1MA1/1H", "H", "2023-Jun", "QP", f"{_PE}/GCSE/Mathematics/2015/Exam-materials/1ma1-1h-que-20230520.pdf"),
|
||||
("EDX-MATH-1MA1", "1MA1/1H", "H", "2023-Jun", "MS", f"{_PE}/GCSE/Mathematics/2015/Exam-materials/1ma1-1h-rms-20230824.pdf"),
|
||||
("EDX-MATH-1MA1", "1MA1/1F", "F", "2023-Jun", "MS", f"{_PE}/GCSE/Mathematics/2015/Exam-materials/1ma1-1f-rms-20230824.pdf"),
|
||||
("EDX-MATH-1MA1", "1MA1/1F", "F", "2024-Jun", "QP", f"{_PE}/GCSE/Mathematics/2015/Exam-materials/1ma1-1f-que-20240517.pdf"),
|
||||
("EDX-MATH-1MA1", "1MA1/1H", "H", "2024-Jun", "QP", f"{_PE}/GCSE/Mathematics/2015/Exam-materials/1ma1-1h-que-20240517.pdf"),
|
||||
("EDX-MATH-1MA1", "1MA1/1F", "F", "2024-Jun", "MS", f"{_PE}/GCSE/Mathematics/2015/Exam-materials/1ma1-1f-rms-20240822.pdf"),
|
||||
("EDX-MATH-1MA1", "1MA1/1H", "H", "2023-Nov", "MS", f"{_PE}/GCSE/Mathematics/2015/Exam-materials/1ma1-1h-rms-20240111.pdf"),
|
||||
("EDX-MATH-1MA1", "1MA1/1H", "H", "2022-Jun", "MS", f"{_PE}/GCSE/mathematics/2015/exam-materials/1ma1-1h-rms-20220825.pdf"),
|
||||
("EDX-MATH-1MA1", "1MA1/3H", "H", "2022-Jun", "MS", f"{_PE}/GCSE/mathematics/2015/exam-materials/1ma1-3h-rms-20220825.pdf"),
|
||||
# ── English Language 1EN0 / Literature 1ET0 (round 2) ──
|
||||
("EDX-ENGL-1EN0", "1EN0/01", None, "2024-Jun", "QP", f"{_PE}/GCSE/English-Language/2015/Exam-materials/1en0-01-que-20240524.pdf"),
|
||||
("EDX-ENGL-1EN0", "1EN0/01", None, "2023-Nov", "QP", f"{_PE}/GCSE/English-Language/2015/Exam-materials/1en0-01-que-20231108.pdf"),
|
||||
("EDX-ENGL-1EN0", "1EN0/01", None, "2024-Jun", "MS", f"{_PE}/GCSE/English-Language/2015/Exam-materials/1en0-01-rms-20240822.pdf"),
|
||||
("EDX-ENGL-1EN0", "1EN0/02", None, "2024-Jun", "MS", f"{_PE}/GCSE/English-Language/2015/Exam-materials/1en0-02-rms-20240822.pdf"),
|
||||
("EDX-ENGL-1EN0", "1EN0/01", None, "2023-Jun", "MS", f"{_PE}/GCSE/English-Language/2015/Exam-materials/1en0-01-rms-20230824.pdf"),
|
||||
("EDX-ENGL-1EN0", "1EN0/02", None, "2023-Jun", "MS", f"{_PE}/GCSE/English-Language/2015/Exam-materials/1en0-02-rms-20230824.pdf"),
|
||||
("EDX-ENGLIT-1ET0", "1ET0/01", None, "2023-Jun", "QP", f"{_PE}/GCSE/English-Literature/2015/Exam-materials/1et0-01-que-20230518.pdf"),
|
||||
("EDX-ENGLIT-1ET0", "1ET0/02", None, "2023-Jun", "QP", f"{_PE}/GCSE/English-Literature/2015/Exam-materials/1et0-02-que-20230525.pdf"),
|
||||
("EDX-ENGLIT-1ET0", "1ET0/02", None, "2024-Jun", "QP", f"{_PE}/GCSE/English-Literature/2015/Exam-materials/1et0-02-que-20240521.pdf"),
|
||||
("EDX-ENGLIT-1ET0", "1ET0/01", None, "2023-Jun", "MS", f"{_PE}/GCSE/English-Literature/2015/Exam-materials/1et0-01-rms-20230824.pdf"),
|
||||
("EDX-ENGLIT-1ET0", "1ET0/01", None, "2024-Jun", "MS", f"{_PE}/GCSE/English-Literature/2015/Exam-materials/1et0-01-rms-20240822.pdf"),
|
||||
# ── A-level Maths 9MA0 / English 9EN0 / 9ET0 (round 2) ──
|
||||
("EDX-MATH-9MA0", "9MA0/01", None, "2023-Jun", "QP", f"{_PE}/A-Level/Mathematics/2017/Exam-materials/9ma0-01-que-20230607.pdf"),
|
||||
("EDX-MATH-9MA0", "9MA0/31", None, "2023-Jun", "QP", f"{_PE}/A-Level/Mathematics/2017/Exam-materials/9ma0-31-que-20230621.pdf"),
|
||||
("EDX-MATH-9MA0", "9MA0/02", None, "2024-Jun", "QP", f"{_PE}/A-Level/Mathematics/2017/Exam-materials/9ma0-02-que-20240612.pdf"),
|
||||
("EDX-MATH-9MA0", "9MA0/31", None, "2023-Jun", "MS", f"{_PE}/A-Level/Mathematics/2017/Exam-materials/9ma0-31-rms-20230817.pdf"),
|
||||
("EDX-MATH-9MA0", "9MA0/01", None, "2024-Jun", "MS", f"{_PE}/A-Level/Mathematics/2017/Exam-materials/9ma0-01-rms-20240815.pdf"),
|
||||
("EDX-ENGL-9EN0", "9EN0/01", None, "2024-Jun", "MS", f"{_PE}/A-Level/English-Language/2015/Exam-materials/9en0-01-rms-20240815.pdf"),
|
||||
("EDX-ENGL-9EN0", "9EN0/02", None, "2024-Jun", "MS", f"{_PE}/A-Level/English-Language/2015/Exam-materials/9en0-02-rms-20240815.pdf"),
|
||||
("EDX-ENGLIT-9ET0", "9ET0/01", None, "2024-Jun", "QP", f"{_PE}/A-Level/English-Literature/2015/Exam-materials/9et0-01-que-20240525.pdf"),
|
||||
("EDX-ENGLIT-9ET0", "9ET0/01", None, "2023-Jun", "MS", f"{_PE}/A-Level/English-Literature/2015/Exam-materials/9et0-01-rms-20230817.pdf"),
|
||||
("EDX-ENGLIT-9ET0", "9ET0/03", None, "2023-Jun", "MS", f"{_PE}/A-Level/English-Literature/2015/Exam-materials/9et0-03-rms-20230817.pdf"),
|
||||
# ── Humanities (round 2) ──
|
||||
("EDX-GEOG-1GA0", "1GA0/01", None, "2023-Jun", "QP", f"{_PE}/GCSE/Geography-A/2016/Exam-materials/1ga0-01-que-20230523.pdf"),
|
||||
("EDX-GEOG-1GA0", "1GA0/01", None, "2023-Jun", "MS", f"{_PE}/GCSE/Geography-A/2016/Exam-materials/1ga0-01-rms-20230824.pdf"),
|
||||
("EDX-GEOG-1GA0", "1GA0/02", None, "2023-Jun", "QP", f"{_PE}/GCSE/Geography-A/2016/Exam-materials/1ga0-02-que-20230610.pdf"),
|
||||
("EDX-GEOG-1GA0", "1GA0/02", None, "2023-Jun", "MS", f"{_PE}/GCSE/Geography-A/2016/Exam-materials/1ga0-02-rms-20230824.pdf"),
|
||||
("EDX-GEOG-1GA0", "1GA0/01", None, "2024-Jun", "MS", f"{_PE}/GCSE/Geography-A/2016/Exam-materials/1ga0-01-rms-20240822.pdf"),
|
||||
("EDX-GEOG-1GA0", "1GA0/03", None, "2024-Jun", "QP", f"{_PE}/GCSE/Geography-A/2016/Exam-materials/1ga0-03-que-20240615.pdf"),
|
||||
("EDX-HIST-1HI0", "1HI0/10", None, "2023-Jun", "QP", f"{_PE}/GCSE/History/2016/Exam-materials/1hi0-10-que-20230519.pdf"),
|
||||
("EDX-HIST-1HI0", "1HI0/10", None, "2023-Jun", "MS", f"{_PE}/GCSE/History/2016/Exam-materials/1hi0-10-rms-20230824.pdf"),
|
||||
("EDX-HIST-1HI0", "1HI0/12", None, "2023-Jun", "MS", f"{_PE}/GCSE/History/2016/Exam-materials/1hi0-12-rms-20230824.pdf"),
|
||||
("EDX-HIST-1HI0", "1HI0/13", None, "2024-Jun", "MS", f"{_PE}/GCSE/History/2016/Exam-materials/1hi0-13-rms-20240822.pdf"),
|
||||
("EDX-HIST-1HI0", "1HI0/33", None, "2023-Jun", "MS", f"{_PE}/GCSE/History/2016/Exam-materials/1hi0-33-rms-20230824.pdf"),
|
||||
("EDX-BUS-1BS0", "1BS0/01", None, "2023-Jun", "QP", f"{_PE}/GCSE/Business/2017/Exam-materials/1bs0-01-que-20230519.pdf"),
|
||||
("EDX-BUS-1BS0", "1BS0/02", None, "2023-Jun", "QP", f"{_PE}/GCSE/Business/2017/Exam-materials/1bs0-02-que-20230613.pdf"),
|
||||
("EDX-BUS-1BS0", "1BS0/02", None, "2023-Jun", "MS", f"{_PE}/GCSE/Business/2017/Exam-materials/1bs0-02-rms-20230824.pdf"),
|
||||
("EDX-BUS-1BS0", "1BS0/02", None, "2024-Jun", "QP", f"{_PE}/GCSE/Business/2017/Exam-materials/1bs0-02-que-20240606.pdf"),
|
||||
("EDX-BUS-1BS0", "1BS0/01", None, "2024-Jun", "MS", f"{_PE}/GCSE/Business/2017/Exam-materials/1bs0-01-rms-20240822.pdf"),
|
||||
("EDX-COMP-1CP2", "1CP2/01", None, "2023-Jun", "QP", f"{_PE}/GCSE/Computer-science/2020/Exam-materials/1cp2-01-que-20230520.pdf"),
|
||||
("EDX-COMP-1CP2", "1CP2/01", None, "2023-Jun", "MS", f"{_PE}/GCSE/Computer-science/2020/Exam-materials/1cp2-01-rms-20230824.pdf"),
|
||||
("EDX-COMP-1CP2", "1CP2/02", None, "2023-Jun", "QP", f"{_PE}/GCSE/Computer-science/2020/Exam-materials/1cp2-02-que-20230526.pdf"),
|
||||
("EDX-COMP-1CP2", "1CP2/01", None, "2024-Jun", "QP", f"{_PE}/GCSE/Computer-Science/2020/Exam-materials/1cp2-01-que-20240702.pdf"),
|
||||
("EDX-COMP-1CP2", "1CP2/01", None, "2024-Jun", "MS", f"{_PE}/GCSE/Computer-science/2020/Exam-materials/1cp2-01-rms-20240822.pdf"),
|
||||
("EDX-GEOG-9GE0", "9GE0/01", None, "2023-Jun", "QP", f"{_PE}/A-Level/Geography/2016/Exam-materials/9ge0-01-que-20230518.pdf"),
|
||||
]
|
||||
|
||||
OCR_SPECS = {
|
||||
@ -199,39 +325,123 @@ OCR_SPECS = {
|
||||
"OCR-CHEM-J248": ("CHEMISTRY", "J248", "GCSE", "2016"),
|
||||
"OCR-PHYS-J249": ("PHYSICS", "J249", "GCSE", "2016"),
|
||||
"OCR-COMB-J250": ("COMBINED SCIENCE", "J250", "GCSE", "2016"),
|
||||
"OCR-MATH-J560": ("MATHEMATICS", "J560", "GCSE", "2015"),
|
||||
"OCR-ENGL-J351": ("ENGLISH LANGUAGE", "J351", "GCSE", "2015"),
|
||||
"OCR-ENGLIT-J352": ("ENGLISH LITERATURE", "J352", "GCSE", "2015"),
|
||||
"OCR-COMP-J277": ("COMPUTER SCIENCE", "J277", "GCSE", "2020"),
|
||||
"OCR-GEOG-J383": ("GEOGRAPHY A", "J383", "GCSE", "2016"),
|
||||
"OCR-BUS-J204": ("BUSINESS", "J204", "GCSE", "2017"),
|
||||
"OCR-HIST-J411": ("HISTORY B (SHP)", "J411", "GCSE", "2016"),
|
||||
"OCR-MATH-H240": ("MATHEMATICS A", "H240", "A-level", "2017"),
|
||||
"OCR-ENGLIT-H472": ("ENGLISH LITERATURE", "H472", "A-level", "2015"),
|
||||
"OCR-ENGL-H470": ("ENGLISH LANGUAGE", "H470", "A-level", "2015"),
|
||||
}
|
||||
_OCR = "https://www.ocr.org.uk/Images"
|
||||
OCR_PAPERS = [
|
||||
("OCR-BIOL-J247", "OCR-J247-1-2024JUN-QP", "J247/01", "F", "2024-Jun", "QP", f"{_OCR}/727713-question-paper-paper-1.pdf"),
|
||||
("OCR-BIOL-J247", "OCR-J247-1-2024JUN-MS", "J247/01", "F", "2024-Jun", "MS", f"{_OCR}/727745-mark-scheme-paper-1.pdf"),
|
||||
("OCR-BIOL-J247", "OCR-J247-3-2024JUN-QP", "J247/03", "H", "2024-Jun", "QP", f"{_OCR}/727715-question-paper-paper-3.pdf"),
|
||||
("OCR-BIOL-J247", "OCR-J247-3-2024JUN-MS", "J247/03", "H", "2024-Jun", "MS", f"{_OCR}/727747-mark-scheme-paper-3.pdf"),
|
||||
("OCR-BIOL-J247", "OCR-J247-1-2023JUN-QP", "J247/01", "F", "2023-Jun", "QP", f"{_OCR}/704945-question-paper-paper-1.pdf"),
|
||||
("OCR-BIOL-J247", "OCR-J247-3-2023JUN-MS", "J247/03", "H", "2023-Jun", "MS", f"{_OCR}/704979-mark-scheme-paper-3.pdf"),
|
||||
("OCR-BIOL-J247", "OCR-J247-3-2022JUN-QP", "J247/03", "H", "2022-Jun", "QP", f"{_OCR}/678031-question-paper-paper-3.pdf"),
|
||||
("OCR-BIOL-J247", "OCR-J247-1-2022JUN-MS", "J247/01", "F", "2022-Jun", "MS", f"{_OCR}/678076-mark-scheme-paper-1.pdf"),
|
||||
("OCR-CHEM-J248", "OCR-J248-1-2024JUN-QP", "J248/01", "F", "2024-Jun", "QP", f"{_OCR}/727718-question-paper-paper-1.pdf"),
|
||||
("OCR-CHEM-J248", "OCR-J248-3-2024JUN-MS", "J248/03", "H", "2024-Jun", "MS", f"{_OCR}/727751-mark-scheme-paper-3.pdf"),
|
||||
("OCR-CHEM-J248", "OCR-J248-1-2023JUN-QP", "J248/01", "F", "2023-Jun", "QP", f"{_OCR}/704950-question-paper-paper-1.pdf"),
|
||||
("OCR-CHEM-J248", "OCR-J248-3-2022JUN-QP", "J248/03", "H", "2022-Jun", "QP", f"{_OCR}/678036-question-paper-paper-3.pdf"),
|
||||
("OCR-PHYS-J249", "OCR-J249-1-2024JUN-QP", "J249/01", "F", "2024-Jun", "QP", f"{_OCR}/727724-question-paper-paper-1.pdf"),
|
||||
("OCR-PHYS-J249", "OCR-J249-3-2024JUN-MS", "J249/03", "H", "2024-Jun", "MS", f"{_OCR}/727755-mark-scheme-paper-3.pdf"),
|
||||
("OCR-PHYS-J249", "OCR-J249-1-2023JUN-QP", "J249/01", "F", "2023-Jun", "QP", f"{_OCR}/704956-question-paper-paper-1.pdf"),
|
||||
("OCR-PHYS-J249", "OCR-J249-3-2022JUN-MS", "J249/03", "H", "2022-Jun", "MS", f"{_OCR}/678086-mark-scheme-paper-3.pdf"),
|
||||
("OCR-COMB-J250", "OCR-J250-1-2024JUN-QP", "J250/01", "F", "2024-Jun", "QP", f"{_OCR}/727730-question-paper-paper-1.pdf"),
|
||||
("OCR-COMB-J250", "OCR-J250-7-2024JUN-MS", "J250/07", "H", "2024-Jun", "MS", f"{_OCR}/727763-mark-scheme-paper-7.pdf"),
|
||||
# ── Sciences (round 1) ──
|
||||
("OCR-BIOL-J247", "J247/01", "F", "2024-Jun", "QP", f"{_OCR}/727713-question-paper-paper-1.pdf"),
|
||||
("OCR-BIOL-J247", "J247/01", "F", "2024-Jun", "MS", f"{_OCR}/727745-mark-scheme-paper-1.pdf"),
|
||||
("OCR-BIOL-J247", "J247/03", "H", "2024-Jun", "QP", f"{_OCR}/727715-question-paper-paper-3.pdf"),
|
||||
("OCR-BIOL-J247", "J247/03", "H", "2024-Jun", "MS", f"{_OCR}/727747-mark-scheme-paper-3.pdf"),
|
||||
("OCR-BIOL-J247", "J247/01", "F", "2023-Jun", "QP", f"{_OCR}/704945-question-paper-paper-1.pdf"),
|
||||
("OCR-BIOL-J247", "J247/03", "H", "2023-Jun", "MS", f"{_OCR}/704979-mark-scheme-paper-3.pdf"),
|
||||
("OCR-BIOL-J247", "J247/03", "H", "2022-Jun", "QP", f"{_OCR}/678031-question-paper-paper-3.pdf"),
|
||||
("OCR-BIOL-J247", "J247/01", "F", "2022-Jun", "MS", f"{_OCR}/678076-mark-scheme-paper-1.pdf"),
|
||||
("OCR-CHEM-J248", "J248/01", "F", "2024-Jun", "QP", f"{_OCR}/727718-question-paper-paper-1.pdf"),
|
||||
("OCR-CHEM-J248", "J248/03", "H", "2024-Jun", "MS", f"{_OCR}/727751-mark-scheme-paper-3.pdf"),
|
||||
("OCR-CHEM-J248", "J248/01", "F", "2023-Jun", "QP", f"{_OCR}/704950-question-paper-paper-1.pdf"),
|
||||
("OCR-CHEM-J248", "J248/03", "H", "2022-Jun", "QP", f"{_OCR}/678036-question-paper-paper-3.pdf"),
|
||||
("OCR-PHYS-J249", "J249/01", "F", "2024-Jun", "QP", f"{_OCR}/727724-question-paper-paper-1.pdf"),
|
||||
("OCR-PHYS-J249", "J249/03", "H", "2024-Jun", "MS", f"{_OCR}/727755-mark-scheme-paper-3.pdf"),
|
||||
("OCR-PHYS-J249", "J249/01", "F", "2023-Jun", "QP", f"{_OCR}/704956-question-paper-paper-1.pdf"),
|
||||
("OCR-PHYS-J249", "J249/03", "H", "2022-Jun", "MS", f"{_OCR}/678086-mark-scheme-paper-3.pdf"),
|
||||
("OCR-COMB-J250", "J250/01", "F", "2024-Jun", "QP", f"{_OCR}/727730-question-paper-paper-1.pdf"),
|
||||
("OCR-COMB-J250", "J250/07", "H", "2024-Jun", "MS", f"{_OCR}/727763-mark-scheme-paper-7.pdf"),
|
||||
# ── Maths J560 (round 2) ──
|
||||
("OCR-MATH-J560", "J560/01", "F", "2024-Jun", "QP", f"{_OCR}/727817-question-paper-paper-1.pdf"),
|
||||
("OCR-MATH-J560", "J560/01", "F", "2024-Jun", "MS", f"{_OCR}/727824-mark-scheme-paper-1.pdf"),
|
||||
("OCR-MATH-J560", "J560/04", "H", "2024-Jun", "QP", f"{_OCR}/727820-question-paper-paper-4.pdf"),
|
||||
("OCR-MATH-J560", "J560/04", "H", "2024-Jun", "MS", f"{_OCR}/727827-mark-scheme-paper-4.pdf"),
|
||||
("OCR-MATH-J560", "J560/01", "F", "2023-Jun", "QP", f"{_OCR}/705050-question-paper-paper-1.pdf"),
|
||||
("OCR-MATH-J560", "J560/01", "F", "2023-Jun", "MS", f"{_OCR}/705057-mark-scheme-paper-1.pdf"),
|
||||
("OCR-MATH-J560", "J560/04", "H", "2023-Jun", "QP", f"{_OCR}/705053-question-paper-paper-4.pdf"),
|
||||
("OCR-MATH-J560", "J560/04", "H", "2023-Jun", "MS", f"{_OCR}/705060-mark-scheme-paper-4.pdf"),
|
||||
("OCR-MATH-J560", "J560/01", "F", "2022-Jun", "QP", f"{_OCR}/678149-question-paper-paper-1.pdf"),
|
||||
("OCR-MATH-J560", "J560/01", "F", "2022-Jun", "MS", f"{_OCR}/678156-mark-scheme-paper-1.pdf"),
|
||||
("OCR-MATH-J560", "J560/04", "H", "2022-Jun", "QP", f"{_OCR}/678152-question-paper-paper-4.pdf"),
|
||||
("OCR-MATH-J560", "J560/04", "H", "2022-Jun", "MS", f"{_OCR}/678159-mark-scheme-paper-4.pdf"),
|
||||
# ── English Language J351 / Literature J352 (round 2) ──
|
||||
("OCR-ENGL-J351", "J351/01", None, "2024-Jun", "QP", f"{_OCR}/727556-question-paper-communicating-information-and-ideas.pdf"),
|
||||
("OCR-ENGL-J351", "J351/01", None, "2024-Jun", "MS", f"{_OCR}/727658-mark-scheme-communication-information-and-ideas.pdf"),
|
||||
("OCR-ENGL-J351", "J351/02", None, "2024-Jun", "QP", f"{_OCR}/727558-question-paper-exploring-effects-and-impact.pdf"),
|
||||
("OCR-ENGL-J351", "J351/02", None, "2024-Jun", "MS", f"{_OCR}/727659-mark-scheme-exploring-effects-and-impact.pdf"),
|
||||
("OCR-ENGL-J351", "J351/01", None, "2023-Jun", "QP", f"{_OCR}/704782-question-paper-communicating-information-and-ideas.pdf"),
|
||||
("OCR-ENGL-J351", "J351/01", None, "2023-Jun", "MS", f"{_OCR}/704888-mark-scheme-communication-information-and-ideas.pdf"),
|
||||
("OCR-ENGL-J351", "J351/01", None, "2022-Jun", "QP", f"{_OCR}/677852-question-paper-communicating-information-and-ideas.pdf"),
|
||||
("OCR-ENGL-J351", "J351/01", None, "2022-Jun", "MS", f"{_OCR}/677967-mark-scheme-communication-information-and-ideas.pdf"),
|
||||
("OCR-ENGLIT-J352", "J352/01", None, "2024-Jun", "QP", f"{_OCR}/727830-question-paper-exploring-modern-and-literary-heritage-texts.pdf"),
|
||||
("OCR-ENGLIT-J352", "J352/01", None, "2024-Jun", "MS", f"{_OCR}/727832-mark-scheme-exploring-modern-and-literary-heritage-texts.pdf"),
|
||||
("OCR-ENGLIT-J352", "J352/02", None, "2024-Jun", "QP", f"{_OCR}/727831-question-paper-exploring-poetry-and-shakespeare.pdf"),
|
||||
("OCR-ENGLIT-J352", "J352/02", None, "2024-Jun", "MS", f"{_OCR}/727833-mark-scheme-exploring-poetry-and-shakespeare.pdf"),
|
||||
("OCR-ENGLIT-J352", "J352/01", None, "2023-Jun", "QP", f"{_OCR}/705069-question-paper-exploring-modern-and-literary-heritage-texts.pdf"),
|
||||
("OCR-ENGLIT-J352", "J352/01", None, "2023-Jun", "MS", f"{_OCR}/705075-mark-scheme-exploring-modern-and-literary-heritage-texts.pdf"),
|
||||
# ── A-level Maths H240 / English Lit H472 / Lang H470 (round 2) ──
|
||||
("OCR-MATH-H240", "H240/01", None, "2024-Jun", "QP", f"{_OCR}/726654-question-paper-pure-mathematics.pdf"),
|
||||
("OCR-MATH-H240", "H240/01", None, "2024-Jun", "MS", f"{_OCR}/726795-mark-scheme-pure-mathematics.pdf"),
|
||||
("OCR-MATH-H240", "H240/02", None, "2024-Jun", "QP", f"{_OCR}/726656-question-paper-pure-mathematics-and-statistics.pdf"),
|
||||
("OCR-MATH-H240", "H240/02", None, "2024-Jun", "MS", f"{_OCR}/726796-mark-scheme-pure-mathematics-and-statistics.pdf"),
|
||||
("OCR-MATH-H240", "H240/01", None, "2023-Jun", "QP", f"{_OCR}/703866-question-paper-pure-mathematics.pdf"),
|
||||
("OCR-MATH-H240", "H240/01", None, "2023-Jun", "MS", f"{_OCR}/704008-mark-scheme-pure-mathematics.pdf"),
|
||||
("OCR-MATH-H240", "H240/01", None, "2022-Jun", "QP", f"{_OCR}/676845-question-paper-pure-mathematics.pdf"),
|
||||
("OCR-MATH-H240", "H240/01", None, "2022-Jun", "MS", f"{_OCR}/677005-mark-scheme-pure-mathematics.pdf"),
|
||||
("OCR-ENGLIT-H472", "H472/01", None, "2024-Jun", "QP", f"{_OCR}/726602-question-paper-drama-and-poetry-pre-1900.pdf"),
|
||||
("OCR-ENGLIT-H472", "H472/01", None, "2024-Jun", "MS", f"{_OCR}/726762-mark-scheme-drama-and-poetry-pre-1900.pdf"),
|
||||
("OCR-ENGLIT-H472", "H472/01", None, "2023-Jun", "QP", f"{_OCR}/703813-question-paper-drama-and-poetry-pre-1900.pdf"),
|
||||
("OCR-ENGLIT-H472", "H472/01", None, "2023-Jun", "MS", f"{_OCR}/703974-mark-scheme-drama-and-poetry-pre-1900.pdf"),
|
||||
("OCR-ENGLIT-H472", "H472/01", None, "2022-Jun", "QP", f"{_OCR}/676783-question-paper-drama-and-poetry-pre-1900.pdf"),
|
||||
("OCR-ENGLIT-H472", "H472/01", None, "2022-Jun", "MS", f"{_OCR}/676965-mark-scheme-drama-and-poetry-pre-1900.pdf"),
|
||||
("OCR-ENGL-H470", "H470/01", None, "2024-Jun", "QP", f"{_OCR}/726595-question-paper-exploring-language.pdf"),
|
||||
("OCR-ENGL-H470", "H470/01", None, "2024-Jun", "MS", f"{_OCR}/726764-mark-scheme-exploring-language.pdf"),
|
||||
("OCR-ENGL-H470", "H470/01", None, "2023-Jun", "QP", f"{_OCR}/703806-question-paper-exploring-language.pdf"),
|
||||
("OCR-ENGL-H470", "H470/01", None, "2023-Jun", "MS", f"{_OCR}/703976-mark-scheme-exploring-language.pdf"),
|
||||
("OCR-ENGL-H470", "H470/01", None, "2022-Jun", "QP", f"{_OCR}/676772-question-paper-exploring-language.pdf"),
|
||||
("OCR-ENGL-H470", "H470/01", None, "2022-Jun", "MS", f"{_OCR}/676967-mark-scheme-exploring-language.pdf"),
|
||||
# ── Humanities (round 2) ──
|
||||
("OCR-COMP-J277", "J277/01", None, "2024-Jun", "QP", f"{_OCR}/727534-question-paper-computer-systems.pdf"),
|
||||
("OCR-COMP-J277", "J277/01", None, "2024-Jun", "MS", f"{_OCR}/727652-mark-scheme-computer-systems.pdf"),
|
||||
("OCR-COMP-J277", "J277/02", None, "2024-Jun", "QP", f"{_OCR}/727535-question-paper-computational-thinking-algorithms-and-programming.pdf"),
|
||||
("OCR-COMP-J277", "J277/02", None, "2024-Jun", "MS", f"{_OCR}/727653-mark-scheme-computational-thinking-algorithms-and-programming.pdf"),
|
||||
("OCR-GEOG-J383", "J383/01", None, "2024-Jun", "QP", f"{_OCR}/727564-question-paper-living-in-the-uk-today.pdf"),
|
||||
("OCR-GEOG-J383", "J383/01", None, "2024-Jun", "MS", f"{_OCR}/727661-mark-scheme-living-in-the-uk-today.pdf"),
|
||||
("OCR-GEOG-J383", "J383/02", None, "2024-Jun", "QP", f"{_OCR}/727566-question-paper-the-world-around-us.pdf"),
|
||||
("OCR-GEOG-J383", "J383/02", None, "2024-Jun", "MS", f"{_OCR}/727662-mark-scheme-the-world-around-us.pdf"),
|
||||
("OCR-BUS-J204", "J204/01", None, "2024-Jun", "QP", f"{_OCR}/727519-question-paper-business-1-business-activity-marketing-and-people.pdf"),
|
||||
("OCR-BUS-J204", "J204/01", None, "2024-Jun", "MS", f"{_OCR}/727634-mark-scheme-business-1-business-activity-marketing-and-people.pdf"),
|
||||
("OCR-BUS-J204", "J204/02", None, "2024-Jun", "QP", f"{_OCR}/727520-question-paper-business-2-operations-finance-and-influences-on-business.pdf"),
|
||||
("OCR-BUS-J204", "J204/02", None, "2024-Jun", "MS", f"{_OCR}/727635-mark-scheme-business-2-operations-finance-and-influences-on-business.pdf"),
|
||||
("OCR-BUS-J204", "J204/01", None, "2023-Jun", "QP", f"{_OCR}/704745-question-paper-business-1-business-activity-marketing-and-people.pdf"),
|
||||
("OCR-BUS-J204", "J204/01", None, "2023-Jun", "MS", f"{_OCR}/704864-mark-scheme-business-1-business-activity-marketing-and-people.pdf"),
|
||||
("OCR-HIST-J411", "J411/11", None, "2024-Jun", "QP", f"{_OCR}/727590-question-paper-the-people-s-health-c.1250-to-present-with-the-norman-conquest-1065-1087.pdf"),
|
||||
("OCR-HIST-J411", "J411/11", None, "2024-Jun", "MS", f"{_OCR}/727678-mark-scheme-the-people-s-health-c.1250-to-present-with-the-norman-conquest-1065-1087.pdf"),
|
||||
]
|
||||
|
||||
|
||||
def build_board(board_code: str, specs_meta: Dict, papers: List) -> Dict[str, Any]:
|
||||
prefix = EXAM_CODE_PREFIX[board_code]
|
||||
print(f"[{board_code}] re-verifying {len(papers)} confirmed URLs...", file=sys.stderr)
|
||||
live: Dict[int, bool] = {}
|
||||
with cf.ThreadPoolExecutor(max_workers=24) as ex:
|
||||
futs = {ex.submit(head_ok, p[5]): i for i, p in enumerate(papers)}
|
||||
for fut in cf.as_completed(futs):
|
||||
live[futs[fut]] = fut.result()
|
||||
by_spec: Dict[str, List[Dict[str, Any]]] = {}
|
||||
for spec_code, exam_code, paper_code, tier, session, role, url in papers:
|
||||
if not head_ok(url):
|
||||
for i, (spec_code, paper_code, tier, session, role, url) in enumerate(papers):
|
||||
if not live.get(i):
|
||||
print(f" DROP (not live): {url}", file=sys.stderr)
|
||||
continue
|
||||
award = specs_meta[spec_code][1]
|
||||
by_spec.setdefault(spec_code, []).append({
|
||||
"exam_code": exam_code, "paper_code": paper_code, "tier": tier,
|
||||
"exam_code": _mk_exam_code(prefix, award, paper_code, session, role),
|
||||
"paper_code": paper_code, "tier": tier,
|
||||
"session": session, "doc_type": role,
|
||||
"file": {"source": f"url:{url}", "original_name": os.path.basename(url),
|
||||
"provenance": {"source_url": url, "fetched": FETCHED,
|
||||
|
||||
@ -82,6 +82,41 @@ SUPABASE_TABLES_TO_CLEAR = [
|
||||
"admin_profiles",
|
||||
]
|
||||
|
||||
# Exam subsystem tables, FK child-first. NOT in the list above — the previous full reset()
|
||||
# never cleared exam data or storage at all; the granular scopes below fold it in.
|
||||
EXAM_CORPUS_TABLES = [
|
||||
"mark_entries",
|
||||
"student_submissions",
|
||||
"marking_batches",
|
||||
"exam_response_areas",
|
||||
"exam_boundaries",
|
||||
"exam_template_layout",
|
||||
"exam_questions",
|
||||
"exam_templates",
|
||||
"eb_exams",
|
||||
"eb_specifications",
|
||||
]
|
||||
|
||||
# Timetable / calendar materialization subset (for scope='timetable').
|
||||
TIMETABLE_TABLES = [
|
||||
"lesson_deliveries",
|
||||
"lesson_collaborators",
|
||||
"taught_lessons",
|
||||
"academic_periods",
|
||||
"academic_days",
|
||||
"academic_weeks",
|
||||
"academic_term_breaks",
|
||||
"academic_terms",
|
||||
"academic_years",
|
||||
"teacher_timetable_slots",
|
||||
"teacher_timetables",
|
||||
"school_timetables",
|
||||
"planned_lessons",
|
||||
]
|
||||
|
||||
# Buckets whose objects the exam-corpus reset clears (Storage API — protect_delete blocks raw SQL).
|
||||
EXAM_STORAGE_BUCKET = "cc.examboards"
|
||||
|
||||
|
||||
def _sb_headers():
|
||||
url = os.environ["SUPABASE_URL"]
|
||||
@ -146,13 +181,84 @@ def _supabase_delete_auth_user(url: str, headers: dict, uid: str):
|
||||
logger.warning(f" Delete auth user {uid}: {r.status_code} {r.text[:80]}")
|
||||
|
||||
|
||||
# ─── Granular helpers ───────────────────────────────────────────────────────────
|
||||
|
||||
def _clear_tables(url: str, headers: dict, tables: List[str]) -> "tuple[List[str], List[str]]":
|
||||
cleared, failed = [], []
|
||||
for table in tables:
|
||||
if _sb_clear_table(url, headers, table) in (200, 204):
|
||||
cleared.append(table)
|
||||
logger.info(f" ✓ {table}")
|
||||
else:
|
||||
failed.append(table)
|
||||
return cleared, failed
|
||||
|
||||
|
||||
def _clear_exam_storage() -> Dict[str, Any]:
|
||||
"""Remove cc.examboards objects via the Storage API (protect_delete blocks raw SQL deletes).
|
||||
Gathers storage_loc from eb_exams/eb_specifications BEFORE the rows are cleared."""
|
||||
try:
|
||||
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
|
||||
from modules.database.supabase.utils.storage import StorageAdmin
|
||||
except Exception as exc:
|
||||
logger.warning(f" exam storage clear skipped (import): {exc}")
|
||||
return {"removed": 0, "error": str(exc)}
|
||||
sb = SupabaseServiceRoleClient().supabase
|
||||
storage = StorageAdmin()
|
||||
locs: List[str] = []
|
||||
for table in ("eb_exams", "eb_specifications"):
|
||||
try:
|
||||
rows = sb.table(table).select("storage_loc").execute().data or []
|
||||
locs += [r["storage_loc"] for r in rows if r.get("storage_loc")]
|
||||
except Exception as exc:
|
||||
logger.warning(f" storage_loc gather {table}: {exc}")
|
||||
by_bucket: Dict[str, List[str]] = {}
|
||||
for loc in locs:
|
||||
if "/" in loc:
|
||||
b, _, p = loc.partition("/")
|
||||
by_bucket.setdefault(b, []).append(p)
|
||||
removed = 0
|
||||
for b, paths in by_bucket.items():
|
||||
for i in range(0, len(paths), 100):
|
||||
chunk = paths[i:i + 100]
|
||||
try:
|
||||
storage.client.supabase.storage.from_(b).remove(chunk)
|
||||
removed += len(chunk)
|
||||
except Exception as exc:
|
||||
logger.warning(f" storage remove {b}: {exc}")
|
||||
logger.info(f" exam storage removed {removed} objects from {list(by_bucket)}")
|
||||
return {"removed": removed, "buckets": list(by_bucket)}
|
||||
|
||||
|
||||
# ─── Main reset ───────────────────────────────────────────────────────────────
|
||||
|
||||
def reset() -> Dict[str, Any]:
|
||||
def reset(scope: str = "all") -> Dict[str, Any]:
|
||||
"""Destructive reset. scope ∈ {all, exam-corpus, timetable}.
|
||||
|
||||
- all : full wipe (Neo4j + Supabase data + auth users) AND the exam subsystem + storage.
|
||||
- exam-corpus : ONLY eb_*/exam_* tables + cc.examboards storage objects (load/unload the corpus).
|
||||
- timetable : ONLY timetable/calendar materialization tables.
|
||||
"""
|
||||
scope = (scope or "all").lower()
|
||||
if scope not in ("all", "exam-corpus", "timetable"):
|
||||
raise ValueError(f"invalid scope {scope!r} (want all|exam-corpus|timetable)")
|
||||
url, headers = _sb_headers()
|
||||
|
||||
if scope == "exam-corpus":
|
||||
logger.info("RESET (scope=exam-corpus) — exam tables + cc.examboards storage")
|
||||
storage = _clear_exam_storage()
|
||||
cleared, failed = _clear_tables(url, headers, EXAM_CORPUS_TABLES)
|
||||
return {"scope": scope, "exam_storage": storage, "tables_cleared": cleared, "tables_failed": failed}
|
||||
|
||||
if scope == "timetable":
|
||||
logger.info("RESET (scope=timetable) — timetable/calendar tables")
|
||||
cleared, failed = _clear_tables(url, headers, TIMETABLE_TABLES)
|
||||
return {"scope": scope, "tables_cleared": cleared, "tables_failed": failed}
|
||||
|
||||
logger.info("=" * 60)
|
||||
logger.info("RESET ENVIRONMENT — full destructive wipe starting")
|
||||
logger.info("=" * 60)
|
||||
results: Dict[str, Any] = {}
|
||||
results: Dict[str, Any] = {"scope": scope}
|
||||
|
||||
# ── 1. Neo4j: drop everything except system + neo4j ──────────────────────
|
||||
logger.info("\n[Neo4j] Dropping all non-system databases...")
|
||||
@ -213,11 +319,22 @@ def reset() -> Dict[str, Any]:
|
||||
)
|
||||
logger.info(" kcar → admin_profiles restored ✓")
|
||||
|
||||
# ── 5. Exam subsystem: storage objects (Storage API) + exam tables ───────────
|
||||
# (The legacy full reset cleared neither exam tables nor storage — folded in here.)
|
||||
logger.info("\n[Supabase] Clearing exam subsystem (storage + eb_*/exam_* tables)...")
|
||||
exam_storage = _clear_exam_storage()
|
||||
exam_cleared, exam_failed = _clear_tables(url, headers, EXAM_CORPUS_TABLES)
|
||||
|
||||
results["supabase"] = {
|
||||
"tables_cleared": cleared,
|
||||
"tables_failed": failed,
|
||||
"deleted_users": deleted_emails,
|
||||
}
|
||||
results["exam"] = {
|
||||
"storage": exam_storage,
|
||||
"tables_cleared": exam_cleared,
|
||||
"tables_failed": exam_failed,
|
||||
}
|
||||
|
||||
logger.info("\n" + "=" * 60)
|
||||
logger.info("RESET COMPLETE")
|
||||
|
||||
@ -60,6 +60,13 @@ DOC_ROLES = {"QP", "MS", "INSERT", "ER", "SPECIMEN", "GRADE_BOUNDARIES", "DATA_S
|
||||
TIERS = {"H", "F", None}
|
||||
# Default working dir for cached url: downloads (override with --cache-dir / EXAM_CORPUS_CACHE).
|
||||
DEFAULT_CACHE_DIR = os.getenv("EXAM_CORPUS_CACHE", "/tmp/exam-corpus-cache")
|
||||
# Persistent, mountable local store laid out exactly like the bucket (download once, seed many,
|
||||
# offline-repeatable). Override with --store-dir / EXAM_CORPUS_STORE. Distinct from --cache-dir,
|
||||
# which is a throwaway url hash-cache.
|
||||
DEFAULT_STORE_DIR = os.getenv(
|
||||
"EXAM_CORPUS_STORE",
|
||||
os.path.join(os.path.dirname(os.path.abspath(__file__)), "manifests", "_corpus_store"),
|
||||
)
|
||||
|
||||
|
||||
# ─────────────────────────────── canonical storage paths ───────────────────────────────
|
||||
@ -95,12 +102,24 @@ class LoadReport:
|
||||
user_copies: int = 0
|
||||
swept: int = 0
|
||||
sweep_failed: int = 0
|
||||
downloaded: int = 0
|
||||
download_cached: int = 0
|
||||
unseed_objects: int = 0
|
||||
unseed_exams: int = 0
|
||||
unseed_specs: int = 0
|
||||
unseed_templates: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
|
||||
def as_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"specs_upserted": self.specs_upserted,
|
||||
"papers_upserted": self.papers_upserted,
|
||||
"downloaded": self.downloaded,
|
||||
"download_cached": self.download_cached,
|
||||
"unseed_objects": self.unseed_objects,
|
||||
"unseed_exams": self.unseed_exams,
|
||||
"unseed_specs": self.unseed_specs,
|
||||
"unseed_templates": self.unseed_templates,
|
||||
"files_uploaded": self.files_uploaded,
|
||||
"files_skipped": self.files_skipped,
|
||||
"files_failed": self.files_failed,
|
||||
@ -181,6 +200,70 @@ def _resolve_source_bytes(source: str, *, cache_dir: str) -> bytes:
|
||||
return fh.read()
|
||||
|
||||
|
||||
# ─────────────────────── persistent local store (download-once, seed-many) ───────────────────────
|
||||
def _store_path(store_dir: str, storage_loc: str) -> str:
|
||||
"""Local path mirroring the bucket layout (so the store is directly mountable as the corpus):
|
||||
storage_loc 'cc.examboards/aqa/physics/8463/1h/2022-jun/qp.pdf'
|
||||
-> {store_dir}/aqa/physics/8463/1h/2022-jun/qp.pdf
|
||||
"""
|
||||
_, _, path = storage_loc.partition("/")
|
||||
return os.path.join(store_dir, path)
|
||||
|
||||
def _item_bytes(source: str, storage_loc: str, *, store_dir: Optional[str], cache_dir: str,
|
||||
populate: bool = True, rep: Optional[LoadReport] = None) -> bytes:
|
||||
"""Resolve bytes for an item, preferring the persistent local store when present.
|
||||
|
||||
If store_dir holds the file → read it (offline). Otherwise resolve the source (local|url:) and,
|
||||
when populate=True, write it into the store at its canonical path for future offline runs.
|
||||
"""
|
||||
if store_dir:
|
||||
sp = _store_path(store_dir, storage_loc)
|
||||
if os.path.exists(sp) and os.path.getsize(sp) > 0:
|
||||
if rep is not None:
|
||||
rep.download_cached += 1
|
||||
with open(sp, "rb") as fh:
|
||||
return fh.read()
|
||||
data = _resolve_source_bytes(source, cache_dir=cache_dir)
|
||||
if store_dir and populate:
|
||||
sp = _store_path(store_dir, storage_loc)
|
||||
os.makedirs(os.path.dirname(sp), exist_ok=True)
|
||||
tmp = sp + ".part"
|
||||
with open(tmp, "wb") as fh:
|
||||
fh.write(data)
|
||||
os.replace(tmp, sp)
|
||||
if rep is not None:
|
||||
rep.downloaded += 1
|
||||
return data
|
||||
|
||||
def download_corpus(m: Dict[str, Any], *, store_dir: str, board_filter: Optional[str],
|
||||
spec_filter: Optional[str], cache_dir: str, rep: LoadReport) -> None:
|
||||
"""--download-only: populate the persistent local store from the manifest. No DB/bucket writes.
|
||||
A later run with the same --store-dir (e.g. mounted into the container) seeds offline from it."""
|
||||
for board in m.get("boards", []):
|
||||
if board_filter and board.get("exam_board_code") != board_filter:
|
||||
continue
|
||||
for spec in board.get("specifications", []):
|
||||
if spec_filter and spec.get("spec_code") != spec_filter:
|
||||
continue
|
||||
sf = spec.get("spec_file")
|
||||
if sf and sf.get("source"):
|
||||
sloc = spec_storage_loc(board["exam_board_code"], spec.get("subject_code", ""),
|
||||
spec.get("award_code", ""), spec.get("spec_ver", ""))
|
||||
try:
|
||||
_item_bytes(sf["source"], sloc, store_dir=store_dir, cache_dir=cache_dir, rep=rep)
|
||||
except Exception as exc:
|
||||
rep.errors.append(f"download spec {spec.get('spec_code')}: {exc}")
|
||||
for p in spec.get("papers", []):
|
||||
ploc = paper_storage_loc(board["exam_board_code"], spec.get("subject_code", ""),
|
||||
spec.get("award_code", ""), p["paper_code"], p["session"], p["doc_type"])
|
||||
try:
|
||||
_item_bytes(p["file"]["source"], ploc, store_dir=store_dir, cache_dir=cache_dir, rep=rep)
|
||||
except Exception as exc:
|
||||
rep.errors.append(f"download {p.get('exam_code')}: {exc}")
|
||||
logger.info(f"download-only done: downloaded={rep.downloaded} already_in_store={rep.download_cached} "
|
||||
f"errors={len(rep.errors)} store={store_dir}")
|
||||
|
||||
|
||||
# ─────────────────────────────── storage upload (skip-if-exists + sha256) ───────────────────────────────
|
||||
def _split_loc(storage_loc: str) -> Tuple[str, str]:
|
||||
bucket, _, path = storage_loc.partition("/")
|
||||
@ -491,10 +574,88 @@ def first_sweep(client: SupabaseServiceRoleClient, storage: StorageAdmin,
|
||||
rep.swept += 1
|
||||
|
||||
|
||||
# ─────────────────────────────── unseed (inverse of the loader) ───────────────────────────────
|
||||
def _chunks(seq: List[Any], n: int = 100):
|
||||
for i in range(0, len(seq), n):
|
||||
yield seq[i:i + n]
|
||||
|
||||
def unseed(client: SupabaseServiceRoleClient, storage: StorageAdmin, *,
|
||||
board_filter: Optional[str], spec_filter: Optional[str],
|
||||
drop_specs: bool = True, drop_seed_templates: bool = True, rep: LoadReport) -> None:
|
||||
"""Inverse of the loader: remove the seeded public corpus, scoped by --board/--spec (or all).
|
||||
|
||||
Deletes (in FK-safe order): cc.examboards storage objects (via the Storage API, since the
|
||||
protect_delete trigger blocks direct SQL deletes), first-sweep exam_templates created by the
|
||||
seed (title '... (auto-map seed)', cascades children), eb_exams rows, then eb_specifications.
|
||||
"""
|
||||
sb = client.supabase
|
||||
q = sb.table("eb_specifications").select("spec_code, storage_loc, exam_board_code")
|
||||
if board_filter:
|
||||
q = q.eq("exam_board_code", board_filter)
|
||||
if spec_filter:
|
||||
q = q.eq("spec_code", spec_filter)
|
||||
specs = getattr(q.execute(), "data", None) or []
|
||||
spec_codes = [s["spec_code"] for s in specs]
|
||||
if not spec_codes:
|
||||
logger.info("[unseed] no matching specifications; nothing to do")
|
||||
return
|
||||
|
||||
exams: List[Dict[str, Any]] = []
|
||||
for chunk in _chunks(spec_codes):
|
||||
res = sb.table("eb_exams").select("id, exam_code, storage_loc").in_("spec_code", chunk).execute()
|
||||
exams.extend(getattr(res, "data", None) or [])
|
||||
|
||||
# 1) Storage objects (Storage API; batch-remove per bucket). Specs may carry a spec PDF too.
|
||||
by_bucket: Dict[str, List[str]] = {}
|
||||
for row in exams + specs:
|
||||
loc = row.get("storage_loc")
|
||||
if not loc or "/" not in loc:
|
||||
continue
|
||||
bkt, _, path = loc.partition("/")
|
||||
by_bucket.setdefault(bkt, []).append(path)
|
||||
for bkt, paths in by_bucket.items():
|
||||
for chunk in _chunks(paths, 100):
|
||||
try:
|
||||
storage.client.supabase.storage.from_(bkt).remove(chunk)
|
||||
rep.unseed_objects += len(chunk)
|
||||
except Exception as exc:
|
||||
logger.warning(f"[unseed] storage remove failed ({bkt}, {len(chunk)} objs): {exc}")
|
||||
|
||||
# 2) First-sweep templates created by the seed (cascades questions/regions/boundaries/layout).
|
||||
if drop_seed_templates and exams:
|
||||
exam_codes = [e["exam_code"] for e in exams if e.get("exam_code")]
|
||||
for chunk in _chunks(exam_codes, 100):
|
||||
try:
|
||||
res = sb.table("exam_templates").delete(count="exact") \
|
||||
.in_("exam_code", chunk).like("title", "%(auto-map seed)%").execute()
|
||||
rep.unseed_templates += getattr(res, "count", None) or len(getattr(res, "data", []) or [])
|
||||
except Exception as exc:
|
||||
logger.warning(f"[unseed] template delete failed: {exc}")
|
||||
|
||||
# 3) Catalogue rows: eb_exams (by id), then eb_specifications (by spec_code).
|
||||
exam_ids = [e["id"] for e in exams]
|
||||
for chunk in _chunks(exam_ids, 100):
|
||||
try:
|
||||
sb.table("eb_exams").delete().in_("id", chunk).execute()
|
||||
rep.unseed_exams += len(chunk)
|
||||
except Exception as exc:
|
||||
logger.warning(f"[unseed] eb_exams delete failed: {exc}")
|
||||
if drop_specs:
|
||||
for chunk in _chunks(spec_codes, 100):
|
||||
try:
|
||||
sb.table("eb_specifications").delete().in_("spec_code", chunk).execute()
|
||||
rep.unseed_specs += len(chunk)
|
||||
except Exception as exc:
|
||||
logger.warning(f"[unseed] eb_specifications delete failed: {exc}")
|
||||
|
||||
logger.info(f"unseed done: storage_objects={rep.unseed_objects} templates={rep.unseed_templates} "
|
||||
f"exams={rep.unseed_exams} specs={rep.unseed_specs}")
|
||||
|
||||
|
||||
# ─────────────────────────────── orchestration ───────────────────────────────
|
||||
def load(manifest_path: str, *, dry_run: bool, force: bool, board_filter: Optional[str],
|
||||
spec_filter: Optional[str], user_subset: bool, do_first_sweep: bool,
|
||||
cache_dir: str = DEFAULT_CACHE_DIR) -> LoadReport:
|
||||
cache_dir: str = DEFAULT_CACHE_DIR, store_dir: Optional[str] = None) -> LoadReport:
|
||||
with open(manifest_path) as f:
|
||||
m = yaml.safe_load(f)
|
||||
rep = LoadReport()
|
||||
@ -526,7 +687,9 @@ def load(manifest_path: str, *, dry_run: bool, force: bool, board_filter: Option
|
||||
spec.get("award_code", ""), spec.get("spec_ver", ""))
|
||||
if not dry_run:
|
||||
try:
|
||||
spec_sha = upload_file(storage, sloc, _resolve_source_bytes(sf["source"], cache_dir=cache_dir),
|
||||
spec_sha = upload_file(storage, sloc,
|
||||
_item_bytes(sf["source"], sloc, store_dir=store_dir,
|
||||
cache_dir=cache_dir, rep=rep),
|
||||
force=force, rep=rep)
|
||||
except Exception as exc:
|
||||
logger.error(f"[spec-file] {spec.get('spec_code')}: {exc}")
|
||||
@ -543,7 +706,9 @@ def load(manifest_path: str, *, dry_run: bool, force: bool, board_filter: Option
|
||||
continue
|
||||
psha = None
|
||||
try:
|
||||
psha = upload_file(storage, ploc, _resolve_source_bytes(p["file"]["source"], cache_dir=cache_dir),
|
||||
psha = upload_file(storage, ploc,
|
||||
_item_bytes(p["file"]["source"], ploc, store_dir=store_dir,
|
||||
cache_dir=cache_dir, rep=rep),
|
||||
force=force, rep=rep)
|
||||
except Exception as exc:
|
||||
logger.error(f"[paper-file] {p.get('exam_code')}: {exc}")
|
||||
@ -563,19 +728,49 @@ def load(manifest_path: str, *, dry_run: bool, force: bool, board_filter: Option
|
||||
|
||||
|
||||
def main() -> None:
|
||||
ap = argparse.ArgumentParser(description="Seed the public exam-paper corpus from a manifest.")
|
||||
ap.add_argument("--manifest", required=True)
|
||||
ap = argparse.ArgumentParser(description="Seed (or unseed) the public exam-paper corpus from a manifest.")
|
||||
ap.add_argument("--manifest", help="corpus manifest (required except for --unseed)")
|
||||
ap.add_argument("--dry-run", action="store_true", help="validate + report, no writes")
|
||||
ap.add_argument("--force", action="store_true", help="re-upload/overwrite existing storage objects")
|
||||
ap.add_argument("--board", default=None, help="only this exam_board_code")
|
||||
ap.add_argument("--spec", default=None, help="only this spec_code")
|
||||
ap.add_argument("--user-subset", action="store_true", help="also seed a user-side test subset")
|
||||
ap.add_argument("--first-sweep", action="store_true", help="run docling/auto-map first pass on seeded papers")
|
||||
ap.add_argument("--cache-dir", default=DEFAULT_CACHE_DIR, help="cache dir for url: downloads")
|
||||
ap.add_argument("--cache-dir", default=DEFAULT_CACHE_DIR, help="throwaway url-hash cache dir")
|
||||
ap.add_argument("--store-dir", default=DEFAULT_STORE_DIR,
|
||||
help="persistent, bucket-shaped local store (download-once, seed-many)")
|
||||
ap.add_argument("--no-store", action="store_true",
|
||||
help="ignore the local store; always fetch from source (don't read/populate the store)")
|
||||
ap.add_argument("--download-only", action="store_true",
|
||||
help="populate the local store from the manifest; no DB/bucket writes")
|
||||
ap.add_argument("--unseed", action="store_true",
|
||||
help="INVERSE: remove seeded eb_*/storage/first-sweep templates (scoped by --board/--spec)")
|
||||
a = ap.parse_args()
|
||||
rep = load(a.manifest, dry_run=a.dry_run, force=a.force, board_filter=a.board, spec_filter=a.spec,
|
||||
user_subset=a.user_subset, do_first_sweep=a.first_sweep, cache_dir=a.cache_dir)
|
||||
store_dir = None if a.no_store else a.store_dir
|
||||
import json
|
||||
|
||||
if a.unseed:
|
||||
rep = LoadReport()
|
||||
unseed(SupabaseServiceRoleClient(), StorageAdmin(),
|
||||
board_filter=a.board, spec_filter=a.spec, rep=rep)
|
||||
print(json.dumps(rep.as_dict(), indent=2))
|
||||
return
|
||||
|
||||
if not a.manifest:
|
||||
ap.error("--manifest is required unless --unseed is given")
|
||||
|
||||
if a.download_only:
|
||||
with open(a.manifest) as f:
|
||||
m = yaml.safe_load(f)
|
||||
rep = LoadReport()
|
||||
download_corpus(m, store_dir=(a.store_dir), board_filter=a.board, spec_filter=a.spec,
|
||||
cache_dir=a.cache_dir, rep=rep)
|
||||
print(json.dumps(rep.as_dict(), indent=2))
|
||||
return
|
||||
|
||||
rep = load(a.manifest, dry_run=a.dry_run, force=a.force, board_filter=a.board, spec_filter=a.spec,
|
||||
user_subset=a.user_subset, do_first_sweep=a.first_sweep, cache_dir=a.cache_dir,
|
||||
store_dir=store_dir)
|
||||
print(json.dumps(rep.as_dict(), indent=2))
|
||||
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user