PRIMARY — corpus breadth (505->1178 papers, 18->60 specs, all URLs HEAD-verified): - AQA (enumerated): Maths, English Lang/Lit, Geography, Computer Science, Business, Psychology, MFL (French/Spanish/German), GCSE + A-level, on top of round-1 sciences. - Edexcel + OCR (confirmed direct URLs via research): Maths, English, Geography, History, Business, Computer Science, GCSE + A-level. - generate_corpus_manifest.py: _subj/_mfl AQA builders, Edexcel/OCR spec+URL tables, derived exam_code (_mk_exam_code) matching the locked convention, concurrent re-verify. Verified on dev .94: eb_specifications=60, eb_exams=1178, QP=469, doc_type all 'pdf', seed idempotent (uploaded=673 new, skipped=505), failed=0. SECONDARY: - --download-only + persistent bucket-shaped local store (manifests/_corpus_store/, gitignored): download-once, seed-many, offline-repeatable; --store-dir/--no-store. (_store_path/_item_bytes/ download_corpus). Verified: store populated, seed reads offline (download_cached). - --unseed [--board/--spec]: inverse loader — storage objects (Storage API; protect_delete blocks raw SQL), first-sweep seed templates, eb_exams, eb_specifications. Verified reversible on .94. - Granular admin reset: POST /admin/reset?scope=all|exam-corpus|timetable. reset_environment.reset(scope) adds EXAM_CORPUS_TABLES (10) + cc.examboards storage cleanup + TIMETABLE_TABLES (13); 'all' now also clears the exam subsystem the legacy reset missed. No schema migration required. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
348 lines
14 KiB
Python
348 lines
14 KiB
Python
"""
|
|
reset_environment.py — DESTRUCTIVE wipe of all non-permanent data.
|
|
|
|
Clears:
|
|
- Neo4j: drops ALL databases except system, neo4j (including gaisdata, cc.users.*, cc.institutes.*)
|
|
- Supabase: deletes ALL data tables except gais_local_authorities and gais_schools
|
|
- Supabase: deletes all auth users except kcar, then re-seeds kcar profile state
|
|
|
|
Safe invariants (never touched):
|
|
- kcar auth account
|
|
- gais_local_authorities and gais_schools Supabase tables
|
|
- system / neo4j Neo4j system databases
|
|
|
|
Run from inside the ccapi container:
|
|
python3 -c "from run.initialization.reset_environment import reset; reset()"
|
|
"""
|
|
import os
|
|
import time
|
|
import requests
|
|
from typing import List, Dict, Any
|
|
|
|
from modules.logger_tool import initialise_logger
|
|
import modules.database.tools.neo4j_driver_tools as dt
|
|
|
|
logger = initialise_logger(__name__, os.getenv("LOG_LEVEL"), os.getenv("LOG_PATH"), "default", True)
|
|
|
|
KCAR_ID = "d9e1d1a9-04c4-4611-bb05-57babf4a9a28"
|
|
KCAR_EMAIL = "kcar@kevlarai.com"
|
|
|
|
# Neo4j system databases — never drop these
|
|
NEO4J_SYSTEM_DBS = {"system", "neo4j"}
|
|
|
|
# Supabase tables to clear, in FK child-first order.
|
|
# gais_local_authorities and gais_schools are intentionally absent.
|
|
SUPABASE_TABLES_TO_CLEAR = [
|
|
# ── Transcription (deepest children first) ───────────────────────────────
|
|
"canvas_events",
|
|
"keyword_events",
|
|
"transcription_summaries",
|
|
"transcription_segments",
|
|
"keyword_watches",
|
|
"transcription_sessions",
|
|
# ── Lesson delivery chain ────────────────────────────────────────────────
|
|
"lesson_deliveries",
|
|
"lesson_collaborators",
|
|
# ── Timetable materialization ────────────────────────────────────────────
|
|
"taught_lessons",
|
|
# ── Academic calendar (children → parents) ───────────────────────────────
|
|
"academic_periods",
|
|
"academic_days",
|
|
"academic_weeks",
|
|
"academic_term_breaks",
|
|
"academic_terms",
|
|
"academic_years",
|
|
# ── Teacher timetables ───────────────────────────────────────────────────
|
|
"teacher_timetable_slots",
|
|
"teacher_timetables",
|
|
"school_timetables",
|
|
# ── Lesson plans ─────────────────────────────────────────────────────────
|
|
"planned_lessons",
|
|
# ── Whiteboard rooms ─────────────────────────────────────────────────────
|
|
"whiteboard_rooms",
|
|
# ── Classes & enrollment ─────────────────────────────────────────────────
|
|
"enrollment_requests",
|
|
"class_students",
|
|
"class_teachers",
|
|
"classes",
|
|
# ── Files & brains ───────────────────────────────────────────────────────
|
|
"document_artefacts",
|
|
"brain_files",
|
|
"cabinet_memberships",
|
|
"files",
|
|
"file_cabinets",
|
|
"brains",
|
|
# ── Invitations & memberships ────────────────────────────────────────────
|
|
"invitations",
|
|
"institute_memberships",
|
|
"institute_membership_requests",
|
|
# ── Institutes ───────────────────────────────────────────────────────────
|
|
"institutes",
|
|
# ── Profiles (non-kcar cleared separately via auth deletion cascade) ─────
|
|
"admin_profiles",
|
|
]
|
|
|
|
# Exam subsystem tables, FK child-first. NOT in the list above — the previous full reset()
|
|
# never cleared exam data or storage at all; the granular scopes below fold it in.
|
|
EXAM_CORPUS_TABLES = [
|
|
"mark_entries",
|
|
"student_submissions",
|
|
"marking_batches",
|
|
"exam_response_areas",
|
|
"exam_boundaries",
|
|
"exam_template_layout",
|
|
"exam_questions",
|
|
"exam_templates",
|
|
"eb_exams",
|
|
"eb_specifications",
|
|
]
|
|
|
|
# Timetable / calendar materialization subset (for scope='timetable').
|
|
TIMETABLE_TABLES = [
|
|
"lesson_deliveries",
|
|
"lesson_collaborators",
|
|
"taught_lessons",
|
|
"academic_periods",
|
|
"academic_days",
|
|
"academic_weeks",
|
|
"academic_term_breaks",
|
|
"academic_terms",
|
|
"academic_years",
|
|
"teacher_timetable_slots",
|
|
"teacher_timetables",
|
|
"school_timetables",
|
|
"planned_lessons",
|
|
]
|
|
|
|
# Buckets whose objects the exam-corpus reset clears (Storage API — protect_delete blocks raw SQL).
|
|
EXAM_STORAGE_BUCKET = "cc.examboards"
|
|
|
|
|
|
def _sb_headers():
|
|
url = os.environ["SUPABASE_URL"]
|
|
key = os.environ["SERVICE_ROLE_KEY"]
|
|
return url, {
|
|
"apikey": key,
|
|
"Authorization": f"Bearer {key}",
|
|
"Content-Type": "application/json",
|
|
"Prefer": "return=minimal",
|
|
}
|
|
|
|
|
|
# ─── Neo4j helpers ────────────────────────────────────────────────────────────
|
|
|
|
def _neo4j_drop_all_non_system() -> Dict[str, List[str]]:
|
|
"""Drop every Neo4j DB except the system-reserved ones."""
|
|
with dt.get_session(database="system") as s:
|
|
all_dbs = [r["name"] for r in s.run("SHOW DATABASES YIELD name RETURN name")]
|
|
|
|
to_drop = [db for db in all_dbs if db not in NEO4J_SYSTEM_DBS]
|
|
dropped = []
|
|
for db in to_drop:
|
|
logger.info(f" DROP DATABASE `{db}`")
|
|
try:
|
|
with dt.get_session(database="system") as s:
|
|
s.run(f"DROP DATABASE `{db}` IF EXISTS")
|
|
dropped.append(db)
|
|
except Exception as e:
|
|
logger.warning(f" Could not drop `{db}`: {e}")
|
|
return dropped
|
|
|
|
|
|
# ─── Supabase helpers ─────────────────────────────────────────────────────────
|
|
|
|
# Tables without an uid=1000(kcar) gid=1000(kcar) groups=1000(kcar),27(sudo),119(docker) column — map to the column to use as the delete filter.
|
|
TABLE_FILTER_COLUMN = {
|
|
"brain_files": "brain_id",
|
|
}
|
|
|
|
def _sb_clear_table(url: str, headers: dict, table: str) -> int:
|
|
"""Delete all rows from a Supabase table. Returns HTTP status."""
|
|
col = TABLE_FILTER_COLUMN.get(table, "id")
|
|
r = requests.delete(
|
|
f"{url}/rest/v1/{table}",
|
|
headers=headers,
|
|
params={col: "not.is.null"},
|
|
)
|
|
if r.status_code not in (200, 204):
|
|
logger.warning(f" Clear {table}: {r.status_code} {r.text[:120]}")
|
|
return r.status_code
|
|
|
|
|
|
def _supabase_list_auth_users(url: str, headers: dict) -> List[Dict]:
|
|
r = requests.get(f"{url}/auth/v1/admin/users", headers=headers, params={"per_page": 200})
|
|
r.raise_for_status()
|
|
return r.json().get("users", [])
|
|
|
|
|
|
def _supabase_delete_auth_user(url: str, headers: dict, uid: str):
|
|
r = requests.delete(f"{url}/auth/v1/admin/users/{uid}", headers=headers)
|
|
if r.status_code not in (200, 204):
|
|
logger.warning(f" Delete auth user {uid}: {r.status_code} {r.text[:80]}")
|
|
|
|
|
|
# ─── Granular helpers ───────────────────────────────────────────────────────────
|
|
|
|
def _clear_tables(url: str, headers: dict, tables: List[str]) -> "tuple[List[str], List[str]]":
|
|
cleared, failed = [], []
|
|
for table in tables:
|
|
if _sb_clear_table(url, headers, table) in (200, 204):
|
|
cleared.append(table)
|
|
logger.info(f" ✓ {table}")
|
|
else:
|
|
failed.append(table)
|
|
return cleared, failed
|
|
|
|
|
|
def _clear_exam_storage() -> Dict[str, Any]:
|
|
"""Remove cc.examboards objects via the Storage API (protect_delete blocks raw SQL deletes).
|
|
Gathers storage_loc from eb_exams/eb_specifications BEFORE the rows are cleared."""
|
|
try:
|
|
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
|
|
from modules.database.supabase.utils.storage import StorageAdmin
|
|
except Exception as exc:
|
|
logger.warning(f" exam storage clear skipped (import): {exc}")
|
|
return {"removed": 0, "error": str(exc)}
|
|
sb = SupabaseServiceRoleClient().supabase
|
|
storage = StorageAdmin()
|
|
locs: List[str] = []
|
|
for table in ("eb_exams", "eb_specifications"):
|
|
try:
|
|
rows = sb.table(table).select("storage_loc").execute().data or []
|
|
locs += [r["storage_loc"] for r in rows if r.get("storage_loc")]
|
|
except Exception as exc:
|
|
logger.warning(f" storage_loc gather {table}: {exc}")
|
|
by_bucket: Dict[str, List[str]] = {}
|
|
for loc in locs:
|
|
if "/" in loc:
|
|
b, _, p = loc.partition("/")
|
|
by_bucket.setdefault(b, []).append(p)
|
|
removed = 0
|
|
for b, paths in by_bucket.items():
|
|
for i in range(0, len(paths), 100):
|
|
chunk = paths[i:i + 100]
|
|
try:
|
|
storage.client.supabase.storage.from_(b).remove(chunk)
|
|
removed += len(chunk)
|
|
except Exception as exc:
|
|
logger.warning(f" storage remove {b}: {exc}")
|
|
logger.info(f" exam storage removed {removed} objects from {list(by_bucket)}")
|
|
return {"removed": removed, "buckets": list(by_bucket)}
|
|
|
|
|
|
# ─── Main reset ───────────────────────────────────────────────────────────────
|
|
|
|
def reset(scope: str = "all") -> Dict[str, Any]:
|
|
"""Destructive reset. scope ∈ {all, exam-corpus, timetable}.
|
|
|
|
- all : full wipe (Neo4j + Supabase data + auth users) AND the exam subsystem + storage.
|
|
- exam-corpus : ONLY eb_*/exam_* tables + cc.examboards storage objects (load/unload the corpus).
|
|
- timetable : ONLY timetable/calendar materialization tables.
|
|
"""
|
|
scope = (scope or "all").lower()
|
|
if scope not in ("all", "exam-corpus", "timetable"):
|
|
raise ValueError(f"invalid scope {scope!r} (want all|exam-corpus|timetable)")
|
|
url, headers = _sb_headers()
|
|
|
|
if scope == "exam-corpus":
|
|
logger.info("RESET (scope=exam-corpus) — exam tables + cc.examboards storage")
|
|
storage = _clear_exam_storage()
|
|
cleared, failed = _clear_tables(url, headers, EXAM_CORPUS_TABLES)
|
|
return {"scope": scope, "exam_storage": storage, "tables_cleared": cleared, "tables_failed": failed}
|
|
|
|
if scope == "timetable":
|
|
logger.info("RESET (scope=timetable) — timetable/calendar tables")
|
|
cleared, failed = _clear_tables(url, headers, TIMETABLE_TABLES)
|
|
return {"scope": scope, "tables_cleared": cleared, "tables_failed": failed}
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("RESET ENVIRONMENT — full destructive wipe starting")
|
|
logger.info("=" * 60)
|
|
results: Dict[str, Any] = {"scope": scope}
|
|
|
|
# ── 1. Neo4j: drop everything except system + neo4j ──────────────────────
|
|
logger.info("\n[Neo4j] Dropping all non-system databases...")
|
|
dropped = _neo4j_drop_all_non_system()
|
|
logger.info(f" Dropped {len(dropped)}: {dropped}")
|
|
results["neo4j"] = {"dropped": dropped}
|
|
|
|
# ── 2. Supabase: clear all data tables (GAIS preserved) ──────────────────
|
|
logger.info("\n[Supabase] Clearing data tables (preserving gais_*)...")
|
|
url, headers = _sb_headers()
|
|
cleared, failed = [], []
|
|
for table in SUPABASE_TABLES_TO_CLEAR:
|
|
status = _sb_clear_table(url, headers, table)
|
|
if status in (200, 204):
|
|
cleared.append(table)
|
|
logger.info(f" ✓ {table}")
|
|
else:
|
|
failed.append(table)
|
|
logger.info(f" Cleared {len(cleared)} tables, {len(failed)} failed")
|
|
|
|
# ── 3. Supabase: delete all auth users except kcar ────────────────────────
|
|
logger.info("\n[Supabase] Deleting test auth users...")
|
|
all_users = _supabase_list_auth_users(url, headers)
|
|
deleted_emails = []
|
|
for u in all_users:
|
|
if u["email"] == KCAR_EMAIL:
|
|
continue
|
|
_supabase_delete_auth_user(url, headers, u["id"])
|
|
deleted_emails.append(u["email"])
|
|
time.sleep(0.05)
|
|
logger.info(f" Deleted {len(deleted_emails)} auth users")
|
|
|
|
# Explicit cleanup in case cascade didn't fire
|
|
requests.delete(f"{url}/rest/v1/profiles", headers=headers,
|
|
params={"id": f"neq.{KCAR_ID}"})
|
|
|
|
# ── 4. Reset kcar profile to known-good platform_admin state ──────────────
|
|
logger.info("\n[Supabase] Resetting kcar profile...")
|
|
requests.patch(
|
|
f"{url}/rest/v1/profiles",
|
|
headers=headers,
|
|
params={"id": f"eq.{KCAR_ID}"},
|
|
json={"school_id": None},
|
|
)
|
|
logger.info(" kcar → school_id: null ✓")
|
|
|
|
# Restore admin_profiles row (wiped with other tables above)
|
|
requests.post(
|
|
f"{url}/rest/v1/admin_profiles",
|
|
headers={**headers, "Prefer": "resolution=merge-duplicates"},
|
|
json={
|
|
"id": KCAR_ID,
|
|
"email": KCAR_EMAIL,
|
|
"display_name": "Kevin Carroll",
|
|
"admin_role": "super_admin",
|
|
"is_super_admin": True,
|
|
},
|
|
)
|
|
logger.info(" kcar → admin_profiles restored ✓")
|
|
|
|
# ── 5. Exam subsystem: storage objects (Storage API) + exam tables ───────────
|
|
# (The legacy full reset cleared neither exam tables nor storage — folded in here.)
|
|
logger.info("\n[Supabase] Clearing exam subsystem (storage + eb_*/exam_* tables)...")
|
|
exam_storage = _clear_exam_storage()
|
|
exam_cleared, exam_failed = _clear_tables(url, headers, EXAM_CORPUS_TABLES)
|
|
|
|
results["supabase"] = {
|
|
"tables_cleared": cleared,
|
|
"tables_failed": failed,
|
|
"deleted_users": deleted_emails,
|
|
}
|
|
results["exam"] = {
|
|
"storage": exam_storage,
|
|
"tables_cleared": exam_cleared,
|
|
"tables_failed": exam_failed,
|
|
}
|
|
|
|
logger.info("\n" + "=" * 60)
|
|
logger.info("RESET COMPLETE")
|
|
logger.info("=" * 60)
|
|
return results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import json
|
|
print(json.dumps(reset(), indent=2, default=str))
|