""" reset_environment.py — DESTRUCTIVE wipe of all non-permanent data. Clears: - Neo4j: drops ALL databases except system, neo4j (including gaisdata, cc.users.*, cc.institutes.*) - Supabase: deletes ALL data tables except gais_local_authorities and gais_schools - Supabase: deletes all auth users except kcar, then re-seeds kcar profile state - Granular scopes can clear exam corpus, timetable data, or --user-subset seed copies Safe invariants (never touched): - kcar auth account - gais_local_authorities and gais_schools Supabase tables - system / neo4j Neo4j system databases Run from inside the ccapi container: python3 -c "from run.initialization.reset_environment import reset; reset()" """ import os import time import requests from typing import List, Dict, Any from modules.logger_tool import initialise_logger import modules.database.tools.neo4j_driver_tools as dt logger = initialise_logger(__name__, os.getenv("LOG_LEVEL"), os.getenv("LOG_PATH"), "default", True) KCAR_ID = "d9e1d1a9-04c4-4611-bb05-57babf4a9a28" KCAR_EMAIL = "kcar@kevlarai.com" # Neo4j system databases — never drop these NEO4J_SYSTEM_DBS = {"system", "neo4j"} # Supabase tables to clear, in FK child-first order. # gais_local_authorities and gais_schools are intentionally absent. SUPABASE_TABLES_TO_CLEAR = [ # ── Transcription (deepest children first) ─────────────────────────────── "canvas_events", "keyword_events", "transcription_summaries", "transcription_segments", "keyword_watches", "transcription_sessions", # ── Lesson delivery chain ──────────────────────────────────────────────── "lesson_deliveries", "lesson_collaborators", # ── Timetable materialization ──────────────────────────────────────────── "taught_lessons", # ── Academic calendar (children → parents) ─────────────────────────────── "academic_periods", "academic_days", "academic_weeks", "academic_term_breaks", "academic_terms", "academic_years", # ── Teacher timetables ─────────────────────────────────────────────────── "teacher_timetable_slots", "teacher_timetables", "school_timetables", # ── Lesson plans ───────────────────────────────────────────────────────── "planned_lessons", # ── Whiteboard rooms ───────────────────────────────────────────────────── "whiteboard_rooms", # ── Classes & enrollment ───────────────────────────────────────────────── "enrollment_requests", "class_students", "class_teachers", "classes", # ── Files & brains ─────────────────────────────────────────────────────── "document_artefacts", "brain_files", "cabinet_memberships", "files", "file_cabinets", "brains", # ── Invitations & memberships ──────────────────────────────────────────── "invitations", "institute_memberships", "institute_membership_requests", # ── Institutes ─────────────────────────────────────────────────────────── "institutes", # ── Profiles (non-kcar cleared separately via auth deletion cascade) ───── "admin_profiles", ] # Exam-marker subsystem tables, FK child-first. scope="exam-corpus" is deliberately # broader than "public papers": it wipes public corpus eb_* rows, templates, layouts, # questions, boundaries, response areas, marking batches, student submissions, and mark # entries. NOT in the list above — the previous full reset() never cleared exam data # or storage at all; the granular scopes below fold it in. EXAM_CORPUS_TABLES = [ "mark_entries", "student_submissions", "marking_batches", "exam_response_areas", "exam_boundaries", "exam_template_layout", "exam_questions", "exam_templates", "eb_exams", "eb_specifications", ] # Timetable / calendar materialization subset (for scope='timetable'). TIMETABLE_TABLES = [ "lesson_deliveries", "lesson_collaborators", "taught_lessons", "academic_periods", "academic_days", "academic_weeks", "academic_term_breaks", "academic_terms", "academic_years", "teacher_timetable_slots", "teacher_timetables", "school_timetables", "planned_lessons", ] # Bucket whose objects scope="exam-corpus" clears for the whole exam-marker subsystem # (Storage API — protect_delete blocks raw SQL). EXAM_STORAGE_BUCKET = "cc.examboards" def _sb_headers(): url = os.environ["SUPABASE_URL"] key = os.environ["SERVICE_ROLE_KEY"] return url, { "apikey": key, "Authorization": f"Bearer {key}", "Content-Type": "application/json", "Prefer": "return=minimal", } # Markers that identify a production Supabase target. Destructive reset against any of these is # refused by default (project rule: ".94 only; .156 human-gated") — set RESET_ALLOW_PROD=1 to override. PROD_TARGET_MARKERS = ("192.168.0.156", "supabase.classroomcopilot") def _assert_reset_allowed(url: str, scope: str) -> None: """Default-deny destructive reset against a production-looking Supabase target. The /admin/reset route and this module both act on os.environ['SUPABASE_URL']; without this guard a platform-admin call on a prod-deployed API would wipe prod data + exam corpus + storage. We refuse when the target matches a known prod marker unless an explicit RESET_ALLOW_PROD opt-in is set. """ target = (url or "").lower() looks_prod = any(m in target for m in PROD_TARGET_MARKERS) override = os.environ.get("RESET_ALLOW_PROD", "").strip().lower() in ("1", "true", "yes") if looks_prod and not override: raise RuntimeError( f"refusing destructive reset (scope={scope}) against production-looking target {target!r}; " f"this is human-gated — set RESET_ALLOW_PROD=1 to override." ) # ─── Neo4j helpers ──────────────────────────────────────────────────────────── def _neo4j_drop_all_non_system() -> Dict[str, List[str]]: """Drop every Neo4j DB except the system-reserved ones.""" with dt.get_session(database="system") as s: all_dbs = [r["name"] for r in s.run("SHOW DATABASES YIELD name RETURN name")] to_drop = [db for db in all_dbs if db not in NEO4J_SYSTEM_DBS] dropped = [] for db in to_drop: logger.info(f" DROP DATABASE `{db}`") try: with dt.get_session(database="system") as s: s.run(f"DROP DATABASE `{db}` IF EXISTS") dropped.append(db) except Exception as e: logger.warning(f" Could not drop `{db}`: {e}") return dropped # ─── Supabase helpers ───────────────────────────────────────────────────────── # Tables without an uid=1000(kcar) gid=1000(kcar) groups=1000(kcar),27(sudo),119(docker) column — map to the column to use as the delete filter. TABLE_FILTER_COLUMN = { "brain_files": "brain_id", } def _sb_clear_table(url: str, headers: dict, table: str) -> int: """Delete all rows from a Supabase table. Returns HTTP status.""" col = TABLE_FILTER_COLUMN.get(table, "id") r = requests.delete( f"{url}/rest/v1/{table}", headers=headers, params={col: "not.is.null"}, ) if r.status_code not in (200, 204): logger.warning(f" Clear {table}: {r.status_code} {r.text[:120]}") return r.status_code def _supabase_list_auth_users(url: str, headers: dict) -> List[Dict]: r = requests.get(f"{url}/auth/v1/admin/users", headers=headers, params={"per_page": 200}) r.raise_for_status() return r.json().get("users", []) def _supabase_delete_auth_user(url: str, headers: dict, uid: str): r = requests.delete(f"{url}/auth/v1/admin/users/{uid}", headers=headers) if r.status_code not in (200, 204): logger.warning(f" Delete auth user {uid}: {r.status_code} {r.text[:80]}") # ─── Granular helpers ─────────────────────────────────────────────────────────── def _clear_tables(url: str, headers: dict, tables: List[str]) -> "tuple[List[str], List[str]]": cleared, failed = [], [] for table in tables: if _sb_clear_table(url, headers, table) in (200, 204): cleared.append(table) logger.info(f" ✓ {table}") else: failed.append(table) return cleared, failed def _clear_exam_storage() -> Dict[str, Any]: """Remove cc.examboards objects for the exam-marker subsystem. scope="exam-corpus" is not limited to public-paper metadata: it also removes the storage objects that back exam board corpus files and any downstream exam-marker artifacts referenced from eb_exams/eb_specifications. Gathers storage_loc from eb_exams/eb_specifications BEFORE the rows are cleared. """ try: from modules.database.supabase.utils.client import SupabaseServiceRoleClient from modules.database.supabase.utils.storage import StorageAdmin except Exception as exc: logger.warning(f" exam storage clear skipped (import): {exc}") return {"removed": 0, "error": str(exc)} sb = SupabaseServiceRoleClient().supabase storage = StorageAdmin() locs: List[str] = [] for table in ("eb_exams", "eb_specifications"): try: rows = sb.table(table).select("storage_loc").execute().data or [] locs += [r["storage_loc"] for r in rows if r.get("storage_loc")] except Exception as exc: logger.warning(f" storage_loc gather {table}: {exc}") by_bucket: Dict[str, List[str]] = {} for loc in locs: if "/" in loc: b, _, p = loc.partition("/") by_bucket.setdefault(b, []).append(p) removed = 0 for b, paths in by_bucket.items(): for i in range(0, len(paths), 100): chunk = paths[i:i + 100] try: storage.client.supabase.storage.from_(b).remove(chunk) removed += len(chunk) except Exception as exc: logger.warning(f" storage remove {b}: {exc}") logger.info(f" exam storage removed {removed} objects from {list(by_bucket)}") return {"removed": removed, "buckets": list(by_bucket)} def _clear_user_subset_files() -> Dict[str, Any]: """Remove files rows and cc.users storage objects created by --user-subset seeding. Reuses the seed/unseed implementation so reset(scope="user-subset") has the same storage-before-row deletion order and idempotency guarantees as seed_exam_corpus.py --unseed. The helper only targets rows marked by the seeder: bucket='cc.users', source='exam-corpus-seed', path LIKE 'exam-marker/%'. """ try: from modules.database.supabase.utils.client import SupabaseServiceRoleClient from modules.database.supabase.utils.storage import StorageAdmin from run.initialization.seed_exam_corpus import LoadReport, _delete_user_subset_files except Exception as exc: logger.warning(f" user-subset clear skipped (import): {exc}") return {"files_rows_deleted": 0, "storage_objects_removed": 0, "errors": [str(exc)]} rep = LoadReport() _delete_user_subset_files( SupabaseServiceRoleClient(), StorageAdmin(), exam_codes=None, rep=rep, ) return { "files_rows_deleted": rep.unseed_user_files, "storage_objects_removed": rep.unseed_objects, "errors": rep.errors, } # ─── Main reset ─────────────────────────────────────────────────────────────── def reset(scope: str = "all") -> Dict[str, Any]: """Destructive reset. scope ∈ {all, exam-corpus, timetable, user-subset}. - all : full wipe (Neo4j + Supabase data + auth users) AND the entire exam-marker subsystem listed below, including --user-subset copies. - exam-corpus : ONLY the entire exam-marker subsystem, not just public papers: public corpus/eb_* data, cc.examboards storage objects, exam templates, template layouts, questions, boundaries, response areas, marking batches, student submissions, mark entries, and --user-subset cc.users copies. - timetable : ONLY timetable/calendar materialization tables. - user-subset : ONLY files rows and cc.users storage objects created by seed_exam_corpus.py --user-subset. """ scope = (scope or "all").lower() if scope not in ("all", "exam-corpus", "timetable", "user-subset"): raise ValueError(f"invalid scope {scope!r} (want all|exam-corpus|timetable|user-subset)") url, headers = _sb_headers() _assert_reset_allowed(url, scope) if scope == "exam-corpus": logger.info("RESET (scope=exam-corpus) — entire exam-marker subsystem: public corpus/eb_* data, cc.examboards storage, templates/layout/questions/boundaries/response areas, marking batches, submissions, mark entries, and --user-subset copies") user_subset = _clear_user_subset_files() storage = _clear_exam_storage() cleared, failed = _clear_tables(url, headers, EXAM_CORPUS_TABLES) return {"scope": scope, "user_subset": user_subset, "exam_storage": storage, "tables_cleared": cleared, "tables_failed": failed} if scope == "timetable": logger.info("RESET (scope=timetable) — timetable/calendar tables") cleared, failed = _clear_tables(url, headers, TIMETABLE_TABLES) return {"scope": scope, "tables_cleared": cleared, "tables_failed": failed} if scope == "user-subset": logger.info("RESET (scope=user-subset) — --user-subset cc.users storage objects and files rows") user_subset = _clear_user_subset_files() return {"scope": scope, "user_subset": user_subset} logger.info("=" * 60) logger.info("RESET ENVIRONMENT — full destructive wipe starting") logger.info("=" * 60) results: Dict[str, Any] = {"scope": scope} # ── 1. Neo4j: drop everything except system + neo4j ────────────────────── logger.info("\n[Neo4j] Dropping all non-system databases...") dropped = _neo4j_drop_all_non_system() logger.info(f" Dropped {len(dropped)}: {dropped}") results["neo4j"] = {"dropped": dropped} # ── 2. Supabase: clear all data tables (GAIS preserved) ────────────────── # First remove --user-subset cc.users storage objects (+ their files rows) via the # Storage API, so the generic files-table clear below doesn't strand orphaned objects. results["user_subset"] = _clear_user_subset_files() logger.info("\n[Supabase] Clearing data tables (preserving gais_*)...") url, headers = _sb_headers() cleared, failed = [], [] for table in SUPABASE_TABLES_TO_CLEAR: status = _sb_clear_table(url, headers, table) if status in (200, 204): cleared.append(table) logger.info(f" ✓ {table}") else: failed.append(table) logger.info(f" Cleared {len(cleared)} tables, {len(failed)} failed") # ── 3. Supabase: delete all auth users except kcar ──────────────────────── logger.info("\n[Supabase] Deleting test auth users...") all_users = _supabase_list_auth_users(url, headers) deleted_emails = [] for u in all_users: if u["email"] == KCAR_EMAIL: continue _supabase_delete_auth_user(url, headers, u["id"]) deleted_emails.append(u["email"]) time.sleep(0.05) logger.info(f" Deleted {len(deleted_emails)} auth users") # Explicit cleanup in case cascade didn't fire requests.delete(f"{url}/rest/v1/profiles", headers=headers, params={"id": f"neq.{KCAR_ID}"}) # ── 4. Reset kcar profile to known-good platform_admin state ────────────── logger.info("\n[Supabase] Resetting kcar profile...") requests.patch( f"{url}/rest/v1/profiles", headers=headers, params={"id": f"eq.{KCAR_ID}"}, json={"school_id": None}, ) logger.info(" kcar → school_id: null ✓") # Restore admin_profiles row (wiped with other tables above) requests.post( f"{url}/rest/v1/admin_profiles", headers={**headers, "Prefer": "resolution=merge-duplicates"}, json={ "id": KCAR_ID, "email": KCAR_EMAIL, "display_name": "Kevin Carroll", "admin_role": "super_admin", "is_super_admin": True, }, ) logger.info(" kcar → admin_profiles restored ✓") # ── 5. Exam-marker subsystem: storage objects (Storage API) + all exam tables ── # This is the same destructive surface as scope="exam-corpus": public corpus/eb_* # rows, cc.examboards storage, templates/layout/questions/boundaries/response # areas, marking batches, submissions, and mark entries. (The legacy full reset # cleared neither exam tables nor storage — folded in here.) logger.info("\n[Supabase] Clearing entire exam-marker subsystem (public corpus, storage, templates/layout/questions/boundaries/response areas, marking batches, submissions, mark entries)...") exam_storage = _clear_exam_storage() exam_cleared, exam_failed = _clear_tables(url, headers, EXAM_CORPUS_TABLES) results["supabase"] = { "tables_cleared": cleared, "tables_failed": failed, "deleted_users": deleted_emails, } results["exam"] = { "storage": exam_storage, "tables_cleared": exam_cleared, "tables_failed": exam_failed, } logger.info("\n" + "=" * 60) logger.info("RESET COMPLETE") logger.info("=" * 60) return results if __name__ == "__main__": import json print(json.dumps(reset(), indent=2, default=str))