Some checks failed
api-ci-deploy / test-build-deploy (push) Has been cancelled
t_d1600327 added a standalone scope=user-subset, but a full reset (scope=all) and scope=exam-corpus still left the --user-subset cc.users storage objects orphaned (files rows are wiped by the table clear, but the Storage API objects are not). Call the same _clear_user_subset_files() helper in both paths so the finding-#2 gap is fully closed: storage removed before rows, idempotent. Closes overwatch review finding #2 (user-subset not cleaned by reset). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
430 lines
19 KiB
Python
430 lines
19 KiB
Python
"""
|
|
reset_environment.py — DESTRUCTIVE wipe of all non-permanent data.
|
|
|
|
Clears:
|
|
- Neo4j: drops ALL databases except system, neo4j (including gaisdata, cc.users.*, cc.institutes.*)
|
|
- Supabase: deletes ALL data tables except gais_local_authorities and gais_schools
|
|
- Supabase: deletes all auth users except kcar, then re-seeds kcar profile state
|
|
- Granular scopes can clear exam corpus, timetable data, or --user-subset seed copies
|
|
|
|
Safe invariants (never touched):
|
|
- kcar auth account
|
|
- gais_local_authorities and gais_schools Supabase tables
|
|
- system / neo4j Neo4j system databases
|
|
|
|
Run from inside the ccapi container:
|
|
python3 -c "from run.initialization.reset_environment import reset; reset()"
|
|
"""
|
|
import os
|
|
import time
|
|
import requests
|
|
from typing import List, Dict, Any
|
|
|
|
from modules.logger_tool import initialise_logger
|
|
import modules.database.tools.neo4j_driver_tools as dt
|
|
|
|
logger = initialise_logger(__name__, os.getenv("LOG_LEVEL"), os.getenv("LOG_PATH"), "default", True)
|
|
|
|
KCAR_ID = "d9e1d1a9-04c4-4611-bb05-57babf4a9a28"
|
|
KCAR_EMAIL = "kcar@kevlarai.com"
|
|
|
|
# Neo4j system databases — never drop these
|
|
NEO4J_SYSTEM_DBS = {"system", "neo4j"}
|
|
|
|
# Supabase tables to clear, in FK child-first order.
|
|
# gais_local_authorities and gais_schools are intentionally absent.
|
|
SUPABASE_TABLES_TO_CLEAR = [
|
|
# ── Transcription (deepest children first) ───────────────────────────────
|
|
"canvas_events",
|
|
"keyword_events",
|
|
"transcription_summaries",
|
|
"transcription_segments",
|
|
"keyword_watches",
|
|
"transcription_sessions",
|
|
# ── Lesson delivery chain ────────────────────────────────────────────────
|
|
"lesson_deliveries",
|
|
"lesson_collaborators",
|
|
# ── Timetable materialization ────────────────────────────────────────────
|
|
"taught_lessons",
|
|
# ── Academic calendar (children → parents) ───────────────────────────────
|
|
"academic_periods",
|
|
"academic_days",
|
|
"academic_weeks",
|
|
"academic_term_breaks",
|
|
"academic_terms",
|
|
"academic_years",
|
|
# ── Teacher timetables ───────────────────────────────────────────────────
|
|
"teacher_timetable_slots",
|
|
"teacher_timetables",
|
|
"school_timetables",
|
|
# ── Lesson plans ─────────────────────────────────────────────────────────
|
|
"planned_lessons",
|
|
# ── Whiteboard rooms ─────────────────────────────────────────────────────
|
|
"whiteboard_rooms",
|
|
# ── Classes & enrollment ─────────────────────────────────────────────────
|
|
"enrollment_requests",
|
|
"class_students",
|
|
"class_teachers",
|
|
"classes",
|
|
# ── Files & brains ───────────────────────────────────────────────────────
|
|
"document_artefacts",
|
|
"brain_files",
|
|
"cabinet_memberships",
|
|
"files",
|
|
"file_cabinets",
|
|
"brains",
|
|
# ── Invitations & memberships ────────────────────────────────────────────
|
|
"invitations",
|
|
"institute_memberships",
|
|
"institute_membership_requests",
|
|
# ── Institutes ───────────────────────────────────────────────────────────
|
|
"institutes",
|
|
# ── Profiles (non-kcar cleared separately via auth deletion cascade) ─────
|
|
"admin_profiles",
|
|
]
|
|
|
|
# Exam-marker subsystem tables, FK child-first. scope="exam-corpus" is deliberately
|
|
# broader than "public papers": it wipes public corpus eb_* rows, templates, layouts,
|
|
# questions, boundaries, response areas, marking batches, student submissions, and mark
|
|
# entries. NOT in the list above — the previous full reset() never cleared exam data
|
|
# or storage at all; the granular scopes below fold it in.
|
|
EXAM_CORPUS_TABLES = [
|
|
"mark_entries",
|
|
"student_submissions",
|
|
"marking_batches",
|
|
"exam_response_areas",
|
|
"exam_boundaries",
|
|
"exam_template_layout",
|
|
"exam_questions",
|
|
"exam_templates",
|
|
"eb_exams",
|
|
"eb_specifications",
|
|
]
|
|
|
|
# Timetable / calendar materialization subset (for scope='timetable').
|
|
TIMETABLE_TABLES = [
|
|
"lesson_deliveries",
|
|
"lesson_collaborators",
|
|
"taught_lessons",
|
|
"academic_periods",
|
|
"academic_days",
|
|
"academic_weeks",
|
|
"academic_term_breaks",
|
|
"academic_terms",
|
|
"academic_years",
|
|
"teacher_timetable_slots",
|
|
"teacher_timetables",
|
|
"school_timetables",
|
|
"planned_lessons",
|
|
]
|
|
|
|
# Bucket whose objects scope="exam-corpus" clears for the whole exam-marker subsystem
|
|
# (Storage API — protect_delete blocks raw SQL).
|
|
EXAM_STORAGE_BUCKET = "cc.examboards"
|
|
|
|
|
|
def _sb_headers():
|
|
url = os.environ["SUPABASE_URL"]
|
|
key = os.environ["SERVICE_ROLE_KEY"]
|
|
return url, {
|
|
"apikey": key,
|
|
"Authorization": f"Bearer {key}",
|
|
"Content-Type": "application/json",
|
|
"Prefer": "return=minimal",
|
|
}
|
|
|
|
|
|
# Markers that identify a production Supabase target. Destructive reset against any of these is
|
|
# refused by default (project rule: ".94 only; .156 human-gated") — set RESET_ALLOW_PROD=1 to override.
|
|
PROD_TARGET_MARKERS = ("192.168.0.156", "supabase.classroomcopilot")
|
|
|
|
|
|
def _assert_reset_allowed(url: str, scope: str) -> None:
|
|
"""Default-deny destructive reset against a production-looking Supabase target.
|
|
|
|
The /admin/reset route and this module both act on os.environ['SUPABASE_URL']; without this guard
|
|
a platform-admin call on a prod-deployed API would wipe prod data + exam corpus + storage. We refuse
|
|
when the target matches a known prod marker unless an explicit RESET_ALLOW_PROD opt-in is set.
|
|
"""
|
|
target = (url or "").lower()
|
|
looks_prod = any(m in target for m in PROD_TARGET_MARKERS)
|
|
override = os.environ.get("RESET_ALLOW_PROD", "").strip().lower() in ("1", "true", "yes")
|
|
if looks_prod and not override:
|
|
raise RuntimeError(
|
|
f"refusing destructive reset (scope={scope}) against production-looking target {target!r}; "
|
|
f"this is human-gated — set RESET_ALLOW_PROD=1 to override."
|
|
)
|
|
|
|
|
|
# ─── Neo4j helpers ────────────────────────────────────────────────────────────
|
|
|
|
def _neo4j_drop_all_non_system() -> Dict[str, List[str]]:
|
|
"""Drop every Neo4j DB except the system-reserved ones."""
|
|
with dt.get_session(database="system") as s:
|
|
all_dbs = [r["name"] for r in s.run("SHOW DATABASES YIELD name RETURN name")]
|
|
|
|
to_drop = [db for db in all_dbs if db not in NEO4J_SYSTEM_DBS]
|
|
dropped = []
|
|
for db in to_drop:
|
|
logger.info(f" DROP DATABASE `{db}`")
|
|
try:
|
|
with dt.get_session(database="system") as s:
|
|
s.run(f"DROP DATABASE `{db}` IF EXISTS")
|
|
dropped.append(db)
|
|
except Exception as e:
|
|
logger.warning(f" Could not drop `{db}`: {e}")
|
|
return dropped
|
|
|
|
|
|
# ─── Supabase helpers ─────────────────────────────────────────────────────────
|
|
|
|
# Tables without an uid=1000(kcar) gid=1000(kcar) groups=1000(kcar),27(sudo),119(docker) column — map to the column to use as the delete filter.
|
|
TABLE_FILTER_COLUMN = {
|
|
"brain_files": "brain_id",
|
|
}
|
|
|
|
def _sb_clear_table(url: str, headers: dict, table: str) -> int:
|
|
"""Delete all rows from a Supabase table. Returns HTTP status."""
|
|
col = TABLE_FILTER_COLUMN.get(table, "id")
|
|
r = requests.delete(
|
|
f"{url}/rest/v1/{table}",
|
|
headers=headers,
|
|
params={col: "not.is.null"},
|
|
)
|
|
if r.status_code not in (200, 204):
|
|
logger.warning(f" Clear {table}: {r.status_code} {r.text[:120]}")
|
|
return r.status_code
|
|
|
|
|
|
def _supabase_list_auth_users(url: str, headers: dict) -> List[Dict]:
|
|
r = requests.get(f"{url}/auth/v1/admin/users", headers=headers, params={"per_page": 200})
|
|
r.raise_for_status()
|
|
return r.json().get("users", [])
|
|
|
|
|
|
def _supabase_delete_auth_user(url: str, headers: dict, uid: str):
|
|
r = requests.delete(f"{url}/auth/v1/admin/users/{uid}", headers=headers)
|
|
if r.status_code not in (200, 204):
|
|
logger.warning(f" Delete auth user {uid}: {r.status_code} {r.text[:80]}")
|
|
|
|
|
|
# ─── Granular helpers ───────────────────────────────────────────────────────────
|
|
|
|
def _clear_tables(url: str, headers: dict, tables: List[str]) -> "tuple[List[str], List[str]]":
|
|
cleared, failed = [], []
|
|
for table in tables:
|
|
if _sb_clear_table(url, headers, table) in (200, 204):
|
|
cleared.append(table)
|
|
logger.info(f" ✓ {table}")
|
|
else:
|
|
failed.append(table)
|
|
return cleared, failed
|
|
|
|
|
|
def _clear_exam_storage() -> Dict[str, Any]:
|
|
"""Remove cc.examboards objects for the exam-marker subsystem.
|
|
|
|
scope="exam-corpus" is not limited to public-paper metadata: it also removes the
|
|
storage objects that back exam board corpus files and any downstream exam-marker
|
|
artifacts referenced from eb_exams/eb_specifications. Gathers storage_loc from
|
|
eb_exams/eb_specifications BEFORE the rows are cleared.
|
|
"""
|
|
try:
|
|
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
|
|
from modules.database.supabase.utils.storage import StorageAdmin
|
|
except Exception as exc:
|
|
logger.warning(f" exam storage clear skipped (import): {exc}")
|
|
return {"removed": 0, "error": str(exc)}
|
|
sb = SupabaseServiceRoleClient().supabase
|
|
storage = StorageAdmin()
|
|
locs: List[str] = []
|
|
for table in ("eb_exams", "eb_specifications"):
|
|
try:
|
|
rows = sb.table(table).select("storage_loc").execute().data or []
|
|
locs += [r["storage_loc"] for r in rows if r.get("storage_loc")]
|
|
except Exception as exc:
|
|
logger.warning(f" storage_loc gather {table}: {exc}")
|
|
by_bucket: Dict[str, List[str]] = {}
|
|
for loc in locs:
|
|
if "/" in loc:
|
|
b, _, p = loc.partition("/")
|
|
by_bucket.setdefault(b, []).append(p)
|
|
removed = 0
|
|
for b, paths in by_bucket.items():
|
|
for i in range(0, len(paths), 100):
|
|
chunk = paths[i:i + 100]
|
|
try:
|
|
storage.client.supabase.storage.from_(b).remove(chunk)
|
|
removed += len(chunk)
|
|
except Exception as exc:
|
|
logger.warning(f" storage remove {b}: {exc}")
|
|
logger.info(f" exam storage removed {removed} objects from {list(by_bucket)}")
|
|
return {"removed": removed, "buckets": list(by_bucket)}
|
|
|
|
|
|
def _clear_user_subset_files() -> Dict[str, Any]:
|
|
"""Remove files rows and cc.users storage objects created by --user-subset seeding.
|
|
|
|
Reuses the seed/unseed implementation so reset(scope="user-subset") has the
|
|
same storage-before-row deletion order and idempotency guarantees as
|
|
seed_exam_corpus.py --unseed. The helper only targets rows marked by the seeder:
|
|
bucket='cc.users', source='exam-corpus-seed', path LIKE 'exam-marker/%'.
|
|
"""
|
|
try:
|
|
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
|
|
from modules.database.supabase.utils.storage import StorageAdmin
|
|
from run.initialization.seed_exam_corpus import LoadReport, _delete_user_subset_files
|
|
except Exception as exc:
|
|
logger.warning(f" user-subset clear skipped (import): {exc}")
|
|
return {"files_rows_deleted": 0, "storage_objects_removed": 0, "errors": [str(exc)]}
|
|
|
|
rep = LoadReport()
|
|
_delete_user_subset_files(
|
|
SupabaseServiceRoleClient(),
|
|
StorageAdmin(),
|
|
exam_codes=None,
|
|
rep=rep,
|
|
)
|
|
return {
|
|
"files_rows_deleted": rep.unseed_user_files,
|
|
"storage_objects_removed": rep.unseed_objects,
|
|
"errors": rep.errors,
|
|
}
|
|
|
|
|
|
# ─── Main reset ───────────────────────────────────────────────────────────────
|
|
|
|
def reset(scope: str = "all") -> Dict[str, Any]:
|
|
"""Destructive reset. scope ∈ {all, exam-corpus, timetable, user-subset}.
|
|
|
|
- all : full wipe (Neo4j + Supabase data + auth users) AND the entire
|
|
exam-marker subsystem listed below, including --user-subset copies.
|
|
- exam-corpus : ONLY the entire exam-marker subsystem, not just public papers:
|
|
public corpus/eb_* data, cc.examboards storage objects, exam
|
|
templates, template layouts, questions, boundaries, response
|
|
areas, marking batches, student submissions, mark entries, and
|
|
--user-subset cc.users copies.
|
|
- timetable : ONLY timetable/calendar materialization tables.
|
|
- user-subset : ONLY files rows and cc.users storage objects created by
|
|
seed_exam_corpus.py --user-subset.
|
|
"""
|
|
scope = (scope or "all").lower()
|
|
if scope not in ("all", "exam-corpus", "timetable", "user-subset"):
|
|
raise ValueError(f"invalid scope {scope!r} (want all|exam-corpus|timetable|user-subset)")
|
|
url, headers = _sb_headers()
|
|
_assert_reset_allowed(url, scope)
|
|
|
|
if scope == "exam-corpus":
|
|
logger.info("RESET (scope=exam-corpus) — entire exam-marker subsystem: public corpus/eb_* data, cc.examboards storage, templates/layout/questions/boundaries/response areas, marking batches, submissions, mark entries, and --user-subset copies")
|
|
user_subset = _clear_user_subset_files()
|
|
storage = _clear_exam_storage()
|
|
cleared, failed = _clear_tables(url, headers, EXAM_CORPUS_TABLES)
|
|
return {"scope": scope, "user_subset": user_subset, "exam_storage": storage, "tables_cleared": cleared, "tables_failed": failed}
|
|
|
|
if scope == "timetable":
|
|
logger.info("RESET (scope=timetable) — timetable/calendar tables")
|
|
cleared, failed = _clear_tables(url, headers, TIMETABLE_TABLES)
|
|
return {"scope": scope, "tables_cleared": cleared, "tables_failed": failed}
|
|
|
|
if scope == "user-subset":
|
|
logger.info("RESET (scope=user-subset) — --user-subset cc.users storage objects and files rows")
|
|
user_subset = _clear_user_subset_files()
|
|
return {"scope": scope, "user_subset": user_subset}
|
|
|
|
logger.info("=" * 60)
|
|
logger.info("RESET ENVIRONMENT — full destructive wipe starting")
|
|
logger.info("=" * 60)
|
|
results: Dict[str, Any] = {"scope": scope}
|
|
|
|
# ── 1. Neo4j: drop everything except system + neo4j ──────────────────────
|
|
logger.info("\n[Neo4j] Dropping all non-system databases...")
|
|
dropped = _neo4j_drop_all_non_system()
|
|
logger.info(f" Dropped {len(dropped)}: {dropped}")
|
|
results["neo4j"] = {"dropped": dropped}
|
|
|
|
# ── 2. Supabase: clear all data tables (GAIS preserved) ──────────────────
|
|
# First remove --user-subset cc.users storage objects (+ their files rows) via the
|
|
# Storage API, so the generic files-table clear below doesn't strand orphaned objects.
|
|
results["user_subset"] = _clear_user_subset_files()
|
|
logger.info("\n[Supabase] Clearing data tables (preserving gais_*)...")
|
|
url, headers = _sb_headers()
|
|
cleared, failed = [], []
|
|
for table in SUPABASE_TABLES_TO_CLEAR:
|
|
status = _sb_clear_table(url, headers, table)
|
|
if status in (200, 204):
|
|
cleared.append(table)
|
|
logger.info(f" ✓ {table}")
|
|
else:
|
|
failed.append(table)
|
|
logger.info(f" Cleared {len(cleared)} tables, {len(failed)} failed")
|
|
|
|
# ── 3. Supabase: delete all auth users except kcar ────────────────────────
|
|
logger.info("\n[Supabase] Deleting test auth users...")
|
|
all_users = _supabase_list_auth_users(url, headers)
|
|
deleted_emails = []
|
|
for u in all_users:
|
|
if u["email"] == KCAR_EMAIL:
|
|
continue
|
|
_supabase_delete_auth_user(url, headers, u["id"])
|
|
deleted_emails.append(u["email"])
|
|
time.sleep(0.05)
|
|
logger.info(f" Deleted {len(deleted_emails)} auth users")
|
|
|
|
# Explicit cleanup in case cascade didn't fire
|
|
requests.delete(f"{url}/rest/v1/profiles", headers=headers,
|
|
params={"id": f"neq.{KCAR_ID}"})
|
|
|
|
# ── 4. Reset kcar profile to known-good platform_admin state ──────────────
|
|
logger.info("\n[Supabase] Resetting kcar profile...")
|
|
requests.patch(
|
|
f"{url}/rest/v1/profiles",
|
|
headers=headers,
|
|
params={"id": f"eq.{KCAR_ID}"},
|
|
json={"school_id": None},
|
|
)
|
|
logger.info(" kcar → school_id: null ✓")
|
|
|
|
# Restore admin_profiles row (wiped with other tables above)
|
|
requests.post(
|
|
f"{url}/rest/v1/admin_profiles",
|
|
headers={**headers, "Prefer": "resolution=merge-duplicates"},
|
|
json={
|
|
"id": KCAR_ID,
|
|
"email": KCAR_EMAIL,
|
|
"display_name": "Kevin Carroll",
|
|
"admin_role": "super_admin",
|
|
"is_super_admin": True,
|
|
},
|
|
)
|
|
logger.info(" kcar → admin_profiles restored ✓")
|
|
|
|
# ── 5. Exam-marker subsystem: storage objects (Storage API) + all exam tables ──
|
|
# This is the same destructive surface as scope="exam-corpus": public corpus/eb_*
|
|
# rows, cc.examboards storage, templates/layout/questions/boundaries/response
|
|
# areas, marking batches, submissions, and mark entries. (The legacy full reset
|
|
# cleared neither exam tables nor storage — folded in here.)
|
|
logger.info("\n[Supabase] Clearing entire exam-marker subsystem (public corpus, storage, templates/layout/questions/boundaries/response areas, marking batches, submissions, mark entries)...")
|
|
exam_storage = _clear_exam_storage()
|
|
exam_cleared, exam_failed = _clear_tables(url, headers, EXAM_CORPUS_TABLES)
|
|
|
|
results["supabase"] = {
|
|
"tables_cleared": cleared,
|
|
"tables_failed": failed,
|
|
"deleted_users": deleted_emails,
|
|
}
|
|
results["exam"] = {
|
|
"storage": exam_storage,
|
|
"tables_cleared": exam_cleared,
|
|
"tables_failed": exam_failed,
|
|
}
|
|
|
|
logger.info("\n" + "=" * 60)
|
|
logger.info("RESET COMPLETE")
|
|
logger.info("=" * 60)
|
|
return results
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import json
|
|
print(json.dumps(reset(), indent=2, default=str))
|