api/run/initialization/seed_file_cabinets.py
Kevin Carter ead4452277 feat: add P0 seed scripts for timetable, planned lessons, file cabinets, and curriculum
- seed_kevlarai_timetable.py: Mirror Greenfield timetable structure for KevlarAI
  (8 classes, 2 teachers, 2 students, full slot/materialize/sync pipeline)
- seed_planned_lessons.py: 2-3 planned lessons per teacher across both schools
  (6 plans total, idempotent via title+subject check)
- seed_file_cabinets.py: One file cabinet per class with sample documents
  (14 cabinets, ~28 files, document_artefacts, cabinet_memberships)
- seed_curriculum.py: Exam board specifications and exams (AQA, Edexcel, OCR)
  (6 specs, 12 exam papers, Neo4j curriculum topics per school)
2026-05-29 21:15:05 +01:00

424 lines
18 KiB
Python

"""
seed_file_cabinets.py — Create one file cabinet per class with sample document references.
Creates file_cabinets, files, and cabinet_memberships rows via Supabase REST API
using the service role key. Also creates document_artefacts entries for sample files.
Each cabinet is owned by the class's primary teacher and shared with students
in that class via cabinet_memberships.
Tables: file_cabinets, files, cabinet_memberships, document_artefacts
Run inside ccapi container:
python3 -c "from run.initialization.seed_file_cabinets import seed; seed()"
"""
import os
import time
import uuid
import requests
from typing import Dict, Any, List, Optional
SUPA_URL = os.environ["SUPABASE_URL"]
SERVICE_KEY = os.environ["SERVICE_ROLE_KEY"]
API_BASE = os.environ.get("API_BASE_URL", "http://localhost:8000")
# ─── Passwords (standardized from T4) ────────────────────────────────────────
PWD_ADMIN = "Admin@Cc2025!"
PWD_TEACHER = "Teacher@Cc2025!"
PWD_STUDENT = "Student@Cc2025!"
# ─── Sample file data ────────────────────────────────────────────────────────
# Each cabinet gets 2-3 sample files with realistic paths.
SAMPLE_FILES = {
# Physics cabinets
"lesson_plans": [
{"name": "forces_motion_plan.pdf", "path": "cc.public.snapshots/lesson_plans/forces_motion.pdf", "mime_type": "application/pdf", "size": "245KB"},
{"name": "electric_circuits_plan.pdf", "path": "cc.public.snapshots/lesson_plans/electric_circuits.pdf", "mime_type": "application/pdf", "size": "312KB"},
],
"worksheets": [
{"name": "worksheet_fma.pdf", "path": "cc.public.snapshots/worksheets/fma_practice.pdf", "mime_type": "application/pdf", "size": "128KB"},
{"name": "worksheet_resistance.pdf", "path": "cc.public.snapshots/worksheets/resistance_calc.pdf", "mime_type": "application/pdf", "size": "95KB"},
],
"presentations": [
{"name": "intro_forces.pptx", "path": "cc.public.snapshots/presentations/forces_intro.pptx", "mime_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "size": "2.1MB"},
],
# Maths cabinets
"lesson_plans": [
{"name": "quadratic_factorisation_plan.pdf", "path": "cc.public.snapshots/lesson_plans/quadratics.pdf", "mime_type": "application/pdf", "size": "278KB"},
],
"worksheets": [
{"name": "worksheet_quadratics.pdf", "path": "cc.public.snapshots/worksheets/quadratic_practice.pdf", "mime_type": "application/pdf", "size": "156KB"},
{"name": "worksheet_tree_diagrams.pdf", "path": "cc.public.snapshots/worksheets/tree_diagrams.pdf", "mime_type": "application/pdf", "size": "134KB"},
],
# CS cabinets
"lesson_plans": [
{"name": "intro_python_plan.pdf", "path": "cc.public.snapshots/lesson_plans/intro_python.pdf", "mime_type": "application/pdf", "size": "198KB"},
],
"code_samples": [
{"name": "hello_world.py", "path": "cc.public.snapshots/code_samples/hello_world.py", "mime_type": "text/x-python", "size": "0.5KB"},
{"name": "variables.py", "path": "cc.public.snapshots/code_samples/variables.py", "mime_type": "text/x-python", "size": "1.2KB"},
],
}
# ─── Helpers ───────────────────────────────────────────────────────────────────
def _sb_headers() -> Dict:
return {
"apikey": SERVICE_KEY,
"Authorization": f"Bearer {SERVICE_KEY}",
"Content-Type": "application/json",
}
def _sign_in(email: str, password: str) -> str:
r = requests.post(
f"{SUPA_URL}/auth/v1/token?grant_type=password",
headers={"apikey": SERVICE_KEY, "Content-Type": "application/json"},
json={"email": email, "password": password},
)
r.raise_for_status()
return r.json()["access_token"]
def _get_profile_id(email: str) -> Optional[str]:
"""Look up a profile's UUID by email via Supabase service role."""
r = requests.get(
f"{SUPA_URL}/rest/v1/profiles",
headers=_sb_headers(),
params={"email": f"eq.{email}", "select": "id", "limit": "1"},
)
data = r.json() if r.ok else []
return data[0]["id"] if data else None
def _get_class_info(admin_token: str, class_code: str) -> Optional[Dict]:
"""Get class info including teacher and students."""
r = requests.get(
f"{API_BASE}/database/timetable/classes",
headers={"Authorization": f"Bearer {admin_token}"},
params={"class_code": class_code},
)
if not r.ok:
return None
data = r.json()
if isinstance(data, list) and data:
return data[0]
if isinstance(data, dict):
return data
return None
def _get_class_students(admin_token: str, class_id: str) -> List[str]:
"""Get student profile IDs enrolled in a class."""
r = requests.get(
f"{API_BASE}/database/timetable/classes/{class_id}/students",
headers={"Authorization": f"Bearer {admin_token}"},
)
if r.ok:
data = r.json()
if isinstance(data, list):
return [s.get("student_id") or s.get("id") for s in data if s.get("student_id") or s.get("id")]
return []
# ─── Main seed ─────────────────────────────────────────────────────────────────
def seed() -> Dict[str, Any]:
print("=" * 60)
print("File cabinets seed — both schools")
print("=" * 60)
results: Dict[str, Any] = {}
errors: List[str] = []
# ── Sign in as both school admins ───────────────────────────────────────
print("\n[1] Signing in as school admins...")
admin_tokens = {}
for school, email, pwd in [
("KevlarAI", "admin@kevlarai.test", PWD_ADMIN),
("Greenfield", "admin@greenfieldacademy.test", PWD_ADMIN),
]:
try:
token = _sign_in(email, pwd)
admin_tokens[school] = token
print(f"{school} admin signed in")
except Exception as e:
print(f"{school} admin login failed: {e}")
errors.append(f"{school}_admin_login: {e}")
if not admin_tokens:
return {"success": False, "error": "No admin tokens obtained"}
# ── Resolve class codes per school ──────────────────────────────────────
print("\n[2] Resolving classes per school...")
# KevlarAI classes
kevlarai_classes = [
("10K/Ph1", "physics@kevlarai.test"),
("11K/Ph1", "physics@kevlarai.test"),
("10K/Ma1", "maths@kevlarai.test"),
("11K/Ma1", "maths@kevlarai.test"),
("10K/CS1", "physics@kevlarai.test"),
("11K/CS1", "maths@kevlarai.test"),
("9K/Ph1", "physics@kevlarai.test"),
("9K/Ma1", "maths@kevlarai.test"),
]
# Greenfield classes (subset — just a few for cabinet seeding)
greenfield_classes = [
("9P/Ph1", "physics@greenfieldacademy.test"),
("10P/Ph2", "physics@greenfieldacademy.test"),
("9M/Ma1", "maths@greenfieldacademy.test"),
("10M/Ma1", "maths@greenfieldacademy.test"),
("9En/1", "teacher1@greenfieldacademy.test"),
("10Hs/1", "teacher2@greenfieldacademy.test"),
]
# ── Seed KevlarAI cabinets ──────────────────────────────────────────────
print("\n[3] Seeding KevlarAI file cabinets...")
results["kevlarai"] = {"cabinets": 0, "files": 0, "memberships": 0}
for class_code, teacher_email in kevlarai_classes:
try:
# Get class info
class_info = _get_class_info(admin_tokens["KevlarAI"], class_code)
if not class_info:
print(f" ✗ class not found: {class_code}")
errors.append(f"class_not_found: {class_code}")
continue
class_id = class_info.get("id") or class_info
teacher_pid = _get_profile_id(teacher_email)
if not teacher_pid:
print(f" ✗ teacher profile not found: {teacher_email}")
errors.append(f"teacher_profile_not_found: {teacher_email}")
continue
# Get students in this class
student_ids = _get_class_students(admin_tokens["KevlarAI"], str(class_id))
# Determine file category based on subject
subject = (class_info.get("subject") or "").lower()
if "physics" in subject:
file_category = "lesson_plans"
elif "math" in subject:
file_category = "worksheets"
elif "cs" in subject or "computer" in subject:
file_category = "code_samples"
else:
file_category = "lesson_plans"
files_list = SAMPLE_FILES.get(file_category, SAMPLE_FILES["lesson_plans"])
# Create cabinet
cabinet_id = str(uuid.uuid4())
cabinet_name = f"{class_code}{class_info.get('name', class_code)}"
r = requests.post(
f"{SUPA_URL}/rest/v1/file_cabinets",
headers={**_sb_headers(), "Prefer": "return=representation"},
json={"id": cabinet_id, "user_id": teacher_pid, "name": cabinet_name},
params={"on_conflict": "id"},
)
if r.status_code in (200, 201):
print(f" ✓ Cabinet: {cabinet_name}")
results["kevlarai"]["cabinets"] += 1
else:
print(f" ✗ Cabinet create failed ({class_code}): {r.text[:100]}")
errors.append(f"cabinet_create: {class_code}")
continue
# Create files in cabinet
for fi in files_list:
file_id = str(uuid.uuid4())
r = requests.post(
f"{SUPA_URL}/rest/v1/files",
headers={**_sb_headers(), "Prefer": "return=representation"},
json={
"id": file_id,
"cabinet_id": cabinet_id,
"name": fi["name"],
"path": fi["path"],
"bucket": "file-cabinets",
"mime_type": fi.get("mime_type"),
"size": fi.get("size"),
"metadata": {},
},
params={"on_conflict": "id"},
)
if r.status_code in (200, 201):
results["kevlarai"]["files"] += 1
# Create document_artefact for this file
artefact_id = str(uuid.uuid4())
requests.post(
f"{SUPA_URL}/rest/v1/document_artefacts",
headers={**_sb_headers(), "Prefer": "return=representation"},
json={
"id": artefact_id,
"file_id": file_id,
"type": fi.get("mime_type", "application/octet-stream").split("/")[-1],
"rel_path": fi["path"],
"status": "processed",
"extra": {"seeded": True, "source": "seed_file_cabinets"},
},
params={"on_conflict": "id"},
)
time.sleep(0.05)
# Create cabinet memberships for students
for sid in student_ids:
r = requests.post(
f"{SUPA_URL}/rest/v1/cabinet_memberships",
headers={**_sb_headers(), "Prefer": "return=minimal"},
json={
"cabinet_id": cabinet_id,
"profile_id": sid,
"role": "viewer",
},
params={"on_conflict": "cabinet_id,profile_id"},
)
if r.status_code in (200, 201, 409):
results["kevlarai"]["memberships"] += 1
time.sleep(0.1)
except Exception as e:
err = f"cabinet seed {class_code}: {e}"
print(f"{err}")
errors.append(err)
# ── Seed Greenfield cabinets ────────────────────────────────────────────
print("\n[4] Seeding Greenfield file cabinets...")
results["greenfield"] = {"cabinets": 0, "files": 0, "memberships": 0}
for class_code, teacher_email in greenfield_classes:
try:
class_info = _get_class_info(admin_tokens["Greenfield"], class_code)
if not class_info:
print(f" ✗ class not found: {class_code}")
errors.append(f"class_not_found: {class_code}")
continue
class_id = class_info.get("id") or class_info
teacher_pid = _get_profile_id(teacher_email)
if not teacher_pid:
print(f" ✗ teacher profile not found: {teacher_email}")
errors.append(f"teacher_profile_not_found: {teacher_email}")
continue
student_ids = _get_class_students(admin_tokens["Greenfield"], str(class_id))
subject = (class_info.get("subject") or "").lower()
if "physics" in subject:
file_category = "lesson_plans"
elif "math" in subject:
file_category = "worksheets"
elif "english" in subject:
file_category = "presentations"
elif "history" in subject:
file_category = "lesson_plans"
else:
file_category = "lesson_plans"
files_list = SAMPLE_FILES.get(file_category, SAMPLE_FILES["lesson_plans"])
cabinet_id = str(uuid.uuid4())
cabinet_name = f"{class_code}{class_info.get('name', class_code)}"
r = requests.post(
f"{SUPA_URL}/rest/v1/file_cabinets",
headers={**_sb_headers(), "Prefer": "return=representation"},
json={"id": cabinet_id, "user_id": teacher_pid, "name": cabinet_name},
params={"on_conflict": "id"},
)
if r.status_code in (200, 201):
print(f" ✓ Cabinet: {cabinet_name}")
results["greenfield"]["cabinets"] += 1
else:
print(f" ✗ Cabinet create failed ({class_code}): {r.text[:100]}")
errors.append(f"cabinet_create: {class_code}")
continue
for fi in files_list:
file_id = str(uuid.uuid4())
r = requests.post(
f"{SUPA_URL}/rest/v1/files",
headers={**_sb_headers(), "Prefer": "return=representation"},
json={
"id": file_id,
"cabinet_id": cabinet_id,
"name": fi["name"],
"path": fi["path"],
"bucket": "file-cabinets",
"mime_type": fi.get("mime_type"),
"size": fi.get("size"),
"metadata": {},
},
params={"on_conflict": "id"},
)
if r.status_code in (200, 201):
results["greenfield"]["files"] += 1
artefact_id = str(uuid.uuid4())
requests.post(
f"{SUPA_URL}/rest/v1/document_artefacts",
headers={**_sb_headers(), "Prefer": "return=representation"},
json={
"id": artefact_id,
"file_id": file_id,
"type": fi.get("mime_type", "application/octet-stream").split("/")[-1],
"rel_path": fi["path"],
"status": "processed",
"extra": {"seeded": True, "source": "seed_file_cabinets"},
},
params={"on_conflict": "id"},
)
time.sleep(0.05)
for sid in student_ids:
r = requests.post(
f"{SUPA_URL}/rest/v1/cabinet_memberships",
headers={**_sb_headers(), "Prefer": "return=minimal"},
json={
"cabinet_id": cabinet_id,
"profile_id": sid,
"role": "viewer",
},
params={"on_conflict": "cabinet_id,profile_id"},
)
if r.status_code in (200, 201, 409):
results["greenfield"]["memberships"] += 1
time.sleep(0.1)
except Exception as e:
err = f"cabinet seed {class_code}: {e}"
print(f"{err}")
errors.append(err)
# ── Summary ─────────────────────────────────────────────────────────────
print("\n" + "=" * 60)
results["success"] = len(errors) == 0
results["errors"] = errors
total_cabinets = results["kevlarai"]["cabinets"] + results["greenfield"]["cabinets"]
total_files = results["kevlarai"]["files"] + results["greenfield"]["files"]
total_memberships = results["kevlarai"]["memberships"] + results["greenfield"]["memberships"]
print(f"COMPLETE — {total_cabinets} cabinets, {total_files} files, {total_memberships} memberships")
if errors:
print(f"Errors ({len(errors)}):")
for e in errors:
print(f"{e}")
print("=" * 60)
return results
if __name__ == "__main__":
import json
print(json.dumps(seed(), indent=2, default=str))