api/run/initialization/buckets.py
CC Worker 9aabc12062 feat(seed): provision taxonomy buckets (infra) + exam-corpus loader skeleton
infra (buckets.py): add cc.public / cc.institutes / cc.admin to the bucket
provisioner alongside cc.examboards; make initialize_buckets idempotent
(already-exists treated as success). Bucket provisioning stays in infra init.

new (seed_exam_corpus.py): manifest-driven loader scaffold that USES the buckets
(does not create them) — validate -> upload to cc.examboards (canonical path) ->
upsert eb_specifications/eb_exams -> optional user test subset -> optional
--first-sweep auto-map pass. TODOs marked for the gathering task to complete.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-07 22:22:48 +00:00

153 lines
5.7 KiB
Python

import os
from modules.logger_tool import initialise_logger
from modules.database.supabase.utils.client import SupabaseServiceRoleClient, CreateBucketOptions
logger = initialise_logger(__name__, os.getenv("LOG_LEVEL"), os.getenv("LOG_PATH"), 'default', True)
def initialize_buckets() -> dict:
"""
Initialize storage buckets for ClassroomCopilot documents and files.
Creates buckets for:
- TLDraw snapshot JSON files
- Office document files (PDF, DOCX, etc.)
- Docling document JSON files
- Document page images (PNG/base64)
- Document page fragment images (smaller PNG/base64)
Returns:
dict: Result status and message
"""
logger.info("Starting storage bucket initialization...")
try:
storage_client = SupabaseServiceRoleClient()
# Define the buckets to create
buckets = [
# TLDraw snapshot files
{
"id": "cc.public.snapshots",
"options": CreateBucketOptions(
name="ClassroomCopilot Public TLDraw Snapshots",
public=False,
file_size_limit=1000 * 1024 * 1024, # 1GB
allowed_mime_types=[
'application/json'
]
)
},
# User cabinet files
{
"id": "cc.users",
"options": CreateBucketOptions(
name="Classroom Copilot Users - Private",
public=False,
file_size_limit=1000 * 1024 * 1024, # 1GB
)
},
# Exam Board files (admin-curated public exam corpus: QP/MS/insert/ER + specs)
{
"id": "cc.examboards",
"options": CreateBucketOptions(
name="Classroom Copilot Exam Board Files",
public=False,
file_size_limit=1000 * 1024 * 1024, # 1GB
)
},
# ── Storage taxonomy bins (access scoped by RLS on bucket + leading path segment; RLS = D1) ──
# Platform-managed public/shared assets (readable by all authenticated users).
{
"id": "cc.public",
"options": CreateBucketOptions(
name="Classroom Copilot Public",
public=False,
file_size_limit=1000 * 1024 * 1024, # 1GB
)
},
# Institute-scoped operational assets: cc.institutes/{institute_id}/...
{
"id": "cc.institutes",
"options": CreateBucketOptions(
name="Classroom Copilot Institutes",
public=False,
file_size_limit=1000 * 1024 * 1024, # 1GB
)
},
# Platform-admin-only assets, seeds, intake/staging for unidentified papers.
{
"id": "cc.admin",
"options": CreateBucketOptions(
name="Classroom Copilot Admin",
public=False,
file_size_limit=1000 * 1024 * 1024, # 1GB
)
},
]
results = {}
success_count = 0
total_count = len(buckets)
for bucket in buckets:
try:
logger.info(f"Creating bucket: {bucket['id']}")
result = storage_client.create_bucket(bucket["id"], bucket["options"])
if result:
results[bucket["id"]] = {
"status": "success",
"result": result
}
success_count += 1
logger.info(f"Successfully created bucket: {bucket['id']}")
else:
results[bucket["id"]] = {
"status": "error",
"error": "Failed to create bucket"
}
logger.error(f"Failed to create bucket: {bucket['id']}")
except Exception as e:
# Idempotent: an already-existing bucket is not a failure on re-run.
if any(s in str(e).lower() for s in ("already exists", "duplicate", "resource already")):
results[bucket["id"]] = {"status": "exists", "result": str(e)}
success_count += 1
logger.info(f"Bucket already exists (ok): {bucket['id']}")
else:
results[bucket["id"]] = {
"status": "error",
"error": str(e)
}
logger.error(f"Error creating bucket {bucket['id']}: {str(e)}")
# Determine overall success
if success_count == total_count:
message = f"All {total_count} storage buckets created successfully"
success = True
elif success_count > 0:
message = f"Created {success_count}/{total_count} storage buckets. Some failed."
success = False
else:
message = f"Failed to create any storage buckets ({total_count} attempted)"
success = False
logger.info(f"Bucket initialization completed: {message}")
return {
"success": success,
"message": message,
"results": results,
"success_count": success_count,
"total_count": total_count
}
except Exception as e:
error_msg = f"Failed to initialize storage buckets: {str(e)}"
logger.error(error_msg)
return {
"success": False,
"message": error_msg,
"error": str(e)
}