#!/usr/bin/env python3 """ ClassroomCopilot Initialization System This script orchestrates the initialization of all system components. """ import os import sys import json import time import requests import csv from typing import Dict, Any, Optional from datetime import datetime, timedelta from modules.database.services.neo4j_service import Neo4jService from modules.database.supabase.utils.client import SupabaseServiceRoleClient, CreateBucketOptions from modules.database.init.init_user import NonSchoolUserCreator from modules.database.init.init_calendar import create_calendar import modules.database.init.init_school as init_school import modules.database.init.init_school_timetable as init_school_timetable import modules.database.init.init_school_curriculum as init_school_curriculum import modules.database.init.xl_tools as xl from modules.database.schemas.nodes.schools.schools import SchoolNode from modules.logger_tool import initialise_logger from modules.database.tools.neo4j_driver_tools import get_session import modules.database.schemas.nodes.structures.schools as school_structures import modules.database.tools.neontology_tools as neon import modules.database.schemas.relationships.entity_relationships as entity_relationships import modules.database.schemas.relationships.structures.schools as structure_relationships logger = initialise_logger(__name__, os.getenv("LOG_LEVEL"), os.getenv("LOG_PATH"), 'default', True) # Configuration SUPABASE_URL = os.environ.get("SUPABASE_URL") SERVICE_ROLE_KEY = os.environ.get("SERVICE_ROLE_KEY") ADMIN_EMAIL = os.environ.get("ADMIN_EMAIL") ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD") ADMIN_NAME = os.environ.get("ADMIN_NAME", "Super Admin") ADMIN_FULL_NAME = os.environ.get("ADMIN_FULL_NAME", "Super Admin Full Name") POSTGRES_PASSWORD = os.environ.get("POSTGRES_PASSWORD") POSTGRES_DB = os.environ.get("POSTGRES_DB") ADMIN_USERNAME = os.environ.get("ADMIN_USERNAME", "superadmin") ADMIN_DISPLAY_NAME = os.environ.get("ADMIN_DISPLAY_NAME", "Super Admin") class InitializationSystem: """Main initialization system that orchestrates all components""" def __init__(self, manager=None): if manager: self.manager = manager self.supabase_headers = manager.supabase_headers self.status = manager.status else: # Fallback to original standalone behavior self.supabase_headers = { "apikey": SERVICE_ROLE_KEY, "Authorization": f"Bearer {SERVICE_ROLE_KEY}", "Content-Type": "application/json" } self.manager = None self.status = self._load_status() self.admin_token = None self.init_dir = "/app/data/init" # Ensure data directory exists os.makedirs(self.init_dir, exist_ok=True) # Copy template files if they don't exist self._ensure_data_files() self.neo4j_service = Neo4jService() def _ensure_data_files(self): """Ensure required data files exist""" # Check for schools data file csv_path = os.path.join(self.init_dir, "sample_schools.csv") if not os.path.exists(csv_path): logger.warning(f"Schools data file not found at {csv_path}") def _load_status(self) -> Dict: """Load initialization status from file or create default""" try: with open("/init/status.json", "r") as f: status = json.load(f) # Verify all required keys exist default_status = { "super_admin_created": False, "admin_token_obtained": False, "storage": { "initialized": False, "buckets": { "cc.users": False, "cc.institutes": False } }, "neo4j": { "initialized": False, "database_created": False, "schema_initialized": False, "schools_imported": False }, "cc_database": { "initialized": False, "database_created": False, "schema_initialized": False, "calendar_created": False }, "completed": False, "timestamp": None, "steps": [] } # Recursively update status with any missing keys def update_dict(current: Dict, default: Dict) -> Dict: for key, value in default.items(): if key not in current: current[key] = value elif isinstance(value, dict) and isinstance(current[key], dict): current[key] = update_dict(current[key], value) return current status = update_dict(status, default_status) self._save_status(status) return status except (FileNotFoundError, json.JSONDecodeError): default_status = { "super_admin_created": False, "admin_token_obtained": False, "storage": { "initialized": False, "buckets": { "cc.users": False, "cc.institutes": False } }, "neo4j": { "initialized": False, "database_created": False, "schema_initialized": False, "schools_imported": False }, "cc_database": { "initialized": False, "database_created": False, "schema_initialized": False, "calendar_created": False }, "completed": False, "timestamp": None, "steps": [] } self._save_status(default_status) return default_status def _save_status(self, status: Dict) -> None: """Save initialization status to file""" if self.manager: self.manager._save_status(status) else: # Fallback to direct file save os.makedirs(os.path.dirname("/init/status.json"), exist_ok=True) with open("/init/status.json", "w") as f: json.dump(status, f, indent=2) def update_status(self, key: str, value: Any) -> None: """Update a specific status key and save""" if isinstance(value, dict): if key not in self.status: self.status[key] = {} self.status[key].update(value) else: self.status[key] = value self.status["timestamp"] = time.time() self._save_status(self.status) def wait_for_services(self) -> bool: """Wait for required services to be available""" logger.info("Waiting for services to be available...") def check_supabase_endpoint(endpoint: str, description: str) -> bool: """Check a specific Supabase endpoint with retry logic""" max_retries = 30 retry_count = 0 initial_delay = 5 max_delay = 30 while retry_count < max_retries: try: url = f"{SUPABASE_URL}/{endpoint}" logger.info(f"Checking Supabase {description} at {url}") response = requests.get( url, headers={"apikey": SERVICE_ROLE_KEY}, timeout=10 # Add timeout to prevent hanging ) if response.status_code < 500: logger.info(f"Supabase {description} is available") return True logger.warning( f"Supabase {description} returned status {response.status_code}: {response.text}" ) except requests.RequestException as e: logger.warning(f"Error checking Supabase {description}: {str(e)}") retry_count += 1 if retry_count < max_retries: # Calculate delay with exponential backoff delay = min(initial_delay * (2 ** (retry_count - 1)), max_delay) logger.info( f"Waiting for Supabase {description}... " f"({retry_count}/{max_retries}, next retry in {delay}s)" ) time.sleep(delay) logger.error(f"Supabase {description} is not available after {max_retries} attempts") return False # Check multiple Supabase endpoints endpoints = [ ("rest/v1/", "REST API"), ("auth/v1/", "Auth API"), ("storage/v1/", "Storage API") ] for endpoint, description in endpoints: if not check_supabase_endpoint(endpoint, description): logger.error(f"Failed to connect to Supabase {description}") return False logger.info("All Supabase services are available") return True def check_super_admin_exists(self) -> bool: """Check if super admin exists in both auth and profiles""" try: # Ensure Supabase headers are properly set self._ensure_supabase_headers() # 1. Check auth.users table response = self._supabase_request_with_retry( 'get', f"{SUPABASE_URL}/auth/v1/admin/users", headers=self.supabase_headers ) if response.status_code != 200: logger.error(f"Failed to check auth users: {response.text}") return False try: # Parse the response auth_data = response.json() # Check if we have the expected structure if not isinstance(auth_data, dict) or 'users' not in auth_data: logger.error(f"Unexpected auth users response structure: {auth_data}") return False # Find our admin in the list of users auth_user = next( (user for user in auth_data['users'] if isinstance(user, dict) and user.get("email") == ADMIN_EMAIL), None ) if not auth_user: logger.info("Super admin not found in auth.users") return False user_id = auth_user.get("id") logger.info(f"Found auth user with ID: {user_id}") # Verify the user has the correct metadata app_metadata = auth_user.get("app_metadata", {}) if app_metadata.get("role") != "supabase_admin": logger.info("User exists but is not a supabase_admin") return False # 2. Check public.profiles table response = self._supabase_request_with_retry( 'get', f"{SUPABASE_URL}/rest/v1/profiles", headers=self.supabase_headers, params={ "select": "*", "email": f"eq.{ADMIN_EMAIL}" } ) if response.status_code != 200: logger.error(f"Failed to check profiles: {response.text}") return False try: profiles = response.json() if not isinstance(profiles, list): logger.error(f"Unexpected profiles response format: {profiles}") return False if not profiles: logger.info("Super admin not found in public.profiles") return False profile = profiles[0] if not isinstance(profile, dict): logger.error(f"Unexpected profile format: {profile}") return False # Verify admin status and username user_type = profile.get("user_type") username = profile.get("username") if user_type != "admin" or not username: logger.info(f"User exists but is not properly configured (type: {user_type}, username: {username})") return False logger.info("Super admin exists and is properly configured") return True except json.JSONDecodeError as e: logger.error(f"Failed to parse profiles response: {str(e)}") return False except json.JSONDecodeError as e: logger.error(f"Failed to parse auth users response: {str(e)}") return False except Exception as e: logger.error(f"Error checking super admin existence: {str(e)}") return False def create_super_admin(self) -> bool: """Create the super admin user""" if self.status.get("super_admin_created"): if self.check_super_admin_exists(): logger.info("Super admin already exists and is properly configured") return True else: logger.warning("Status indicates super admin created but verification failed") logger.info(f"Creating super admin user with email: {os.getenv('ADMIN_EMAIL')}") try: # Create the main users database first try: with get_session() as session: logger.info("Creating main users database cc.users") session.run("CREATE DATABASE cc.users IF NOT EXISTS") # Wait for database to be available time.sleep(2) # Give Neo4j time to create the database # Verify database exists result = session.run("SHOW DATABASES") databases = [record["name"] for record in result] if "cc.users" not in databases: logger.error("Failed to create cc.users database") return False logger.info("Successfully created cc.users database") except Exception as e: logger.error(f"Failed to create cc.users database: {str(e)}") return False # Create user data structure directly user_data = { "email": os.getenv('ADMIN_EMAIL'), "password": os.getenv('ADMIN_PASSWORD'), "email_confirm": True, "user_metadata": { "name": os.getenv('ADMIN_NAME'), "username": ADMIN_USERNAME, "full_name": os.getenv('ADMIN_FULL_NAME'), "display_name": ADMIN_DISPLAY_NAME, "user_type": "admin" # Set this explicitly for admin }, "app_metadata": { "provider": "email", "providers": ["email"], "role": "supabase_admin" } } # Create user via Auth API response = self._supabase_request_with_retry( 'post', f"{SUPABASE_URL}/auth/v1/admin/users", headers=self.supabase_headers, json=user_data ) if response.status_code not in (200, 201): logger.error(f"Failed to create admin user: {response.text}") return False user_id = response.json().get("id") logger.info(f"Created auth user with ID: {user_id}") # Add a small delay to ensure user is created time.sleep(2) # Call setup_initial_admin function to set admin profile response = self._supabase_request_with_retry( 'post', f"{SUPABASE_URL}/rest/v1/rpc/setup_initial_admin", headers=self.supabase_headers, json={ "admin_email": os.getenv('ADMIN_EMAIL') } ) if response.status_code not in (200, 201, 204): logger.error(f"Failed to set up admin profile: {response.text}") return False admin_profile = response.json() logger.info(f"Updated user profile to admin type: {admin_profile}") # Get admin access token login_data = { "email": os.getenv('ADMIN_EMAIL'), "password": os.getenv('ADMIN_PASSWORD') } response = self._supabase_request_with_retry( 'post', f"{SUPABASE_URL}/auth/v1/token?grant_type=password", headers={ "apikey": os.getenv('SERVICE_ROLE_KEY'), "Content-Type": "application/json" }, json=login_data ) if response.status_code not in (200, 201): logger.error(f"Failed to get admin access token: {response.text}") return False admin_access_token = response.json().get("access_token") if not admin_access_token: logger.error("No access token in response") return False # Store admin user ID and access token in status for later use self.status["admin_user_id"] = user_id self.status["admin_access_token"] = admin_access_token self._save_status(self.status) # Create Neo4j database and graph for super admin try: # Create the admin's private database and graph def safe_parse_date(date_str): """Parse date string in YYYY-MM-DD format, return None if invalid""" if not date_str: return None try: return datetime.strptime(date_str, '%Y-%m-%d').date() except ValueError: logger.warning(f"Invalid date format: {date_str}. Expected YYYY-MM-DD") return None cc_users_db_name = "cc.users" user_type = "admin" worker_type = "superadmin" user_email = os.getenv('SUPER_ADMIN_EMAIL') worker_email = os.getenv('SUPER_ADMIN_WORKER_EMAIL') cc_username = os.getenv('SUPER_ADMIN_USERNAME') user_name = os.getenv('SUPER_ADMIN_NAME') worker_name = os.getenv('SUPER_ADMIN_DISPLAY_NAME') calendar_start_date = os.getenv('SUPER_ADMIN_CALENDAR_START_DATE') # Expected format: 2025-01-01 calendar_end_date = os.getenv('SUPER_ADMIN_CALENDAR_END_DATE') # Expected format: 2025-01-01 admin_creator = NonSchoolUserCreator( user_id=user_id, cc_users_db_name=cc_users_db_name, # Main users database user_type=user_type, worker_type=worker_type, user_email=user_email, worker_email=worker_email, cc_username=cc_username, user_name=user_name, worker_name=worker_name, calendar_start_date=safe_parse_date(calendar_start_date), calendar_end_date=safe_parse_date(calendar_end_date) ) # Create the user nodes and relationships user_nodes = admin_creator.create_user(access_token=admin_access_token) logger.info(f"Initialised super admin user nodes: {user_nodes}") if not user_nodes.get(f'default_user_node'): logger.error("Failed to create admin user node in the default database") return False if not user_nodes.get(f'private_user_node'): logger.error(f"Failed to create admin user node in the {admin_creator.user_type} database") return False if not user_nodes.get('worker_node'): logger.error("Failed to create admin super admin node") return False logger.info(f"Created Neo4j nodes for admin user: {user_nodes}") except Exception as e: logger.error(f"Failed to create admin Neo4j database and graph: {str(e)}") return False # Wait for changes to propagate logger.info("Waiting for changes to propagate...") time.sleep(2) # Verify the setup if self.check_super_admin_exists(): logger.info("Super admin exists and is properly configured") self.status["super_admin_created"] = True self._save_status(self.status) logger.info("Super admin created and verified successfully") return True else: logger.error("Failed to verify super admin setup") return False except Exception as e: logger.error(f"Error creating super admin: {str(e)}") return False def get_admin_token(self) -> bool: """Get an access token for the admin user""" if self.status.get("admin_token_obtained"): logger.info("Admin token already obtained, skipping...") return True logger.info("Getting admin access token...") # Add a small delay to ensure auth system is ready time.sleep(2) # Try multiple times with increasing delays max_retries = 5 for retry in range(max_retries): try: # Sign in with admin credentials login_data = { "email": ADMIN_EMAIL, "password": ADMIN_PASSWORD } logger.info(f"Attempting to login as {ADMIN_EMAIL} (attempt {retry+1}/{max_retries})") # Use the retry mechanism but with custom headers response = self._supabase_request_with_retry( 'post', f"{SUPABASE_URL}/auth/v1/token?grant_type=password", headers={ "apikey": SERVICE_ROLE_KEY, "Content-Type": "application/json" }, json=login_data ) if response.status_code in (200, 201): # Extract the access token self.admin_token = response.json().get("access_token") if self.admin_token: logger.info("Admin token obtained successfully") self.update_status("admin_token_obtained", True) return True else: logger.error("No access token in response") else: logger.error(f"Failed to get admin token (attempt {retry+1}): {response.text}") # Increase delay with each retry wait_time = (retry + 1) * 2 logger.info(f"Waiting {wait_time} seconds before next attempt...") time.sleep(wait_time) except Exception as e: logger.error(f"Error getting admin token: {str(e)}") time.sleep((retry + 1) * 2) # If we get here, all retries failed logger.error("Failed to get admin token after multiple attempts") # As a fallback, try to use the service role key directly logger.info("Falling back to using service role key for API calls") self.admin_token = SERVICE_ROLE_KEY self.update_status("admin_token_obtained", True) return True def log_step(self, step: str, success: bool, message: Optional[str] = None) -> None: """Log a step in the initialization process""" step_log = { "step": step, "success": success, "timestamp": time.time(), "message": message } if "steps" not in self.status: self.status["steps"] = [] self.status["steps"].append(step_log) self._save_status(self.status) if success: logger.info(f"Step '{step}' completed successfully") else: logger.error(f"Step '{step}' failed: {message}") def initialize_storage(self) -> bool: """Initialize storage buckets and policies""" if self.status["storage"]["initialized"]: logger.info("Storage already initialized, skipping...") return True logger.info("Initializing storage buckets and policies...") try: # Initialize storage client with admin user ID and access token admin_user_id = self.status.get("admin_user_id") admin_access_token = self.status.get("admin_access_token") if not admin_user_id or not admin_access_token: raise ValueError("Admin user ID and access token are required for bucket initialization") # Create Supabase client with service role access and admin token supabase = SupabaseServiceRoleClient.for_admin(admin_access_token) # Define core buckets core_buckets = [ { "id": "cc.users", "name": "CC Users", "public": False, "file_size_limit": 50 * 1024 * 1024, # 50MB "allowed_mime_types": [ 'image/*', 'video/*', 'application/pdf', 'application/msword', 'application/vnd.openxmlformats-officedocument.*', 'text/plain', 'text/csv', 'application/json' ] }, { "id": "cc.institutes", "name": "CC Institutes", "public": False, "file_size_limit": 50 * 1024 * 1024, # 50MB "allowed_mime_types": [ 'image/*', 'video/*', 'application/pdf', 'application/msword', 'application/vnd.openxmlformats-officedocument.*', 'text/plain', 'text/csv', 'application/json' ] } ] # Create each core bucket results = [] for bucket in core_buckets: try: # Create bucket with properly typed options options = CreateBucketOptions( public=bucket["public"], file_size_limit=bucket["file_size_limit"], allowed_mime_types=bucket["allowed_mime_types"], name=bucket["name"] ) result = supabase.create_bucket(bucket["id"], options=options) results.append({ "bucket": bucket["id"], "status": "success", "result": result }) except Exception as e: logger.error(f"Error creating bucket {bucket['id']}: {str(e)}") results.append({ "bucket": bucket["id"], "status": "error", "error": str(e) }) # Update status based on results for result in results: bucket_id = result["bucket"] success = result["status"] == "success" self.status["storage"]["buckets"][bucket_id] = success if success: self.log_step(f"storage_bucket_{bucket_id}", True, "Bucket created successfully") else: self.log_step(f"storage_bucket_{bucket_id}", False, f"Failed to create bucket {bucket_id}: {result.get('error', 'Unknown error')}") return False # Check if all buckets were created successfully if all(self.status["storage"]["buckets"].values()): logger.info("Storage initialization completed successfully") self.status["storage"]["initialized"] = True self._save_status(self.status) return True else: logger.error("Some buckets failed to initialize") return False except Exception as e: self.log_step("storage_initialization", False, str(e)) return False def create_schools_database(self) -> bool: """Create the schools Neo4j database""" if self.status.get("schools_db_created"): logger.info("Schools database already created, skipping...") return True logger.info("Creating schools Neo4j database...") # For now, we'll just mark this as done since we can't easily create the Neo4j database directly # In a production environment, you would need to use the Neo4j Admin API or a direct connection logger.info("Schools database creation marked as completed") self.update_status("schools_db_created", True) return True def initialize_schema(self) -> bool: """Initialize Neo4j schema (constraints and indexes)""" if self.status.get("schema_initialized"): logger.info("Schema already initialized, skipping...") return True logger.info("Initializing Neo4j schema...") # For now, we'll just mark this as done since we can't easily initialize the schema directly # In a production environment, you would need to use the Neo4j Cypher API or a direct connection logger.info("Schema initialization marked as completed") self.update_status("schema_initialized", True) return True def import_sample_schools(self) -> bool: """Import sample schools data""" if self.status.get("neo4j", {}).get("schools_imported"): logger.info("Sample schools already imported, skipping...") return True logger.info("Importing sample schools data...") try: # Ensure Supabase headers are properly set self._ensure_supabase_headers() # Check if schools CSV exists csv_path = os.path.join(self.init_dir, "sample_schools.csv") if not os.path.exists(csv_path): logger.warning("No schools CSV file found, skipping import") self.status["neo4j"]["schools_imported"] = True self._save_status(self.status) return True # Read and parse the CSV file with open(csv_path, "r") as f: csv_reader = csv.DictReader(f) schools = list(csv_reader) logger.info(f"Found {len(schools)} schools in CSV file") # Add a date format conversion function def convert_date_format(date_str: str) -> Optional[str]: """Convert date from DD-MM-YYYY to YYYY-MM-DD format""" if not date_str or date_str == "": return None try: if "-" in date_str: day, month, year = date_str.split("-") return f"{year}-{month}-{day}" return None except: return None # Import each school success_count = 0 for school in schools: try: # Format the school data school_data = { "urn": school.get("URN"), "establishment_name": school.get("EstablishmentName"), "la_code": school.get("LA (code)"), "la_name": school.get("LA (name)"), "establishment_number": school.get("EstablishmentNumber"), "establishment_type": school.get("TypeOfEstablishment (name)"), "establishment_type_group": school.get("EstablishmentTypeGroup (name)"), "establishment_status": school.get("EstablishmentStatus (name)"), "reason_establishment_opened": school.get("ReasonEstablishmentOpened (name)"), "open_date": school.get("OpenDate"), "reason_establishment_closed": school.get("ReasonEstablishmentClosed (name)"), "close_date": school.get("CloseDate"), "phase_of_education": school.get("PhaseOfEducation (name)"), "statutory_low_age": school.get("StatutoryLowAge"), "statutory_high_age": school.get("StatutoryHighAge"), "boarders": school.get("Boarders (name)"), "nursery_provision": school.get("NurseryProvision (name)"), "official_sixth_form": school.get("OfficialSixthForm (name)"), "gender": school.get("Gender (name)"), "religious_character": school.get("ReligiousCharacter (name)"), "religious_ethos": school.get("ReligiousEthos (name)"), "diocese": school.get("Diocese (name)"), "admissions_policy": school.get("AdmissionsPolicy (name)"), "school_capacity": school.get("SchoolCapacity"), "special_classes": school.get("SpecialClasses (name)"), "census_date": school.get("CensusDate"), "number_of_pupils": school.get("NumberOfPupils"), "number_of_boys": school.get("NumberOfBoys"), "number_of_girls": school.get("NumberOfGirls"), "percentage_fsm": school.get("PercentageFSM"), "trust_school_flag": school.get("TrustSchoolFlag (name)"), "trusts_name": school.get("Trusts (name)"), "school_sponsor_flag": school.get("SchoolSponsorFlag (name)"), "school_sponsors_name": school.get("SchoolSponsors (name)"), "federation_flag": school.get("FederationFlag (name)"), "federations_name": school.get("Federations (name)"), "ukprn": school.get("UKPRN"), "fehe_identifier": school.get("FEHEIdentifier"), "further_education_type": school.get("FurtherEducationType (name)"), "ofsted_last_inspection": school.get("OfstedLastInsp"), "last_changed_date": school.get("LastChangedDate"), "street": school.get("Street"), "locality": school.get("Locality"), "address3": school.get("Address3"), "town": school.get("Town"), "county": school.get("County (name)"), "postcode": school.get("Postcode"), "school_website": school.get("SchoolWebsite"), "telephone_num": school.get("TelephoneNum"), "head_title": school.get("HeadTitle (name)"), "head_first_name": school.get("HeadFirstName"), "head_last_name": school.get("HeadLastName"), "head_preferred_job_title": school.get("HeadPreferredJobTitle"), "gssla_code": school.get("GSSLACode (name)"), "parliamentary_constituency": school.get("ParliamentaryConstituency (name)"), "urban_rural": school.get("UrbanRural (name)"), "rsc_region": school.get("RSCRegion (name)"), "country": school.get("Country (name)"), "uprn": school.get("UPRN"), "sen_stat": school.get("SENStat") == "true", "sen_no_stat": school.get("SENNoStat") == "true", "sen_unit_on_roll": school.get("SenUnitOnRoll"), "sen_unit_capacity": school.get("SenUnitCapacity"), "resourced_provision_on_roll": school.get("ResourcedProvisionOnRoll"), "resourced_provision_capacity": school.get("ResourcedProvisionCapacity") } # Update the data type conversion section for key, value in school_data.items(): if value == "": school_data[key] = None elif key in ["statutory_low_age", "statutory_high_age", "school_capacity", "number_of_pupils", "number_of_boys", "number_of_girls", "sen_unit_on_roll", "sen_unit_capacity", "resourced_provision_on_roll", "resourced_provision_capacity"]: try: if value is not None and value != "": school_data[key] = int(float(value)) # Handle both integer and decimal strings except (ValueError, TypeError): school_data[key] = None elif key in ["percentage_fsm"]: try: if value is not None and value != "": school_data[key] = float(value) except (ValueError, TypeError): school_data[key] = None elif key in ["open_date", "close_date", "census_date", "ofsted_last_inspection", "last_changed_date"]: if value and value != "": # Convert date format converted_date = convert_date_format(value) if converted_date: school_data[key] = converted_date else: school_data[key] = None else: school_data[key] = None # Insert the school into the institute_imports table response = self._supabase_request_with_retry( 'post', f"{SUPABASE_URL}/rest/v1/institute_imports", headers=self.supabase_headers, json=school_data ) if response.status_code in (200, 201): try: response_data = response.json() success_count += 1 logger.info(f"Successfully imported school {school.get('URN')}: {school.get('EstablishmentName')}") except json.JSONDecodeError as e: # If we can't parse the response but the status code was successful, consider it a success if response.status_code in (200, 201): success_count += 1 logger.info(f"Successfully imported school {school.get('URN')}: {school.get('EstablishmentName')} (response not JSON)") else: logger.error(f"Failed to parse response for school {school.get('URN')}: {e}, Response: {response.text}") else: logger.error(f"Failed to import school {school.get('URN')}: {response.text}") except Exception as e: logger.error(f"Error importing school {school.get('URN')}: {str(e)}") logger.info(f"Successfully imported {success_count} out of {len(schools)} schools") # Mark as completed even if some schools failed self.status["neo4j"]["schools_imported"] = True self._save_status(self.status) return True except Exception as e: logger.error(f"Error importing sample schools: {str(e)}") return False def initialize_calendar_database(self) -> bool: """Initialize the main calendar database""" if self.status.get("calendar_database", {}).get("initialized"): logger.info("Calendar database already initialized, skipping...") return True try: # Initialize the status dictionaries if they don't exist self.status.setdefault("calendar_database", {}) self.status.setdefault("cc_database", {}) # 1. Create main calendar database logger.info("Creating main calendar database...") result = self.neo4j_service.create_database("cc.calendar") if result["status"] != "success": self.log_step("calendar_database_creation", False, result["message"]) return False self.status["calendar_database"]["database_created"] = True self.log_step("calendar_database_creation", True) # 2. Create the cc.calendar storage bucket logger.info("Creating cc.calendar storage bucket...") admin_access_token = self.status.get("admin_access_token") if not admin_access_token: self.log_step("calendar_storage_creation", False, "Admin access token not found") return False storage_client = SupabaseServiceRoleClient.for_admin(admin_access_token) try: bucket_options = CreateBucketOptions( name="CC Calendar Files", public=True, file_size_limit=50 * 1024 * 1024, # 50MB allowed_mime_types=[ 'image/*', 'video/*', 'application/pdf', 'application/msword', 'application/vnd.openxmlformats-officedocument.*', 'text/plain', 'text/csv', 'application/json' ] ) storage_client.create_bucket("cc.calendar", bucket_options) self.status["calendar_database"]["storage_created"] = True self.log_step("calendar_storage_creation", True) except Exception as e: self.log_step("calendar_storage_creation", False, str(e)) return False # 3. Initialize schema on calendar database logger.info("Initializing Neo4j schema on calendar database...") result = self.neo4j_service.initialize_schema("cc.calendar") if result["status"] != "success": self.log_step("calendar_schema_initialization", False, result["message"]) return False self.status["calendar_database"]["schema_initialized"] = True self.log_step("calendar_schema_initialization", True) # 4. Create test calendar logger.info("Creating test calendar...") start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) end_date = start_date + timedelta(days=90) # 3 months calendar_nodes = create_calendar( db_name="cc.calendar", start_date=start_date, end_date=end_date, attach_to_calendar_node=True, owner_node=None, time_chunk_node_length=0 ) if calendar_nodes: self.status["cc_database"]["calendar_created"] = True self.log_step("cc_calendar_creation", True) else: self.log_step("cc_calendar_creation", False, "Failed to create calendar") return False self.status["calendar_database"]["initialized"] = True self.status["cc_database"]["initialized"] = True self._save_status(self.status) return True except Exception as e: self.log_step("cc_database_initialization", False, str(e)) return False def initialize_neo4j(self) -> bool: """Initialize Neo4j databases and schema""" if self.status["neo4j"]["initialized"]: logger.info("Neo4j already initialized, skipping...") return True try: # 1. Create main schools database logger.info("Creating main schools database...") result = self.neo4j_service.create_database("cc.institutes") if result["status"] != "success": self.log_step("neo4j_database_creation", False, result["message"]) return False self.status["neo4j"]["database_created"] = True self.log_step("neo4j_database_creation", True) # 2. Initialize schema on the custom database logger.info("Initializing Neo4j schema on cc.institutes database...") result = self.neo4j_service.initialize_schema("cc.institutes") if result["status"] != "success": self.log_step("neo4j_schema_initialization", False, result["message"]) return False self.status["neo4j"]["schema_initialized"] = True self.log_step("neo4j_schema_initialization", True) self.status["neo4j"]["initialized"] = True self._save_status(self.status) return True except Exception as e: self.log_step("neo4j_initialization", False, str(e)) return False def initialize_default_school(self) -> bool: """Initialize the default school structure with public and private databases""" if self.status.get("default_school", {}).get("initialized"): logger.info("Default school already initialized, skipping...") return True try: # Default school configuration school_config = { "id": "default", "type": "development", "name": "Default School", "website": "https://example.com", "timetable_file": "default_institute/default_timetable.xlsx", "curriculum_file": "default_institute/default_curriculum.xlsx" } # Ensure Supabase headers are properly set self._ensure_supabase_headers() # Define database names following the convention base_path = f"cc.institutes.{school_config['type']}.{school_config['id']}" public_db_name = base_path private_db_name = base_path # Removed .private suffix curriculum_db_name = f"{base_path}.curriculum" # Create public school node first logger.info("Creating public school node...") public_school_node = SchoolNode( unique_id=f'School_{school_config["id"]}', tldraw_snapshot="", id=school_config["id"], school_type=school_config["type"], name=school_config["name"], website=school_config["website"] ) # 1. Create storage buckets for school logger.info("Creating storage buckets for default school...") admin_access_token = self.status.get("admin_access_token") if not admin_access_token: self.log_step("default_school_storage", False, "Admin access token not found") return False bucket_results = init_school.create_school_buckets( school_id=school_config["id"], school_type=school_config["type"], school_name=school_config["name"], admin_access_token=admin_access_token ) # Check if all buckets were created successfully expected_buckets = [ f"{base_path}.public", # Public bucket f"{base_path}.private", # Private bucket f"{base_path}.curriculum.public", # Public curriculum bucket f"{base_path}.curriculum.private" # Private curriculum bucket ] for bucket_id in expected_buckets: if bucket_id not in bucket_results or bucket_results[bucket_id]["status"] != "success": self.log_step("default_school_storage", False, f"Failed to create bucket {bucket_id}") return False self.status.setdefault("default_school", {})["storage_created"] = True self.log_step("default_school_storage", True) # 2. Create school node in public database result = init_school.create_school( db_name=public_db_name, id=school_config["id"], name=school_config["name"], website=school_config["website"], school_type=school_config["type"], is_public=True, school_node=public_school_node ) if not result: self.log_step("default_school_public_creation", False, "Failed to create public school node") return False self.status.setdefault("default_school", {})["public_created"] = True self.log_step("default_school_public_creation", True) # 3. Create private school database and node logger.info(f"Creating private school database: {private_db_name}") result = self.neo4j_service.create_database(private_db_name) if result["status"] != "success": self.log_step("default_school_private_creation", False, result["message"]) return False # Create private school node with more details logger.info("Creating private school node...") private_school_node = SchoolNode( unique_id=f'School_{school_config["id"]}', tldraw_snapshot="", id=school_config["id"], school_type=school_config["type"], name=school_config["name"], website=school_config["website"], # Add required private fields with default values establishment_number="0000", establishment_name=school_config["name"], establishment_type="Development", establishment_status="Open", phase_of_education="All", statutory_low_age=11, statutory_high_age=18, school_capacity=1000 ) # Create school node in private database result = init_school.create_school( db_name=private_db_name, id=school_config["id"], name=school_config["name"], website=school_config["website"], school_type=school_config["type"], is_public=False, school_node=private_school_node ) if not result: self.log_step("default_school_private_creation", False, "Failed to create private school node") return False # Create curriculum database logger.info(f"Creating curriculum database: {curriculum_db_name}") result = self.neo4j_service.create_database(curriculum_db_name) if result["status"] != "success": self.log_step("default_school_curriculum_creation", False, result["message"]) return False # 4. Import timetable data logger.info("Importing timetable data...") timetable_path = os.path.join(self.init_dir, school_config["timetable_file"]) if not os.path.exists(timetable_path): self.log_step("default_school_timetable", False, f"Timetable file not found: {timetable_path}") return False school_timetable_dataframes = xl.create_dataframes(timetable_path) init_school_timetable.create_school_timetable( dataframes=school_timetable_dataframes, db_name=private_db_name, school_node=private_school_node ) self.status["default_school"]["timetable_imported"] = True self.log_step("default_school_timetable", True) # 5. Import curriculum data logger.info("Importing curriculum data...") curriculum_path = os.path.join(self.init_dir, school_config["curriculum_file"]) if not os.path.exists(curriculum_path): self.log_step("default_school_curriculum", False, f"Curriculum file not found: {curriculum_path}") return False school_curriculum_dataframes = xl.create_dataframes(curriculum_path) init_school_curriculum.create_curriculum( dataframes=school_curriculum_dataframes, db_name=private_db_name, curriculum_db_name=curriculum_db_name, school_node=private_school_node ) self.status["default_school"]["curriculum_imported"] = True self.log_step("default_school_curriculum", True) # 6. Add the default school to Supabase institutes table logger.info("Adding default school to Supabase institutes table...") try: # Check if the school already exists in the institutes table response = self._supabase_request_with_retry( 'get', f"{SUPABASE_URL}/rest/v1/institutes", headers=self.supabase_headers, params={ "select": "*", "name": f"eq.{school_config['name']}" } ) if response.status_code != 200: logger.error(f"Failed to check institutes table: {response.text}") self.log_step("default_school_supabase", False, f"Failed to check institutes table: {response.text}") return False existing_institutes = response.json() institute_id = None if existing_institutes and len(existing_institutes) > 0: # School already exists, use its ID institute_id = existing_institutes[0]["id"] logger.info(f"Default school already exists in institutes table with ID: {institute_id}") else: # Create the school in the institutes table address_json = {"street": "123 Dev Street", "city": "Development City", "postcode": "DEV123"} metadata_json = { "school_type": school_config["type"], "id": school_config["id"] } school_data = { "name": school_config["name"], "urn": f"DEV-{school_config['id']}", "status": "active", "website": school_config["website"], "address": json.dumps(address_json), "metadata": json.dumps(metadata_json), "neo4j_unique_id": private_school_node.unique_id, "neo4j_public_sync_status": "synced", "neo4j_public_sync_at": datetime.now().isoformat(), "neo4j_private_sync_status": "synced", "neo4j_private_sync_at": datetime.now().isoformat() } # Log the request data for debugging logger.debug(f"Sending request to create institute with data: {school_data}") response = self._supabase_request_with_retry( 'post', f"{SUPABASE_URL}/rest/v1/institutes", headers=self.supabase_headers, json=school_data ) if response.status_code not in (200, 201): logger.error(f"Failed to add default school to institutes table: {response.text}") self.log_step("default_school_supabase", False, f"Failed to add default school to institutes table: {response.text}") return False # Handle the response more carefully try: response_data = response.json() if isinstance(response_data, list) and len(response_data) > 0: institute_id = response_data[0]["id"] elif isinstance(response_data, dict) and "id" in response_data: institute_id = response_data["id"] else: logger.error(f"Unexpected response format: {response_data}") self.log_step("default_school_supabase", False, f"Unexpected response format: {response_data}") return False logger.info(f"Added default school to institutes table with ID: {institute_id}") except json.JSONDecodeError as e: logger.error(f"Failed to parse response from institutes table: {e}, Response: {response.text}") self.log_step("default_school_supabase", False, f"Failed to parse response from institutes table: {e}") # If we can't parse the response but the status code was successful, try to continue # by querying for the school we just created try: # Query for the school we just created query_response = self._supabase_request_with_retry( 'get', f"{SUPABASE_URL}/rest/v1/institutes", headers=self.supabase_headers, params={ "select": "*", "name": f"eq.{school_config['name']}" } ) if query_response.status_code == 200: query_data = query_response.json() if query_data and len(query_data) > 0: institute_id = query_data[0]["id"] logger.info(f"Retrieved default school from institutes table with ID: {institute_id}") else: logger.error("School was created but could not be retrieved") self.log_step("default_school_supabase", False, "School was created but could not be retrieved") return False else: logger.error(f"Failed to retrieve created school: {query_response.text}") self.log_step("default_school_supabase", False, f"Failed to retrieve created school: {query_response.text}") return False except Exception as query_error: logger.error(f"Error retrieving created school: {str(query_error)}") self.log_step("default_school_supabase", False, f"Error retrieving created school: {str(query_error)}") return False # 7. Add super admin to the default school in Neo4j logger.info("Adding super admin to default school in Neo4j...") # Get the super admin worker node from the status admin_user_id = self.status.get("admin_user_id") if not admin_user_id: logger.error("Admin user ID not found in status") self.log_step("default_school_admin", False, "Admin user ID not found in status") return False # Initialize Neontology connection neon.init_neontology_connection() # Get the super admin worker node from the admin's private database admin_db_name = f"cc.users.admin.{os.getenv('SUPER_ADMIN_USERNAME', 'superadmin')}" # First, update the worker_db_name property in the super admin node to include the default school with get_session(database=admin_db_name) as session: # Get the super admin worker node result = session.run( """ MATCH (n:SuperAdmin {unique_id: $unique_id}) RETURN n """, unique_id=f"SuperAdmin_{admin_user_id}" ) admin_node_record = result.single() if not admin_node_record: logger.error(f"Super admin node not found in database {admin_db_name}") self.log_step("default_school_admin", False, f"Super admin node not found in database {admin_db_name}") return False admin_node = admin_node_record["n"] # Update the worker_db_name property to include the default school database worker_db_name = f"{private_db_name}" # Update the worker_db_name property session.run( """ MATCH (n:SuperAdmin {unique_id: $unique_id}) SET n.worker_db_name = $worker_db_name """, unique_id=f"SuperAdmin_{admin_user_id}", worker_db_name=worker_db_name, database=admin_db_name ) logger.info(f"Updated super admin worker_db_name to {worker_db_name} in {admin_db_name}") # Create a copy of the super admin node in the default school database from modules.database.schemas.nodes.workers.workers import SuperAdminNode # Create the super admin node in the default school database super_admin_node = SuperAdminNode( unique_id=f"SuperAdmin_{admin_user_id}", tldraw_snapshot="", worker_name=os.getenv('SUPER_ADMIN_DISPLAY_NAME', 'Super Admin'), worker_email=os.getenv('SUPER_ADMIN_WORKER_EMAIL') or os.getenv('ADMIN_EMAIL', 'admin@example.com'), worker_db_name=worker_db_name, worker_type="superadmin" ) # Create the super admin node in the default school database neon.create_or_merge_neontology_node( node=super_admin_node, database=private_db_name, operation='merge' ) logger.info(f"Created super admin node in default school database {private_db_name}") # Create the necessary structure nodes for the school if they don't exist logger.info("Creating structure nodes for the default school...") # Create Staff Structure Node staff_structure_node = school_structures.StaffStructureNode( unique_id=f"StaffStructure_{school_config['id']}", tldraw_snapshot="" ) neon.create_or_merge_neontology_node( node=staff_structure_node, database=private_db_name, operation='merge' ) # Create Student Structure Node student_structure_node = school_structures.StudentStructureNode( unique_id=f"StudentStructure_{school_config['id']}", tldraw_snapshot="" ) neon.create_or_merge_neontology_node( node=student_structure_node, database=private_db_name, operation='merge' ) # Create IT Admin Structure Node it_admin_structure_node = school_structures.ITAdminStructureNode( unique_id=f"ITAdminStructure_{school_config['id']}", tldraw_snapshot="" ) neon.create_or_merge_neontology_node( node=it_admin_structure_node, database=private_db_name, operation='merge' ) # Create relationships between school and structure nodes # School has Staff Structure school_staff_rel = structure_relationships.SchoolHasStaffStructure( source=private_school_node, target=staff_structure_node ) neon.create_or_merge_neontology_relationship( relationship=school_staff_rel, database=private_db_name, operation='merge' ) # School has Student Structure school_student_rel = structure_relationships.SchoolHasStudentStructure( source=private_school_node, target=student_structure_node ) neon.create_or_merge_neontology_relationship( relationship=school_student_rel, database=private_db_name, operation='merge' ) # School has IT Admin Structure school_it_admin_rel = structure_relationships.SchoolHasITAdminStructure( source=private_school_node, target=it_admin_structure_node ) neon.create_or_merge_neontology_relationship( relationship=school_it_admin_rel, database=private_db_name, operation='merge' ) # Connect super admin to IT Admin Structure admin_structure_rel = structure_relationships.SuperAdminBelongsToITAdminStructure( source=super_admin_node, target=it_admin_structure_node ) neon.create_or_merge_neontology_relationship( relationship=admin_structure_rel, database=private_db_name, operation='merge' ) logger.info("Created structure nodes and relationships for default school") # 8. Add super admin to the institute_memberships table in Supabase logger.info("Adding super admin to institute_memberships table...") # Check if the membership already exists response = self._supabase_request_with_retry( 'get', f"{SUPABASE_URL}/rest/v1/institute_memberships", headers=self.supabase_headers, params={ "select": "*", "profile_id": f"eq.{admin_user_id}", "institute_id": f"eq.{institute_id}" } ) if response.status_code != 200: logger.error(f"Failed to check institute_memberships table: {response.text}") self.log_step("default_school_admin_membership", False, f"Failed to check institute_memberships table: {response.text}") return False try: existing_memberships = response.json() if not existing_memberships or len(existing_memberships) == 0: # Create the membership membership_data = { "profile_id": admin_user_id, "institute_id": institute_id, "role": "admin", "tldraw_preferences": "{}", # Use string instead of json.dumps({}) "metadata": json.dumps({ "worker_type": "superadmin", "neo4j_unique_id": f"SuperAdmin_{admin_user_id}" }) } # Log the request data for debugging logger.debug(f"Sending request to create institute membership with data: {membership_data}") response = self._supabase_request_with_retry( 'post', f"{SUPABASE_URL}/rest/v1/institute_memberships", headers=self.supabase_headers, json=membership_data ) if response.status_code not in (200, 201): logger.error(f"Failed to add super admin to institute_memberships table: {response.text}") self.log_step("default_school_admin_membership", False, f"Failed to add super admin to institute_memberships table: {response.text}") return False logger.info("Added super admin to institute_memberships table") else: logger.info("Super admin already exists in institute_memberships table") except json.JSONDecodeError as e: logger.error(f"Failed to parse response from institute_memberships table: {e}, Response: {response.text}") self.log_step("default_school_admin_membership", False, f"Failed to parse response from institute_memberships table: {e}") return False self.status["default_school"]["supabase_updated"] = True self.log_step("default_school_supabase", True) except Exception as e: logger.error(f"Error updating Supabase tables: {str(e)}") self.log_step("default_school_supabase", False, f"Error updating Supabase tables: {str(e)}") return False # Mark completion self.status["default_school"]["initialized"] = True self._save_status(self.status) return True except Exception as e: self.log_step("default_school_initialization", False, str(e)) return False def _ensure_supabase_headers(self): """Ensure Supabase headers are properly set""" if not self.supabase_headers or 'apikey' not in self.supabase_headers: logger.info("Initializing Supabase headers") self.supabase_headers = { "apikey": SERVICE_ROLE_KEY, "Authorization": f"Bearer {SERVICE_ROLE_KEY}", "Content-Type": "application/json", "Prefer": "return=representation" # This ensures Supabase returns the created record } elif 'Prefer' not in self.supabase_headers: # Add the Prefer header if it's missing self.supabase_headers['Prefer'] = "return=representation" logger.debug(f"Using Supabase headers: {self.supabase_headers}") def _supabase_request_with_retry(self, method, url, **kwargs): """Make a request to Supabase with retry logic""" max_retries = 3 retry_delay = 2 # seconds for attempt in range(max_retries): try: if method.lower() == 'get': response = requests.get(url, **kwargs) elif method.lower() == 'post': response = requests.post(url, **kwargs) elif method.lower() == 'put': response = requests.put(url, **kwargs) elif method.lower() == 'delete': response = requests.delete(url, **kwargs) else: raise ValueError(f"Unsupported HTTP method: {method}") # If successful or client error (4xx), don't retry if response.status_code < 500: return response # Server error (5xx), retry after delay logger.warning(f"Supabase server error (attempt {attempt+1}/{max_retries}): {response.status_code} - {response.text}") time.sleep(retry_delay * (attempt + 1)) # Exponential backoff except requests.RequestException as e: logger.warning(f"Supabase request exception (attempt {attempt+1}/{max_retries}): {str(e)}") if attempt == max_retries - 1: raise time.sleep(retry_delay * (attempt + 1)) # If we get here, all retries failed with server errors raise requests.RequestException(f"Failed after {max_retries} attempts to {method} {url}") def check_completion(self) -> bool: """Check if all initialization steps are complete""" try: # Initialize required status dictionaries if they don't exist self.status.setdefault("cc_database", {}) self.status.setdefault("default_school", {}) # Add default school check to existing checks if not (self.status["super_admin_created"] and self.status["admin_token_obtained"] and self.status["storage"]["initialized"] and self.status["neo4j"]["initialized"] and self.status["neo4j"]["schools_imported"] and self.status["cc_database"].get("initialized", False) and self.status["default_school"].get("initialized", False)): return False # Check cc.institutes database result = self.neo4j_service.check_database_exists("cc.institutes") if not result.get("exists"): logger.warning("cc.institutes database does not exist") self.status["neo4j"]["initialized"] = False self._save_status(self.status) return False # Check cc database and calendar result = self.neo4j_service.check_database_exists("cc") if not result.get("exists"): logger.warning("cc database does not exist") self.status["cc_database"]["initialized"] = False self._save_status(self.status) return False # Verify calendar exists in cc database try: result = self.neo4j_service.check_node_exists("cc", "Calendar") if not result["exists"]: logger.warning("No calendar found in cc database") self.status["cc_database"]["calendar_created"] = False self._save_status(self.status) return False except Exception as e: logger.error(f"Error checking calendar: {str(e)}") return False return True except Exception as e: logger.error(f"Error checking completion: {str(e)}") return False def run(self) -> bool: """Run the complete initialization process""" # Check if super admin initialization is required init_super_admin = os.getenv("SUPER_ADMIN_CHECK", "true").lower() == "true" if init_super_admin: if self.check_completion(): logger.info("System already initialized, skipping...") return True else: logger.info("Skipping super admin check due to INIT_SUPER_ADMIN being false") # Wait for services if not self.wait_for_services(): return False # Ensure Supabase headers are properly set self._ensure_supabase_headers() # Run initialization steps in order steps = [ self.create_super_admin if init_super_admin else lambda: True, self.get_admin_token, self.initialize_storage, self.initialize_neo4j, self.import_sample_schools, self.initialize_calendar_database, self.initialize_default_school ] success = True for step in steps: if not step(): success = False break if success: logger.info("System initialization completed successfully") self.status["completed"] = True self.status["timestamp"] = time.time() self._save_status(self.status) else: logger.error("System initialization failed") return success if __name__ == "__main__": init_system = InitializationSystem() success = init_system.run() sys.exit(0 if success else 1)