api/routers/simple_upload.py
2025-11-14 14:47:19 +00:00

544 lines
22 KiB
Python

"""
Simple Upload Router
===================
Handles file and directory uploads without automatic processing.
Just stores files in Supabase storage and creates database records.
Features:
- Single file upload
- Directory/folder upload with manifest
- No automatic processing
- Immediate response to users
- Directory structure preservation
"""
import os
import uuid
import json
import tempfile
import logging
from typing import Dict, List, Optional, Any
from pathlib import Path
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, BackgroundTasks
from fastapi.responses import JSONResponse
from modules.auth.supabase_bearer import SupabaseBearer
from modules.database.supabase.utils.client import SupabaseServiceRoleClient
from modules.database.supabase.utils.storage import StorageAdmin
from modules.logger_tool import initialise_logger
router = APIRouter()
auth = SupabaseBearer()
logger = initialise_logger(__name__, os.getenv("LOG_LEVEL"), os.getenv("LOG_PATH"), 'default', True)
def _choose_bucket(scope: str, user_id: str, school_id: Optional[str]) -> str:
"""Choose appropriate bucket based on scope - matches old system logic."""
scope = (scope or 'teacher').lower()
if scope == 'school' and school_id:
return f"cc.institutes.{school_id}.private"
# teacher / student fall back to users bucket for now
return 'cc.users'
@router.post("/files/upload")
async def upload_single_file(
cabinet_id: str = Form(...),
path: str = Form(...),
scope: str = Form(...),
file: UploadFile = File(...),
payload: Dict[str, Any] = Depends(auth)
):
"""
Simple single file upload - no automatic processing.
Just stores the file and creates a database record.
"""
try:
user_id = payload.get('sub') or payload.get('user_id')
if not user_id:
raise HTTPException(status_code=401, detail="User ID required")
# Read file content
file_bytes = await file.read()
file_size = len(file_bytes)
mime_type = file.content_type or 'application/octet-stream'
filename = file.filename or path
logger.info(f"📤 Simple upload: {filename} ({file_size} bytes) for user {user_id}")
# Initialize services
client = SupabaseServiceRoleClient()
storage = StorageAdmin()
# Generate file ID and storage path
file_id = str(uuid.uuid4())
# Use same bucket logic as old system for consistency
bucket = _choose_bucket('teacher', user_id, None) # Default to teacher scope
storage_path = f"{cabinet_id}/{file_id}/{filename}"
# Store file in Supabase storage
try:
storage.upload_file(bucket, storage_path, file_bytes, mime_type, upsert=True)
except Exception as e:
logger.error(f"Storage upload failed for {file_id}: {e}")
raise HTTPException(status_code=500, detail=f"Storage upload failed: {str(e)}")
# Create database record
try:
insert_res = client.supabase.table('files').insert({
'id': file_id,
'name': filename,
'cabinet_id': cabinet_id,
'bucket': bucket,
'path': storage_path,
'mime_type': mime_type,
'uploaded_by': user_id,
'size_bytes': file_size,
'source': 'classroomcopilot-web',
'is_directory': False,
'processing_status': 'uploaded',
'relative_path': filename # For single files, relative path is just the filename
}).execute()
if not insert_res.data:
# Clean up storage on DB failure
try:
storage.delete_file(bucket, storage_path)
except:
pass
raise HTTPException(status_code=500, detail="Failed to create file record")
file_record = insert_res.data[0]
except Exception as e:
logger.error(f"Database insert failed for {file_id}: {e}")
# Clean up storage
try:
storage.delete_file(bucket, storage_path)
except:
pass
raise HTTPException(status_code=500, detail=f"Database error: {str(e)}")
logger.info(f"✅ Simple upload completed: {file_id}")
return {
'status': 'success',
'message': 'File uploaded successfully',
'file': file_record,
'processing_required': False, # No automatic processing
'next_steps': 'File is ready for manual processing if needed'
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Upload error: {e}")
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
@router.post("/files/upload-directory")
async def upload_directory(
cabinet_id: str = Form(...),
scope: str = Form(...),
directory_name: str = Form(...),
files: List[UploadFile] = File(...),
file_paths: str = Form(...), # JSON string of relative paths
payload: Dict[str, Any] = Depends(auth)
):
"""
Upload a complete directory/folder with all files.
Preserves directory structure and creates a manifest.
"""
try:
user_id = payload.get('sub') or payload.get('user_id')
if not user_id:
raise HTTPException(status_code=401, detail="User ID required")
# Parse file paths
try:
relative_paths = json.loads(file_paths)
except json.JSONDecodeError:
raise HTTPException(status_code=400, detail="Invalid file_paths JSON")
if len(files) != len(relative_paths):
raise HTTPException(status_code=400, detail="Files and paths count mismatch")
logger.info(f"📁 Directory upload: {directory_name} ({len(files)} files) for user {user_id}")
# Initialize services
client = SupabaseServiceRoleClient()
storage = StorageAdmin()
# Generate session ID for this directory upload
upload_session_id = str(uuid.uuid4())
directory_id = str(uuid.uuid4())
# Use same bucket logic as old system for consistency
bucket = _choose_bucket('teacher', user_id, None)
# Calculate total size and build manifest
total_size = 0
directory_structure = {}
uploaded_files = []
directory_records = {} # Track created directories: {relative_path: directory_id}
# First, analyze all paths to determine directory structure
all_directories = set()
for relative_path in relative_paths:
# Get all parent directories for this file
path_parts = relative_path.split('/')
for i in range(len(path_parts) - 1): # Exclude the filename
dir_path = '/'.join(path_parts[:i+1])
all_directories.add(dir_path)
logger.info(f"📁 Creating directory structure with {len(all_directories)} directories: {sorted(all_directories)}")
try:
# Create directory records for all directories (sorted to create parents first)
sorted_directories = sorted(all_directories, key=lambda x: (len(x.split('/')), x))
for dir_path in sorted_directories:
dir_id = str(uuid.uuid4())
path_parts = dir_path.split('/')
dir_name = path_parts[-1] # Last part is the directory name
# Determine parent directory
parent_id = None
if len(path_parts) > 1:
parent_path = '/'.join(path_parts[:-1])
parent_id = directory_records.get(parent_path)
directory_record = {
'id': dir_id,
'name': dir_name,
'cabinet_id': cabinet_id,
'bucket': bucket,
'path': f"{cabinet_id}/{dir_id}/",
'mime_type': 'inode/directory',
'uploaded_by': user_id,
'size_bytes': 0,
'source': 'classroomcopilot-web',
'is_directory': True,
'parent_directory_id': parent_id,
'upload_session_id': upload_session_id,
'processing_status': 'uploaded',
'relative_path': dir_path
}
directory_records[dir_path] = dir_id
# Insert directory record
result = client.supabase.table('files').insert(directory_record).execute()
logger.info(f"📁 Created directory: {dir_path} (ID: {dir_id}, Parent: {parent_id})")
# Process each file
for i, (file, relative_path) in enumerate(zip(files, relative_paths)):
try:
# Read file content
file_bytes = await file.read()
file_size = len(file_bytes)
mime_type = file.content_type or 'application/octet-stream'
filename = file.filename or f"file_{i}"
total_size += file_size
# Generate file ID and determine parent directory
file_id = str(uuid.uuid4())
# Find the correct parent directory for this file
path_parts = relative_path.split('/')
if len(path_parts) > 1:
parent_dir_path = '/'.join(path_parts[:-1])
parent_directory_id = directory_records.get(parent_dir_path)
else:
parent_directory_id = None # File is in root cabinet
# Use parent directory ID for storage path if exists, otherwise use a generated path
if parent_directory_id:
storage_path = f"{cabinet_id}/{parent_directory_id}/{path_parts[-1]}"
else:
storage_path = f"{cabinet_id}/{file_id}"
# Store file in Supabase storage
storage.upload_file(bucket, storage_path, file_bytes, mime_type, upsert=True)
# Create file record
file_record = {
'id': file_id,
'name': filename,
'cabinet_id': cabinet_id,
'bucket': bucket,
'path': storage_path,
'mime_type': mime_type,
'uploaded_by': user_id,
'size_bytes': file_size,
'source': 'classroomcopilot-web',
'is_directory': False,
'parent_directory_id': parent_directory_id,
'relative_path': relative_path,
'upload_session_id': upload_session_id,
'processing_status': 'uploaded'
}
uploaded_files.append(file_record)
# Build directory structure for manifest
_add_to_directory_structure(directory_structure, relative_path, {
'size': file_size,
'mime_type': mime_type,
'file_id': file_id
})
logger.info(f"📄 Uploaded file {i+1}/{len(files)}: {relative_path}")
except Exception as e:
logger.error(f"Failed to upload file {relative_path}: {e}")
# Continue with other files, don't fail entire upload
continue
# Create directory manifest
directory_manifest = {
'total_files': len(uploaded_files),
'total_size_bytes': total_size,
'directory_structure': directory_structure,
'upload_timestamp': '2024-09-23T12:00:00Z', # TODO: Use actual timestamp
'upload_method': 'directory_picker',
'upload_session_id': upload_session_id
}
# Update root directory with manifest and total size (if root directory exists)
root_directory_id = directory_records.get(sorted_directories[0]) if sorted_directories else None
if root_directory_id:
update_res = client.supabase.table('files').update({
'size_bytes': total_size,
'directory_manifest': directory_manifest
}).eq('id', root_directory_id).execute()
if not update_res.data:
logger.warning("Failed to update root directory with manifest")
# Insert all file records in batch
if uploaded_files:
files_insert_res = client.supabase.table('files').insert(uploaded_files).execute()
if not files_insert_res.data:
logger.warning("Some file records failed to insert")
logger.info(f"✅ Directory upload completed: {root_directory_id} ({len(uploaded_files)} files, {len(sorted_directories)} directories)")
return {
'status': 'success',
'message': f'Directory uploaded successfully with {len(uploaded_files)} files in {len(sorted_directories)} directories',
'directories_created': len(sorted_directories),
'files_count': len(uploaded_files),
'total_size_bytes': total_size,
'root_directory_id': root_directory_id,
'upload_session_id': upload_session_id,
'processing_required': False, # No automatic processing
'next_steps': 'Files are ready for manual processing if needed'
}
except Exception as e:
logger.error(f"Directory upload failed: {e}")
# TODO: Implement cleanup of partially uploaded files
raise HTTPException(status_code=500, detail=f"Directory upload failed: {str(e)}")
except HTTPException:
raise
except Exception as e:
logger.error(f"Directory upload error: {e}")
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")
def _add_to_directory_structure(structure: Dict, relative_path: str, file_info: Dict):
"""Add a file to the directory structure manifest."""
path_parts = relative_path.split('/')
current_level = structure
# Navigate/create directory structure
for i, part in enumerate(path_parts):
if i == len(path_parts) - 1:
# This is the file itself
current_level[part] = file_info
else:
# This is a directory
if part not in current_level:
current_level[part] = {}
current_level = current_level[part]
@router.get("/files")
def list_files(
cabinet_id: str,
include_directories: bool = True,
parent_directory_id: Optional[str] = None,
page: int = 1,
per_page: int = 20,
search: Optional[str] = None,
sort_by: str = 'created_at',
sort_order: str = 'desc',
payload: Dict[str, Any] = Depends(auth)
):
"""
List files with pagination, search, and sorting support.
Args:
cabinet_id: Cabinet to list files from
include_directories: Whether to include directory entries
parent_directory_id: Filter by parent directory
page: Page number (1-based)
per_page: Items per page (max 100)
search: Search term for filename
sort_by: Field to sort by (name, size_bytes, created_at, processing_status)
sort_order: Sort order (asc, desc)
"""
try:
client = SupabaseServiceRoleClient()
# Validate pagination parameters
page = max(1, page)
per_page = min(max(1, per_page), 100) # Limit to 100 items per page
offset = (page - 1) * per_page
# Validate sort parameters
valid_sort_fields = ['name', 'size_bytes', 'created_at', 'processing_status', 'mime_type']
if sort_by not in valid_sort_fields:
sort_by = 'created_at'
if sort_order.lower() not in ['asc', 'desc']:
sort_order = 'desc'
# Build base query
query = client.supabase.table('files').select('*').eq('cabinet_id', cabinet_id)
count_query = client.supabase.table('files').select('id', count='exact').eq('cabinet_id', cabinet_id)
# Apply filters
if parent_directory_id:
query = query.eq('parent_directory_id', parent_directory_id)
count_query = count_query.eq('parent_directory_id', parent_directory_id)
elif not include_directories:
query = query.eq('is_directory', False)
count_query = count_query.eq('is_directory', False)
# Apply search filter
if search:
search_term = f"%{search}%"
query = query.ilike('name', search_term)
count_query = count_query.ilike('name', search_term)
# Get total count
count_res = count_query.execute()
total_count = count_res.count if hasattr(count_res, 'count') else len(count_res.data or [])
# Apply sorting and pagination
query = query.order(sort_by, desc=(sort_order.lower() == 'desc'))
query = query.range(offset, offset + per_page - 1)
res = query.execute()
files = res.data or []
# Calculate pagination metadata
total_pages = (total_count + per_page - 1) // per_page
has_next = page < total_pages
has_prev = page > 1
return {
'files': files,
'pagination': {
'page': page,
'per_page': per_page,
'total_count': total_count,
'total_pages': total_pages,
'has_next': has_next,
'has_prev': has_prev,
'offset': offset
},
'filters': {
'search': search,
'sort_by': sort_by,
'sort_order': sort_order,
'include_directories': include_directories,
'parent_directory_id': parent_directory_id
}
}
except Exception as e:
logger.error(f"List files error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/files/{file_id}")
def get_file_details(file_id: str, payload: Dict[str, Any] = Depends(auth)):
"""Get detailed information about a file or directory."""
try:
client = SupabaseServiceRoleClient()
res = client.supabase.table('files').select('*').eq('id', file_id).single().execute()
if not res.data:
raise HTTPException(status_code=404, detail="File not found")
file_data = res.data
# If it's a directory, also get its contents
if file_data.get('is_directory'):
contents_res = client.supabase.table('files').select('*').eq('parent_directory_id', file_id).execute()
file_data['contents'] = contents_res.data or []
return file_data
except HTTPException:
raise
except Exception as e:
logger.error(f"Get file details error: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/files/{file_id}")
def delete_file(file_id: str, payload: Dict[str, Any] = Depends(auth)):
"""Delete a file or directory and its contents."""
try:
client = SupabaseServiceRoleClient()
storage = StorageAdmin()
# Get file info
res = client.supabase.table('files').select('*').eq('id', file_id).single().execute()
if not res.data:
raise HTTPException(status_code=404, detail="File not found")
file_data = res.data
# If it's a directory, delete all contents first
if file_data.get('is_directory'):
contents_res = client.supabase.table('files').select('*').eq('parent_directory_id', file_id).execute()
# Delete each file in the directory
for content_file in contents_res.data or []:
try:
# Delete from storage
storage.delete_file(content_file['bucket'], content_file['path'])
except Exception as e:
logger.warning(f"Failed to delete file from storage: {content_file['path']}: {e}")
# Delete all directory contents from database
client.supabase.table('files').delete().eq('parent_directory_id', file_id).execute()
else:
# Delete single file from storage
try:
storage.delete_file(file_data['bucket'], file_data['path'])
except Exception as e:
logger.warning(f"Failed to delete file from storage: {file_data['path']}: {e}")
# Delete the main record
delete_res = client.supabase.table('files').delete().eq('id', file_id).execute()
logger.info(f"🗑️ Deleted {'directory' if file_data.get('is_directory') else 'file'}: {file_id}")
return {
'status': 'success',
'message': f"{'Directory' if file_data.get('is_directory') else 'File'} deleted successfully"
}
except HTTPException:
raise
except Exception as e:
logger.error(f"Delete file error: {e}")
raise HTTPException(status_code=500, detail=str(e))