diff --git a/routers/transcribe/sessions.py b/routers/transcribe/sessions.py index 3d507cb..dcd66c7 100644 --- a/routers/transcribe/sessions.py +++ b/routers/transcribe/sessions.py @@ -1,8 +1,13 @@ """Transcription sessions router — CRUD endpoints for transcription sessions and segments.""" from fastapi import APIRouter, Depends, HTTPException, Query +from fastapi.responses import FileResponse from typing import Optional, List from datetime import datetime +import io +import json +import tempfile +import os from modules.auth.supabase_bearer import SupabaseBearer from modules.transcription.models import ( @@ -35,6 +40,116 @@ def get_user_id(credentials=Depends(SupabaseBearer())) -> str: return credentials.get("sub", credentials.get("user_id", "")) +def seconds_to_srt_timestamp(seconds: float) -> str: + """Convert seconds to SRT timestamp format: HH:MM:SS,mmm""" + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + secs = int(seconds % 60) + millis = int((seconds % 1) * 1000) + return f"{hours:02d}:{minutes:02d}:{secs:02d},{millis:03d}" + + +def generate_srt(segments: List[dict]) -> str: + """Generate SRT (SubRip subtitle) content from segments.""" + srt_entries = [] + for idx, seg in enumerate(segments, start=1): + start_sec = float(seg.get("start_seconds", 0)) + end_sec = float(seg.get("end_seconds", 0)) + text = seg.get("text", "").strip() + if not text: + continue + start_ts = seconds_to_srt_timestamp(start_sec) + end_ts = seconds_to_srt_timestamp(end_sec) + # Clean text for SRT (no line breaks within a subtitle block) + clean_text = text.replace("\n", " ").strip() + srt_entries.append(f"{idx}\n{start_ts} --> {end_ts}\n{clean_text}") + return "\n\n".join(srt_entries) + "\n" if srt_entries else "" + + +def generate_txt(segments: List[dict]) -> str: + """Generate plain text transcript with timestamps from segments.""" + lines = [] + for seg in segments: + start_sec = float(seg.get("start_seconds", 0)) + text = seg.get("text", "").strip() + if not text: + continue + ts = seconds_to_srt_timestamp(start_sec) + lines.append(f"[{ts}] {text}") + return "\n".join(lines) + "\n" if lines else "" + + +def generate_json_export(session: dict, segments: List[dict], + summaries: List[dict], + canvas_events: List[dict]) -> str: + """Generate structured JSON export with segments, metadata, and canvas events.""" + # Build clean segment list (exclude internal DB fields) + clean_segments = [] + for seg in segments: + clean_segments.append({ + "sequence_index": seg.get("sequence_index"), + "text": seg.get("text", ""), + "start_seconds": float(seg.get("start_seconds", 0)), + "end_seconds": float(seg.get("end_seconds", 0)), + "is_final": seg.get("is_final", True), + "speaker_label": seg.get("speaker_label"), + "keyword_matches": seg.get("keyword_matches"), + }) + + # Build clean summary list + clean_summaries = [] + for s in summaries: + clean_summaries.append({ + "id": s.get("id"), + "summary_type": s.get("summary_type"), + "content": s.get("content", ""), + "llm_provider": s.get("llm_provider"), + "llm_model": s.get("llm_model"), + "created_at": s.get("created_at"), + }) + + # Build clean canvas events list + clean_events = [] + for ev in canvas_events: + clean_events.append({ + "id": ev.get("id"), + "event_type": ev.get("event_type"), + "session_elapsed_seconds": float(ev.get("session_elapsed_seconds", 0)) if ev.get("session_elapsed_seconds") else None, + "timestamp": ev.get("timestamp"), + "event_payload": ev.get("event_payload", {}), + }) + + export_data = { + "session": { + "id": session.get("id"), + "title": session.get("title"), + "canvas_type": session.get("canvas_type"), + "started_at": session.get("started_at"), + "ended_at": session.get("ended_at"), + "duration_seconds": session.get("duration_seconds"), + "timetable_period_id": session.get("timetable_period_id"), + "timetable_event_type": session.get("timetable_event_type"), + "timetable_event_label": session.get("timetable_event_label"), + "auto_tagged": session.get("auto_tagged", False), + "llm_provider": session.get("llm_provider"), + "llm_model": session.get("llm_model"), + "word_count": session.get("word_count", 0), + "segment_count": session.get("segment_count", 0), + }, + "segments": clean_segments, + "summaries": clean_summaries, + "canvas_events": clean_events, + } + + return json.dumps(export_data, indent=2, default=str) + + +def sanitize_filename(name: str) -> str: + """Remove or replace characters that are unsafe in filenames.""" + safe = "".join(c if c.isalnum() or c in " _-." else "_" for c in name) + return safe[:100] if safe else "export" + + @router.post("/sessions", response_model=TranscriptionSessionResponse) async def create_session( session_data: TranscriptionSessionCreate, @@ -330,26 +445,65 @@ async def export_session( export_format: ExportFormat, user_id: str = Depends(get_user_id), ): - """Export session as SRT, TXT, or JSON (Phase 1 stub).""" + """Export session as SRT, TXT, or JSON file download. + + Phase 3E: Full implementation — generates properly formatted files + and returns them as downloadable responses. API keys are never stored + or logged during export. + """ supabase = get_supabase_client() # Verify ownership - session_check = supabase.supabase.table("transcription_sessions").select("id").eq("id", session_id).eq("user_id", user_id).execute() + session_check = supabase.supabase.table("transcription_sessions").select("*").eq("id", session_id).eq("user_id", user_id).execute() if not session_check.data: raise HTTPException(status_code=404, detail="Session not found") + session = session_check.data[0] + # Get segments segments_result = supabase.supabase.table("transcription_segments").select("*").eq("session_id", session_id).order("sequence_index").execute() segments = segments_result.data - if export_format.format == "srt": - # Phase 1 stub — implement in Phase 3 - return {"format": "srt", "content": "[TODO: Generate SRT from segments]"} - elif export_format.format == "txt": - text = "\n".join(s["text"] for s in segments) - return {"format": "txt", "content": text} - elif export_format.format == "json": - return {"format": "json", "content": {"segments": segments}} + # Get summaries (for JSON export) + summaries_result = supabase.supabase.table("transcription_summaries").select("*").eq("session_id", session_id).execute() + summaries = summaries_result.data + + # Get canvas events (for JSON export) + canvas_result = supabase.supabase.table("canvas_events").select("*").eq("session_id", session_id).order("timestamp").execute() + canvas_events = canvas_result.data + + fmt = export_format.format.lower() + + if fmt == "srt": + content = generate_srt(segments) + filename = f"{sanitize_filename(session.get('title', session_id))}_{session.get('started_at', 'export')[:10]}.srt" + return FileResponse( + io.BytesIO(content.encode("utf-8")), + media_type="application/x-subrip", + filename=filename, + headers={"Content-Disposition": f"attachment; filename*=UTF-8''{filename}"}, + ) + + elif fmt == "txt": + content = generate_txt(segments) + filename = f"{sanitize_filename(session.get('title', session_id))}_{session.get('started_at', 'export')[:10]}.txt" + return FileResponse( + io.BytesIO(content.encode("utf-8")), + media_type="text/plain", + filename=filename, + headers={"Content-Disposition": f"attachment; filename*=UTF-8''{filename}"}, + ) + + elif fmt == "json": + content = generate_json_export(session, segments, summaries, canvas_events) + filename = f"{sanitize_filename(session.get('title', session_id))}_{session.get('started_at', 'export')[:10]}.json" + return FileResponse( + io.BytesIO(content.encode("utf-8")), + media_type="application/json", + filename=filename, + headers={"Content-Disposition": f"attachment; filename*=UTF-8''{filename}"}, + ) + else: raise HTTPException(status_code=400, detail=f"Unsupported format: {export_format.format}")