diff --git a/.env b/.env index 2e9b354..94f3c18 100644 --- a/.env +++ b/.env @@ -14,7 +14,7 @@ RUN_INIT=true # - full: Full setup including demo school and users (infra → demo-school → demo-users → gais-data) # - infra,demo-school,demo-users: Custom combination (comma-separated) # - infra,gais-data: Infrastructure + GAIS data import -INIT_MODE=full +INIT_MODE=infra,demo-school,demo-users ## ===================================================== ## APP INFORMATION & METADATA @@ -93,7 +93,7 @@ GOOGLE_CLIENT_SECRETS_FILE=Users/kcar/ClassroomCopilot/backend/app/secrets/googl # External Service Endpoints TIKA_URL=https://tika.kevlarai.com TIKA_TIMEOUT=300 -DOCLING_URL=http://ubuntu-server:5001 +DOCLING_URL=https://docling.kevlarai.com ## ===================================================== ## DOCUMENT STRUCTURE DISCOVERY & ANALYSIS @@ -200,8 +200,8 @@ DOCLING_VLM_DO_PICTURE_DESCRIPTION=true ## ===================================================== VITE_APP_URL=https://app.classroomcopilot.ai APP_API_URL=https://api.classroomcopilot.ai -APP_GRAPH_URL=https://graph.classroomcopilot.ai -APP_BOLT_URL=bolt://bolt.classroomcopilot.ai +APP_GRAPH_URL=https://192.168.0.208 +APP_BOLT_URL=bolt://192.168.0.208 ## ===================================================== ## REDIS CONFIGURATION & ENVIRONMENT ISOLATION @@ -271,4 +271,4 @@ LOG_LEVEL=debug ## API KEYS ## ===================================================== OPENAI_BASE_URL=https://api.openai.com/v1 -OPENAI_API_KEY=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 \ No newline at end of file +OPENAI_API_KEY=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 diff --git a/docker-compose.yml b/docker-compose.yml index 2134126..a10c829 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -10,7 +10,7 @@ services: networks: - kevlarai-network healthcheck: - test: ["CMD", "redis-cli", "ping"] + test: [ "CMD", "redis-cli", "ping" ] interval: 5s timeout: 3s retries: 5 @@ -26,16 +26,16 @@ services: environment: - REDIS_HOST=redis - RUN_INIT=true - - INIT_MODE=${INIT_MODE:-infra} # Set via .env or override: INIT_MODE=infra,gais-data - - INIT_ONLY=true # Exit after init, don't start server - command: ["./docker-entrypoint.sh", "init-only"] + - INIT_MODE=${INIT_MODE:-infra} # Set via .env or override: INIT_MODE=infra,gais-data + - INIT_ONLY=true # Exit after init, don't start server + command: [ "./docker-entrypoint.sh", "init-only" ] depends_on: redis: condition: service_healthy networks: - kevlarai-network profiles: - - init # Only run when explicitly requested: docker compose --profile init up + - init # Only run when explicitly requested: docker compose --profile init up backend: container_name: api @@ -46,10 +46,10 @@ services: - .env environment: - REDIS_HOST=redis - - RUN_INIT=${RUN_INIT:-false} # Set to 'true' to run init on startup - - INIT_MODE=${INIT_MODE:-infra} # Which init tasks to run + - RUN_INIT=${RUN_INIT:-false} # Set to 'true' to run init on startup + - INIT_MODE=${INIT_MODE:-infra} # Which init tasks to run ports: - - 8000:8000 + - 8080:8080 depends_on: redis: condition: service_healthy @@ -59,6 +59,7 @@ services: volumes: redis-data: + networks: kevlarai-network: name: kevlarai-network diff --git a/requirements.txt b/requirements.txt index 4c194d4..7d1aced 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,7 +53,7 @@ w3lib scikit-learn # Google APIs -youtube-transcript-api +youtube-transcript-api>=1.2.3 google-api-python-client google-auth-oauthlib diff --git a/routers/external/youtube.py b/routers/external/youtube.py index 8411b6b..4a848e8 100644 --- a/routers/external/youtube.py +++ b/routers/external/youtube.py @@ -28,9 +28,9 @@ youtube = build('youtube', 'v3', developerKey=os.getenv('YOUTUBE_API_KEY')) @router.get("/youtube-proxy") async def youtube_proxy(videoId: str): try: - # Fetch transcript using youtube-transcript-api - transcript = YouTubeTranscriptApi.get_transcript(videoId, languages=['en']) - transcript_lines = [{"start": entry["start"], "duration": entry["duration"], "text": entry["text"]} for entry in transcript] + transcript_list = YouTubeTranscriptApi().list(videoId) + transcript = transcript_list.find_transcript(['en']).fetch() + transcript_lines = [{"start": entry.start, "duration": entry.duration, "text": entry.text} for entry in transcript] # Fetch video details using YouTube Data API video_response = youtube.videos().list( diff --git a/run/initialization/buckets.py b/run/initialization/buckets.py index fc7d6d7..351df04 100644 --- a/run/initialization/buckets.py +++ b/run/initialization/buckets.py @@ -46,6 +46,15 @@ def initialize_buckets() -> dict: file_size_limit=1000 * 1024 * 1024, # 1GB ) }, + # Exam Board files + { + "id": "cc.examboards", + "options": CreateBucketOptions( + name="Classroom Copilot Exam Board Files", + public=False, + file_size_limit=1000 * 1024 * 1024, # 1GB + ) + }, ] results = {}