From 83edfff9d33f93328ca31ee8a1e8cabd6e08c3ea Mon Sep 17 00:00:00 2001 From: kcar Date: Wed, 13 May 2026 22:33:35 +0000 Subject: [PATCH] feat: apply local modifications to WhisperLive-Server --- .archive/Dockerfile.macos.dev | 51 - .archive/Dockerfile.macos.prod | 45 - .archive/Dockerfile.win.prod | 49 - .archive/docker-compose.yml | 1191 -------------------- .env | 5 +- Dockerfile | 20 +- HYBRID_SERVER_README.md | 260 +++++ __pycache__/hybrid_server.cpython-314.pyc | Bin 0 -> 52983 bytes batch_transcribe.py | 270 +++++ docker-compose.yaml | 12 +- hybrid_server.py | 1229 +++++++++++++++++++++ openapi.json | 866 +++++++++++++++ requirements/server.txt | 6 +- scratch/dashboard.html | 727 ++++++++++++ scratch/test_ws.py | 9 + test_form.html | 727 ++++++++++++ test_http_endpoints.py | 159 +++ 17 files changed, 4274 insertions(+), 1352 deletions(-) delete mode 100644 .archive/Dockerfile.macos.dev delete mode 100644 .archive/Dockerfile.macos.prod delete mode 100644 .archive/Dockerfile.win.prod delete mode 100644 .archive/docker-compose.yml create mode 100644 HYBRID_SERVER_README.md create mode 100644 __pycache__/hybrid_server.cpython-314.pyc create mode 100644 batch_transcribe.py create mode 100644 hybrid_server.py create mode 100644 openapi.json create mode 100644 scratch/dashboard.html create mode 100644 scratch/test_ws.py create mode 100644 test_form.html create mode 100644 test_http_endpoints.py diff --git a/.archive/Dockerfile.macos.dev b/.archive/Dockerfile.macos.dev deleted file mode 100644 index 4926c61..0000000 --- a/.archive/Dockerfile.macos.dev +++ /dev/null @@ -1,51 +0,0 @@ -FROM python:3.10-bookworm - -ARG DEBIAN_FRONTEND=noninteractive - -# Create log directories with proper permissions -RUN mkdir -p /app/logs && \ - touch /app/logs/whisperlive.log && \ - touch /app/logs/connections.log && \ - chmod 666 /app/logs/whisperlive.log && \ - chmod 666 /app/logs/connections.log - -# install lib required for pyaudio -RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/* - -# update pip to support for whl.metadata -> less downloading -RUN pip install --no-cache-dir -U "pip>=24" - -# create a working directory -WORKDIR /app - -# install the requirements for running the whisper-live server -COPY requirements/server.txt /app/ -RUN pip install -r server.txt && rm server.txt - -COPY whisper_live /app/whisper_live -COPY run_server.py /app - -# Port options -EXPOSE ${PORT_WHISPERLIVE} -EXPOSE ${PORT_WHISPERLIVE_SSL} -ARG PORT_WHISPERLIVE -ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE} -ARG PORT_WHISPERLIVE_SSL -ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL} - -# SSL options -ARG WHISPERLIVE_SSL -ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL} - -# Model options -ARG WHISPL_USE_CUSTOM_MODEL -ENV WHISPL_USE_CUSTOM_MODEL=${WHISPL_USE_CUSTOM_MODEL} -ARG FASTERWHISPER_MODEL -ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL} - -CMD ["sh", "-c", "\ - if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \ - python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \ - else \ - python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --no_single_model; \ - fi"] diff --git a/.archive/Dockerfile.macos.prod b/.archive/Dockerfile.macos.prod deleted file mode 100644 index 9ab915a..0000000 --- a/.archive/Dockerfile.macos.prod +++ /dev/null @@ -1,45 +0,0 @@ -FROM python:3.10-bookworm - -ARG DEBIAN_FRONTEND=noninteractive - -# Create log directories with proper permissions -RUN mkdir -p /app/logs && \ - touch /app/logs/whisperlive.log && \ - touch /app/logs/connections.log && \ - chmod 666 /app/logs/whisperlive.log && \ - chmod 666 /app/logs/connections.log - -# install lib required for pyaudio -RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/* - -# update pip to support for whl.metadata -> less downloading -RUN pip install --no-cache-dir -U "pip>=24" - -# create a working directory -WORKDIR /app - -# install the requirements for running the whisper-live server -COPY requirements/server.txt /app/ -RUN pip install -r server.txt && rm server.txt - -COPY whisper_live /app/whisper_live -COPY run_server.py /app - -# Copy application files -EXPOSE ${PORT_WHISPERLIVE} -EXPOSE ${PORT_WHISPERLIVE_SSL} -ARG PORT_WHISPERLIVE -ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE} -ARG PORT_WHISPERLIVE_SSL -ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL} -ARG FASTERWHISPER_MODEL -ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL} -ARG WHISPERLIVE_SSL -ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL} - -CMD ["sh", "-c", "\ - if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \ - python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \ - else \ - python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL; \ - fi"] diff --git a/.archive/Dockerfile.win.prod b/.archive/Dockerfile.win.prod deleted file mode 100644 index 79cb85a..0000000 --- a/.archive/Dockerfile.win.prod +++ /dev/null @@ -1,49 +0,0 @@ -FROM python:3.10-bookworm - -ARG DEBIAN_FRONTEND=noninteractive - -# Create log directories with proper permissions -RUN mkdir -p /app/logs && \ - touch /app/logs/whisperlive.log && \ - touch /app/logs/connections.log && \ - chmod 666 /app/logs/whisperlive.log && \ - chmod 666 /app/logs/connections.log - -# install lib required for pyaudio -RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/* - -# update pip to support for whl.metadata -> less downloading -RUN pip install --no-cache-dir -U "pip>=24" - -# create a working directory -WORKDIR /app - -# install the requirements for running the whisper-live server -COPY requirements/server.txt /app/ -RUN pip install -r server.txt && rm server.txt - -# make the paths of the nvidia libs installed as wheels visible. equivalent to: -# export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'` -ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib" - -COPY whisper_live /app/whisper_live -COPY run_server.py /app - -# Copy application files -EXPOSE ${PORT_WHISPERLIVE} -EXPOSE ${PORT_WHISPERLIVE_SSL} -ARG PORT_WHISPERLIVE -ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE} -ARG PORT_WHISPERLIVE_SSL -ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL} -ARG FASTERWHISPER_MODEL -ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL} -ARG WHISPERLIVE_SSL -ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL} - -CMD ["sh", "-c", "\ - if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \ - python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \ - else \ - python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL; \ - fi"] diff --git a/.archive/docker-compose.yml b/.archive/docker-compose.yml deleted file mode 100644 index 404508a..0000000 --- a/.archive/docker-compose.yml +++ /dev/null @@ -1,1191 +0,0 @@ -services: - nginx: - profiles: - - core - container_name: nginx-proxy-manager-${NGINX_MODE:-dev} - image: 'jc21/nginx-proxy-manager:latest' - ports: - - '80:80' - - '81:81' - - '443:443' - volumes: - - ./cc-volumes/nginx-proxy-manager/${BUILD_OS}/${NGINX_MODE:-dev}/data:/data - - ./cc-volumes/nginx-proxy-manager/${BUILD_OS}/${NGINX_MODE:-dev}/letsencrypt:/etc/letsencrypt - - ./cc-volumes/nginx-proxy-manager/${BUILD_OS}/${NGINX_MODE:-dev}/snippets:/snippets:ro - environment: - TZ: Europe/London - networks: - - cc-network - - keycloak: - profiles: - - core - - database - container_name: keycloak-${NGINX_MODE:-dev} - build: - context: ./cc-volumes/keycloak/${NGINX_MODE:-dev}/docker - dockerfile: Dockerfile.${BUILD_OS}.${NGINX_MODE:-dev} - args: - KC_BOOTSTRAP_ADMIN_PASSWORD: ${KEYCLOAK_ADMIN_PASSWORD} - KC_BOOTSTRAP_ADMIN_USERNAME: ${KEYCLOAK_ADMIN} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - KC_DB: postgres - KC_DB_URL: jdbc:postgresql://db:5432/postgres - KC_DB_USERNAME: keycloak - KC_DB_PASSWORD: keycloak - KC_DB_SCHEMA: keycloak - KC_HOSTNAME: ${KEYCLOAK_URL} - KC_HOSTNAME_STRICT: "false" - KC_PROXY_HEADERS: xforwarded - KC_PROXY_PROTOCOL_ENABLED: "false" - KC_HTTP_ENABLED: "true" - KC_HTTPS_ENABLED: "false" - KC_HOSTNAME_ADMIN: ${KEYCLOAK_ADMIN_URL} - KC_HOSTNAME_DEBUG: "true" - KC_HEALTH_ENABLED: "true" - KC_HOSTNAME_BACKCHANNEL_DYNAMIC: "false" - KC_METRICS_ENABLED: "true" - KC_LOG_LEVEL: DEBUG - KC_HTTP_RELATIVE_PATH: / - depends_on: - db: - condition: service_healthy - restart: unless-stopped - ports: - - "${KEYCLOAK_MANAGEMENT_PORT}:9000" - - "${KEYCLOAK_PORT}:8080" - - "${KEYCLOAK_SSL_PORT}:8443" - volumes: - - ./cc-volumes/keycloak/${NGINX_MODE:-dev}/conf:/opt/keycloak/conf:ro - - ./cc-volumes/keycloak/${NGINX_MODE:-dev}/providers:/opt/keycloak/providers:ro - - ./cc-volumes/keycloak/${NGINX_MODE:-dev}/themes:/opt/keycloak/themes:ro - - ./cc-volumes/keycloak/${NGINX_MODE:-dev}/master-realm-${NGINX_MODE:-dev}-${BUILD_OS}.json:/opt/keycloak/data/import/master-realm.json:ro - - ./cc-volumes/keycloak/${NGINX_MODE:-dev}/classroomcopilot-realm-${NGINX_MODE:-dev}-${BUILD_OS}.json:/opt/keycloak/data/import/classroomcopilot-realm.json:ro - networks: - - cc-network - - oauth2-proxy-admin: - image: quay.io/oauth2-proxy/oauth2-proxy:v7.6.0 - container_name: oauth2-proxy-admin - restart: unless-stopped - environment: - OAUTH2_PROXY_PROVIDER: oidc - OAUTH2_PROXY_OIDC_ISSUER_URL: https://keycloak.classroomcopilot.test/realms/classroomcopilot - OAUTH2_PROXY_CLIENT_ID: admin-app - OAUTH2_PROXY_CLIENT_SECRET: ${KEYCLOAK_SECRET_ADMIN} - OAUTH2_PROXY_COOKIE_SECRET: ${COOKIE_SECRET_ADMIN} - OAUTH2_PROXY_COOKIE_DOMAIN: .classroomcopilot.test - OAUTH2_PROXY_UPSTREAMS: http://cc-admin:3000 - OAUTH2_PROXY_REDIRECT_URL: https://admin.classroomcopilot.test/oauth2/callback - OAUTH2_PROXY_EMAIL_DOMAINS: "*" - OAUTH2_PROXY_ALLOWED_GROUPS: "admin" - OAUTH2_PROXY_SKIP_PROVIDER_BUTTON: "true" - OAUTH2_PROXY_PASS_ACCESS_TOKEN: "true" - OAUTH2_PROXY_SET_XAUTHREQUEST: "true" - ports: - - "4181:4180" - networks: - - cc-network - - whisperlive-frontend: - profiles: - - core - - frontend - container_name: whisperlive-frontend-${NGINX_MODE:-dev} - build: - context: . - dockerfile: ./whisperlive-frontend/Dockerfile - args: - BUILD_OS: ${BUILD_OS} - NGINX_MODE: ${NGINX_MODE} - environment: - - VITE_APP_URL=${APP_URL} - - VITE_APP_PROTOCOL=${APP_PROTOCOL} - - VITE_APP_NAME=${APP_NAME} - - VITE_DEV=${DEV_MODE} - - VITE_WHISPERLIVE_URL=${WHISPERLIVE_URL} - ports: - - "${PORT_WHISPERLIVE_FRONTEND}:${PORT_WHISPERLIVE_FRONTEND}" - - "${PORT_WHISPERLIVE_FRONTEND_SSL}:${PORT_WHISPERLIVE_FRONTEND_SSL}" - volumes: - - ./whisperlive-frontend:/app - - /app/node_modules - - ./cc-volumes/whisperlive/frontend/ssl/fullchain1.pem:/etc/nginx/ssl/fullchain.pem:ro - - ./cc-volumes/whisperlive/frontend/ssl/privkey1.pem:/etc/nginx/ssl/privkey.pem:ro - networks: - - cc-network - - whisperlive-win: - profiles: - - none - container_name: whisperlive-${NGINX_MODE:-dev} - build: - context: ./WhisperLive/server - dockerfile: Dockerfile.${NGINX_MODE:-dev} - args: - PORT_WHISPERLIVE: ${PORT_WHISPERLIVE} - PORT_WHISPERLIVE_SSL: ${PORT_WHISPERLIVE_SSL} - WHISPERLIVE_SSL: ${WHISPERLIVE_SSL:-false} - WHISPERLIVE_MODEL: ${WHISPERLIVE_MODEL:-base} - env_file: - - .env - environment: - WHISPERLIVE_SSL: ${WHISPERLIVE_SSL:-false} - LOG_PATH: /app/logs - NVIDIA_VISIBLE_DEVICES: all - NVIDIA_DRIVER_CAPABILITIES: compute,utility - volumes: - - ./cc-volumes/whisperlive/models:/app/models - - ./cc-volumes/whisperlive/${NGINX_MODE:-dev}/ssl:/app/ssl - - ./local/logs/whisperlive:/app/logs - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: 1 - capabilities: [gpu] - ports: - - ${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE} - - ${PORT_WHISPERLIVE_SSL}:${PORT_WHISPERLIVE_SSL} - networks: - - cc-network - - whisperlive-macos: - profiles: - - core - container_name: whisperlive-${NGINX_MODE:-dev} - build: - context: ./WhisperLive/server - dockerfile: Dockerfile.${BUILD_OS}.${NGINX_MODE:-dev} - args: - PORT_WHISPERLIVE: ${PORT_WHISPERLIVE} - PORT_WHISPERLIVE_SSL: ${PORT_WHISPERLIVE_SSL} - WHISPERLIVE_SSL: ${WHISPERLIVE_SSL:-false} - WHISPL_USE_CUSTOM_MODEL: ${WHISPL_USE_CUSTOM_MODEL:-false} - FASTERWHISPER_MODEL: ${FASTERWHISPER_MODEL:-base} - env_file: - - .env - environment: - WHISPERLIVE_SSL: ${WHISPERLIVE_SSL:-false} - LOG_PATH: /app/logs - NVIDIA_VISIBLE_DEVICES: all - NVIDIA_DRIVER_CAPABILITIES: compute,utility - volumes: - - ./local/data/whisperlive/models:/app/models - - ./local/data/whisperlive/auto-download:/root/.cache/huggingface/hub - - ./cc-volumes/whisperlive/${NGINX_MODE:-dev}/ssl:/app/ssl - - ./local/logs/whisperlive:/app/logs - deploy: - resources: - limits: - cpus: '4' - memory: 8G - ports: - - ${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE} - - ${PORT_WHISPERLIVE_SSL}:${PORT_WHISPERLIVE_SSL} - networks: - - cc-network - - whisperlive-cpu: - profiles: - - none - container_name: whisperlive-cpu-${NGINX_MODE:-dev} - image: ghcr.io/collabora/whisperlive-cpu:latest - environment: - LOG_PATH: /app/logs - volumes: - - ./cc-volumes/whisperlive/models:/app/models - - ./cc-volumes/whisperlive/${NGINX_MODE:-dev}/ssl:/app/ssl - - ./local/logs/whisperlive-cpu:/app/logs - deploy: - resources: - limits: - cpus: '4' - memory: 8G - ports: - - ${PORT_WHISPERLIVE}:9090 - networks: - - cc-network - - whisperlive-gpu: - profiles: - - none - container_name: whisperlive-gpu-${NGINX_MODE:-dev} - image: ghcr.io/collabora/whisperlive-gpu:latest - environment: - LOG_PATH: /app/logs - NVIDIA_VISIBLE_DEVICES: all - NVIDIA_DRIVER_CAPABILITIES: compute,utility - volumes: - - ./cc-volumes/whisperlive/models:/app/models - - ./cc-volumes/whisperlive/${NGINX_MODE:-dev}/ssl:/app/ssl - - ./local/logs/whisperlive-gpu:/app/logs - deploy: - resources: - limits: - cpus: '4' - memory: 16G - ports: - - ${PORT_WHISPERLIVE}:9090 - networks: - - cc-network - - solid-proxy-internal: - profiles: - - core - container_name: solid-proxy-internal-${NGINX_MODE:-dev} - image: nginx:alpine - ports: - - 3007:3007 - volumes: - - ./cc-volumes/solid-css/${NGINX_MODE:-dev}/nginx/solid-internal.conf:/etc/nginx/conf.d/default.conf:ro - - ./cc-volumes/cloudflare-origin-certs/solid_cc_cert.pem:/etc/nginx/ssl/cert.pem:ro - - ./cc-volumes/cloudflare-origin-certs/solid_cc_key.pem:/etc/nginx/ssl/key.pem:ro - - ./local/logs/${NGINX_MODE:-dev}/solid-proxy-internal:/var/log/nginx - networks: - - cc-network - - cc-marketing-site: - profiles: - - core - - frontend - container_name: cc-marketing-${NGINX_MODE:-dev} - build: - context: ./cc-marketing - dockerfile: Dockerfile.${NGINX_MODE:-dev} - env_file: - - .env - environment: - - VITE_APP_URL=${APP_URL} - - VITE_APP_SITE_URL=${SITE_URL} - - VITE_APP_APP_URL=${APP_URL} someone check - ports: - - "${PORT_MARKETING_SITE}:${PORT_MARKETING_SITE}" - - "${PORT_MARKETING_SITE_SSL}:${PORT_MARKETING_SITE_SSL}" - networks: - - cc-network - - frontend: - profiles: - - core - - frontend - container_name: frontend-${NGINX_MODE:-dev} - build: - context: ./frontend - dockerfile: Dockerfile.${NGINX_MODE:-dev} - args: - VITE_APP_URL: ${VITE_APP_URL} - environment: - - VITE_FRONTEND_SITE_URL=${SITE_URL} - - VITE_APP_PROTOCOL=${APP_PROTOCOL} - - VITE_APP_NAME=${APP_NAME} - - VITE_SUPER_ADMIN_EMAIL=${APP_AUTHOR_EMAIL} - - VITE_DEV=${DEV_MODE} - - VITE_SUPABASE_URL=${SUPABASE_URL} - - VITE_SUPABASE_ANON_KEY=${ANON_KEY} - - VITE_STRICT_MODE=${STRICT_MODE} - - APP_URL=${APP_URL} - - PORT_FRONTEND=${PORT_FRONTEND} - ports: - - "${PORT_FRONTEND}:${PORT_FRONTEND}" - volumes: - - ./frontend:/app - - /app/node_modules - networks: - - cc-network - - storybook: - profiles: - - core - - frontend - container_name: storybook-${NGINX_MODE:-dev} - build: - context: ./frontend - dockerfile: Dockerfile.storybook.macos.${NGINX_MODE:-dev} - environment: - - NODE_ENV=${NGINX_MODE:-dev} - ports: - - "${PORT_STORYBOOK:-6006}:6006" - volumes: - - ./frontend:/app - - /app/node_modules - networks: - - cc-network - depends_on: - - frontend - - cc-admin: - profiles: - - core - - frontend - container_name: cc-admin-${NGINX_MODE:-dev} - build: - context: ./cc-admin - dockerfile: Dockerfile.${NGINX_MODE:-dev} - args: - PORT: ${PORT_CC_ADMIN} - PORT_DEVTOOLS: ${PORT_CC_ADMIN_DEVTOOLS} - SUPABASE_URL: ${SUPABASE_URL} - ANON_KEY: ${ANON_KEY} - SERVICE_ROLE_KEY: ${SERVICE_ROLE_KEY} - VITE_CC_ADMIN_URL: ${CC_ADMIN_URL} - environment: - APP_URL: ${APP_URL} - PORT_CC_ADMIN: ${PORT_CC_ADMIN} - PORT_CC_ADMIN_DEVTOOLS: ${PORT_CC_ADMIN_DEVTOOLS} - env_file: - - .env - - ./cc-admin/.env.${NGINX_MODE:-dev} - ports: - - "${PORT_CC_ADMIN}:${PORT_CC_ADMIN}" - volumes: - - ./cc-admin:/app - - /app/node_modules - networks: - - cc-network - - backend: - profiles: - - core - - backend - container_name: backend-${NGINX_MODE:-dev} - build: - context: ./backend - dockerfile: Dockerfile.${BUILD_OS}.${NGINX_MODE:-dev} - env_file: - - .env - environment: - ADMIN_EMAIL: ${SUPER_ADMIN_EMAIL} - ADMIN_PASSWORD: ${SUPER_ADMIN_PASSWORD} - ADMIN_NAME: ${SUPER_ADMIN_NAME} - ADMIN_USERNAME: ${SUPER_ADMIN_USERNAME} - ADMIN_DISPLAY_NAME: ${SUPER_ADMIN_DISPLAY_NAME} - SUPABASE_URL: ${SUPABASE_URL} - SERVICE_ROLE_KEY: ${SERVICE_ROLE_KEY} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_DB: ${POSTGRES_DB} - UVICORN_TIMEOUT: 300 - volumes: - - /var/run/docker.sock:/var/run/docker.sock - - ./backend/:/app/backend - - ./cc-volumes/init:/init:rw - - ./local/logs/container/backend:/logs - - ./local/input:/app/local/input:rw - - ./local/output:/app/local/output:rw - ports: - - "${PORT_BACKEND}:${PORT_BACKEND}" - extra_hosts: - - "supa.classroomcopilot.test:172.23.0.1" - networks: - - cc-network - deploy: - resources: - limits: - cpus: '2' - memory: 4G - - tldraw-sync: - profiles: - - core - - backend - container_name: tldraw-sync-${NGINX_MODE:-dev} - build: - context: ./tldraw-sync - dockerfile: Dockerfile - env_file: - - .env - environment: - - LOG_PATH=/app/logs - ports: - - "5002:5002" - volumes: - - ./tldraw-sync:/app - - ./cc-volumes/tldraw-sync/bunfig.toml:/app/bunfig.toml:ro - - ./local/data/tldraw-sync/.assets:/app/.assets - - ./local/data/tldraw-sync/.rooms:/app/.rooms - - ./local/logs/container/tldraw-sync:/app/logs - networks: - - cc-network - - neo4j: - profiles: - - database - - backend - image: neo4j:enterprise - container_name: neo4j-${NGINX_MODE:-dev} - env_file: - - .env - environment: - - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes - - NEO4J_PLUGINS='["apoc"]' - ports: - - ${PORT_NEO4J_HTTP}:${PORT_NEO4J_HTTP} - - ${PORT_NEO4J_HTTPS}:${PORT_NEO4J_HTTPS} - - ${PORT_NEO4J_BOLT}:${PORT_NEO4J_BOLT} - volumes: - - neo4j-data:/data - - neo4j-logs:/logs - - ./cc-volumes/neo4j/conf/${NGINX_MODE:-dev}/neo4j.conf:/conf/neo4j.conf:ro - - ./cc-volumes/cloudflare-origin-certs/graph_cc_key.pem:/certificates/https/private.key:ro - - ./cc-volumes/cloudflare-origin-certs/graph_cc_cert.pem:/certificates/https/public.crt:ro - - ./cc-volumes/letsencrypt-certs/bolt.classroomcopilot/privkey1.pem:/certificates/bolt/private.key:ro - - ./cc-volumes/letsencrypt-certs/bolt.classroomcopilot/fullchain1.pem:/certificates/bolt/public.crt:ro - - ./cc-volumes/letsencrypt-certs/bolt.classroomcopilot/fullchain1.pem:/certificates/bolt/trusted/public.crt:ro - - ./cc-volumes/neo4j/plugins:/plugins:rw - - ./local/logs/container/neo4j:/logs - healthcheck: - test: ["CMD-SHELL", "neo4j status || exit 1"] - interval: 10s - timeout: 5s - retries: 10 - networks: - - cc-network - - solid-css: - profiles: - - solid - image: solidproject/community-server:latest - container_name: solid-css-${NGINX_MODE:-dev} - restart: unless-stopped - ports: - - "${PORT_SOLID_CSS}:3000" - volumes: - - ./cc-volumes/solid-css/${NGINX_MODE:-dev}/config:/config:ro - - ./cc-volumes/solid-css/${NGINX_MODE:-dev}/data:/data - command: - - --config - - /config/docker.json - networks: - - cc-network - - redis: - profiles: - - database - - backend - image: redis:alpine - container_name: redis-${NGINX_MODE:-dev} - networks: - - cc-network - ports: - - "${PORT_REDIS:-6379}:6379" - command: redis-server --appendonly yes - volumes: - - redis-data:/data - - searxng: - profiles: - - core - - services - - backend - image: searxng/searxng - container_name: searxng-${NGINX_MODE:-dev} - ports: - - "${PORT_SEARXNG}:${PORT_SEARXNG}" - env_file: - - .env - volumes: - - ./cc-volumes/searxng/limiter.toml:/etc/searxng/limiter.toml - - ./cc-volumes/searxng/settings.yml:/etc/searxng/settings.yml - networks: - - cc-network - - mailhog: - profiles: - - core - container_name: mailhog-${NGINX_MODE:-dev} - image: mailhog/mailhog - ports: - - "${PORT_MAILHOG_SMTP}:1025" # SMTP port - - "${PORT_MAILHOG_WEB}:8025" # Web UI port - env_file: - - .env - volumes: - - ./local/logs/mailhog:/var/mailhog - - ./local/data/mailhog:/var/mailhog/mailhog - networks: - - cc-network - - postfix: - profiles: - - prod - image: catatnight/postfix - environment: - - maildomain=${APP_URL} - - smtp_user=user:password - ports: - - "25:25" - - minecraft-server: - profiles: - - none - image: itzg/minecraft-server - container_name: cc-minecraft-forge-${NGINX_MODE:-dev} - environment: - EULA: "TRUE" - TYPE: VANILLA - ONLINE_MODE: "false" - PROXY: "minecraft.kevlarai.com" - - # ✅ Set custom server host details - MOTD: "Welcome to KevlarAI's Minecraft Forge Server" - - # ✅ Optional extras (customize as desired) - MAX_PLAYERS: 20 - ALLOW_NETHER: "TRUE" - ENABLE_COMMAND_BLOCK: "TRUE" - DIFFICULTY: "normal" - MODE: "survival" - LEVEL_TYPE: "minecraft:default" - LEVEL: "world" - PVP: "TRUE" - ports: - - 25575:25575 - - 25565:25565 - volumes: - - ./cc-volumes/minecraft/${NGINX_MODE:-dev}/vanilla/data:/data - restart: unless-stopped - networks: - - cc-network - - # Supabase containers - studio: - profiles: - - database - - supabase - container_name: supabase-studio-${NGINX_MODE:-dev} - image: supabase/studio:20250113-83c9420 - restart: unless-stopped - healthcheck: - test: - [ - "CMD", - "node", - "-e", - "fetch('http://studio:3000/api/profile').then((r) => {if (r.status !== 200) throw new Error(r.status)})", - ] - timeout: 10s - interval: 5s - retries: 3 - depends_on: - analytics: - condition: service_healthy - ports: - - ${PORT_SUPABASE_STUDIO}:3000 - env_file: - - .env - environment: - STUDIO_PG_META_URL: http://meta:8080 - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - DEFAULT_PROJECT_ID: "ClassroomCopilot" - DEFAULT_ORGANIZATION_NAME: ${STUDIO_DEFAULT_ORGANIZATION} - DEFAULT_PROJECT_NAME: ${STUDIO_DEFAULT_PROJECT} - OPENAI_API_KEY: ${OPENAI_API_KEY:-} - SUPABASE_URL: ${SUPABASE_URL} - SUPABASE_PUBLIC_URL: ${SUPABASE_PUBLIC_URL} - SUPABASE_ANON_KEY: ${ANON_KEY} - SUPABASE_SERVICE_KEY: ${SERVICE_ROLE_KEY} - LOGFLARE_API_KEY: ${LOGFLARE_API_KEY} - LOGFLARE_URL: http://analytics:4000 - NEXT_PUBLIC_ENABLE_LOGS: true - NEXT_ANALYTICS_BACKEND_PROVIDER: postgres - networks: - - cc-network - - kong: - profiles: - - database - - supabase - container_name: supabase-kong-${NGINX_MODE:-dev} - image: kong:2.8.1 - restart: unless-stopped - entrypoint: bash -c 'eval "echo \"$$(cat ~/temp.yml)\"" > ~/kong.yml && /docker-entrypoint.sh kong docker-start' - ports: - - ${KONG_HTTP_PORT}:8000/tcp - - ${KONG_HTTPS_PORT}:8443/tcp - depends_on: - analytics: - condition: service_healthy - env_file: - - .env - environment: - KONG_DATABASE: "off" - KONG_DECLARATIVE_CONFIG: /home/kong/kong.yml - KONG_DNS_ORDER: LAST,A,CNAME - KONG_PLUGINS: request-transformer,cors,key-auth,acl,basic-auth - KONG_NGINX_PROXY_PROXY_BUFFER_SIZE: 160k - KONG_NGINX_PROXY_PROXY_BUFFERS: 64 160k - SUPABASE_ANON_KEY: ${ANON_KEY} - SUPABASE_SERVICE_KEY: ${SERVICE_ROLE_KEY} - DASHBOARD_USERNAME: ${DASHBOARD_USERNAME} - DASHBOARD_PASSWORD: ${DASHBOARD_PASSWORD} - KONG_PROXY_ACCESS_LOG: "/dev/stdout" - KONG_ADMIN_ACCESS_LOG: "/dev/stdout" - KONG_PROXY_ERROR_LOG: "/dev/stderr" - KONG_ADMIN_ERROR_LOG: "/dev/stderr" - KONG_CORS_ORIGINS: "*" - KONG_CORS_METHODS: "GET,HEAD,PUT,PATCH,POST,DELETE,OPTIONS" - KONG_CORS_HEADERS: "DNT,X-Auth-Token,Keep-Alive,User-Agent,X-Requested-With,If-Modified-Since,Cache-Control,Content-Type,Range,Authorization,apikey,x-client-info" - KONG_CORS_EXPOSED_HEADERS: "Content-Length,Content-Range" - KONG_CORS_MAX_AGE: 3600 - volumes: - - ./supabase/api/kong.yml:/home/kong/temp.yml:ro - networks: - - cc-network - - auth: - profiles: - - database - - supabase - container_name: supabase-auth-${NGINX_MODE:-dev} - image: supabase/gotrue:v2.167.0 - depends_on: - db: - condition: service_healthy - analytics: - condition: service_healthy - healthcheck: - test: - [ - "CMD", - "wget", - "--no-verbose", - "--tries=1", - "--spider", - "http://localhost:9999/health", - ] - timeout: 5s - interval: 5s - retries: 3 - restart: unless-stopped - env_file: - - .env - environment: - GOTRUE_API_HOST: 0.0.0.0 - GOTRUE_API_PORT: 9999 - API_EXTERNAL_URL: ${API_EXTERNAL_URL} - GOTRUE_DB_DRIVER: postgres - GOTRUE_DB_DATABASE_URL: postgres://supabase_auth_admin:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB} - GOTRUE_SITE_URL: ${SITE_URL} - GOTRUE_URI_ALLOW_LIST: ${ADDITIONAL_REDIRECT_URLS} - GOTRUE_DISABLE_SIGNUP: ${DISABLE_SIGNUP} - GOTRUE_JWT_ADMIN_ROLES: service_role - GOTRUE_JWT_AUD: authenticated - GOTRUE_JWT_DEFAULT_GROUP_NAME: authenticated - GOTRUE_JWT_EXP: ${JWT_EXPIRY} - GOTRUE_JWT_SECRET: ${JWT_SECRET} - GOTRUE_LOG_LEVEL: ${AUTH_LOG_LEVEL} - GOTRUE_SMTP_ADMIN_EMAIL: ${SMTP_ADMIN_EMAIL} - GOTRUE_SMTP_HOST: ${SMTP_HOST} - GOTRUE_SMTP_PORT: ${SMTP_PORT} - GOTRUE_SMTP_USER: ${SMTP_USER} - GOTRUE_SMTP_PASS: ${SMTP_PASS} - GOTRUE_SMTP_SENDER_NAME: ${SMTP_SENDER_NAME} - GOTRUE_MAILER_URLPATHS_INVITE: ${MAILER_URLPATHS_INVITE} - GOTRUE_MAILER_URLPATHS_CONFIRMATION: ${MAILER_URLPATHS_CONFIRMATION} - GOTRUE_MAILER_URLPATHS_RECOVERY: ${MAILER_URLPATHS_RECOVERY} - GOTRUE_MAILER_URLPATHS_EMAIL_CHANGE: ${MAILER_URLPATHS_EMAIL_CHANGE} - GOTRUE_MAILER_AUTOCONFIRM: ${ENABLE_EMAIL_AUTOCONFIRM} - GOTRUE_MAILER_SECURE_EMAIL_CHANGE_ENABLED: ${MAILER_SECURE_EMAIL_CHANGE_ENABLED} - GOTRUE_MAILER_EXTERNAL_HOSTS: "localhost,admin.localhost,kong,supabase.classroomcopilot.ai,classroomcopilot.ai" - GOTRUE_MAILER_EXTERNAL_HOSTS_ALLOW_REGEX: ".*\\.classroomcopilot\\.ai$" - GOTRUE_SMS_AUTOCONFIRM: ${ENABLE_PHONE_AUTOCONFIRM} - GOTRUE_EXTERNAL_EMAIL_ENABLED: ${ENABLE_EMAIL_SIGNUP} - GOTRUE_EXTERNAL_ANONYMOUS_USERS_ENABLED: ${ENABLE_ANONYMOUS_USERS} - GOTRUE_EXTERNAL_PHONE_ENABLED: ${ENABLE_PHONE_SIGNUP} - GOTRUE_EXTERNAL_AZURE_ENABLED: ${AZURE_ENABLED} - GOTRUE_EXTERNAL_AZURE_CLIENT_ID: ${AZURE_CLIENT_ID} - GOTRUE_EXTERNAL_AZURE_SECRET: ${AZURE_SECRET} - GOTRUE_EXTERNAL_AZURE_REDIRECT_URI: ${AZURE_REDIRECT_URI} - networks: - - cc-network - - rest: - profiles: - - database - - supabase - container_name: supabase-rest-${NGINX_MODE:-dev} - image: postgrest/postgrest:v12.2.0 - depends_on: - db: - condition: service_healthy - analytics: - condition: service_healthy - restart: unless-stopped - env_file: - - .env - environment: - PGRST_DB_URI: postgres://authenticator:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB} - PGRST_DB_SCHEMAS: ${PGRST_DB_SCHEMAS} - PGRST_DB_ANON_ROLE: anon - PGRST_JWT_SECRET: ${JWT_SECRET} - PGRST_DB_USE_LEGACY_GUCS: "false" - PGRST_APP_SETTINGS_JWT_SECRET: ${JWT_SECRET} - PGRST_APP_SETTINGS_JWT_EXP: ${JWT_EXPIRY} - command: "postgrest" - networks: - - cc-network - - realtime: - profiles: - - database - - supabase - container_name: realtime-dev-${NGINX_MODE:-dev}.supabase-realtime - image: supabase/realtime:v2.34.7 - depends_on: - db: - condition: service_healthy - analytics: - condition: service_healthy - healthcheck: - test: - [ - "CMD", - "curl", - "-sSfL", - "--head", - "-o", - "/dev/null", - "-H", - "Authorization: Bearer ${ANON_KEY}", - "http://localhost:4000/api/tenants/realtime-dev/health", - ] - timeout: 5s - interval: 5s - retries: 3 - restart: unless-stopped - env_file: - - .env - environment: - PORT: 4000 - DB_HOST: ${POSTGRES_HOST} - DB_PORT: ${POSTGRES_PORT} - DB_USER: supabase_admin - DB_PASSWORD: ${POSTGRES_PASSWORD} - DB_NAME: ${POSTGRES_DB} - DB_AFTER_CONNECT_QUERY: "SET search_path TO _realtime" - DB_ENC_KEY: supabaserealtime - API_JWT_SECRET: ${JWT_SECRET} - SECRET_KEY_BASE: ${SECRET_KEY_BASE} - ERL_AFLAGS: -proto_dist inet_tcp - DNS_NODES: "''" - RLIMIT_NOFILE: "10000" - APP_NAME: realtime - SEED_SELF_HOST: true - RUN_JANITOR: true - networks: - - cc-network - - storage: - profiles: - - database - - supabase - container_name: supabase-storage-${NGINX_MODE:-dev} - image: supabase/storage-api:v1.14.5 - depends_on: - db: - condition: service_healthy - rest: - condition: service_started - imgproxy: - condition: service_started - healthcheck: - test: - [ - "CMD", - "wget", - "--no-verbose", - "--tries=1", - "--spider", - "http://storage:5000/status", - ] - timeout: 5s - interval: 5s - retries: 3 - restart: unless-stopped - env_file: - - .env - environment: - ANON_KEY: ${ANON_KEY} - SERVICE_KEY: ${SERVICE_ROLE_KEY} - POSTGREST_URL: http://rest:3000 - PGRST_JWT_SECRET: ${JWT_SECRET} - DATABASE_URL: postgres://supabase_storage_admin:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB} - FILE_SIZE_LIMIT: 52428800 - STORAGE_BACKEND: file - FILE_STORAGE_BACKEND_PATH: /var/lib/storage - TENANT_ID: stub - REGION: stub - GLOBAL_S3_BUCKET: stub - ENABLE_IMAGE_TRANSFORMATION: "true" - IMGPROXY_URL: http://imgproxy:5001 - networks: - - cc-network - - imgproxy: - profiles: - - database - - supabase - container_name: supabase-imgproxy-${NGINX_MODE:-dev} - image: darthsim/imgproxy:v3.8.0 - healthcheck: - test: ["CMD", "imgproxy", "health"] - timeout: 10s - interval: 5s - retries: 10 - env_file: - - .env - environment: - IMGPROXY_BIND: ":5001" - IMGPROXY_LOCAL_FILESYSTEM_ROOT: / - IMGPROXY_USE_ETAG: "true" - IMGPROXY_ENABLE_WEBP_DETECTION: ${IMGPROXY_ENABLE_WEBP_DETECTION} - volumes: - - ./local/data/supabase/storage-${NGINX_MODE:-dev}:/var/lib/storage:z - networks: - - cc-network - - meta: - profiles: - - database - - supabase - container_name: supabase-meta-${NGINX_MODE:-dev} - image: supabase/postgres-meta:v0.84.2 - depends_on: - db: - condition: service_healthy - analytics: - condition: service_healthy - restart: unless-stopped - env_file: - - .env - environment: - PG_META_PORT: 8080 - PG_META_DB_HOST: ${POSTGRES_HOST} - PG_META_DB_PORT: ${POSTGRES_PORT} - PG_META_DB_NAME: ${POSTGRES_DB} - PG_META_DB_USER: supabase_admin - PG_META_DB_PASSWORD: ${POSTGRES_PASSWORD} - networks: - - cc-network - - functions: - profiles: - - database - - supabase - container_name: supabase-edge-functions-${NGINX_MODE:-dev} - image: supabase/edge-runtime:v1.67.0 - restart: unless-stopped - depends_on: - analytics: - condition: service_healthy - env_file: - - .env - environment: - JWT_SECRET: ${JWT_SECRET} - SUPABASE_URL: ${SUPABASE_URL} - SUPABASE_ANON_KEY: ${ANON_KEY} - SUPABASE_SERVICE_ROLE_KEY: ${SERVICE_ROLE_KEY} - SUPABASE_DB_URL: postgresql://postgres:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB} - VERIFY_JWT: "${FUNCTIONS_VERIFY_JWT}" - volumes: - - ./supabase/functions:/home/deno/functions:Z - command: - - start - - --main-service - - /home/deno/functions/main - networks: - - cc-network - - analytics: - profiles: - - database - - supabase - container_name: supabase-analytics-${NGINX_MODE:-dev} - image: supabase/logflare:1.4.0 - healthcheck: - test: ["CMD", "curl", "http://localhost:4000/health"] - timeout: 10s - interval: 5s - retries: 10 - restart: unless-stopped - depends_on: - db: - condition: service_healthy - env_file: - - .env - environment: - LOGFLARE_NODE_HOST: 127.0.0.1 - DB_USERNAME: supabase_admin - DB_DATABASE: _supabase - DB_HOSTNAME: ${POSTGRES_HOST} - DB_PORT: ${POSTGRES_PORT} - DB_PASSWORD: ${POSTGRES_PASSWORD} - DB_SCHEMA: _analytics - LOGFLARE_API_KEY: ${LOGFLARE_API_KEY} - LOGFLARE_SINGLE_TENANT: true - LOGFLARE_SUPABASE_MODE: true - LOGFLARE_MIN_CLUSTER_SIZE: 1 - POSTGRES_BACKEND_URL: postgresql://supabase_admin:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/_supabase - POSTGRES_BACKEND_SCHEMA: _analytics - LOGFLARE_FEATURE_FLAG_OVERRIDE: multibackend=true - ports: - - 4000:4000 - networks: - - cc-network - - db: - profiles: - - database - - supabase - container_name: supabase-db-${NGINX_MODE:-dev} - image: supabase/postgres:15.8.1.020 - healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres -h localhost || exit 1"] - interval: 10s - timeout: 5s - retries: 20 - start_period: 30s - depends_on: - vector: - condition: service_healthy - command: - - postgres - - -c - - config_file=/etc/postgresql/postgresql.conf - - -c - - log_min_messages=fatal - restart: unless-stopped - env_file: - - .env - environment: - POSTGRES_HOST: /var/run/postgresql - PGPORT: ${POSTGRES_PORT} - POSTGRES_PORT: ${POSTGRES_PORT} - PGPASSWORD: ${POSTGRES_PASSWORD} - POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} - PGDATABASE: ${POSTGRES_DB} - POSTGRES_DB: ${POSTGRES_DB} - JWT_SECRET: ${JWT_SECRET} - JWT_EXP: ${JWT_EXPIRY} - volumes: - - ./supabase/db/migrations/supabase/50-_supabase.sql:/docker-entrypoint-initdb.d/migrations/50-_supabase.sql - - ./supabase/db/migrations/supabase/52-realtime.sql:/docker-entrypoint-initdb.d/migrations/52-realtime.sql - - ./supabase/db/migrations/supabase/52-pooler.sql:/docker-entrypoint-initdb.d/migrations/52-pooler.sql - - ./supabase/db/migrations/supabase/52-logs.sql:/docker-entrypoint-initdb.d/migrations/52-logs.sql - - ./supabase/db/init-scripts/51-webhooks.sql:/docker-entrypoint-initdb.d/init-scripts/51-webhooks.sql - - ./supabase/db/init-scripts/52-roles.sql:/docker-entrypoint-initdb.d/init-scripts/52-roles.sql - - ./supabase/db/init-scripts/52-jwt.sql:/docker-entrypoint-initdb.d/init-scripts/52-jwt.sql - - ./supabase/db/migrations/core/60-create-databases.sql:/docker-entrypoint-initdb.d/migrations/60-create-databases.sql - - ./supabase/db/migrations/core/61-core-schema.sql:/docker-entrypoint-initdb.d/migrations/61-core-schema.sql - - ./supabase/db/migrations/core/62-functions-triggers.sql:/docker-entrypoint-initdb.d/migrations/62-functions-triggers.sql - - ./supabase/db/migrations/core/63-storage-policies.sql:/docker-entrypoint-initdb.d/migrations/63-storage-policies.sql - - ./supabase/db/migrations/core/64-initial-admin.sql:/docker-entrypoint-initdb.d/migrations/64-initial-admin.sql - - ./supabase/db/migrations/core/65-keycloak-setup.sql:/docker-entrypoint-initdb.d/migrations/65-keycloak-setup.sql - - supabase-db-data:/var/lib/postgresql/data - - supabase-db-config:/etc/postgresql-custom - networks: - - cc-network - - vector: - profiles: - - database - - supabase - container_name: supabase-vector-${NGINX_MODE:-dev} - image: timberio/vector:0.28.1-alpine - healthcheck: - test: - [ - "CMD", - "wget", - "--no-verbose", - "--tries=1", - "--spider", - "http://vector:9001/health", - ] - timeout: 10s - interval: 10s - retries: 10 - volumes: - - ./supabase/logs/vector.yml:/etc/vector/vector.yml:ro - - /var/run/docker.sock:/var/run/docker.sock:ro - env_file: - - .env - environment: - LOGFLARE_API_KEY: ${LOGFLARE_API_KEY} - command: ["--config", "/etc/vector/vector.yml"] - networks: - - cc-network - - supavisor: - profiles: - - database - - supabase - container_name: supabase-pooler-${NGINX_MODE:-dev} - image: supabase/supavisor:1.1.56 - healthcheck: - test: curl -sSfL --head -o /dev/null "http://127.0.0.1:4000/api/health" - interval: 10s - timeout: 10s - retries: 10 - depends_on: - db: - condition: service_healthy - analytics: - condition: service_healthy - command: - - /bin/sh - - -c - - /app/bin/migrate && /app/bin/supavisor eval "$$(cat /etc/pooler/pooler.exs)" && /app/bin/server - restart: unless-stopped - ports: - - ${POSTGRES_PORT}:5432 - - ${POOLER_PROXY_PORT_TRANSACTION}:6543 - env_file: - - .env - environment: - - PORT=4000 - - POSTGRES_PORT=${POSTGRES_PORT} - - POSTGRES_DB=${POSTGRES_DB} - - POSTGRES_PASSWORD=${POSTGRES_PASSWORD} - - DATABASE_URL=ecto://supabase_admin:${POSTGRES_PASSWORD}@db:${POSTGRES_PORT}/_supabase - - CLUSTER_POSTGRES=true - - SECRET_KEY_BASE=${SECRET_KEY_BASE} - - VAULT_ENC_KEY=${VAULT_ENC_KEY} - - API_JWT_SECRET=${JWT_SECRET} - - METRICS_JWT_SECRET=${JWT_SECRET} - - REGION=local - - ERL_AFLAGS=-proto_dist inet_tcp - - POOLER_TENANT_ID=${POOLER_TENANT_ID} - - POOLER_DEFAULT_POOL_SIZE=${POOLER_DEFAULT_POOL_SIZE} - - POOLER_MAX_CLIENT_CONN=${POOLER_MAX_CLIENT_CONN} - - POOLER_POOL_MODE=transaction - volumes: - - ./supabase/pooler/pooler.exs:/etc/pooler/pooler.exs:ro - networks: - - cc-network - - ollama: - profiles: - - none - - ai_services - container_name: ollama-${NGINX_MODE:-dev} - build: - context: ./cc-volumes/ollama/docker - dockerfile: Dockerfile.${BUILD_OS}.${NGINX_MODE:-dev} - ports: - - "${PORT_OLLAMA}:11434" - volumes: - - ./local/data/ollama:/root/.ollama - - ./local/logs/ollama:/var/log/ollama - environment: - - OLLAMA_HOST=0.0.0.0 - - OLLAMA_ORIGINS=* - networks: - - cc-network - deploy: - resources: - limits: - cpus: '4' - memory: 8G - - open-webui: - profiles: - - core - - ai_services - container_name: open-webui-${NGINX_MODE:-dev} - image: ghcr.io/open-webui/open-webui:main - ports: - - "${PORT_OPEN_WEBUI:-3333}:8080" - volumes: - - ./local/${BUILD_OS}/${NGINX_MODE:-dev}/data/open-webui:/app/backend/data - - ./local/${BUILD_OS}/${NGINX_MODE:-dev}/logs/open-webui:/app/backend/logs - environment: - - OLLAMA_LOG_LEVEL=DEBUG - - WEBUI_URL=http://open-webui.classroomcopilot.test - - DEFAULT_LOCALE=en - - DEFAULT_USER_ROLE=pending where features - - ENABLE_OAUTH_SIGNUP=true - - OAUTH_CLIENT_ID=open-webui - - OAUTH_CLIENT_SECRET=${KEYCLOAK_SECRET_OPENWEBUI} - - OAUTH_PROVIDER_NAME=Keycloak - - OAUTH_SCOPES=openid,email,profile - # Optional - - OAUTH_MERGE_ACCOUNTS_BY_EMAIL=true - - OAUTH_ROLES_CLAIM=realm_access.roles - - ENABLE_OAUTH_ROLE_MANAGEMENT=true - - OAUTH_ALLOWED_ROLES=user,admin,superadmin - - OAUTH_ADMIN_ROLES=superadmin,admin - - OAUTH_ALLOWED_DOMAINS=kevlarai.test - # Keycloak - - OPENID_PROVIDER_URL=http://keycloak.kevlarai.test/realms/ClassroomCopilot/.well-known/openid-configuration - - OLLAMA_BASE_URL=http://${HOST_OLLAMA}:11434 - - PORT=8080 - - WEBUI_PORT=8080 - - HOST=0.0.0.0 - env_file: - - .env - extra_hosts: - - "keycloak.kevlarai.test=${HOST_IP}" - networks: - - cc-network - deploy: - resources: - limits: - cpus: '2' - memory: 4G - - n8n: - profiles: - - none - - ai_services - container_name: n8n-${NGINX_MODE:-dev} - build: - context: ./cc-volumes/n8n/docker - dockerfile: Dockerfile.${BUILD_OS}.${NGINX_MODE:-dev} - ports: - - "5678:5678" - volumes: - - ./local/data/n8n:/home/node/.n8n - - ./local/logs/n8n:/home/node/.n8n/logs - environment: - - N8N_HOST=0.0.0.0 - - N8N_PORT=5678 - - N8N_PROTOCOL=http - - N8N_USER_MANAGEMENT_DISABLED=true - - N8N_BASIC_AUTH_ACTIVE=false - - N8N_SECURE_COOKIE=false - - NODE_ENV=production - networks: - - cc-network - deploy: - resources: - limits: - cpus: '2' - memory: 4G - - -volumes: - supabase-db-config: - driver: local - supabase-db-data: - driver: local - neo4j-data: - driver: local - neo4j-logs: - driver: local - frontend-node-modules: - driver: local - frontend-dist: - driver: local - tldraw-sync-node-modules: - driver: local - redis-data: - driver: local - jupyter-user-data: - driver: local - -networks: - cc-network: - name: cc-network - driver: bridge diff --git a/.env b/.env index 630b92e..ec83ddf 100644 --- a/.env +++ b/.env @@ -1,9 +1,10 @@ # Whisper live settings APP_WS_PROTOCOL=wss -APP_URL=kevlarai.com +APP_URL=classroomcopilot.ai -PORT_WHISPERLIVE=5050 +PORT_WHISPERLIVE=5000 PORT_WHISPERLIVE_SSL=5053 +HTTP_PORT=8080 WHISPERLIVE_SSL=false WHISPL_USE_CUSTOM_MODEL=false diff --git a/Dockerfile b/Dockerfile index f62a710..d56e2ac 100644 --- a/Dockerfile +++ b/Dockerfile @@ -20,22 +20,24 @@ WORKDIR /app # install the requirements for running the whisper-live server COPY requirements/server.txt /app/ -RUN pip install -r server.txt && rm server.txt +RUN pip install --no-cache-dir "setuptools<70.0.0" wheel +RUN pip install -r server.txt +RUN pip install --no-build-isolation openai-whisper==20240930 +RUN rm server.txt # make the paths of the nvidia libs installed as wheels visible -ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib" +RUN pip install --no-cache-dir nvidia-cublas-cu12 nvidia-cudnn-cu12 +ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib:/usr/local/lib/python3.10/site-packages/torch/lib:${LD_LIBRARY_PATH}" COPY whisper_live /app/whisper_live COPY run_server.py /app +COPY hybrid_server.py /app -# Copy application files -EXPOSE ${PORT_WHISPERLIVE} -ARG PORT_WHISPERLIVE -ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE} -ARG FASTERWHISPER_MODEL -ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL} +# Expose both WebSocket and HTTP ports +EXPOSE 5000 8080 -CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE}", "--backend", "faster_whisper"] +# Use the hybrid server by default +CMD python3 -u hybrid_server.py --websocket-port 5000 --http-port 8080 --backend faster_whisper # CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE}", "--backend", "faster_whisper", "--faster_whisper_custom_model_path", "/app/models/${FASTERWHISPER_MODEL}", "--ssl_cert_path", "/app/ssl"] diff --git a/HYBRID_SERVER_README.md b/HYBRID_SERVER_README.md new file mode 100644 index 0000000..640ab38 --- /dev/null +++ b/HYBRID_SERVER_README.md @@ -0,0 +1,260 @@ +# WhisperLive Hybrid Server + +This hybrid server extends the original WhisperLive-Server to support both WebSocket connections (for real-time audio streaming) and HTTP endpoints (for file transcription) in a single container. + +## Features + +- **WebSocket Server**: Original real-time audio transcription functionality +- **HTTP Server**: New file upload and transcription endpoints +- **Single Container**: Both services run in the same Docker container +- **GPU Sharing**: Both services share the same GPU resources + +## Architecture + +The hybrid server runs two services simultaneously: +1. **WebSocket Server**: Handles real-time audio streaming transcription +2. **HTTP Server**: Handles file uploads and transcription requests + +Both services use the same WhisperLive transcriber instance, ensuring efficient resource usage. + +## Ports + +- **WebSocket Port**: Default 5050 (configurable via `PORT_WHISPERLIVE`) +- **HTTP Port**: Default 8080 (configurable via `HTTP_PORT`) + +## HTTP Endpoints + +### 1. Health Check +``` +GET /health +``` +Returns server health status. + +**Response:** +```json +{ + "status": "healthy", + "service": "WhisperLive Hybrid Server" +} +``` + +### 2. OpenAI Compatible Endpoints +``` +POST /v1/audio/transcriptions +POST /v1/audio/translations +``` +Fully compatible drop-in replacements for the standard OpenAI Whisper API. + +**Parameters:** +- `file` (required): Audio file (WAV, MP3, FLAC, M4A, OGG, WEBM, MP4, MPEG, MPGA) +- `model` (optional): Model size (default: "base") +- `language` (optional): Language code (e.g., "en", "es", "fr") +- `prompt` (optional): Text to guide the model's style +- `response_format` (optional): "json", "text", "srt", "verbose_json", "vtt" (default: "json") +- `temperature` (optional): Sampling temperature (0.0 to 1.0) + +**Example Request:** +```bash +curl -X POST http://localhost:8080/v1/audio/transcriptions \ + -H "Content-Type: multipart/form-data" \ + -F "file=@audio.wav" \ + -F "model=whisper-1" \ + -F "response_format=json" +``` + +**Response (JSON format):** +```json +{ + "text": "Hello, this is a test." +} +``` + +### 3. Legacy File Transcription +``` +POST /transcribe +``` +Transcribes an uploaded audio file. + +**Parameters:** +- `file` (required): Audio file (WAV, MP3, FLAC, M4A, OGG, WEBM) +- `language` (optional): Language code (e.g., "en", "es", "fr") +- `task` (optional): "transcribe" or "translate" (default: "transcribe") +- `model` (optional): Model size (default: "base") + +**Example Request:** +```bash +curl -X POST http://localhost:8080/transcribe \ + -F "file=@audio.wav" \ + -F "language=en" \ + -F "task=transcribe" \ + -F "model=base" +``` + +**Response:** +```json +{ + "success": true, + "segments": [ + { + "start": 0.0, + "end": 2.5, + "text": "Hello, this is a test.", + "no_speech_prob": 0.1 + } + ], + "info": { + "language": "en", + "language_probability": 0.95, + "duration": 10.5, + "duration_after_vad": 10.5, + "transcription_options": {} + }, + "filename": "audio.wav" +} +``` + +### 3. URL Transcription (Placeholder) +``` +POST /transcribe/url +``` +Endpoint for transcribing audio from URLs (ready for implementation). + +## Usage Examples + +### Python Client +```python +import requests + +# Transcribe a file +with open('audio.wav', 'rb') as f: + response = requests.post('http://localhost:8080/transcribe', + files={'file': f}, + data={'language': 'en', 'model': 'base'}) + +if response.status_code == 200: + result = response.json() + print(f"Transcription: {result['segments']}") +``` + +### JavaScript/Node.js +```javascript +const FormData = require('form-data'); +const fs = require('fs'); + +const form = new FormData(); +form.append('file', fs.createReadStream('audio.wav')); +form.append('language', 'en'); +form.append('model', 'base'); + +fetch('http://localhost:8080/transcribe', { + method: 'POST', + body: form +}) +.then(response => response.json()) +.then(result => console.log(result)); +``` + +### cURL +```bash +# Basic transcription +curl -X POST http://localhost:8080/transcribe \ + -F "file=@audio.wav" + +# With parameters +curl -X POST http://localhost:8080/transcribe \ + -F "file=@audio.wav" \ + -F "language=es" \ + -F "task=translate" \ + -F "model=small" +``` + +## Configuration + +### Environment Variables +- `PORT_WHISPERLIVE`: WebSocket port (default: 5050) +- `HTTP_PORT`: HTTP port (default: 8080) +- `FASTERWHISPER_MODEL`: Custom model path +- `OMP_NUM_THREADS`: OpenMP thread count + +### Docker Compose +```yaml +services: + whisperlive: + ports: + - "5050:5050" # WebSocket + - "8080:8080" # HTTP + environment: + PORT_WHISPERLIVE: 5050 + HTTP_PORT: 8080 +``` + +## Testing + +### 1. Test Script +Run the Python test script: +```bash +python3 test_http_endpoints.py +``` + +### 2. Web Interface +Open `test_form.html` in a web browser to test the HTTP endpoints with a user-friendly interface. + +### 3. Health Check +```bash +curl http://localhost:8080/health +``` + +## Backend Support + +Currently, the HTTP endpoints support: +- **faster_whisper**: Full support for all features +- **tensorrt**: Basic support (needs adaptation) +- **openvino**: Basic support (needs adaptation) + +## File Size Limits + +- Maximum file size: 100MB +- Supported formats: WAV, MP3, FLAC, M4A, OGG, WEBM + +## Performance Considerations + +- File transcription uses the same model instance as WebSocket connections +- Temporary files are automatically cleaned up after processing +- Both services share GPU memory efficiently +- HTTP requests are processed in separate threads + +## Troubleshooting + +### Common Issues + +1. **Port Already in Use** + - Check if ports 5050 or 8080 are available + - Use different ports via environment variables + +2. **File Upload Errors** + - Ensure file size is under 100MB + - Check file format is supported + - Verify file is not corrupted + +3. **GPU Memory Issues** + - Monitor GPU memory usage + - Consider using smaller model sizes + - Restart container if needed + +### Logs +Check container logs for detailed error information: +```bash +docker logs whisperlive +``` + +## Migration from Original Server + +The hybrid server is fully backward compatible. Your existing WebSocket clients will continue to work without changes. The HTTP endpoints are additional functionality that doesn't interfere with the original service. + +## Future Enhancements + +- [ ] Support for more audio formats +- [ ] Batch file processing +- [ ] Progress tracking for long files +- [ ] Authentication and rate limiting +- [ ] WebSocket support for file transcription progress diff --git a/__pycache__/hybrid_server.cpython-314.pyc b/__pycache__/hybrid_server.cpython-314.pyc new file mode 100644 index 0000000000000000000000000000000000000000..730b98fd1e7212edb04a3430a776222014d6a33f GIT binary patch literal 52983 zcmd_TeN5#@0%JSNE~S76s4OJalkf+b z?s9)jm$SR3(>Xmg>Flmqle4vxG5G#&2*pLXA>_JFXBeTjT<*^+#4T_1^GD~o|C`znZM*e za@_w)Ka?j|_I&zz7RTMOBlS; zXeoQ|jPCSgm2uHso@}^f9xL2(j}30c<(=+>O3{<=y&S?1Cx0tGPfmwP^4Cy|BT9Jk zjg-{~Kg;w^BOKPS386Oeqd(V+^^> zQ@#b{3eWB|$d%iGT;-|0yx+6OQ?re@yk{@snmqPx#I5zzvAFxT5qH0*-qYZ5cn)+} zw!lN9r-?yzZUd^z(|oz^izZLYHsZE=+E9XaT?x)AEth;NL>g_l+~D>k!xf+BAW_p) z8Kb2J_m~o%d{5ihw?z~T+R7Sj=`T&Mk5YW z!s7NtXC0Y|%)x*!awU;13SXWVBGE+lmm;B{fBHtk77>C|-f4e8NLW2WBpeDx1WGp= zn!JK{VMLgm7X=m}=$jKJ<>7@@gwSWk-=}{E=N5NK{Wi{dxF(r2DW-zVut^sO!?f~z zvCA+;hBgJ_Xi}(9&DuCxzY?Q~i40Os9#fM%@X&81M%XfL@?_*=&>zo`-)8o0|CGMD?)|X&*MoH&EA+0{ZrnEAYK(jSNKMva5^N;`J&#ae@^g5 z#Hd(_$VK@3lzQ|n?r~n>9n+n$uiJiiVS2UbcNZwU7T5( zSiE$9$NjE%Gh%K1vD_0e%ZX2sANT+7Mi=vd0uh4@C!+N^u@Fwer0=(a|`_^wXa}EHz~}%h#Q>dxNGQNV(leL z!=6hjRgZ&mo2U@-qtLkW4qG-zW!^MZCg57MG2Gco4#~H|&ca#ei@_W-$1QNP#xt2e z3vPCswTW;kKVj49S&p-Dd$@71KZiBJf$3yMqCOCPE#f51U{484RJa~Zm~uYPKRjR-SyLNFRhWQFFV;dwytQ*UH} zi5!**>tYEj@j&Wq;y&aVp?a~d$@5ZU=FJE}mJ#cWZTP&0;=KNlPiyfyfESIwgJ zzG-b&+p9UZO^K>$Ni2+jmK8@b*=2|zMb>9#Qx1gOJ( zg}d%t{pVjZJz4m0a@;sFY1q<}23iTqA-n#pM)rcUiF${`|g;dp+< zt-+5>$~dn2YeI$fIoUe zJdRjYq(~nejI6mkUhDpy?r-e6x9810-`ch2IJWFKwvyX*>qI<%$7|kKy?6afrLk() zN`CXL!MMfx)lbHjDxjXK>@anOcWtY?^0ZM%oG=EAN_>jeAE^6{{&_Z-Z&AYe| z=dpS+%D8|jYVnvcW6$(h;AVL;;bt?jILf4k zkx4D8sp|N%e)mAF%5US8nq1UD z1uw$VM-r^Z!e+hD)KrnSD08Uf zWNA1Da(q@e7b#RsB~uXZpl1qYtwMfAaga7=FE^g6shL_1j~O!r{5h;YodTFd@tNKW z-u|;AV*?{&-r<3f=f_ThNtBFD46y|Fi?80NmPS)c`^Z=2~7!s zgk{1P5&Rr?Jj<~&QE*L|kUAmjF$vpjG#Zw?*%Ll=PK?QXDnRe#d?Xs0^Ue`8Zy2+V z#7;RcZ&V0ILLzz|3oeSnH-{dI4j7!7_XQGp5qJWEls1tYi3Ggp!rDe z*PAf=!r?>~K2G~*M51`Hha%)fgy?+OD`J?3z+Ix`8}MHhT#{5R4pO`V<%<-UD2~kf zL;*v;jIjwpOq45eSZd7|@gzVv^1x+;z_fUZGCxhuFga()86k)CCn7Ol@da`yUzReF zBcUeZUl1ZBQtnqC%mE}Og?jpf{wV5MWzfZ4%GYB^9QhucTikjUSL0l@R>mvrUn_{0 z?fF_>yv4n0b;kE~u3GJJzGc-~9j|IwwN}I{_pe&ZpJZ{R72hcT{pW65zGjWL^xn3- zW_{KATH&jOvGS&6tMhSb`Q6fc~&;?}%1YsIp)0$Kch7H8}Fa8K*Xp7z@o z{6^Nb_2jFG=N7)!^=enFvT-%H@yB`HA6L}eJ{2!4St~q<|FOb@@#?*6)qTs=eRpy` z+~0P;b7g%WF!PVu;@`?!|*U`8c2T7B|Wn4QFnZVd{}b{a-33+1ffd~_By89Ot~gMCJMQa;QNOmkdL*WMFn`^PT22l(0OT%aeX zi(Y)d7o6#~3qgBN4nhRqR8J1+F}i?uA3q6>iVAhx&y5W_k046&6Y=nF`&GYijl?iJ z&v<+gQLp)@qO;vo0-@K*el+pK2>rf*GXiV~-L0+`1ys}@4G2BjHaOvn%ua-S;#60& z^ij$aqstY3RurbY?KFHvI-8rPF{(sdGocVh8(-KTaZQHinkOTXw&$jOP|n=w9>S<2 zc3zvAjUI1rX*qTfe~0jQ7=IltE&Hb+aRq!gx+B+oVLLAhfo^*wdLtl2W(6Ut6p)2T z^`^RailI=Hf0>nue$(T0hDHCJPrT8|*B+cccvv`e%=o)=HUyOr#cFRqGU?ODnwW7; zh5{jq&@$cH(Uulr5?FCg%yjbN%!IF@y`_mi*xJM&I@-jyxH=qqa5UmN=Y0Mk;7uQy z_DxS3$G|8%g(yc4`r0Rs=%Y-8#3?E=%dzdyp(Z}*w76Q+GL6hnP709-;I+0)96i#i zhc^XkV<-@&58}~(zUC8ykRGVShmHy@6Upy4b5xuj(BXd~blr(@n5C1->qJm8DYX%r zXNEA*k{TTLO--?Mns7}{6QQXay3B~^oDzL?^7V|u>YMloWHcvOihnv4K8aT|5M+W= zO4DBTi49IPqV!vbKe_p(IBNUpB0&@BoE7|_>Q273rRD1EhFO9rXM|1_Z-fVWMBfzk z#Tj}-q0-=s@-6%Dze#H44$xBvXsfM_5mxIV2j3FD-o!KEDGUiK808PO>~l122QUh_ zk4mWkZ#~?^w;ll*waU;p0KQ?Je9@?Ha+VZ9o&2=_x-gX*Q*F>TQJ719(zGeKL@$tSW_v8_svH`$!}WF#6HvDvUcYs3QPY>zU@D$Gcz5#jEtOy>#2FRlFt|qrBCq+mAk}|bq(5C;AE-1HGn&F3 zpA*0;`34PZ=s0`?&8REPCgr5EZd{<$!rWAAs40Vs1UO(IG_NOXeG8?u?*5>x!uFyAe96ulT8`3-H2q8jT6gi5Vt1&q>LNi=GQMAk;q*kKK^v*@xI z^(_5%n@Gcurpt88wqq<8f_VrdlP?fJx!WR1OlT-bi&_coozo$xh?2;c5G;l2&Q}2IfY== zzAzmSbOOK(u?nNc&=GkmH_=p68R54@1Tqb7YuH0QoD8ai}dcM z#kcWvspXI{Z7O-Yd>8?thDasJr_Ox){pcY2&|w;|i6mK>b;AF_O$sbe_;saKJ5V}B z`GS~`NZe)`a`I3Ry{SFI3s6-%F{c5aNOA{F&t=$=Gc(4Jnx}36(*QYsMYu67LS+$w zrkmxRisWfAG^eWxk#{KUn}l5{ZQe-V7-`Yq3^A89&LnMIY=G#a!5PKVl!7G-ZI+L2 z!pss7@U(nd(sH4=^l0(})9ahZv?7?w_crGhn~{%9i%_aZ4OzJnliwDL*hW`QVHBjp zNnzZZGK$T)03l5!A%BntD`z_UXEQh_>OBWp(r6y{C74XG1`DGZI2BD#jUsJ0h!_eZ znx=H2(F}Q$mhGf8ouqBjkcl1|>g<~c2ztpnN%Jn9B1^$>Fcft{UI<;&&c1cfosy~AI&{nXv<M}|naP?K%qB6<>r~9pt zxiO{;un?M_roNzSFD-MDh_M|i-osGq(H||t5Z4p?qrpcJ4k$yDT&`6n5 zf|pZd6sW>e11OX9nQef~)Sk$Ni!{TO?T-z$N9c`aTQVT&&4p3=5b{nFmItE=g8iP`Ls*z4i z={G`~aAaC1r%Jg)>x3ECmKt0g|4W@bYmhgIWjZ?2Vue)gRQ*`Q?uTV!M7RcPmHexT zp9=*;5lnH6so8}ExYWa(k+2_&^z`-;m8+%?JhCv`hWTY`vnXS8`WFQ+?J8c3hfA#* znHpNxnlMbI<|u!Mz@Vs^3??s{j!t#uTT^DG+wSu$ZE7!p2_-dv%B-0=iXpHwp`rb7 z%aky)Nk5@vUCpvFx{H=bz)qjjMFvXQC_m-D%1@H%qT5b05N6?$jo2))=DTD_(tKnQ zW?Qv(c;wNVG<&lM;hvMQ207i3&oJxt1t$f_urMT!kz~+2KjjbcW2(#wrD9ieI0>Pv z83jO*2@*YBd!0^M^FNP8-=z3=Gm{IJgeN0u3av5Aq?Np#XPD`>8+91T04CYTT3xIU z*?an>Wv7(ws1S{U$|I<8dopWQMosp>sbY!HWv`E!)YltCWRaN?zUSoFn49l)5Ao+b z!+gVFNaP2hwdT)-soU}U>64?YS&FM-gwiWd7gNjf8f`f5pOThGoD&0fn1%xL(3306 zB|!JI!jvt9V}ydG40DsAus;xrx_o|v!pd!?4Yzs4#uuH=c^gX7EPtKmBbsgn39ZT7h+4vmEpnO(~I>i&M6@Z3u#xg^wAhsn?RxU zg9BLh?T^epI}EUjQShu^2FA1q_JAcyliMr?xFjBmQnVt4?&v{0UJw`NMmf+XoOX=XRW3o4c$~qle;vxviEF4 zXT~E1%L0pF#W{^wt>n*Gm%aKEhZnyFu{{ zf{j{D9pTzKC2hDzRqt##$SH%i+NzpC&Y)7-3Tc_Sf))MLL9$~T0Ma+@wtD^3^7t-I zkEe=mlfQSR^7g0Z%KG$A%aL{IpN1Q6&It_YD23FYK^oZ>_tm$i2TJF87Oraq z>X+fT&xB*6{;Ku?c}AqN6lOQ&nYNtGw6U%T3tx3+(~UZnEU^>YlGMAzlr=n?2j6ze z);E=#$+{+HD%t=rD=l$wCPPY~hrk2uJ?r;=@0&6k*UY-47-O`)tOv6%F$z70tZm+m z=WooWQwu=-`6(uK3WHK{%=B&co{>;gotY_pL3yKcw5wLYCJHukrA6GdQRU90P_CBk zQ`xm_KMgReDh+%xWu(}Ew3A@AHW+4P?Yig`^B%LcY|^6rG)Lrh?Lz{{gn*^~umxfz zO|lBm^k7nKT-?-Iv4TSbmSco9b?9pEl(acCw@aBet0*DDmS=)0S#L3D&2FmNviF<~ z3&GwYDAMMz8`BSKK9wO|lf{Y<-nvcngCNsmr0BbkQqTsia$d8Q3bV`z@56tdt%OE{ zhyoVmP71VipEf1&Qqk)pyqt_m?&2?o=J`qV)Ona&e8C$ivM)G=eJ^r!!v=)5fPhZi zp#4fgH2w_R14VmaN!^s*>P$142C8fJpn@SC9uy^yqS&9Dm^vi10uAmP*qavQ8$xIX zpC49-$t!4m=(A=IrVEo@uw(ReFW@8IpK9GGhMs1E|co-ofomKX`3p*=BusE_8dn}4%9BQSo=B}6K@wM><(&F5}T!cB`F<@?*_A`rf^BAQa)mPmMD;}Tg5v;=_V(+jFfGv?Y2|N zq3I_h=2|C<|@FxtIw8 zDAWTCVPBCym};q`#K}h6BB@IrO&XfNNEtRa@%7gtC{H6_4=sOvD&*w83d)1+-95?n z7&mCBhCa=Q(QgE~vqrREZ)(~%rH0r*IB*r%ANJ!w1{g^j>e(11z|1PQt}es$O!%8R z>LFF{C{ul7C{u}NdOj#^kV$IAAv_FC=^H`qDmWWXyfMnuTOqMG5NN2^E~Tw^(9V(p zY^G~K2kYUZE++O-J%HRfn7|wAB~@{~Lx-lVa)6kwfj&XdC!PViuB%Ix)xZJjlSNEz zaU84zUIEb^Ft$#iv81E-Nu(ilLKXSG$c^A6uQovn4e2^?RMhVoWmLzw8dxlPgYTNp z4;e^+ZKL6(x|dVviS?M9@*en?bciQF)P(`Y6}qBtc=}2Y!IRjMRP_<+-00d6rLYHH zXE|c)c69wk0+RxW22pvyAy9w!jeo|U-6-izi48?(#n3fAh*gaPq8Jjjy@@io2{8`{ z;2Q*^dOTGfq{U7KmC!)ZSKwcNjX!IcF3BuX$>JR24Un|4)I#A9DtV}cq30kv+K`4O z5^pfJ0g>rV@4{&;k@Jwh8tR+D&z>`i62Z(zXu!rgXRCwnG>DPf8Px8o>rnM65-56X zDvn|OI80{GxTm-9N2;4_bi`CYL^;;*TB#1ys)QI~iqALn(BQ}{g=vOMrB>?xPtZsJJ zy-ev{Sat@^D@Gfz8?e5m-f?q7#B^`_QPG95X>g!-?3`zS8G?s~2lz2h@5pGsXUIJ^ zbaq4=aof3RMdNn+xhiX*WnWv}W^~6@e5% zuCa?L^(ZPGoPv_qvmIBukyou+dUR45ZDP!S^<=vm!Qj^Dk*_xpH}6f!3JYwVPJ#J#{ZrWB8)7fH4EUgfZH2lXvoIpU|+5o08tj z5Fj%O(P%%z#APKaGa+aOdwtQ3h=AhAb0rnKOj)U$G-zQx0}A94GrSY{?G!mV3DRbs zGTgY>ns9c`7|mnm0_jvPSMKFBN+zHi1CyH9$bKYm7Q|8`no)M5^;oS0`t_U(n|qn< z<&{_XQ=?}`(8|yT&tOZYM7S)L(zXUom0IWI^pt=b5%e>3$6u<>BSQ2VmSsw_X{tM` zkWy>8Rdw~ciGeYQt(ry}3EiMSjdSXBqdqm$h9kc`lv5n#Nn%j1(Rv1PR3q)uqL88S z0UqZ>)7ilTW4}_lmzdu*68(eo0SysTFdJd|NzZ zREv2E*VK=-$yP|0oHhH>btAMU71Q8us_rvXd1)Y|s(|W2`?QWgT2ZbS+>OO1^xIY! z?d+h0yt?jRMWwBKp#qcM^L`wbXvfN8rya%yreHwdfPtoglm%b~z!kRhI9!+*O9N0t zhrv;K3P?qy@7%m^4l9zsJY`5)Ocg-Nn?&a=fN7=wjRCRQ5~@lPx5gH$`GO zWCu_YD*!B~5kX4lqHrV9qtvRPrsj!RihXpzGd|$);zaus7hxec7I-MRrl)P$S0$DS za?uR?7vL0o`gqvK<^{fC zAcC_xWJywP)tfxDdJdf|wGj?dDB;A=D2{L+8R*}!XH7Wkk*XRLK-?ASr9h>m6Y?8qHfqVnT$%T8lEjWWGEoGSoDT^bx4qeUC0KM z+?bDS3zeFR|B`6bK+>U59`a~Q;Q-AU43M`Wn%K=3Ta0L1R@4YOhC$hIb#5q)4QoUy zC^rwEdj13DLN0~y=txWMo$TG}U&P~iuPva^VoXE?E zrul;{M-O8u;}lXzH)CMz!J?*S=@u)tWD!TrrjVz0+GtQ>UB(TR15Q-Ne|>RPKoFW4 zAUOb-P)*jx26z&5_G%dfHAMs~Lwr*=MnU1yav_ybyUw}?Mv`V1rJa?x)C%lIulBZH zQDnvg0h1vhqK{#VsHY#RH-U;1kiQ!AMmmC|h>Th|E(zJX!9mXorS_*|T4{)DP|>1G zriQCAQ?NgU^Yo!kp$Ua1p{94KIMsDbU7ymTHGSSsH6`q;g)w4K>PIrhDAm#wih$6g z#zTvzsxRnkl10!j%2Z4Gx>8dV&1Y&bO!gb= zXgtC=7W|Rg8qA_Wn(iXCTs`!6a6E%_sm)4I4D?$RtRY?cxy4v7@4V2B(9bl`)4q$R zgR{{npRzjxAXE4#wbDidELoRy(;`V7mD) zTuutbD)TXzaBVbC+r;prBvJb|0*p;G5_6A&N>9>-B5VzXY-3?-N^$#v05voY8idIR zP-ux$>IZ4{Hj$wAfx*wv1cQv8*{1VS3VkTK!FH!>0Q$1It)=c|rN%c6ODUUOsJ7jmnLZV#`EySP$*v1IYr?!O${-ybXKkLC5Vi*y!p)#dXO z0&Y--jW*?Com{|SR|%ElzRfPvE!+q0+2UeFcI$Q#-TIcirj@Q8YBnkN8+pv_8MRwL z!KKQYYhCF&A2;1MqC$t{+$1-;IaK$!sh+yWgWIf<*Fb8ni`;H{tH-9M&v=IP zIi6fKy}5<+?4L8?>SjYcxYZj0la)~J+;Y<;zPjfUFeW|6OfO`d;-+wKQWaOjxj0W= z88>bzQt$s7G~;sZjMKQMj4rM{lyxQ_cdzT>DDA4emAn6D>}gbx?nvS=vF887~P(T@)V}rooGNOO3BYrEu=^Hq>24Lp03P+vG_K;)^{`?M z-b>F4ZY%-1`+@^6hpc{fh5GmQn+y1*MzWHIV3fS>1yh9MyC+xZt#QgA%))x`g<7 zA#2=(`*TY>WQ?$jp0A%dox(YGYU;J?AG2OAE^Hgm#vQ{aaK~`YxXn%1>gpa(*?5k- zMHh&lo^ph@8H86LT5=Xz?5`Bbv>8k*WBbtb9om!nRe zYh*VYQ+;M|>tYq>+5(HrUCfNA&#$IR87-^6oIU&d$cL1UU8JGiN>~*u-w5tL zhkY)Q6P4}hkVrC_p-rSJrCbF~{GuIypHkPcqup+CW85I_tPkV9;c-(FLY3z_cPkTp z1tT7fV05qWA+z7~PKHP_(g>5x-pNR0xLR=k`t@i86oIRhr3(#hU33}x^|08J7~Z}- zmMp%Pu#&vyP=)3}@jop`Mn6TlW5=eJUzmKq_Cf!jtNB-;4AQW0=* z+beDui9)%?t{Q~$ZR)2_&?t@!F^Y)dJjHJ5gIa>79~AQj=2M!Su}DM8U0v${J9 zIAr!|uWa_jy_C`gdSmQr^MnP*&4v|hTc}``1$FJz1X;4^V<-RlE(HDgxeEloQ#z)V?Qql8r!iG7ARKd9Q#CwKU^Y{U`5!qm*Q&_V4;O` z2{TkZxUC$mAttQA9&JFIO=RMdUi1jdmvA*l!bY72kO;B0e zzDmB|A?J6=`5K&rZQwc`5Qa8PWCc)Zblfn7n_-oA6o?^;s1k+Dwn-fOzGbF-=j1o$#(QAQ)YG6H51 zlDcD|l4e4X@_dR(k}hcvkRG91z(2z*$uexp_n6`(72hm-yZWu_)e_g^O8esTG3Rr! z+T$ygz3;ZiDhFewgFh+T^Ty5BZ!V5MDr>r9iI=4PO6^!|{9>&3;uE)tE8qRbwb!q`@oTUD+M<80tmR=@%j5Fh_iTS)TXZ}sZ(28V z<*n;yOxyw2n&bGgNO*@8}tOL7ZrTbBbs@iz@ z-o=AR5UcBmm+y)3ZE9jYlNXdX0UY^U!RPbRj7^IyzS1P^9ZrI-z*(D!rd)^^Y&plf4--$3$OPod2+kD;ePLE!Pd|QGn|tP4w?Vv zkQx5JIg;zPnSW$H=C+uB)MJ7_X32!RP^eLRG~?8PNt8={XoAm_9{)FnH&S_MChsI3 z=$^gF=_1q=s&EP@RZy0UbK6yjj3ai6(!&HHI$M_+etImPOjR+Eu|XWI^1{499Ib-F zG7jt*{AV}#Zo;ey(QBv*Dc*!15t|yNKG7l#H&1MBwEeGK`M~S0~wwagm zlrSTu1*Z8sl<#Ai3am{;+oFx)Ys$Mr7Hu=bna^cHv}JFNXj_sngFM*axKIeWmX0G< z#9C2Aao8|9!UQNb9*fr~kObC+;-Mh6u;6k=N+d0^rwhB;I_!ly3RFfVNh*QxAnoz+ zU4?u?`;8zAgrdU2nK7Il!>ga+XO>2GPz+8(kpF6thsmO%_EDa z?n44NbjM1A`SsVYFJ>)a>^}rqqI&PW3vXUnoLR!CfA~($dKOn+Nkd_5zh|v%^kLbk zoI!7_wr{1f|J~78<;hs-$tPB>ye3{+9jh6LmsZ?ukC*O_Rb5%nw(!Lm3d>8@Im?cc z&z|JT9316WIQZ$m?vJ@&_`%@&M`G1ut3~Hz7C!LaBiVeU03`opAog-a5G7*>)R+iHUUyB(Qu zQ>JcZNY1$HyXfstp;F$J#&>X&J@GSYtS9}$u6@z|Ie&M4=cZRJcO ziS98}HG@kgIl5-F*=~ApS2ex)8PXSO^0z!g`XbK`HUG?KNMHO#PEDVM@m5u5CzU?o zEU3WSp#sk~ZY#GsHY5>#Q1%+BoG+Cs@zPY_rBH!G&8MU`RDtW_D5H_`R!2)C+Sw`7 z&MpJ0MSSUTlYT(=K-FD#sZt3v#)DD=z!p- zyD>fNf=#K3(qgj8I|)gvOOgRV<*lZZRNkIksIjXd!4;2ZHJhOvwy5wR3srIFPkHtb zNaGs1iKXhEz1#&`yQ!HQ&*?K+IHnl?vomKk>Q=;o+k3g7KmzwU;Ai!Fax#6neS`3gtq#(h)V^pkyvIQjbINTxh5YuS2+z zDtzB~k!L@?b!mXhUbJPQp*p-C;l~Z)Hz2&vAl!lQ6Y1g2ISTDTJqbaG)RW@{MoL`; z0l-qGY#KVH>l6FN3z8$?)AxfUJ*#vJ{sTGxDfELF7M)G_uHif-;jC(Iz%u-%VxZZ= zYtz#;+NsTkcDF>6FdFV{NF22 zroGHOmax!an~7rOBQ{q--w(@2MA9eN2}*>VD4da`J|U5X8T(u~nkZCGLy(XBgp~oO zA|!H2886_vC~Q%hK~nalOuaCh{pGA?;tfDxCNpsXKCQ7#yorFsh3(I@v7C7-Ai%am zhr@c=+EWEl1}3w2QFiewl>03>3%iWXVu=z3t7L7F(K9IiI;DG_oZo^&?E-XT=QnH; zw2?<@7pBZ$dYnXoY$hX%6%a1|HYNQkCC%_pB`gs^xS}(+B(kC*n9d@JT;J6hFDxoZ zIg!{giR5T3GM0GRJzmT#vyj6UhT)VL?4vRztaDhi#|Xw85u*w7)o3)4r#($YGTx9J zx=^2{x;YE{;f!O3Kc?nkS{v~WV)xjn+^>@Z@tW*h;e+Du)6aiN&eti-0+N{_cg83F zD+D;4Oiv?1dgGF&F7Yn?BxQ|w08S!joNmuzWfA{?LjRDQZ;I z>ff2sV?V}p$9y=F^1*7X6cg=J<125MwxpikUJ_SP_ zA-_9glwMWL-8q`Eg>CGa;cjR{GqjdHuHf%gy;-$Z;aIM4EH&Ieu~KpLPS#_)qrM|Z5YeYxu3#}21z&wJ=N{r=Fh zrt_mi6aOWF4wvF=dp%hynV1*})ZtBql=VU!BUk5R~Y#zlJZ7c-uWZLfeY zm?SG-oVVW@l1zOy`|rK-<|}tCWOEyPRPMNI(^&ub#rn4oymdfh^fR&to`_e~EzZ0h zd@D$m?pSN=S#In}@iyNtTWdYF+NFa4YSE=$a8xSyj}QK;R@e+XXxXy z{Y(2-VGji^vhLer^_?p<$DZupj>&I*AGh1FG`L#P_PC~g$@XA)wPxsveOoE_Qp$nV zidLljR$lDT$(4qo*w{;}HNGc%wgsSiV*tCi1)zcejI364qT(_c?O(EdJO7>hc$Mm5 z)U`j>a12J%hTWfXS$yS_UE2ZwB%2JxNqj=(zO@DVvnN9tT)F)TXWFqpZr}I&r`OH! zeg5+TtzmUxTlAMStRBx$)$)VyMc!}y?$vi^W7TI@i`+~rzvPH@ocZg?m}m3{)9+u1 z9U6~aKu^6Eb6k(r+*m1Fh`sWaSlL$|6@7(i=#OW79{EogX@0Zxf|a{V&L4Hb`SSyP zJYMhR=8o5J@9pZX!jJE@)91gicEkP4%!cvy%)fM6$$z-@d^Y!f{W18zUsnkK_YWS1 z`&S1jgpvg@BW)t~an7=*uyv6oGj}75#**zCB&GAeMk6UvuR-5A`&s{7xe^_jS z|HJZ3xC`ZUN-7h8RI4sCs6{4$2lc_|C)piJ;UT-jC!Uy`g}B=+LhqC%RmHX>Ws!Jp`(v{qfNTr0p;;V8<-aX)Exoc*(iO|=l2+&M($!2p$?a+D z1wn_FZP7__lJNdmSGN*v(d0}L-OpAtIeD(GUN5hd9E;^0lj>#0G2|&TlPnYn@{#4! z18Al$laKRd-NIVE(QG$9=pUNeg-w?-lyEm>8vWzR7&oO(0X^mrnHDG~_P9w{)ICt_ zs=E|IS>svbTziJcQpN?W;f$og+MTCMhM!28L8d50#T7P>n~jXvB$v{|)_!xF87;8Z z-!hFlR&i&>F9A2wGj2&5E)mJB-^msBP&Ph?;nL#CW=b0L1}NSQ z(|D}PWEyEO#YU`(Bt*rR0y#jj)bbP)Y^-t5GBPYAF|kEUYSg42Gg~^aTge+rk6Owj zoT-6fuwLI|8_P_k2CQZ^lgWShg7i*K>7i87!Uq|XQl*+39!siIGs>;Z7ZOcO}nGre;^>vU^f+FM=}PRW6emrrUqckILs* zEEHZN3w>)_hpPquT00k9NnjHUdM7(}RNhI8$!rs?fw+EymJR^?ND4oqq7%7(`fIS8 zcV*D}8IV8*R(Pd73JEal23EUh5CF96A0rDbBVk4Yqh_P^E3j$h>fvAojoQgI1U`mN zu~i8SI=-e8ZBPPD9>2YfJXDQdvfS;aqxdlc)>y@z#w^x@?NcD9A|(d80kAHSDScDP z!#uW-IUk*N9vN}i5;p8$z3Qcv6bak4h*!FdCSmFA8yQUGo^|)1Ju%=N8@Mn=3Xb3u zEhuJ+s(krV-_S^}=VBsH{@Fi#c630bd9}lm$O?riuJ`~y6IO-X5*fI7ni`VlZ^?t` z4q4+84S6SU06u;tMz%l9pwtpQCKW_!@b)0X$g9Yd&2n>gR>$|);f3>C`bAY$A-^zw`nfp%e{pVL2x<25$<9qABb@bak z@ARxSKEK@f{OaE4)oA}VpFR$Kd*q#wwWjBno1Tv~p88<#sSo*mpFOUugLt^JI#FH) z;c#d5=Mk_$?l=2t`<&c&oVk5iAaTqDcZ?QoOP4SJ*FIb-5lrN1H$6v)+Oz=+<@$a} zC%@29j|;W>FP(I17dxE{Yk}C7=qX2$cnT0DHOTLg?|*{>B^5iefp#Uz@gXkE(gGuN z??l!Z^D+%gA{!JWfOjNv{5Y%ut9Ee9eIj$>2DU9Ga*1fXSaXeK&Eii9I@7s?$=3zP zVHN)`$}Uf#4qBNbuCpLfz=6qLDN{y6Y*~-^e^S~P;Y4Vto@N@8<1!%;1xvJ$XnzI* zDj_Nv2TS^#c1d!3oJ(VC&g09@px`W?x=pUo8uj8HO&uenqwU&V=tUrtr@?Q z7cZ(>E3z*a*;k9|kX2b_yt4XU=bN2>*mdUw=H&n4m3VpeGTdD|0<8bH&T~ zweqIr@}{M+56auHbat_4wXzk{%C)MN<*JtZH6K*|@64NLmg+vJYQ}`Rv}!%W z3{o%W?-jgRuv*>#waJdMH*#OkjaBu2P}KL?lO1dckSV`gooaMEFx`*Ds*kM|buAZl z{Q|U_-~9_L6{_0D$oE5;e1GxtY#sPVjc#eCoF!w~4`MurcCpygUwPoV-dmWh+(q$(9bGZZg=I`a1 z$StxB7Ms7fyJ(=%^1TBl@;BxV9L@gT;SBO0F_Hgh1^gopvp9-O6PY4T0}w>o#)LIz zL_Oj(IakQJPtJcJ=f~vyEjj-WIUmA_*x*Q1#`DM7N;2^rf)e~z1V$>uhy&F0pKoz- zTmEb1ua>{I=hZ!TZA$}79k=(a*xG-VVY0nw`qa$j99(B!P(n_D8qvHcES{%`&Yxv` zA=h?-#dNJp{`G}j+PL}mvzD$?+#^pi&X{Z-7We475^=5<+QE&^-u+ zG_HR<<_>wL0@I!a)6~obr)M7bUCnBRK4~1JgWG%%>&T`=_f0e2yIAhnJ5I z$Bvwdm5!`h&&I8JYu3tTYvr1?X4zU3&)XT_v2$%l-SUn)+Q=~c`f%LYy5{sOJ3Vpx z{xy5ovb_rnGS{lxmaE(171e7M2bL=i#LKJJ${Uu;8`gJlHvS(tn=L1uh_g`^M#SG> zg}MY+dffkrL@ce6?2x+)h$st0l)X@*5D}XyNc6B!t&)l+l$H}Gsbp{r0-uF$IAhe3 z$ea=;=4Y_Si4mJHHR8zB5XfoNpD||9f_4`(;V`9&OaO({th8uYY5UrZ+FlcBl$!kr z0Bf4P{r-tR9r@#tSbKl0^u(%l0FC~d^;PR@d9UW(bu86=+x3p?-ArtOxt;fc^;~+x z(WHen-0v{3nub%g4EP{FnClyI3p(XMo&VF4QX&W4qfB|QaL$kuCBM^L7i+7FS&QJ< zd>kGxHBw|u`~*-5&p*do{EVC|!fq}3^5Da^KI&?TqGaEankU6#OVJY*MhJ&!Cq^$M z(M(pDeb~kHu(I*~!L|0`<@Vv&xr?inFUG9paclXSb_-*x|*r$QOxB`6r{3xH{15g?%<(6NqGh!O7A=AAu*Q6MvBgIG=qLMEN9RPv&!< z95>AtX7-yt8LP?5%=x6soH=g#c}{j_%{m8%p@t<*SWz7qg1z2_ChZlmQnTQgUb0a0 z4N1=3$WzyJpkTQ;a!a|kVWG(BR2$n#y>-FtycSLrxzBpWyys62jk*UsbWXVcH_+@0 zbxC-Nk^nuCjVbEL;DSi?XQ)#mTfQw3yWoSzv*Cn|O5~-S{C8>qy@oN5 zjR7y%oVaL>?KNG5fJO%|hm5=#8I z`Z#ngC}Gq^7vuEu93F*2!@6i!twbOT2i(9l z%IlW)oAW+i+IOQw(6(Ord7l^rtWJ%_j8W=B1#G0cm#)Fb8o8c-Xs8q9#gG z_JXZhLH|M?ZqD^zl^Xz!Xr(V~!*4bM$+N(Zjt&!MXn6oHO}VGA!FrNTz^8QsHx~8| zvR%o9e0BpWO8_{ufd`0-bn=P9vuE7ik#lFfV<+i2<VSSV&R$S+2k$`g++D~ zAp|ApPFPVEcEU7vA4dYx!VntWrO}PnlO(c<_yhh4wx$FdvB9ISh!~zAP>@4XzDR>D zTL6;CVc)$ZREjjGOB7&l_LT);e#SMAE*lYZDA1;|OQr`Aze9o9v>6&G6KR#b$dl7X z4h_qRTx_oLPxgm`(|#ZWNxqTD0kTnWBECe4+jDm8Z2#F|@7VCDS2~?LQAi{wCBPM}{-C72p~WkSY!u5c zLOWHc;~&y+PSb-#p^iD=7y>qMNg|0r^L;&MLXY#(v?A8|$a`D3p9f9HB0aXmlgDu2u!_=v0ih|B$mEBM5cX&NwnQXsk6 zrs7YE;b6b5$-2gsJmgC5*57M-v*{66AGZ~)*>*p)?Y?_t#a4GK>l2H`RQB^cp~zJ8 zGmhN#k-eNH@AkqYb45IV=URTOU|05Y}ry4D?hL_5U=Jxqa-TcLQZJMPk4-T%>9be&l z&tB+j?Qt52&@&S;~=%DWy2h_h4|ukKvWWEp2(5a&%PFdR#V6&N11Bx9*1X*$C0d~_|>`7qbXKG!Y|Egp-xo{trrT(u5;T)gYn>3DYTT6V>9 zcE!W&ed`%ETXB4M^*z@gxRy$!#VIShdvBkP=asGH)i3AO$E*0YD%WzAD^}UObR$-A zl&z+5E>}8ZyIo6xSotwjGQR=DkYDgKE05_+1%B&T;XCK=W>N}b*D34TJvOwtsv{x z;Lmn(nK`k%&IdiQ&Qq~dmtvQvW0z)PIkT%4|A!Xa?V&qUZv^WN?<Jr~d0K`Rt&#h*vWjQGB*cgLA3?uS*`XFAP4tSvrs$o#`YnehL=5ASO# literal 0 HcmV?d00001 diff --git a/batch_transcribe.py b/batch_transcribe.py new file mode 100644 index 0000000..4b78acf --- /dev/null +++ b/batch_transcribe.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +""" +Batch Transcription Script for WhisperLive +Processes all audio files in a folder using the HTTP transcription endpoint +""" + +import os +import sys +import json +import time +import argparse +import requests +from pathlib import Path +from typing import List, Dict, Optional +import logging + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +class BatchTranscriber: + def __init__(self, server_url: str = "http://localhost:8080"): + self.server_url = server_url + self.supported_formats = {'.wav', '.mp3', '.flac', '.m4a', '.ogg', '.webm'} + + def get_audio_files(self, folder_path: str) -> List[Path]: + """Get all audio files from the specified folder""" + folder = Path(folder_path) + if not folder.exists(): + raise FileNotFoundError(f"Folder not found: {folder_path}") + + audio_files = [] + for file_path in folder.iterdir(): + if file_path.is_file() and file_path.suffix.lower() in self.supported_formats: + audio_files.append(file_path) + + return sorted(audio_files) + + def transcribe_file(self, file_path: Path, language: Optional[str] = None, + task: str = "transcribe", model: str = "base") -> Dict: + """Transcribe a single audio file""" + try: + logger.info(f"Transcribing: {file_path.name}") + + with open(file_path, 'rb') as f: + files = {'file': f} + data = { + 'language': language, + 'task': task, + 'model': model + } + + response = requests.post(f"{self.server_url}/transcribe", + files=files, data=data, timeout=300) + + if response.status_code == 200: + result = response.json() + logger.info(f"✅ Successfully transcribed: {file_path.name}") + return result + else: + error_msg = response.text + logger.error(f"❌ Failed to transcribe {file_path.name}: {error_msg}") + return {'error': error_msg, 'status_code': response.status_code} + + except Exception as e: + logger.error(f"❌ Error transcribing {file_path.name}: {str(e)}") + return {'error': str(e)} + + def save_transcript(self, transcript_data: Dict, output_path: Path, + format_type: str = "txt") -> bool: + """Save transcript in specified format""" + try: + if 'error' in transcript_data: + return False + + if format_type == "txt": + with open(output_path, 'w', encoding='utf-8') as f: + f.write(f"Transcription of: {transcript_data.get('filename', 'Unknown')}\n") + f.write(f"Language: {transcript_data['info'].get('language', 'Auto-detected')}\n") + f.write(f"Duration: {transcript_data['info'].get('duration', 0):.2f} seconds\n") + f.write("=" * 50 + "\n\n") + + for segment in transcript_data['segments']: + f.write(f"[{segment['start']:.2f}s - {segment['end']:.2f}s] {segment['text']}\n") + + elif format_type == "json": + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(transcript_data, f, indent=2, ensure_ascii=False) + + elif format_type == "srt": + with open(output_path, 'w', encoding='utf-8') as f: + for i, segment in enumerate(transcript_data['segments'], 1): + start_time = self.format_srt_time(segment['start']) + end_time = self.format_srt_time(segment['end']) + f.write(f"{i}\n{start_time} --> {end_time}\n{segment['text']}\n\n") + + elif format_type == "vtt": + with open(output_path, 'w', encoding='utf-8') as f: + f.write("WEBVTT\n\n") + for segment in transcript_data['segments']: + start_time = self.format_vtt_time(segment['start']) + end_time = self.format_vtt_time(segment['end']) + f.write(f"{start_time} --> {end_time}\n{segment['text']}\n\n") + + logger.info(f"💾 Saved transcript: {output_path}") + return True + + except Exception as e: + logger.error(f"❌ Error saving transcript {output_path}: {str(e)}") + return False + + def format_srt_time(self, seconds: float) -> str: + """Format time for SRT subtitles""" + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + secs = int(seconds % 60) + millisecs = int((seconds % 1) * 1000) + return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}" + + def format_vtt_time(self, seconds: float) -> str: + """Format time for VTT subtitles""" + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + secs = int(seconds % 60) + millisecs = int((seconds % 1) * 1000) + return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millisecs:03d}" + + def batch_transcribe(self, input_folder: str, output_folder: str, + language: Optional[str] = None, task: str = "transcribe", + model: str = "base", format_type: str = "txt", + delay: float = 1.0) -> Dict: + """Process all audio files in the input folder""" + + # Create output folder if it doesn't exist + output_path = Path(output_folder) + output_path.mkdir(parents=True, exist_ok=True) + + # Get all audio files + audio_files = self.get_audio_files(input_folder) + if not audio_files: + logger.warning(f"No audio files found in: {input_folder}") + return {'processed': 0, 'successful': 0, 'failed': 0} + + logger.info(f"Found {len(audio_files)} audio files to process") + + results = { + 'processed': len(audio_files), + 'successful': 0, + 'failed': 0, + 'files': [] + } + + for i, audio_file in enumerate(audio_files, 1): + logger.info(f"Processing {i}/{len(audio_files)}: {audio_file.name}") + + # Transcribe the file + transcript_data = self.transcribe_file(audio_file, language, task, model) + + if 'error' not in transcript_data: + # Create output filename + base_name = audio_file.stem + output_file = output_path / f"{base_name}.{format_type}" + + # Save transcript + if self.save_transcript(transcript_data, output_file, format_type): + results['successful'] += 1 + results['files'].append({ + 'input': str(audio_file), + 'output': str(output_file), + 'status': 'success' + }) + else: + results['failed'] += 1 + results['files'].append({ + 'input': str(audio_file), + 'output': str(output_file), + 'status': 'failed' + }) + else: + results['failed'] += 1 + results['files'].append({ + 'input': str(audio_file), + 'output': None, + 'status': 'failed', + 'error': transcript_data.get('error', 'Unknown error') + }) + + # Add delay between requests to avoid overwhelming the server + if i < len(audio_files): + time.sleep(delay) + + return results + +def main(): + parser = argparse.ArgumentParser(description='Batch transcribe audio files using WhisperLive') + parser.add_argument('input_folder', help='Folder containing audio files') + parser.add_argument('output_folder', help='Folder to save transcripts') + parser.add_argument('--server', '-s', default='http://localhost:8080', + help='WhisperLive server URL (default: http://localhost:8080)') + parser.add_argument('--language', '-l', help='Language code (e.g., en, es, fr)') + parser.add_argument('--task', '-t', choices=['transcribe', 'translate'], default='transcribe', + help='Task to perform (default: transcribe)') + parser.add_argument('--model', '-m', default='base', + help='Model size (default: base)') + parser.add_argument('--format', '-f', choices=['txt', 'json', 'srt', 'vtt'], default='txt', + help='Output format (default: txt)') + parser.add_argument('--delay', '-d', type=float, default=1.0, + help='Delay between requests in seconds (default: 1.0)') + parser.add_argument('--verbose', '-v', action='store_true', + help='Verbose output') + + args = parser.parse_args() + + if args.verbose: + logging.getLogger().setLevel(logging.DEBUG) + + try: + # Initialize transcriber + transcriber = BatchTranscriber(args.server) + + # Check server health + try: + response = requests.get(f"{args.server}/health", timeout=5) + if response.status_code != 200: + logger.error(f"Server health check failed: {response.status_code}") + sys.exit(1) + logger.info("✅ Server health check passed") + except requests.exceptions.RequestException as e: + logger.error(f"❌ Cannot connect to server: {e}") + sys.exit(1) + + # Process files + results = transcriber.batch_transcribe( + input_folder=args.input_folder, + output_folder=args.output_folder, + language=args.language, + task=args.task, + model=args.model, + format_type=args.format, + delay=args.delay + ) + + # Print summary + logger.info("\n" + "=" * 50) + logger.info("BATCH TRANSCRIPTION COMPLETED") + logger.info("=" * 50) + logger.info(f"Total files processed: {results['processed']}") + logger.info(f"Successful: {results['successful']}") + logger.info(f"Failed: {results['failed']}") + logger.info(f"Output folder: {args.output_folder}") + logger.info(f"Output format: {args.format}") + + if results['failed'] > 0: + logger.warning("\nFailed files:") + for file_info in results['files']: + if file_info['status'] == 'failed': + logger.warning(f" - {file_info['input']}: {file_info.get('error', 'Unknown error')}") + + if results['successful'] > 0: + logger.info(f"\n✅ Successfully processed {results['successful']} files!") + + except KeyboardInterrupt: + logger.info("\n⚠️ Process interrupted by user") + sys.exit(1) + except Exception as e: + logger.error(f"❌ Unexpected error: {str(e)}") + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/docker-compose.yaml b/docker-compose.yaml index 6d13566..9874e5a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -15,6 +15,8 @@ services: LOG_PATH: /app/logs NVIDIA_VISIBLE_DEVICES: all NVIDIA_DRIVER_CAPABILITIES: compute,utility + PORT_WHISPERLIVE: ${PORT_WHISPERLIVE} + HTTP_PORT: ${HTTP_PORT:-8080} volumes: - ./models:/app/models - ./ssl:/app/ssl @@ -26,11 +28,15 @@ services: - driver: nvidia count: 1 capabilities: [gpu] + options: + memory: "4G" # Match the main docker-compose.yml allocation ports: - - ${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE} + - "${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE}" + - "${HTTP_PORT:-8080}:8080" + restart: unless-stopped networks: - - audio-network + - default networks: - audio-network: + default: driver: bridge diff --git a/hybrid_server.py b/hybrid_server.py new file mode 100644 index 0000000..a679630 --- /dev/null +++ b/hybrid_server.py @@ -0,0 +1,1229 @@ +import argparse +import ssl +import os +import socket +import threading +import tempfile +from pathlib import Path + +from flask import Flask, request, jsonify, send_file, Response +from flask_sock import Sock +from werkzeug.utils import secure_filename +import websocket as ws_client +import json +import logging + +def format_time_srt(s): + hours = int(s // 3600) + minutes = int((s % 3600) // 60) + seconds = int(s % 60) + milliseconds = int((s - int(s)) * 1000) + return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}" + +def format_time_vtt(s): + hours = int(s // 3600) + minutes = int((s % 3600) // 60) + seconds = int(s % 60) + milliseconds = int((s - int(s)) * 1000) + return f"{hours:02}:{minutes:02}:{seconds:02}.{milliseconds:03}" + +def generate_srt(segments): + output = "" + for i, segment in enumerate(segments, start=1): + start_time = format_time_srt(float(segment['start'])) + end_time = format_time_srt(float(segment['end'])) + text = segment['text'].strip() + output += f"{i}\n{start_time} --> {end_time}\n{text}\n\n" + return output + +def generate_vtt(segments): + output = "WEBVTT\n\n" + for segment in segments: + start_time = format_time_vtt(float(segment['start'])) + end_time = format_time_vtt(float(segment['end'])) + text = segment['text'].strip() + output += f"{start_time} --> {end_time}\n{text}\n\n" + return output + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def check_port_availability(port): + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + result = sock.connect_ex(('0.0.0.0', port)) + sock.close() + return result != 0 + +class HybridWhisperServer: + def __init__(self, websocket_port, http_port, backend="faster_whisper", + faster_whisper_custom_model_path=None, whisper_tensorrt_path=None, + trt_multilingual=False, single_model=True, ssl_context=None): + self.websocket_port = websocket_port + self.http_port = http_port + self.backend = backend + self.faster_whisper_custom_model_path = faster_whisper_custom_model_path + self.whisper_tensorrt_path = whisper_tensorrt_path + self.trt_multilingual = trt_multilingual + self.single_model = single_model + self.ssl_context = ssl_context + + # Initialize Flask app + self.app = Flask(__name__) + self.app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max file size + self.sock = Sock(self.app) + self.setup_routes() + + # Initialize WhisperLive server + from whisper_live.server import TranscriptionServer + self.whisper_server = TranscriptionServer() + + # Create a shared transcriber instance for HTTP requests + self.shared_transcriber = None + if self.backend == "faster_whisper": + from whisper_live.transcriber import WhisperModel + # Use base model as default for HTTP requests + model_size = "base" + if self.faster_whisper_custom_model_path: + model_size = self.faster_whisper_custom_model_path + self.shared_transcriber = WhisperModel(model_size) + + def setup_routes(self): + @self.app.route('/health', methods=['GET']) + def health_check(): + return jsonify({'status': 'healthy', 'service': 'WhisperLive Hybrid Server'}) + + @self.app.route('/', methods=['GET']) + def serve_test_form(): + """Serve the HTML test form""" + html_content = """ + + + + + + + WhisperLive Dashboard + + + + + +
+
+

WhisperLive

+

High-Performance Real-Time Audio Transcription

+
+ + +
+

Connection Settings

+
+
+ + +
+
+ + +
+
+
+ HTTP Status: Checking... +
+
+ + +
+
+ + + +
+ + +
+
+
+ + +
+ +
+
+ + +
+
+ + +
+
+ + +
+ + +
+ + +
+
+
+ + +
+
+ + +
+
+ +
+ + +
+ +
+
+ Click Start Recording to begin live transcription... +
+
+
+ + +
+

OpenAI Compatible API

+

+ WhisperLive acts as a drop-in replacement for OpenAI's Whisper API. You can use any standard OpenAI + client by changing the base URL. +

+ +

Python (openai package)

+
from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-no-key-required",
+    base_url="https://whisperlive.classroomcopilot.ai/v1/"
+)
+
+with open("audio.wav", "rb") as file:
+    transcription = client.audio.transcriptions.create(
+        file=file,
+        model="base",
+        response_format="verbose_json"
+    )
+    
+print(transcription.text)
+ +

cURL

+
curl https://whisperlive.classroomcopilot.ai/v1/audio/transcriptions \
+  -H "Content-Type: multipart/form-data" \
+  -F file="@audio.wav" \
+  -F model="base" \
+  -F response_format="verbose_json"
+
+
+
+ + + + + + """ + return html_content, 200, {'Content-Type': 'text/html'} + + @self.app.route('/transcribe', methods=['POST']) + def transcribe_file(): + try: + if 'file' not in request.files: + return jsonify({'error': 'No file provided'}), 400 + + file = request.files['file'] + if file.filename == '': + return jsonify({'error': 'No file selected'}), 400 + + # Get optional parameters + language = request.form.get('language', None) + task = request.form.get('task', 'transcribe') # 'transcribe' or 'translate' + model_size = request.form.get('model', 'base') + + # For now, we'll use the shared transcriber regardless of the requested model size + # In the future, we could create different transcriber instances for different models + + # Validate file type + allowed_extensions = {'wav', 'mp3', 'flac', 'm4a', 'ogg', 'webm', 'opus', 'oga'} + if not file.filename.lower().endswith(tuple('.' + ext for ext in allowed_extensions)): + return jsonify({'error': f'Unsupported file type. Allowed: {", ".join(allowed_extensions)}'}), 400 + + # Save file temporarily + with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.filename).suffix) as temp_file: + file.save(temp_file.name) + temp_path = temp_file.name + + try: + # Transcribe the file using WhisperLive + if self.backend == "faster_whisper": + # Use the shared transcriber instance + if self.shared_transcriber is None: + return jsonify({'error': 'Transcriber not initialized'}), 500 + + segments, info = self.shared_transcriber.transcribe( + temp_path, + language=language, + task=task + ) + else: + # For other backends, use the server's transcriber + # This would need to be adapted based on your specific backend setup + return jsonify({'error': 'Backend not yet supported for file transcription'}), 501 + + # Convert segments to serializable format + transcript_segments = [] + for segment in segments: + transcript_segments.append({ + 'start': segment.start, + 'end': segment.end, + 'text': segment.text, + 'no_speech_prob': segment.no_speech_prob + }) + + # Get transcription info + transcription_info = { + 'language': info.language, + 'language_probability': info.language_probability, + 'duration': info.duration, + 'duration_after_vad': info.duration_after_vad, + 'transcription_options': info.transcription_options + } + + return jsonify({ + 'success': True, + 'segments': transcript_segments, + 'info': transcription_info, + 'filename': file.filename + }) + + finally: + # Clean up temporary file + if os.path.exists(temp_path): + os.unlink(temp_path) + + except Exception as e: + logger.error(f"Error transcribing file: {str(e)}") + return jsonify({'error': f'Transcription failed: {str(e)}'}), 500 + + @self.app.route('/transcribe/url', methods=['POST']) + def transcribe_url(): + try: + data = request.get_json() + if not data or 'url' not in data: + return jsonify({'error': 'No URL provided'}), 400 + + url = data['url'] + language = data.get('language', None) + task = data.get('task', 'transcribe') + model_size = data.get('model', 'base') + + # Validate URL + if not url.startswith(('http://', 'https://', 'rtsp://', 'hls://')): + return jsonify({'error': 'Invalid URL format'}), 400 + + # For now, we'll return a message that this endpoint is available + # but the actual implementation would depend on your specific needs + return jsonify({ + 'message': 'URL transcription endpoint available', + 'url': url, + 'note': 'This endpoint is ready for implementation based on your specific requirements' + }) + + except Exception as e: + logger.error(f"Error processing URL transcription request: {str(e)}") + return jsonify({'error': f'URL transcription failed: {str(e)}'}), 500 + + def handle_openai_audio_request(task_type): + try: + if 'file' not in request.files: + return jsonify({'error': {'message': 'No file provided', 'type': 'invalid_request_error', 'code': 'invalid_parameters'}}), 400 + + file = request.files['file'] + if file.filename == '': + return jsonify({'error': {'message': 'No file selected', 'type': 'invalid_request_error', 'code': 'invalid_parameters'}}), 400 + + # Get OpenAI specific parameters + language = request.form.get('language', None) + model_size = request.form.get('model', 'base') + prompt = request.form.get('prompt', None) + response_format = request.form.get('response_format', 'json') + temperature = request.form.get('temperature', 0) + + try: + temperature = float(temperature) + except ValueError: + temperature = 0.0 + + allowed_extensions = {'wav', 'mp3', 'flac', 'm4a', 'ogg', 'webm', 'mp4', 'mpeg', 'mpga', 'opus', 'oga'} + if not file.filename.lower().endswith(tuple('.' + ext for ext in allowed_extensions)): + return jsonify({'error': {'message': 'Unsupported file type.', 'type': 'invalid_request_error', 'code': 'invalid_file_format'}}), 400 + + with tempfile.NamedTemporaryFile(delete=False, suffix=Path(file.filename).suffix) as temp_file: + file.save(temp_file.name) + temp_path = temp_file.name + + try: + if self.backend == "faster_whisper": + if self.shared_transcriber is None: + return jsonify({'error': {'message': 'Transcriber not initialized', 'type': 'internal_server_error'}}), 500 + + kwargs = { + "language": language, + "task": task_type, + "temperature": temperature + } + if prompt: + kwargs["initial_prompt"] = prompt + + segments, info = self.shared_transcriber.transcribe(temp_path, **kwargs) + else: + return jsonify({'error': {'message': 'Backend not yet supported for file transcription', 'type': 'internal_server_error'}}), 501 + + transcript_segments = [] + full_text = "" + for segment in segments: + text = segment.text + full_text += text + transcript_segments.append({ + 'id': segment.id, + 'seek': segment.seek, + 'start': segment.start, + 'end': segment.end, + 'text': text, + 'tokens': segment.tokens, + 'temperature': segment.temperature, + 'avg_logprob': segment.avg_logprob, + 'compression_ratio': segment.compression_ratio, + 'no_speech_prob': segment.no_speech_prob + }) + + full_text = full_text.strip() + + if response_format == 'json': + return jsonify({'text': full_text}) + elif response_format == 'text': + return Response(full_text, mimetype='text/plain') + elif response_format == 'srt': + return Response(generate_srt(transcript_segments), mimetype='text/plain') + elif response_format == 'vtt': + return Response(generate_vtt(transcript_segments), mimetype='text/plain') + elif response_format == 'verbose_json': + return jsonify({ + 'task': task_type, + 'language': info.language, + 'duration': info.duration, + 'text': full_text, + 'segments': transcript_segments + }) + else: + return jsonify({'text': full_text}) + + finally: + if os.path.exists(temp_path): + os.unlink(temp_path) + + except Exception as e: + logger.error(f"Error processing OpenAI audio request: {str(e)}") + return jsonify({'error': {'message': f'Transcription failed: {str(e)}', 'type': 'internal_server_error'}}), 500 + + @self.app.route('/v1/audio/transcriptions', methods=['POST']) + def openai_transcriptions(): + return handle_openai_audio_request('transcribe') + + @self.app.route('/v1/audio/translations', methods=['POST']) + def openai_translations(): + return handle_openai_audio_request('translate') + + @self.app.route('/v1/models', methods=['GET']) + def list_models(): + # Standard Whisper models supported by faster-whisper + model_names = [ + "whisper-1", "tiny", "tiny.en", "base", "base.en", + "small", "small.en", "medium", "medium.en", + "large", "large-v1", "large-v2", "large-v3" + ] + + models = [] + for name in model_names: + models.append({ + "id": name, + "object": "model", + "created": 1677532384, + "owned_by": "openai" if name == "whisper-1" else "local", + "permission": [], + "root": name, + "parent": None + }) + + return jsonify({ + "object": "list", + "data": models + }) + + # ===== WebSocket Bridge ===== + # Bridges browser WebSocket connections on the HTTP port (8080) + # to the internal WhisperLive WebSocket server (port 5000). + # This allows live transcription through a single HTTPS port via NPM. + @self.sock.route('/ws') + def ws_bridge(ws): + """Bridge WebSocket from HTTP port to internal WhisperLive WS server""" + internal_url = f"ws://127.0.0.1:{self.websocket_port}" + logger.info(f"WebSocket bridge: new connection, proxying to {internal_url}") + + internal = None + try: + internal = ws_client.create_connection(internal_url) + + # Thread: internal server → browser + def server_to_browser(): + try: + while True: + opcode, data = internal.recv_data() + if opcode == ws_client.ABNF.OPCODE_TEXT: + ws.send(data.decode('utf-8')) + elif opcode == ws_client.ABNF.OPCODE_BINARY: + ws.send(data) + elif opcode in (ws_client.ABNF.OPCODE_CLOSE, ): + break + except Exception: + pass + + relay_thread = threading.Thread(target=server_to_browser, daemon=True) + relay_thread.start() + + # Main thread: browser → internal server + while True: + data = ws.receive() + if data is None: + break + if isinstance(data, bytes): + internal.send_binary(data) + else: + if data == "END_OF_AUDIO": + internal.send_binary(b"END_OF_AUDIO") + else: + internal.send(data) + + except Exception as e: + logger.error(f"WebSocket bridge error: {e}") + finally: + if internal: + try: + internal.close() + except Exception: + pass + logger.info("WebSocket bridge: connection closed") + + def run_websocket_server(self): + """Run the WebSocket server in a separate thread""" + logger.info(f"Starting WebSocket server on port {self.websocket_port}") + self.whisper_server.run( + "0.0.0.0", + port=self.websocket_port, + backend=self.backend, + faster_whisper_custom_model_path=self.faster_whisper_custom_model_path, + whisper_tensorrt_path=self.whisper_tensorrt_path, + trt_multilingual=self.trt_multilingual, + single_model=self.single_model, + ssl_context=self.ssl_context + ) + + def run_http_server(self): + """Run the HTTP server""" + logger.info(f"Starting HTTP server on port {self.http_port}") + self.app.run(host='0.0.0.0', port=self.http_port, debug=False, threaded=True) + + def start(self): + """Start both servers""" + # Start WebSocket server in a separate thread + websocket_thread = threading.Thread(target=self.run_websocket_server, daemon=True) + websocket_thread.start() + + # Start HTTP server in main thread + self.run_http_server() + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='WhisperLive Hybrid Server (WebSocket + HTTP)') + parser.add_argument('--websocket-port', '-wp', + type=int, + default=int(os.getenv('PORT_WHISPERLIVE', 9090)), + help="WebSocket port to run the server on.") + parser.add_argument('--http-port', '-hp', + type=int, + default=int(os.getenv('HTTP_PORT', 8080)), + help="HTTP port to run the server on.") + parser.add_argument('--backend', '-b', + type=str, + default='faster_whisper', + help='Backends from ["tensorrt", "faster_whisper"]') + parser.add_argument('--faster_whisper_custom_model_path', '-fw', + type=str, default=None, + help="Custom Faster Whisper Model") + parser.add_argument('--trt_model_path', '-trt', + type=str, + default=None, + help='Whisper TensorRT model path') + parser.add_argument('--trt_multilingual', '-m', + action="store_true", + help='Boolean only for TensorRT model. True if multilingual.') + parser.add_argument('--ssl_cert_path', '-ssl', + type=str, + default=None, + help='Path to cert.pem and key.pem if ssl should be used.') + parser.add_argument('--omp_num_threads', '-omp', + type=int, + default=1, + help="Number of threads to use for OpenMP") + parser.add_argument('--no_single_model', '-nsm', + action='store_true', + help='Set this if every connection should instantiate its own model. Only relevant for custom model, passed using -trt or -fw.') + + args = parser.parse_args() + + if args.backend == "tensorrt": + if args.trt_model_path is None: + raise ValueError("Please Provide a valid tensorrt model path") + + websocket_port = args.websocket_port + http_port = args.http_port + + if not check_port_availability(websocket_port): + print(f"Warning: WebSocket port {websocket_port} might already be in use!") + if not check_port_availability(http_port): + print(f"Warning: HTTP port {http_port} might already be in use!") + + ssl_context = None + if args.ssl_cert_path is not None: + try: + ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) + ssl_context.load_cert_chain( + certfile=f"{args.ssl_cert_path}/cert.pem", + keyfile=f"{args.ssl_cert_path}/privkey.pem" + ) + print("SSL context created successfully") + except Exception as e: + print(f"Failed to load SSL certificates: {str(e)}") + raise + + if "OMP_NUM_THREADS" not in os.environ: + print(f"Setting OMP_NUM_THREADS to {args.omp_num_threads}") + os.environ["OMP_NUM_THREADS"] = str(args.omp_num_threads) + + print(f"Running hybrid server with args: {args}") + server = HybridWhisperServer( + websocket_port=websocket_port, + http_port=http_port, + backend=args.backend, + faster_whisper_custom_model_path=args.faster_whisper_custom_model_path, + whisper_tensorrt_path=args.trt_model_path, + trt_multilingual=args.trt_multilingual, + single_model=not args.no_single_model, + ssl_context=ssl_context + ) + + print(f"Starting hybrid server with WebSocket on port {websocket_port} and HTTP on port {http_port}") + print(f"Backend: {args.backend}, SSL: {args.ssl_cert_path is not None}") + + server.start() diff --git a/openapi.json b/openapi.json new file mode 100644 index 0000000..15a7bf5 --- /dev/null +++ b/openapi.json @@ -0,0 +1,866 @@ +{ + "openapi": "3.1.0", + "info": { + "title": "WhisperLive API", + "description": "A high-performance speech-to-text API based on OpenAI's Whisper model.\nSupports real-time transcription via WebSocket and batch processing via HTTP.\n\n## Features\n- Real-time audio transcription\n- Batch file processing\n- Multiple language support\n- Translation capabilities\n- Multiple model sizes\n- WebSocket and HTTP interfaces\n", + "version": "1.0.0", + "contact": { + "name": "WhisperLive Support", + "url": "https://github.com/collabora/WhisperLive" + }, + "license": { + "name": "MIT", + "url": "https://opensource.org/licenses/MIT" + } + }, + "servers": [ + { + "url": "http://localhost:8080", + "description": "Local development server" + }, + { + "url": "https://api.whisperlive.com/v1", + "description": "Production server" + } + ], + "security": [ + { + "ApiKeyAuth": [] + } + ], + "paths": { + "/v1/audio/transcriptions": { + "post": { + "summary": "Create transcription", + "description": "Transcribes audio into the input language. The response will include the transcribed text\nand additional metadata such as language detection, confidence scores, and timestamps.\n", + "operationId": "createTranscription", + "tags": [ + "Audio" + ], + "requestBody": { + "required": true, + "content": { + "multipart/form-data": { + "schema": { + "type": "object", + "required": [ + "file" + ], + "properties": { + "file": { + "type": "string", + "format": "binary", + "description": "The audio file object (not file name) to transcribe, in one of these formats: \nflac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.\n" + }, + "model": { + "type": "string", + "enum": [ + "tiny", + "base", + "small", + "medium", + "large" + ], + "default": "base", + "description": "ID of the model to use. Only whisper-1 is currently available." + }, + "language": { + "type": "string", + "pattern": "^[a-z]{2}$", + "description": "The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.\nSupported languages: en, es, fr, de, it, pt, ru, ja, ko, zh, hi, ar\n" + }, + "prompt": { + "type": "string", + "description": "An optional text to guide the model's style or continue a previous audio segment.\nThe prompt should match the audio language.\n" + }, + "response_format": { + "type": "string", + "enum": [ + "json", + "text", + "srt", + "verbose_json", + "vtt" + ], + "default": "json", + "description": "The format of the transcript output." + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0, + "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic." + }, + "timestamp_granularities": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "word", + "segment" + ] + }, + "description": "The timestamp granularities to populate for this transcription." + } + } + } + } + } + }, + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/TranscriptionResponse" + }, + { + "$ref": "#/components/schemas/TranscriptionTextResponse" + }, + { + "$ref": "#/components/schemas/TranscriptionSrtResponse" + }, + { + "$ref": "#/components/schemas/TranscriptionVttResponse" + } + ] + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest" + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + }, + "413": { + "$ref": "#/components/responses/FileTooLarge" + }, + "422": { + "$ref": "#/components/responses/ValidationError" + }, + "429": { + "$ref": "#/components/responses/RateLimitExceeded" + }, + "500": { + "$ref": "#/components/responses/InternalServerError" + } + } + } + }, + "/v1/audio/translations": { + "post": { + "summary": "Create translation", + "description": "Translates audio into English. The response will include the translated text\nand additional metadata such as confidence scores and timestamps.\n", + "operationId": "createTranslation", + "tags": [ + "Audio" + ], + "requestBody": { + "required": true, + "content": { + "multipart/form-data": { + "schema": { + "type": "object", + "required": [ + "file" + ], + "properties": { + "file": { + "type": "string", + "format": "binary", + "description": "The audio file object (not file name) to translate, in one of these formats: \nflac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.\n" + }, + "model": { + "type": "string", + "enum": [ + "tiny", + "base", + "small", + "medium", + "large" + ], + "default": "base", + "description": "ID of the model to use. Only whisper-1 is currently available." + }, + "prompt": { + "type": "string", + "description": "An optional text to guide the model's style or continue a previous audio segment.\nThe prompt should be in English.\n" + }, + "response_format": { + "type": "string", + "enum": [ + "json", + "text", + "srt", + "verbose_json", + "vtt" + ], + "default": "json", + "description": "The format of the transcript output." + }, + "temperature": { + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 0, + "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic." + }, + "timestamp_granularities": { + "type": "array", + "items": { + "type": "string", + "enum": [ + "word", + "segment" + ] + }, + "description": "The timestamp granularities to populate for this translation." + } + } + } + } + } + }, + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "oneOf": [ + { + "$ref": "#/components/schemas/TranscriptionResponse" + }, + { + "$ref": "#/components/schemas/TranscriptionTextResponse" + }, + { + "$ref": "#/components/schemas/TranscriptionSrtResponse" + }, + { + "$ref": "#/components/schemas/TranscriptionVttResponse" + } + ] + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest" + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + }, + "413": { + "$ref": "#/components/responses/FileTooLarge" + }, + "422": { + "$ref": "#/components/responses/ValidationError" + }, + "429": { + "$ref": "#/components/responses/RateLimitExceeded" + }, + "500": { + "$ref": "#/components/responses/InternalServerError" + } + } + } + }, + "/v1/models": { + "get": { + "summary": "List models", + "description": "Lists the currently available models, and provides basic information about each one such as the owner and availability.", + "operationId": "listModels", + "tags": [ + "Models" + ], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ListModelsResponse" + } + } + } + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + }, + "500": { + "$ref": "#/components/responses/InternalServerError" + } + } + } + }, + "/v1/models/{model}": { + "get": { + "summary": "Retrieve model", + "description": "Retrieves a model instance, providing basic information about the model such as the owner and permissioning.", + "operationId": "retrieveModel", + "tags": [ + "Models" + ], + "parameters": [ + { + "name": "model", + "in": "path", + "required": true, + "description": "The ID of the model to use for this request", + "schema": { + "type": "string", + "enum": [ + "tiny", + "base", + "small", + "medium", + "large" + ] + } + } + ], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Model" + } + } + } + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + }, + "404": { + "$ref": "#/components/responses/NotFound" + }, + "500": { + "$ref": "#/components/responses/InternalServerError" + } + } + } + }, + "/v1/health": { + "get": { + "summary": "Health check", + "description": "Check the health status of the API server", + "operationId": "healthCheck", + "tags": [ + "System" + ], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/HealthResponse" + } + } + } + } + } + } + }, + "/v1/websocket": { + "get": { + "summary": "WebSocket connection", + "description": "Establishes a WebSocket connection for real-time audio transcription.\nSend audio data as binary frames and receive transcription results.\n", + "operationId": "websocketConnection", + "tags": [ + "Real-time" + ], + "parameters": [ + { + "name": "model", + "in": "query", + "description": "The model to use for transcription", + "schema": { + "type": "string", + "enum": [ + "tiny", + "base", + "small", + "medium", + "large" + ], + "default": "base" + } + }, + { + "name": "language", + "in": "query", + "description": "The language of the input audio", + "schema": { + "type": "string", + "pattern": "^[a-z]{2}$" + } + }, + { + "name": "task", + "in": "query", + "description": "The task to perform", + "schema": { + "type": "string", + "enum": [ + "transcribe", + "translate" + ], + "default": "transcribe" + } + } + ], + "responses": { + "101": { + "description": "Switching Protocols", + "headers": { + "Upgrade": { + "schema": { + "type": "string", + "example": "websocket" + } + }, + "Connection": { + "schema": { + "type": "string", + "example": "Upgrade" + } + } + } + }, + "400": { + "$ref": "#/components/responses/BadRequest" + }, + "401": { + "$ref": "#/components/responses/Unauthorized" + } + } + } + } + }, + "components": { + "securitySchemes": { + "ApiKeyAuth": { + "type": "apiKey", + "in": "header", + "name": "Authorization", + "description": "API key authentication. Include your API key in the Authorization header.\nExample: `Authorization: Bearer your-api-key-here`\n" + } + }, + "schemas": { + "TranscriptionResponse": { + "type": "object", + "properties": { + "text": { + "type": "string", + "description": "The transcribed text" + }, + "language": { + "type": "string", + "description": "The language of the input audio" + }, + "duration": { + "type": "number", + "description": "The duration of the input audio in seconds" + }, + "words": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Word" + }, + "description": "Extracted words and their corresponding timestamps" + }, + "segments": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Segment" + }, + "description": "Segments of the transcribed text with timestamps" + } + }, + "required": [ + "text" + ] + }, + "TranscriptionTextResponse": { + "type": "string", + "description": "The transcribed text as plain text" + }, + "TranscriptionSrtResponse": { + "type": "string", + "description": "The transcribed text in SRT subtitle format" + }, + "TranscriptionVttResponse": { + "type": "string", + "description": "The transcribed text in VTT subtitle format" + }, + "Word": { + "type": "object", + "properties": { + "word": { + "type": "string", + "description": "The text content of the word" + }, + "start": { + "type": "number", + "description": "Start time of the word in seconds" + }, + "end": { + "type": "number", + "description": "End time of the word in seconds" + }, + "probability": { + "type": "number", + "description": "Confidence score of the word (0-1)" + } + }, + "required": [ + "word", + "start", + "end" + ] + }, + "Segment": { + "type": "object", + "properties": { + "id": { + "type": "integer", + "description": "Unique identifier for the segment" + }, + "seek": { + "type": "number", + "description": "Seek offset of the segment in seconds" + }, + "start": { + "type": "number", + "description": "Start time of the segment in seconds" + }, + "end": { + "type": "number", + "description": "End time of the segment in seconds" + }, + "text": { + "type": "string", + "description": "The text content of the segment" + }, + "tokens": { + "type": "array", + "items": { + "type": "integer" + }, + "description": "Array of token IDs for the segment" + }, + "temperature": { + "type": "number", + "description": "Temperature parameter used for generating this segment" + }, + "avg_logprob": { + "type": "number", + "description": "Average log probability of the segment" + }, + "compression_ratio": { + "type": "number", + "description": "Compression ratio of the segment" + }, + "no_speech_prob": { + "type": "number", + "description": "Probability of no speech in this segment" + }, + "words": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Word" + }, + "description": "Words in this segment" + } + }, + "required": [ + "id", + "seek", + "start", + "end", + "text" + ] + }, + "Model": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "The model identifier" + }, + "object": { + "type": "string", + "enum": [ + "model" + ], + "description": "The object type, which is always \"model\"" + }, + "created": { + "type": "integer", + "description": "The Unix timestamp (in seconds) when the model was created" + }, + "owned_by": { + "type": "string", + "description": "The organization that owns the model" + }, + "permission": { + "type": "array", + "items": { + "type": "object" + }, + "description": "The permissions associated with the model" + }, + "root": { + "type": "string", + "description": "The root of the model" + }, + "parent": { + "type": "string", + "description": "The parent of the model" + } + }, + "required": [ + "id", + "object", + "created", + "owned_by" + ] + }, + "ListModelsResponse": { + "type": "object", + "properties": { + "object": { + "type": "string", + "enum": [ + "list" + ], + "description": "The object type, which is always \"list\"" + }, + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Model" + }, + "description": "The list of models" + } + }, + "required": [ + "object", + "data" + ] + }, + "HealthResponse": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": [ + "healthy", + "unhealthy" + ], + "description": "The health status of the service" + }, + "service": { + "type": "string", + "description": "The name of the service" + }, + "version": { + "type": "string", + "description": "The version of the service" + }, + "timestamp": { + "type": "string", + "format": "date-time", + "description": "The current timestamp" + }, + "uptime": { + "type": "number", + "description": "The uptime in seconds" + } + }, + "required": [ + "status", + "service" + ] + }, + "Error": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "message": { + "type": "string", + "description": "A human-readable error message" + }, + "type": { + "type": "string", + "description": "The type of error" + }, + "code": { + "type": "string", + "description": "The error code" + }, + "param": { + "type": "string", + "description": "The parameter that caused the error" + } + } + } + }, + "required": [ + "error" + ] + } + }, + "responses": { + "BadRequest": { + "description": "Bad Request", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "error": { + "message": "Invalid request parameters", + "type": "invalid_request_error", + "code": "invalid_parameters" + } + } + } + } + }, + "Unauthorized": { + "description": "Unauthorized", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "error": { + "message": "Invalid API key", + "type": "authentication_error", + "code": "invalid_api_key" + } + } + } + } + }, + "FileTooLarge": { + "description": "File Too Large", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "error": { + "message": "File size exceeds maximum allowed size", + "type": "invalid_request_error", + "code": "file_too_large" + } + } + } + } + }, + "ValidationError": { + "description": "Validation Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "error": { + "message": "Invalid file format", + "type": "invalid_request_error", + "code": "invalid_file_format" + } + } + } + } + }, + "RateLimitExceeded": { + "description": "Rate Limit Exceeded", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "error": { + "message": "Rate limit exceeded", + "type": "rate_limit_error", + "code": "rate_limit_exceeded" + } + } + } + } + }, + "InternalServerError": { + "description": "Internal Server Error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "error": { + "message": "An internal server error occurred", + "type": "server_error", + "code": "internal_error" + } + } + } + } + }, + "NotFound": { + "description": "Not Found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "error": { + "message": "Model not found", + "type": "invalid_request_error", + "code": "model_not_found" + } + } + } + } + } + } + }, + "tags": [ + { + "name": "Audio", + "description": "Audio transcription and translation operations" + }, + { + "name": "Models", + "description": "Model management operations" + }, + { + "name": "System", + "description": "System health and status operations" + }, + { + "name": "Real-time", + "description": "Real-time audio processing via WebSocket" + } + ] +} \ No newline at end of file diff --git a/requirements/server.txt b/requirements/server.txt index 37df171..35276f8 100644 --- a/requirements/server.txt +++ b/requirements/server.txt @@ -9,5 +9,7 @@ av jiwer evaluate numpy<2 -openai-whisper==20240930 -tokenizers==0.20.3 \ No newline at end of file +tokenizers==0.20.3 +flask==3.0.0 +flask-sock +websocket-client \ No newline at end of file diff --git a/scratch/dashboard.html b/scratch/dashboard.html new file mode 100644 index 0000000..1a38087 --- /dev/null +++ b/scratch/dashboard.html @@ -0,0 +1,727 @@ + + + + + + + WhisperLive Dashboard + + + + + +
+
+

WhisperLive

+

High-Performance Real-Time Audio Transcription

+
+ + +
+

Connection Settings

+
+
+ + +
+
+ + +
+
+
+ HTTP Status: Checking... +
+
+ + +
+
+ + + +
+ + +
+
+
+ + +
+ +
+
+ + +
+
+ + +
+
+ + +
+ + +
+ + +
+
+
+ + +
+
+ + +
+
+ +
+ + +
+ +
+
+ Click Start Recording to begin live transcription... +
+
+
+ + +
+

OpenAI Compatible API

+

+ WhisperLive acts as a drop-in replacement for OpenAI's Whisper API. You can use any standard OpenAI + client by changing the base URL. +

+ +

Python (openai package)

+
from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-no-key-required",
+    base_url="https://whisperlive.classroomcopilot.ai/v1/"
+)
+
+with open("audio.wav", "rb") as file:
+    transcription = client.audio.transcriptions.create(
+        file=file,
+        model="base",
+        response_format="verbose_json"
+    )
+    
+print(transcription.text)
+ +

cURL

+
curl https://whisperlive.classroomcopilot.ai/v1/audio/transcriptions \
+  -H "Content-Type: multipart/form-data" \
+  -F file="@audio.wav" \
+  -F model="base" \
+  -F response_format="verbose_json"
+
+
+
+ + + + + \ No newline at end of file diff --git a/scratch/test_ws.py b/scratch/test_ws.py new file mode 100644 index 0000000..e1e101b --- /dev/null +++ b/scratch/test_ws.py @@ -0,0 +1,9 @@ +import websockets +from websockets.sync.server import serve + +def handler(websocket): + print("Path:", websocket.request.path) + websocket.send("Hello") + +with serve(handler, "127.0.0.1", 8765) as server: + server.serve_forever() diff --git a/test_form.html b/test_form.html new file mode 100644 index 0000000..1a38087 --- /dev/null +++ b/test_form.html @@ -0,0 +1,727 @@ + + + + + + + WhisperLive Dashboard + + + + + +
+
+

WhisperLive

+

High-Performance Real-Time Audio Transcription

+
+ + +
+

Connection Settings

+
+
+ + +
+
+ + +
+
+
+ HTTP Status: Checking... +
+
+ + +
+
+ + + +
+ + +
+
+
+ + +
+ +
+
+ + +
+
+ + +
+
+ + +
+ + +
+ + +
+
+
+ + +
+
+ + +
+
+ +
+ + +
+ +
+
+ Click Start Recording to begin live transcription... +
+
+
+ + +
+

OpenAI Compatible API

+

+ WhisperLive acts as a drop-in replacement for OpenAI's Whisper API. You can use any standard OpenAI + client by changing the base URL. +

+ +

Python (openai package)

+
from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-no-key-required",
+    base_url="https://whisperlive.classroomcopilot.ai/v1/"
+)
+
+with open("audio.wav", "rb") as file:
+    transcription = client.audio.transcriptions.create(
+        file=file,
+        model="base",
+        response_format="verbose_json"
+    )
+    
+print(transcription.text)
+ +

cURL

+
curl https://whisperlive.classroomcopilot.ai/v1/audio/transcriptions \
+  -H "Content-Type: multipart/form-data" \
+  -F file="@audio.wav" \
+  -F model="base" \
+  -F response_format="verbose_json"
+
+
+
+ + + + + \ No newline at end of file diff --git a/test_http_endpoints.py b/test_http_endpoints.py new file mode 100644 index 0000000..5eed1c4 --- /dev/null +++ b/test_http_endpoints.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +""" +Test script for WhisperLive HTTP endpoints +This script demonstrates how to use the new HTTP API for file transcription +""" + +import requests +import json +import os +from pathlib import Path + +# Configuration +HTTP_BASE_URL = "http://localhost:8080" # Adjust if using different port +WEBSOCKET_PORT = 5050 # Your existing WebSocket port + +def test_health_endpoint(): + """Test the health check endpoint""" + print("Testing health endpoint...") + try: + response = requests.get(f"{HTTP_BASE_URL}/health") + print(f"Status: {response.status_code}") + print(f"Response: {response.json()}") + return response.status_code == 200 + except Exception as e: + print(f"Error: {e}") + return False + +def test_file_transcription(audio_file_path, language=None, task="transcribe", model="base"): + """Test file transcription endpoint""" + print(f"\nTesting file transcription endpoint...") + print(f"File: {audio_file_path}") + print(f"Language: {language or 'auto-detect'}") + print(f"Task: {task}") + print(f"Model: {model}") + + if not os.path.exists(audio_file_path): + print(f"Error: File {audio_file_path} not found") + return False + + try: + # Prepare the request + files = {'file': open(audio_file_path, 'rb')} + data = { + 'language': language, + 'task': task, + 'model': model + } + + # Make the request + response = requests.post(f"{HTTP_BASE_URL}/transcribe", files=files, data=data) + + print(f"Status: {response.status_code}") + + if response.status_code == 200: + result = response.json() + print("Transcription successful!") + print(f"Filename: {result.get('filename')}") + print(f"Language: {result['info'].get('language')}") + print(f"Duration: {result['info'].get('duration')} seconds") + print(f"Number of segments: {len(result['segments'])}") + + # Print first few segments + for i, segment in enumerate(result['segments'][:3]): + print(f"Segment {i+1}: [{segment['start']:.2f}s - {segment['end']:.2f}s] {segment['text']}") + + if len(result['segments']) > 3: + print(f"... and {len(result['segments']) - 3} more segments") + + return True + else: + print(f"Error: {response.text}") + return False + + except Exception as e: + print(f"Error: {e}") + return False + +def test_url_transcription(): + """Test URL transcription endpoint (placeholder)""" + print(f"\nTesting URL transcription endpoint...") + try: + data = { + 'url': 'https://example.com/audio.mp3', + 'language': 'en', + 'task': 'transcribe', + 'model': 'base' + } + + response = requests.post(f"{HTTP_BASE_URL}/transcribe/url", json=data) + print(f"Status: {response.status_code}") + print(f"Response: {response.json()}") + return response.status_code == 200 + + except Exception as e: + print(f"Error: {e}") + return False + +def test_openai_endpoint(audio_file_path): + """Test the OpenAI compatible endpoint""" + print(f"\nTesting OpenAI compatible endpoint...") + print(f"File: {audio_file_path}") + + if not os.path.exists(audio_file_path): + print(f"Error: File {audio_file_path} not found") + return False + + try: + files = {'file': open(audio_file_path, 'rb')} + data = { + 'model': 'whisper-1', + 'response_format': 'json' + } + + response = requests.post(f"{HTTP_BASE_URL}/v1/audio/transcriptions", files=files, data=data) + print(f"Status: {response.status_code}") + + if response.status_code == 200: + result = response.json() + print("OpenAI endpoint successful!") + print(f"Response: {result}") + return True + else: + print(f"Error: {response.text}") + return False + + except Exception as e: + print(f"Error: {e}") + return False + +def main(): + """Main test function""" + print("WhisperLive HTTP Endpoints Test") + print("=" * 40) + + # Test health endpoint + if not test_health_endpoint(): + print("Health check failed. Make sure the server is running.") + return + + # Test file transcription with a sample audio file + # You can replace this with any audio file you have + sample_audio = "assets/jfk.flac" # Adjust path as needed + + if os.path.exists(sample_audio): + test_file_transcription(sample_audio, language="en", task="transcribe", model="base") + test_openai_endpoint(sample_audio) + else: + print(f"\nSample audio file not found at {sample_audio}") + print("You can test with any audio file by calling:") + print("test_file_transcription('path/to/your/audio.wav')") + + # Test URL transcription endpoint + test_url_transcription() + + print("\n" + "=" * 40) + print("Test completed!") + +if __name__ == "__main__": + main()