feat: apply local modifications to WhisperLive-Server

2026-05-13 22:33:35 +00:00 · 2026-05-13 22:33:35 +00:00 · 83edfff9d3
commit 83edfff9d3
parent 05648af633
17 changed files with 4274 additions and 1352 deletions
--- a/.archive/Dockerfile.macos.dev
+++ b/.archive/Dockerfile.macos.dev
@ -1,51 +0,0 @@
 FROM python:3.10-bookworm
 ARG DEBIAN_FRONTEND=noninteractive
 # Create log directories with proper permissions
 RUN mkdir -p /app/logs && \
    touch /app/logs/whisperlive.log && \
    touch /app/logs/connections.log && \
    chmod 666 /app/logs/whisperlive.log && \
    chmod 666 /app/logs/connections.log
 # install lib required for pyaudio
 RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
 # update pip to support for whl.metadata -> less downloading
 RUN pip install --no-cache-dir -U "pip>=24"
 # create a working directory
 WORKDIR /app
 # install the requirements for running the whisper-live server
 COPY requirements/server.txt /app/
 RUN pip install -r server.txt && rm server.txt
 COPY whisper_live /app/whisper_live
 COPY run_server.py /app
 # Port options
 EXPOSE ${PORT_WHISPERLIVE}
 EXPOSE ${PORT_WHISPERLIVE_SSL}
 ARG PORT_WHISPERLIVE
 ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
 ARG PORT_WHISPERLIVE_SSL
 ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
 # SSL options
 ARG WHISPERLIVE_SSL
 ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
 # Model options
 ARG WHISPL_USE_CUSTOM_MODEL
 ENV WHISPL_USE_CUSTOM_MODEL=${WHISPL_USE_CUSTOM_MODEL}
 ARG FASTERWHISPER_MODEL
 ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
 CMD ["sh", "-c", "\
    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
    else \
        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --no_single_model; \
    fi"]
--- a/.archive/Dockerfile.macos.prod
+++ b/.archive/Dockerfile.macos.prod
@ -1,45 +0,0 @@
 FROM python:3.10-bookworm
 ARG DEBIAN_FRONTEND=noninteractive
 # Create log directories with proper permissions
 RUN mkdir -p /app/logs && \
    touch /app/logs/whisperlive.log && \
    touch /app/logs/connections.log && \
    chmod 666 /app/logs/whisperlive.log && \
    chmod 666 /app/logs/connections.log
 # install lib required for pyaudio
 RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
 # update pip to support for whl.metadata -> less downloading
 RUN pip install --no-cache-dir -U "pip>=24"
 # create a working directory
 WORKDIR /app
 # install the requirements for running the whisper-live server
 COPY requirements/server.txt /app/
 RUN pip install -r server.txt && rm server.txt
 COPY whisper_live /app/whisper_live
 COPY run_server.py /app
 # Copy application files
 EXPOSE ${PORT_WHISPERLIVE}
 EXPOSE ${PORT_WHISPERLIVE_SSL}
 ARG PORT_WHISPERLIVE
 ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
 ARG PORT_WHISPERLIVE_SSL
 ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
 ARG FASTERWHISPER_MODEL
 ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
 ARG WHISPERLIVE_SSL
 ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
 CMD ["sh", "-c", "\
    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
    else \
        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL; \
    fi"]
--- a/.archive/Dockerfile.win.prod
+++ b/.archive/Dockerfile.win.prod
@ -1,49 +0,0 @@
 FROM python:3.10-bookworm
 ARG DEBIAN_FRONTEND=noninteractive
 # Create log directories with proper permissions
 RUN mkdir -p /app/logs && \
    touch /app/logs/whisperlive.log && \
    touch /app/logs/connections.log && \
    chmod 666 /app/logs/whisperlive.log && \
    chmod 666 /app/logs/connections.log
 # install lib required for pyaudio
 RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
 # update pip to support for whl.metadata -> less downloading
 RUN pip install --no-cache-dir -U "pip>=24"
 # create a working directory
 WORKDIR /app
 # install the requirements for running the whisper-live server
 COPY requirements/server.txt /app/
 RUN pip install -r server.txt && rm server.txt
 # make the paths of the nvidia libs installed as wheels visible. equivalent to:
 # export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`
 ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib"
 COPY whisper_live /app/whisper_live
 COPY run_server.py /app
 # Copy application files
 EXPOSE ${PORT_WHISPERLIVE}
 EXPOSE ${PORT_WHISPERLIVE_SSL}
 ARG PORT_WHISPERLIVE
 ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
 ARG PORT_WHISPERLIVE_SSL
 ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
 ARG FASTERWHISPER_MODEL
 ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
 ARG WHISPERLIVE_SSL
 ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
 CMD ["sh", "-c", "\
    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
    else \
        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL; \
    fi"]
--- a/.archive/docker-compose.yml
+++ b/.archive/docker-compose.yml
--- a/.env
+++ b/.env
@ -1,9 +1,10 @@
 # Whisper live settings
 APP_WS_PROTOCOL=wss
-APP_URL=kevlarai.com
+APP_URL=classroomcopilot.ai
-PORT_WHISPERLIVE=5050
+PORT_WHISPERLIVE=5000
 PORT_WHISPERLIVE_SSL=5053
 HTTP_PORT=8080
 WHISPERLIVE_SSL=false
 WHISPL_USE_CUSTOM_MODEL=false
--- a/20
+++ b/20
@ -20,22 +20,24 @@ WORKDIR /app
 # install the requirements for running the whisper-live server
 COPY requirements/server.txt /app/
-RUN pip install -r server.txt && rm server.txt
+RUN pip install --no-cache-dir "setuptools<70.0.0" wheel
 RUN pip install -r server.txt
 RUN pip install --no-build-isolation openai-whisper==20240930
 RUN rm server.txt
 # make the paths of the nvidia libs installed as wheels visible
-ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib"
+RUN pip install --no-cache-dir nvidia-cublas-cu12 nvidia-cudnn-cu12
 ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib:/usr/local/lib/python3.10/site-packages/torch/lib:${LD_LIBRARY_PATH}"
 COPY whisper_live /app/whisper_live
 COPY run_server.py /app
 COPY hybrid_server.py /app
-# Copy application files
+# Expose both WebSocket and HTTP ports
-EXPOSE ${PORT_WHISPERLIVE}
+EXPOSE 5000 8080
 ARG PORT_WHISPERLIVE
 ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
 ARG FASTERWHISPER_MODEL
 ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
-CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE}", "--backend", "faster_whisper"]
+# Use the hybrid server by default
 CMD python3 -u hybrid_server.py --websocket-port 5000 --http-port 8080 --backend faster_whisper
 # CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE}", "--backend", "faster_whisper", "--faster_whisper_custom_model_path", "/app/models/${FASTERWHISPER_MODEL}", "--ssl_cert_path", "/app/ssl"]
--- a/HYBRID_SERVER_README.md
+++ b/HYBRID_SERVER_README.md
@ -0,0 +1,260 @@
 # WhisperLive Hybrid Server
 This hybrid server extends the original WhisperLive-Server to support both WebSocket connections (for real-time audio streaming) and HTTP endpoints (for file transcription) in a single container.
 ## Features
 - **WebSocket Server**: Original real-time audio transcription functionality
 - **HTTP Server**: New file upload and transcription endpoints
 - **Single Container**: Both services run in the same Docker container
 - **GPU Sharing**: Both services share the same GPU resources
 ## Architecture
 The hybrid server runs two services simultaneously:
 1. **WebSocket Server**: Handles real-time audio streaming transcription
 2. **HTTP Server**: Handles file uploads and transcription requests
 Both services use the same WhisperLive transcriber instance, ensuring efficient resource usage.
 ## Ports
 - **WebSocket Port**: Default 5050 (configurable via `PORT_WHISPERLIVE`)
 - **HTTP Port**: Default 8080 (configurable via `HTTP_PORT`)
 ## HTTP Endpoints
 ### 1. Health Check
 ```
 GET /health
 ```
 Returns server health status.
 **Response:**
 ```json
 {
  "status": "healthy",
  "service": "WhisperLive Hybrid Server"
 }
 ```
 ### 2. OpenAI Compatible Endpoints
 ```
 POST /v1/audio/transcriptions
 POST /v1/audio/translations
 ```
 Fully compatible drop-in replacements for the standard OpenAI Whisper API.
 **Parameters:**
 - `file` (required): Audio file (WAV, MP3, FLAC, M4A, OGG, WEBM, MP4, MPEG, MPGA)
 - `model` (optional): Model size (default: "base")
 - `language` (optional): Language code (e.g., "en", "es", "fr")
 - `prompt` (optional): Text to guide the model's style
 - `response_format` (optional): "json", "text", "srt", "verbose_json", "vtt" (default: "json")
 - `temperature` (optional): Sampling temperature (0.0 to 1.0)
 **Example Request:**
 ```bash
 curl -X POST http://localhost:8080/v1/audio/transcriptions \
  -H "Content-Type: multipart/form-data" \
  -F "file=@audio.wav" \
  -F "model=whisper-1" \
  -F "response_format=json"
 ```
 **Response (JSON format):**
 ```json
 {
  "text": "Hello, this is a test."
 }
 ```
 ### 3. Legacy File Transcription
 ```
 POST /transcribe
 ```
 Transcribes an uploaded audio file.
 **Parameters:**
 - `file` (required): Audio file (WAV, MP3, FLAC, M4A, OGG, WEBM)
 - `language` (optional): Language code (e.g., "en", "es", "fr")
 - `task` (optional): "transcribe" or "translate" (default: "transcribe")
 - `model` (optional): Model size (default: "base")
 **Example Request:**
 ```bash
 curl -X POST http://localhost:8080/transcribe \
  -F "file=@audio.wav" \
  -F "language=en" \
  -F "task=transcribe" \
  -F "model=base"
 ```
 **Response:**
 ```json
 {
  "success": true,
  "segments": [
    {
      "start": 0.0,
      "end": 2.5,
      "text": "Hello, this is a test.",
      "no_speech_prob": 0.1
    }
  ],
  "info": {
    "language": "en",
    "language_probability": 0.95,
    "duration": 10.5,
    "duration_after_vad": 10.5,
    "transcription_options": {}
  },
  "filename": "audio.wav"
 }
 ```
 ### 3. URL Transcription (Placeholder)
 ```
 POST /transcribe/url
 ```
 Endpoint for transcribing audio from URLs (ready for implementation).
 ## Usage Examples
 ### Python Client
 ```python
 import requests
 # Transcribe a file
 with open('audio.wav', 'rb') as f:
    response = requests.post('http://localhost:8080/transcribe', 
                           files={'file': f},
                           data={'language': 'en', 'model': 'base'})
 if response.status_code == 200:
    result = response.json()
    print(f"Transcription: {result['segments']}")
 ```
 ### JavaScript/Node.js
 ```javascript
 const FormData = require('form-data');
 const fs = require('fs');
 const form = new FormData();
 form.append('file', fs.createReadStream('audio.wav'));
 form.append('language', 'en');
 form.append('model', 'base');
 fetch('http://localhost:8080/transcribe', {
    method: 'POST',
    body: form
 })
 .then(response => response.json())
 .then(result => console.log(result));
 ```
 ### cURL
 ```bash
 # Basic transcription
 curl -X POST http://localhost:8080/transcribe \
  -F "file=@audio.wav"
 # With parameters
 curl -X POST http://localhost:8080/transcribe \
  -F "file=@audio.wav" \
  -F "language=es" \
  -F "task=translate" \
  -F "model=small"
 ```
 ## Configuration
 ### Environment Variables
 - `PORT_WHISPERLIVE`: WebSocket port (default: 5050)
 - `HTTP_PORT`: HTTP port (default: 8080)
 - `FASTERWHISPER_MODEL`: Custom model path
 - `OMP_NUM_THREADS`: OpenMP thread count
 ### Docker Compose
 ```yaml
 services:
  whisperlive:
    ports:
      - "5050:5050"  # WebSocket
      - "8080:8080"  # HTTP
    environment:
      PORT_WHISPERLIVE: 5050
      HTTP_PORT: 8080
 ```
 ## Testing
 ### 1. Test Script
 Run the Python test script:
 ```bash
 python3 test_http_endpoints.py
 ```
 ### 2. Web Interface
 Open `test_form.html` in a web browser to test the HTTP endpoints with a user-friendly interface.
 ### 3. Health Check
 ```bash
 curl http://localhost:8080/health
 ```
 ## Backend Support
 Currently, the HTTP endpoints support:
 - **faster_whisper**: Full support for all features
 - **tensorrt**: Basic support (needs adaptation)
 - **openvino**: Basic support (needs adaptation)
 ## File Size Limits
 - Maximum file size: 100MB
 - Supported formats: WAV, MP3, FLAC, M4A, OGG, WEBM
 ## Performance Considerations
 - File transcription uses the same model instance as WebSocket connections
 - Temporary files are automatically cleaned up after processing
 - Both services share GPU memory efficiently
 - HTTP requests are processed in separate threads
 ## Troubleshooting
 ### Common Issues
 1. **Port Already in Use**
   - Check if ports 5050 or 8080 are available
   - Use different ports via environment variables
 2. **File Upload Errors**
   - Ensure file size is under 100MB
   - Check file format is supported
   - Verify file is not corrupted
 3. **GPU Memory Issues**
   - Monitor GPU memory usage
   - Consider using smaller model sizes
   - Restart container if needed
 ### Logs
 Check container logs for detailed error information:
 ```bash
 docker logs whisperlive
 ```
 ## Migration from Original Server
 The hybrid server is fully backward compatible. Your existing WebSocket clients will continue to work without changes. The HTTP endpoints are additional functionality that doesn't interfere with the original service.
 ## Future Enhancements
 - [ ] Support for more audio formats
 - [ ] Batch file processing
 - [ ] Progress tracking for long files
 - [ ] Authentication and rate limiting
 - [ ] WebSocket support for file transcription progress
--- a/pycache/hybrid_server.cpython-314.pyc
+++ b/pycache/hybrid_server.cpython-314.pyc
--- a/batch_transcribe.py
+++ b/batch_transcribe.py
@ -0,0 +1,270 @@
 #!/usr/bin/env python3
 """
 Batch Transcription Script for WhisperLive
 Processes all audio files in a folder using the HTTP transcription endpoint
 """
 import os
 import sys
 import json
 import time
 import argparse
 import requests
 from pathlib import Path
 from typing import List, Dict, Optional
 import logging
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 class BatchTranscriber:
    def __init__(self, server_url: str = "http://localhost:8080"):
        self.server_url = server_url
        self.supported_formats = {'.wav', '.mp3', '.flac', '.m4a', '.ogg', '.webm'}
    def get_audio_files(self, folder_path: str) -> List[Path]:
        """Get all audio files from the specified folder"""
        folder = Path(folder_path)
        if not folder.exists():
            raise FileNotFoundError(f"Folder not found: {folder_path}")
        audio_files = []
        for file_path in folder.iterdir():
            if file_path.is_file() and file_path.suffix.lower() in self.supported_formats:
                audio_files.append(file_path)
        return sorted(audio_files)
    def transcribe_file(self, file_path: Path, language: Optional[str] = None, 
                       task: str = "transcribe", model: str = "base") -> Dict:
        """Transcribe a single audio file"""
        try:
            logger.info(f"Transcribing: {file_path.name}")
            with open(file_path, 'rb') as f:
                files = {'file': f}
                data = {
                    'language': language,
                    'task': task,
                    'model': model
                }
                response = requests.post(f"{self.server_url}/transcribe", 
                                      files=files, data=data, timeout=300)
                if response.status_code == 200:
                    result = response.json()
                    logger.info(f"✅ Successfully transcribed: {file_path.name}")
                    return result
                else:
                    error_msg = response.text
                    logger.error(f"❌ Failed to transcribe {file_path.name}: {error_msg}")
                    return {'error': error_msg, 'status_code': response.status_code}
        except Exception as e:
            logger.error(f"❌ Error transcribing {file_path.name}: {str(e)}")
            return {'error': str(e)}
    def save_transcript(self, transcript_data: Dict, output_path: Path, 
                       format_type: str = "txt") -> bool:
        """Save transcript in specified format"""
        try:
            if 'error' in transcript_data:
                return False
            if format_type == "txt":
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write(f"Transcription of: {transcript_data.get('filename', 'Unknown')}\n")
                    f.write(f"Language: {transcript_data['info'].get('language', 'Auto-detected')}\n")
                    f.write(f"Duration: {transcript_data['info'].get('duration', 0):.2f} seconds\n")
                    f.write("=" * 50 + "\n\n")
                    for segment in transcript_data['segments']:
                        f.write(f"[{segment['start']:.2f}s - {segment['end']:.2f}s] {segment['text']}\n")
            elif format_type == "json":
                with open(output_path, 'w', encoding='utf-8') as f:
                    json.dump(transcript_data, f, indent=2, ensure_ascii=False)
            elif format_type == "srt":
                with open(output_path, 'w', encoding='utf-8') as f:
                    for i, segment in enumerate(transcript_data['segments'], 1):
                        start_time = self.format_srt_time(segment['start'])
                        end_time = self.format_srt_time(segment['end'])
                        f.write(f"{i}\n{start_time} --> {end_time}\n{segment['text']}\n\n")
            elif format_type == "vtt":
                with open(output_path, 'w', encoding='utf-8') as f:
                    f.write("WEBVTT\n\n")
                    for segment in transcript_data['segments']:
                        start_time = self.format_vtt_time(segment['start'])
                        end_time = self.format_vtt_time(segment['end'])
                        f.write(f"{start_time} --> {end_time}\n{segment['text']}\n\n")
            logger.info(f"💾 Saved transcript: {output_path}")
            return True
        except Exception as e:
            logger.error(f"❌ Error saving transcript {output_path}: {str(e)}")
            return False
    def format_srt_time(self, seconds: float) -> str:
        """Format time for SRT subtitles"""
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        secs = int(seconds % 60)
        millisecs = int((seconds % 1) * 1000)
        return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
    def format_vtt_time(self, seconds: float) -> str:
        """Format time for VTT subtitles"""
        hours = int(seconds // 3600)
        minutes = int((seconds % 3600) // 60)
        secs = int(seconds % 60)
        millisecs = int((seconds % 1) * 1000)
        return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millisecs:03d}"
    def batch_transcribe(self, input_folder: str, output_folder: str, 
                        language: Optional[str] = None, task: str = "transcribe", 
                        model: str = "base", format_type: str = "txt", 
                        delay: float = 1.0) -> Dict:
        """Process all audio files in the input folder"""
        # Create output folder if it doesn't exist
        output_path = Path(output_folder)
        output_path.mkdir(parents=True, exist_ok=True)
        # Get all audio files
        audio_files = self.get_audio_files(input_folder)
        if not audio_files:
            logger.warning(f"No audio files found in: {input_folder}")
            return {'processed': 0, 'successful': 0, 'failed': 0}
        logger.info(f"Found {len(audio_files)} audio files to process")
        results = {
            'processed': len(audio_files),
            'successful': 0,
            'failed': 0,
            'files': []
        }
        for i, audio_file in enumerate(audio_files, 1):
            logger.info(f"Processing {i}/{len(audio_files)}: {audio_file.name}")
            # Transcribe the file
            transcript_data = self.transcribe_file(audio_file, language, task, model)
            if 'error' not in transcript_data:
                # Create output filename
                base_name = audio_file.stem
                output_file = output_path / f"{base_name}.{format_type}"
                # Save transcript
                if self.save_transcript(transcript_data, output_file, format_type):
                    results['successful'] += 1
                    results['files'].append({
                        'input': str(audio_file),
                        'output': str(output_file),
                        'status': 'success'
                    })
                else:
                    results['failed'] += 1
                    results['files'].append({
                        'input': str(audio_file),
                        'output': str(output_file),
                        'status': 'failed'
                    })
            else:
                results['failed'] += 1
                results['files'].append({
                    'input': str(audio_file),
                    'output': None,
                    'status': 'failed',
                    'error': transcript_data.get('error', 'Unknown error')
                })
            # Add delay between requests to avoid overwhelming the server
            if i < len(audio_files):
                time.sleep(delay)
        return results
 def main():
    parser = argparse.ArgumentParser(description='Batch transcribe audio files using WhisperLive')
    parser.add_argument('input_folder', help='Folder containing audio files')
    parser.add_argument('output_folder', help='Folder to save transcripts')
    parser.add_argument('--server', '-s', default='http://localhost:8080', 
                       help='WhisperLive server URL (default: http://localhost:8080)')
    parser.add_argument('--language', '-l', help='Language code (e.g., en, es, fr)')
    parser.add_argument('--task', '-t', choices=['transcribe', 'translate'], default='transcribe',
                       help='Task to perform (default: transcribe)')
    parser.add_argument('--model', '-m', default='base',
                       help='Model size (default: base)')
    parser.add_argument('--format', '-f', choices=['txt', 'json', 'srt', 'vtt'], default='txt',
                       help='Output format (default: txt)')
    parser.add_argument('--delay', '-d', type=float, default=1.0,
                       help='Delay between requests in seconds (default: 1.0)')
    parser.add_argument('--verbose', '-v', action='store_true',
                       help='Verbose output')
    args = parser.parse_args()
    if args.verbose:
        logging.getLogger().setLevel(logging.DEBUG)
    try:
        # Initialize transcriber
        transcriber = BatchTranscriber(args.server)
        # Check server health
        try:
            response = requests.get(f"{args.server}/health", timeout=5)
            if response.status_code != 200:
                logger.error(f"Server health check failed: {response.status_code}")
                sys.exit(1)
            logger.info("✅ Server health check passed")
        except requests.exceptions.RequestException as e:
            logger.error(f"❌ Cannot connect to server: {e}")
            sys.exit(1)
        # Process files
        results = transcriber.batch_transcribe(
            input_folder=args.input_folder,
            output_folder=args.output_folder,
            language=args.language,
            task=args.task,
            model=args.model,
            format_type=args.format,
            delay=args.delay
        )
        # Print summary
        logger.info("\n" + "=" * 50)
        logger.info("BATCH TRANSCRIPTION COMPLETED")
        logger.info("=" * 50)
        logger.info(f"Total files processed: {results['processed']}")
        logger.info(f"Successful: {results['successful']}")
        logger.info(f"Failed: {results['failed']}")
        logger.info(f"Output folder: {args.output_folder}")
        logger.info(f"Output format: {args.format}")
        if results['failed'] > 0:
            logger.warning("\nFailed files:")
            for file_info in results['files']:
                if file_info['status'] == 'failed':
                    logger.warning(f"  - {file_info['input']}: {file_info.get('error', 'Unknown error')}")
        if results['successful'] > 0:
            logger.info(f"\n✅ Successfully processed {results['successful']} files!")
    except KeyboardInterrupt:
        logger.info("\n⚠️  Process interrupted by user")
        sys.exit(1)
    except Exception as e:
        logger.error(f"❌ Unexpected error: {str(e)}")
        sys.exit(1)
 if __name__ == "__main__":
    main()
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -15,6 +15,8 @@ services:
      LOG_PATH: /app/logs
      NVIDIA_VISIBLE_DEVICES: all
      NVIDIA_DRIVER_CAPABILITIES: compute,utility
      PORT_WHISPERLIVE: ${PORT_WHISPERLIVE}
      HTTP_PORT: ${HTTP_PORT:-8080}
    volumes:
      - ./models:/app/models
      - ./ssl:/app/ssl
@ -26,11 +28,15 @@ services:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
              options:
                memory: "4G"  # Match the main docker-compose.yml allocation
    ports:
-      - ${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE}
+      - "${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE}"
      - "${HTTP_PORT:-8080}:8080"
    restart: unless-stopped
    networks:
-      - audio-network
+      - default
 networks:
-  audio-network:
+  default:
    driver: bridge
--- a/hybrid_server.py
+++ b/hybrid_server.py
--- a/openapi.json
+++ b/openapi.json
@ -0,0 +1,866 @@
 {
  "openapi": "3.1.0",
  "info": {
    "title": "WhisperLive API",
    "description": "A high-performance speech-to-text API based on OpenAI's Whisper model.\nSupports real-time transcription via WebSocket and batch processing via HTTP.\n\n## Features\n- Real-time audio transcription\n- Batch file processing\n- Multiple language support\n- Translation capabilities\n- Multiple model sizes\n- WebSocket and HTTP interfaces\n",
    "version": "1.0.0",
    "contact": {
      "name": "WhisperLive Support",
      "url": "https://github.com/collabora/WhisperLive"
    },
    "license": {
      "name": "MIT",
      "url": "https://opensource.org/licenses/MIT"
    }
  },
  "servers": [
    {
      "url": "http://localhost:8080",
      "description": "Local development server"
    },
    {
      "url": "https://api.whisperlive.com/v1",
      "description": "Production server"
    }
  ],
  "security": [
    {
      "ApiKeyAuth": []
    }
  ],
  "paths": {
    "/v1/audio/transcriptions": {
      "post": {
        "summary": "Create transcription",
        "description": "Transcribes audio into the input language. The response will include the transcribed text\nand additional metadata such as language detection, confidence scores, and timestamps.\n",
        "operationId": "createTranscription",
        "tags": [
          "Audio"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "multipart/form-data": {
              "schema": {
                "type": "object",
                "required": [
                  "file"
                ],
                "properties": {
                  "file": {
                    "type": "string",
                    "format": "binary",
                    "description": "The audio file object (not file name) to transcribe, in one of these formats: \nflac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.\n"
                  },
                  "model": {
                    "type": "string",
                    "enum": [
                      "tiny",
                      "base",
                      "small",
                      "medium",
                      "large"
                    ],
                    "default": "base",
                    "description": "ID of the model to use. Only whisper-1 is currently available."
                  },
                  "language": {
                    "type": "string",
                    "pattern": "^[a-z]{2}$",
                    "description": "The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.\nSupported languages: en, es, fr, de, it, pt, ru, ja, ko, zh, hi, ar\n"
                  },
                  "prompt": {
                    "type": "string",
                    "description": "An optional text to guide the model's style or continue a previous audio segment.\nThe prompt should match the audio language.\n"
                  },
                  "response_format": {
                    "type": "string",
                    "enum": [
                      "json",
                      "text",
                      "srt",
                      "verbose_json",
                      "vtt"
                    ],
                    "default": "json",
                    "description": "The format of the transcript output."
                  },
                  "temperature": {
                    "type": "number",
                    "minimum": 0,
                    "maximum": 1,
                    "default": 0,
                    "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
                  },
                  "timestamp_granularities": {
                    "type": "array",
                    "items": {
                      "type": "string",
                      "enum": [
                        "word",
                        "segment"
                      ]
                    },
                    "description": "The timestamp granularities to populate for this transcription."
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "oneOf": [
                    {
                      "$ref": "#/components/schemas/TranscriptionResponse"
                    },
                    {
                      "$ref": "#/components/schemas/TranscriptionTextResponse"
                    },
                    {
                      "$ref": "#/components/schemas/TranscriptionSrtResponse"
                    },
                    {
                      "$ref": "#/components/schemas/TranscriptionVttResponse"
                    }
                  ]
                }
              }
            }
          },
          "400": {
            "$ref": "#/components/responses/BadRequest"
          },
          "401": {
            "$ref": "#/components/responses/Unauthorized"
          },
          "413": {
            "$ref": "#/components/responses/FileTooLarge"
          },
          "422": {
            "$ref": "#/components/responses/ValidationError"
          },
          "429": {
            "$ref": "#/components/responses/RateLimitExceeded"
          },
          "500": {
            "$ref": "#/components/responses/InternalServerError"
          }
        }
      }
    },
    "/v1/audio/translations": {
      "post": {
        "summary": "Create translation",
        "description": "Translates audio into English. The response will include the translated text\nand additional metadata such as confidence scores and timestamps.\n",
        "operationId": "createTranslation",
        "tags": [
          "Audio"
        ],
        "requestBody": {
          "required": true,
          "content": {
            "multipart/form-data": {
              "schema": {
                "type": "object",
                "required": [
                  "file"
                ],
                "properties": {
                  "file": {
                    "type": "string",
                    "format": "binary",
                    "description": "The audio file object (not file name) to translate, in one of these formats: \nflac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.\n"
                  },
                  "model": {
                    "type": "string",
                    "enum": [
                      "tiny",
                      "base",
                      "small",
                      "medium",
                      "large"
                    ],
                    "default": "base",
                    "description": "ID of the model to use. Only whisper-1 is currently available."
                  },
                  "prompt": {
                    "type": "string",
                    "description": "An optional text to guide the model's style or continue a previous audio segment.\nThe prompt should be in English.\n"
                  },
                  "response_format": {
                    "type": "string",
                    "enum": [
                      "json",
                      "text",
                      "srt",
                      "verbose_json",
                      "vtt"
                    ],
                    "default": "json",
                    "description": "The format of the transcript output."
                  },
                  "temperature": {
                    "type": "number",
                    "minimum": 0,
                    "maximum": 1,
                    "default": 0,
                    "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
                  },
                  "timestamp_granularities": {
                    "type": "array",
                    "items": {
                      "type": "string",
                      "enum": [
                        "word",
                        "segment"
                      ]
                    },
                    "description": "The timestamp granularities to populate for this translation."
                  }
                }
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "oneOf": [
                    {
                      "$ref": "#/components/schemas/TranscriptionResponse"
                    },
                    {
                      "$ref": "#/components/schemas/TranscriptionTextResponse"
                    },
                    {
                      "$ref": "#/components/schemas/TranscriptionSrtResponse"
                    },
                    {
                      "$ref": "#/components/schemas/TranscriptionVttResponse"
                    }
                  ]
                }
              }
            }
          },
          "400": {
            "$ref": "#/components/responses/BadRequest"
          },
          "401": {
            "$ref": "#/components/responses/Unauthorized"
          },
          "413": {
            "$ref": "#/components/responses/FileTooLarge"
          },
          "422": {
            "$ref": "#/components/responses/ValidationError"
          },
          "429": {
            "$ref": "#/components/responses/RateLimitExceeded"
          },
          "500": {
            "$ref": "#/components/responses/InternalServerError"
          }
        }
      }
    },
    "/v1/models": {
      "get": {
        "summary": "List models",
        "description": "Lists the currently available models, and provides basic information about each one such as the owner and availability.",
        "operationId": "listModels",
        "tags": [
          "Models"
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ListModelsResponse"
                }
              }
            }
          },
          "401": {
            "$ref": "#/components/responses/Unauthorized"
          },
          "500": {
            "$ref": "#/components/responses/InternalServerError"
          }
        }
      }
    },
    "/v1/models/{model}": {
      "get": {
        "summary": "Retrieve model",
        "description": "Retrieves a model instance, providing basic information about the model such as the owner and permissioning.",
        "operationId": "retrieveModel",
        "tags": [
          "Models"
        ],
        "parameters": [
          {
            "name": "model",
            "in": "path",
            "required": true,
            "description": "The ID of the model to use for this request",
            "schema": {
              "type": "string",
              "enum": [
                "tiny",
                "base",
                "small",
                "medium",
                "large"
              ]
            }
          }
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/Model"
                }
              }
            }
          },
          "401": {
            "$ref": "#/components/responses/Unauthorized"
          },
          "404": {
            "$ref": "#/components/responses/NotFound"
          },
          "500": {
            "$ref": "#/components/responses/InternalServerError"
          }
        }
      }
    },
    "/v1/health": {
      "get": {
        "summary": "Health check",
        "description": "Check the health status of the API server",
        "operationId": "healthCheck",
        "tags": [
          "System"
        ],
        "responses": {
          "200": {
            "description": "OK",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/HealthResponse"
                }
              }
            }
          }
        }
      }
    },
    "/v1/websocket": {
      "get": {
        "summary": "WebSocket connection",
        "description": "Establishes a WebSocket connection for real-time audio transcription.\nSend audio data as binary frames and receive transcription results.\n",
        "operationId": "websocketConnection",
        "tags": [
          "Real-time"
        ],
        "parameters": [
          {
            "name": "model",
            "in": "query",
            "description": "The model to use for transcription",
            "schema": {
              "type": "string",
              "enum": [
                "tiny",
                "base",
                "small",
                "medium",
                "large"
              ],
              "default": "base"
            }
          },
          {
            "name": "language",
            "in": "query",
            "description": "The language of the input audio",
            "schema": {
              "type": "string",
              "pattern": "^[a-z]{2}$"
            }
          },
          {
            "name": "task",
            "in": "query",
            "description": "The task to perform",
            "schema": {
              "type": "string",
              "enum": [
                "transcribe",
                "translate"
              ],
              "default": "transcribe"
            }
          }
        ],
        "responses": {
          "101": {
            "description": "Switching Protocols",
            "headers": {
              "Upgrade": {
                "schema": {
                  "type": "string",
                  "example": "websocket"
                }
              },
              "Connection": {
                "schema": {
                  "type": "string",
                  "example": "Upgrade"
                }
              }
            }
          },
          "400": {
            "$ref": "#/components/responses/BadRequest"
          },
          "401": {
            "$ref": "#/components/responses/Unauthorized"
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "ApiKeyAuth": {
        "type": "apiKey",
        "in": "header",
        "name": "Authorization",
        "description": "API key authentication. Include your API key in the Authorization header.\nExample: `Authorization: Bearer your-api-key-here`\n"
      }
    },
    "schemas": {
      "TranscriptionResponse": {
        "type": "object",
        "properties": {
          "text": {
            "type": "string",
            "description": "The transcribed text"
          },
          "language": {
            "type": "string",
            "description": "The language of the input audio"
          },
          "duration": {
            "type": "number",
            "description": "The duration of the input audio in seconds"
          },
          "words": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Word"
            },
            "description": "Extracted words and their corresponding timestamps"
          },
          "segments": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Segment"
            },
            "description": "Segments of the transcribed text with timestamps"
          }
        },
        "required": [
          "text"
        ]
      },
      "TranscriptionTextResponse": {
        "type": "string",
        "description": "The transcribed text as plain text"
      },
      "TranscriptionSrtResponse": {
        "type": "string",
        "description": "The transcribed text in SRT subtitle format"
      },
      "TranscriptionVttResponse": {
        "type": "string",
        "description": "The transcribed text in VTT subtitle format"
      },
      "Word": {
        "type": "object",
        "properties": {
          "word": {
            "type": "string",
            "description": "The text content of the word"
          },
          "start": {
            "type": "number",
            "description": "Start time of the word in seconds"
          },
          "end": {
            "type": "number",
            "description": "End time of the word in seconds"
          },
          "probability": {
            "type": "number",
            "description": "Confidence score of the word (0-1)"
          }
        },
        "required": [
          "word",
          "start",
          "end"
        ]
      },
      "Segment": {
        "type": "object",
        "properties": {
          "id": {
            "type": "integer",
            "description": "Unique identifier for the segment"
          },
          "seek": {
            "type": "number",
            "description": "Seek offset of the segment in seconds"
          },
          "start": {
            "type": "number",
            "description": "Start time of the segment in seconds"
          },
          "end": {
            "type": "number",
            "description": "End time of the segment in seconds"
          },
          "text": {
            "type": "string",
            "description": "The text content of the segment"
          },
          "tokens": {
            "type": "array",
            "items": {
              "type": "integer"
            },
            "description": "Array of token IDs for the segment"
          },
          "temperature": {
            "type": "number",
            "description": "Temperature parameter used for generating this segment"
          },
          "avg_logprob": {
            "type": "number",
            "description": "Average log probability of the segment"
          },
          "compression_ratio": {
            "type": "number",
            "description": "Compression ratio of the segment"
          },
          "no_speech_prob": {
            "type": "number",
            "description": "Probability of no speech in this segment"
          },
          "words": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Word"
            },
            "description": "Words in this segment"
          }
        },
        "required": [
          "id",
          "seek",
          "start",
          "end",
          "text"
        ]
      },
      "Model": {
        "type": "object",
        "properties": {
          "id": {
            "type": "string",
            "description": "The model identifier"
          },
          "object": {
            "type": "string",
            "enum": [
              "model"
            ],
            "description": "The object type, which is always \"model\""
          },
          "created": {
            "type": "integer",
            "description": "The Unix timestamp (in seconds) when the model was created"
          },
          "owned_by": {
            "type": "string",
            "description": "The organization that owns the model"
          },
          "permission": {
            "type": "array",
            "items": {
              "type": "object"
            },
            "description": "The permissions associated with the model"
          },
          "root": {
            "type": "string",
            "description": "The root of the model"
          },
          "parent": {
            "type": "string",
            "description": "The parent of the model"
          }
        },
        "required": [
          "id",
          "object",
          "created",
          "owned_by"
        ]
      },
      "ListModelsResponse": {
        "type": "object",
        "properties": {
          "object": {
            "type": "string",
            "enum": [
              "list"
            ],
            "description": "The object type, which is always \"list\""
          },
          "data": {
            "type": "array",
            "items": {
              "$ref": "#/components/schemas/Model"
            },
            "description": "The list of models"
          }
        },
        "required": [
          "object",
          "data"
        ]
      },
      "HealthResponse": {
        "type": "object",
        "properties": {
          "status": {
            "type": "string",
            "enum": [
              "healthy",
              "unhealthy"
            ],
            "description": "The health status of the service"
          },
          "service": {
            "type": "string",
            "description": "The name of the service"
          },
          "version": {
            "type": "string",
            "description": "The version of the service"
          },
          "timestamp": {
            "type": "string",
            "format": "date-time",
            "description": "The current timestamp"
          },
          "uptime": {
            "type": "number",
            "description": "The uptime in seconds"
          }
        },
        "required": [
          "status",
          "service"
        ]
      },
      "Error": {
        "type": "object",
        "properties": {
          "error": {
            "type": "object",
            "properties": {
              "message": {
                "type": "string",
                "description": "A human-readable error message"
              },
              "type": {
                "type": "string",
                "description": "The type of error"
              },
              "code": {
                "type": "string",
                "description": "The error code"
              },
              "param": {
                "type": "string",
                "description": "The parameter that caused the error"
              }
            }
          }
        },
        "required": [
          "error"
        ]
      }
    },
    "responses": {
      "BadRequest": {
        "description": "Bad Request",
        "content": {
          "application/json": {
            "schema": {
              "$ref": "#/components/schemas/Error"
            },
            "example": {
              "error": {
                "message": "Invalid request parameters",
                "type": "invalid_request_error",
                "code": "invalid_parameters"
              }
            }
          }
        }
      },
      "Unauthorized": {
        "description": "Unauthorized",
        "content": {
          "application/json": {
            "schema": {
              "$ref": "#/components/schemas/Error"
            },
            "example": {
              "error": {
                "message": "Invalid API key",
                "type": "authentication_error",
                "code": "invalid_api_key"
              }
            }
          }
        }
      },
      "FileTooLarge": {
        "description": "File Too Large",
        "content": {
          "application/json": {
            "schema": {
              "$ref": "#/components/schemas/Error"
            },
            "example": {
              "error": {
                "message": "File size exceeds maximum allowed size",
                "type": "invalid_request_error",
                "code": "file_too_large"
              }
            }
          }
        }
      },
      "ValidationError": {
        "description": "Validation Error",
        "content": {
          "application/json": {
            "schema": {
              "$ref": "#/components/schemas/Error"
            },
            "example": {
              "error": {
                "message": "Invalid file format",
                "type": "invalid_request_error",
                "code": "invalid_file_format"
              }
            }
          }
        }
      },
      "RateLimitExceeded": {
        "description": "Rate Limit Exceeded",
        "content": {
          "application/json": {
            "schema": {
              "$ref": "#/components/schemas/Error"
            },
            "example": {
              "error": {
                "message": "Rate limit exceeded",
                "type": "rate_limit_error",
                "code": "rate_limit_exceeded"
              }
            }
          }
        }
      },
      "InternalServerError": {
        "description": "Internal Server Error",
        "content": {
          "application/json": {
            "schema": {
              "$ref": "#/components/schemas/Error"
            },
            "example": {
              "error": {
                "message": "An internal server error occurred",
                "type": "server_error",
                "code": "internal_error"
              }
            }
          }
        }
      },
      "NotFound": {
        "description": "Not Found",
        "content": {
          "application/json": {
            "schema": {
              "$ref": "#/components/schemas/Error"
            },
            "example": {
              "error": {
                "message": "Model not found",
                "type": "invalid_request_error",
                "code": "model_not_found"
              }
            }
          }
        }
      }
    }
  },
  "tags": [
    {
      "name": "Audio",
      "description": "Audio transcription and translation operations"
    },
    {
      "name": "Models",
      "description": "Model management operations"
    },
    {
      "name": "System",
      "description": "System health and status operations"
    },
    {
      "name": "Real-time",
      "description": "Real-time audio processing via WebSocket"
    }
  ]
 }
--- a/requirements/server.txt
+++ b/requirements/server.txt
@ -9,5 +9,7 @@ av
 jiwer
 evaluate
 numpy<2
-openai-whisper==20240930
+tokenizers==0.20.3
-tokenizers==0.20.3
+flask==3.0.0
 flask-sock
 websocket-client
--- a/scratch/dashboard.html
+++ b/scratch/dashboard.html
@ -0,0 +1,727 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>WhisperLive Dashboard</title>
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
    <style>
        :root {
            --primary: #4f46e5;
            --primary-hover: #4338ca;
            --bg-color: #0f172a;
            --card-bg: rgba(30, 41, 59, 0.7);
            --text-main: #f8fafc;
            --text-muted: #94a3b8;
            --border: rgba(255, 255, 255, 0.1);
            --success: #10b981;
            --danger: #ef4444;
            --warning: #f59e0b;
        }
        * {
            box-sizing: border-box;
            margin: 0;
            padding: 0;
        }
        body {
            font-family: 'Inter', sans-serif;
            background-color: var(--bg-color);
            color: var(--text-main);
            min-height: 100vh;
            background-image:
                radial-gradient(at 0% 0%, rgba(79, 70, 229, 0.15) 0px, transparent 50%),
                radial-gradient(at 100% 100%, rgba(16, 185, 129, 0.1) 0px, transparent 50%);
            background-attachment: fixed;
            padding: 2rem;
        }
        .container {
            max-width: 1000px;
            margin: 0 auto;
        }
        .header {
            text-align: center;
            margin-bottom: 2rem;
        }
        .header h1 {
            font-size: 2.5rem;
            font-weight: 700;
            background: linear-gradient(to right, #818cf8, #34d399);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
            margin-bottom: 0.5rem;
        }
        .header p {
            color: var(--text-muted);
        }
        .glass-panel {
            background: var(--card-bg);
            backdrop-filter: blur(12px);
            -webkit-backdrop-filter: blur(12px);
            border: 1px solid var(--border);
            border-radius: 1rem;
            padding: 1.5rem;
            margin-bottom: 1.5rem;
            box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.3);
        }
        /* Config Section */
        .config-grid {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 1rem;
        }
        @media (max-width: 768px) {
            .config-grid {
                grid-template-columns: 1fr;
            }
        }
        .form-group {
            margin-bottom: 1rem;
        }
        .form-group label {
            display: block;
            font-size: 0.875rem;
            font-weight: 500;
            margin-bottom: 0.5rem;
            color: var(--text-muted);
        }
        input[type="text"],
        input[type="file"],
        select {
            width: 100%;
            padding: 0.75rem 1rem;
            background: rgba(15, 23, 42, 0.6);
            border: 1px solid var(--border);
            border-radius: 0.5rem;
            color: var(--text-main);
            font-size: 0.875rem;
            transition: all 0.2s;
        }
        input[type="text"]:focus,
        select:focus {
            outline: none;
            border-color: var(--primary);
            box-shadow: 0 0 0 2px rgba(79, 70, 229, 0.2);
        }
        /* Tabs */
        .tabs {
            display: flex;
            gap: 0.5rem;
            margin-bottom: 1rem;
            border-bottom: 1px solid var(--border);
            padding-bottom: 0.5rem;
        }
        .tab-btn {
            background: transparent;
            border: none;
            color: var(--text-muted);
            padding: 0.75rem 1.5rem;
            font-size: 1rem;
            font-weight: 500;
            cursor: pointer;
            border-radius: 0.5rem;
            transition: all 0.2s;
        }
        .tab-btn:hover {
            color: var(--text-main);
            background: rgba(255, 255, 255, 0.05);
        }
        .tab-btn.active {
            color: var(--text-main);
            background: var(--primary);
            box-shadow: 0 4px 6px -1px rgba(79, 70, 229, 0.4);
        }
        .tab-content {
            display: none;
            animation: fadeIn 0.3s ease-in-out;
        }
        .tab-content.active {
            display: block;
        }
        @keyframes fadeIn {
            from {
                opacity: 0;
                transform: translateY(5px);
            }
            to {
                opacity: 1;
                transform: translateY(0);
            }
        }
        /* Buttons */
        .btn {
            background: var(--primary);
            color: white;
            border: none;
            padding: 0.75rem 1.5rem;
            border-radius: 0.5rem;
            font-weight: 600;
            cursor: pointer;
            transition: all 0.2s;
            display: inline-flex;
            align-items: center;
            justify-content: center;
            gap: 0.5rem;
            width: 100%;
        }
        .btn:hover {
            background: var(--primary-hover);
        }
        .btn:disabled {
            opacity: 0.5;
            cursor: not-allowed;
        }
        .btn-danger {
            background: var(--danger);
        }
        .btn-danger:hover {
            background: #dc2626;
        }
        .btn-success {
            background: var(--success);
        }
        .btn-success:hover {
            background: #059669;
        }
        /* Results / Live View */
        .transcript-box {
            background: rgba(15, 23, 42, 0.6);
            border: 1px solid var(--border);
            border-radius: 0.5rem;
            padding: 1.5rem;
            min-height: 200px;
            max-height: 400px;
            overflow-y: auto;
            margin-top: 1rem;
            line-height: 1.6;
        }
        .segment {
            margin-bottom: 0.75rem;
            padding-bottom: 0.75rem;
            border-bottom: 1px solid rgba(255, 255, 255, 0.05);
        }
        .segment:last-child {
            border-bottom: none;
            margin-bottom: 0;
            padding-bottom: 0;
        }
        .segment-time {
            font-size: 0.75rem;
            color: var(--primary);
            font-weight: 600;
            margin-bottom: 0.25rem;
        }
        .status-badge {
            display: inline-flex;
            align-items: center;
            gap: 0.3rem;
            padding: 0.25rem 0.75rem;
            border-radius: 9999px;
            font-size: 0.75rem;
            font-weight: 600;
        }
        .status-offline {
            background: rgba(239, 68, 68, 0.2);
            color: #fca5a5;
        }
        .status-online {
            background: rgba(16, 185, 129, 0.2);
            color: #6ee7b7;
        }
        .status-recording {
            background: rgba(239, 68, 68, 0.2);
            color: #fca5a5;
            animation: pulse 2s infinite;
        }
        @keyframes pulse {
            0% {
                box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4);
            }
            70% {
                box-shadow: 0 0 0 10px rgba(239, 68, 68, 0);
            }
            100% {
                box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
            }
        }
        /* Code snippets */
        pre {
            background: #1e293b;
            padding: 1rem;
            border-radius: 0.5rem;
            overflow-x: auto;
            font-size: 0.875rem;
            color: #e2e8f0;
            border: 1px solid var(--border);
            margin-bottom: 1rem;
        }
        code {
            font-family: 'Courier New', Courier, monospace;
        }
        .loading-spinner {
            display: none;
            width: 24px;
            height: 24px;
            border: 3px solid rgba(255, 255, 255, 0.3);
            border-radius: 50%;
            border-top-color: white;
            animation: spin 1s ease-in-out infinite;
        }
        @keyframes spin {
            to {
                transform: rotate(360deg);
            }
        }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="header">
            <h1>WhisperLive</h1>
            <p>High-Performance Real-Time Audio Transcription</p>
        </div>
        <!-- Configuration Panel -->
        <div class="glass-panel">
            <h3 style="margin-bottom: 1rem; font-size: 1.1rem;">Connection Settings</h3>
            <div class="config-grid">
                <div class="form-group">
                    <label>HTTP API URL (For File Upload & API)</label>
                    <input type="text" id="httpUrl" value="https://whisperlive.classroomcopilot.ai">
                </div>
                <div class="form-group">
                    <label>WebSocket URL (For Live Audio)</label>
                    <input type="text" id="wsUrl" value="wss://whisperlive.classroomcopilot.ai/ws">
                </div>
            </div>
            <div style="margin-top: 0.5rem; font-size: 0.8rem; color: var(--text-muted);">
                HTTP Status: <span id="httpStatus" class="status-badge status-offline">Checking...</span>
            </div>
        </div>
        <!-- Main Workspace -->
        <div class="glass-panel">
            <div class="tabs">
                <button class="tab-btn active" onclick="switchTab('file-tab')">File Upload</button>
                <button class="tab-btn" onclick="switchTab('live-tab')">Live Microphone</button>
                <button class="tab-btn" onclick="switchTab('api-tab')">API Usage</button>
            </div>
            <!-- Tab 1: File Upload -->
            <div id="file-tab" class="tab-content active">
                <form id="fileForm">
                    <div class="form-group">
                        <label>Audio File</label>
                        <input type="file" id="audioFile" accept=".wav,.mp3,.flac,.m4a,.ogg,.webm" required>
                    </div>
                    <div class="config-grid">
                        <div class="form-group">
                            <label>Language</label>
                            <select id="fileLanguage">
                                <option value="">Auto-detect</option>
                                <option value="en">English</option>
                                <option value="es">Spanish</option>
                                <option value="fr">French</option>
                            </select>
                        </div>
                        <div class="form-group">
                            <label>Task</label>
                            <select id="fileTask">
                                <option value="transcribe">Transcribe</option>
                                <option value="translate">Translate to English</option>
                            </select>
                        </div>
                    </div>
                    <button type="submit" class="btn" id="fileSubmitBtn">
                        <span>Transcribe File</span>
                        <div class="loading-spinner" id="fileSpinner"></div>
                    </button>
                </form>
                <div id="fileResult" style="display: none;">
                    <div class="transcript-box" id="fileTranscript"></div>
                </div>
            </div>
            <!-- Tab 2: Live Recording -->
            <div id="live-tab" class="tab-content">
                <div class="config-grid" style="margin-bottom: 1.5rem;">
                    <div class="form-group">
                        <label>Language</label>
                        <select id="liveLanguage">
                            <option value="en">English</option>
                            <option value="es">Spanish</option>
                            <option value="fr">French</option>
                        </select>
                    </div>
                    <div class="form-group">
                        <label>Task</label>
                        <select id="liveTask">
                            <option value="transcribe">Transcribe</option>
                            <option value="translate">Translate to English</option>
                        </select>
                    </div>
                </div>
                <div style="display: flex; gap: 1rem; align-items: center;">
                    <button id="recordBtn" class="btn btn-success" style="width: auto;">
                        <span id="recordIcon">🎤</span> <span id="recordText">Start Recording</span>
                    </button>
                    <span id="liveStatus" class="status-badge status-offline" style="display: none;">Not
                        connected</span>
                </div>
                <div class="transcript-box" id="liveTranscript">
                    <div style="color: var(--text-muted); text-align: center; margin-top: 3rem;">
                        Click Start Recording to begin live transcription...
                    </div>
                </div>
            </div>
            <!-- Tab 3: API Usage -->
            <div id="api-tab" class="tab-content">
                <h3 style="margin-bottom: 1rem;">OpenAI Compatible API</h3>
                <p style="color: var(--text-muted); margin-bottom: 1rem; font-size: 0.9rem;">
                    WhisperLive acts as a drop-in replacement for OpenAI's Whisper API. You can use any standard OpenAI
                    client by changing the base URL.
                </p>
                <h4 style="margin-bottom: 0.5rem; color: #cbd5e1;">Python (openai package)</h4>
                <pre><code id="pythonSnippet">from openai import OpenAI
 client = OpenAI(
    api_key="sk-no-key-required",
    base_url="https://whisperlive.classroomcopilot.ai/v1/"
 )
 with open("audio.wav", "rb") as file:
    transcription = client.audio.transcriptions.create(
        file=file,
        model="base",
        response_format="verbose_json"
    )
 print(transcription.text)</code></pre>
                <h4 style="margin-bottom: 0.5rem; color: #cbd5e1;">cURL</h4>
                <pre><code id="curlSnippet">curl https://whisperlive.classroomcopilot.ai/v1/audio/transcriptions \
  -H "Content-Type: multipart/form-data" \
  -F file="@audio.wav" \
  -F model="base" \
  -F response_format="verbose_json"</code></pre>
            </div>
        </div>
    </div>
    <script>
        // DOM Elements
        const httpUrlInput = document.getElementById('httpUrl');
        const wsUrlInput = document.getElementById('wsUrl');
        const httpStatus = document.getElementById('httpStatus');
        // Initialization
        window.onload = () => {
            // Check if on same domain to set default URL intelligently, else leave defaults
            if (window.location.hostname !== '' && window.location.hostname !== 'localhost') {
                httpUrlInput.value = window.location.origin;
                wsUrlInput.value = window.location.origin.replace(/^http/, 'ws') + '/ws';
            }
            checkHealth();
            updateSnippets();
        };
        httpUrlInput.addEventListener('change', () => { checkHealth(); updateSnippets(); });
        // Tab Switching
        function switchTab(tabId) {
            document.querySelectorAll('.tab-content').forEach(t => t.classList.remove('active'));
            document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
            document.getElementById(tabId).classList.add('active');
            event.target.classList.add('active');
        }
        // Health Check
        async function checkHealth() {
            try {
                const res = await fetch(`${httpUrlInput.value}/health`);
                if (res.ok) {
                    httpStatus.className = 'status-badge status-online';
                    httpStatus.textContent = '✅ Online';
                } else throw new Error();
            } catch (e) {
                httpStatus.className = 'status-badge status-offline';
                httpStatus.textContent = '❌ Offline';
            }
        }
        // Update Code Snippets
        function updateSnippets() {
            const baseUrl = httpUrlInput.value.endsWith('/') ? httpUrlInput.value.slice(0, -1) : httpUrlInput.value;
            document.getElementById('pythonSnippet').textContent = `from openai import OpenAI\n\nclient = OpenAI(\n    api_key="sk-no-key-required",\n    base_url="${baseUrl}/v1/"\n)\n\nwith open("audio.wav", "rb") as file:\n    transcription = client.audio.transcriptions.create(\n        file=file,\n        model="base",\n        response_format="verbose_json"\n    )\n    \nprint(transcription.text)`;
            document.getElementById('curlSnippet').textContent = `curl ${baseUrl}/v1/audio/transcriptions \\\n  -H "Content-Type: multipart/form-data" \\\n  -F file="@audio.wav" \\\n  -F model="base" \\\n  -F response_format="verbose_json"`;
        }
        // Utility: Format Time
        function formatTime(seconds) {
            if (!seconds) return "0:00";
            const mins = Math.floor(seconds / 60);
            const secs = (seconds % 60).toFixed(2);
            return `${mins}:${secs.padStart(5, '0')}`;
        }
        // ==========================================
        // FEATURE 1: FILE TRANSCRIPTION
        // ==========================================
        document.getElementById('fileForm').addEventListener('submit', async (e) => {
            e.preventDefault();
            const file = document.getElementById('audioFile').files[0];
            if (!file) return;
            const btn = document.getElementById('fileSubmitBtn');
            const spinner = document.getElementById('fileSpinner');
            const resultBox = document.getElementById('fileResult');
            const transcriptBox = document.getElementById('fileTranscript');
            btn.disabled = true;
            spinner.style.display = 'block';
            resultBox.style.display = 'none';
            const formData = new FormData();
            formData.append('file', file);
            formData.append('model', 'base');
            formData.append('response_format', 'verbose_json');
            const lang = document.getElementById('fileLanguage').value;
            if (lang) formData.append('language', lang);
            const task = document.getElementById('fileTask').value;
            const baseUrl = httpUrlInput.value.endsWith('/') ? httpUrlInput.value.slice(0, -1) : httpUrlInput.value;
            const endpoint = task === 'translate' ? `${baseUrl}/v1/audio/translations` : `${baseUrl}/v1/audio/transcriptions`;
            try {
                const response = await fetch(endpoint, { method: 'POST', body: formData });
                const data = await response.json();
                resultBox.style.display = 'block';
                if (response.ok) {
                    let html = '';
                    if (data.segments && data.segments.length > 0) {
                        data.segments.forEach(seg => {
                            html += `<div class="segment"><div class="segment-time">${formatTime(seg.start)} - ${formatTime(seg.end)}</div><div class="segment-text">${seg.text}</div></div>`;
                        });
                    } else if (data.text) {
                        html += `<div class="segment"><div class="segment-text">${data.text}</div></div>`;
                    }
                    transcriptBox.innerHTML = html;
                } else {
                    transcriptBox.innerHTML = `<div style="color: var(--danger)">Error: ${data.error?.message || JSON.stringify(data.error)}</div>`;
                }
            } catch (error) {
                resultBox.style.display = 'block';
                transcriptBox.innerHTML = `<div style="color: var(--danger)">Network Error: ${error.message}</div>`;
            } finally {
                btn.disabled = false;
                spinner.style.display = 'none';
            }
        });
        // ==========================================
        // FEATURE 2: LIVE WEBSOCKET TRANSCRIPTION
        // ==========================================
        let ws = null;
        let audioContext = null;
        let mediaStream = null;
        let processor = null;
        let isRecording = false;
        const recordBtn = document.getElementById('recordBtn');
        const liveStatus = document.getElementById('liveStatus');
        const liveTranscript = document.getElementById('liveTranscript');
        recordBtn.addEventListener('click', async () => {
            if (isRecording) {
                stopRecording();
            } else {
                startRecording();
            }
        });
        async function startRecording() {
            liveTranscript.innerHTML = '';
            liveStatus.style.display = 'inline-flex';
            liveStatus.className = 'status-badge status-offline';
            liveStatus.textContent = 'Connecting...';
            try {
                // 1. Connect WebSocket
                ws = new WebSocket(wsUrlInput.value);
                ws.onopen = () => {
                    // Send options to server
                    const options = {
                        uid: "web-" + Math.random().toString(36).substring(7),
                        language: document.getElementById('liveLanguage').value,
                        task: document.getElementById('liveTask').value,
                        model: "base",
                        use_vad: true
                    };
                    ws.send(JSON.stringify(options));
                };
                ws.onmessage = async (event) => {
                    const data = JSON.parse(event.data);
                    if (data.message === "SERVER_READY") {
                        liveStatus.className = 'status-badge status-recording';
                        liveStatus.innerHTML = '🔴 Recording';
                        await startAudioCapture();
                    } else if (data.segments) {
                        renderLiveSegments(data.segments);
                    } else if (data.status === "WAIT") {
                        liveStatus.textContent = `Waiting in queue (Est: ${data.message} min)`;
                    } else if (data.message === "DISCONNECT") {
                        stopRecording();
                        liveStatus.className = 'status-badge status-offline';
                        liveStatus.textContent = 'Disconnected by server';
                    }
                };
                ws.onerror = (err) => {
                    console.error('WebSocket Error', err);
                    stopRecording();
                    liveStatus.className = 'status-badge status-offline';
                    liveStatus.textContent = 'Connection Error';
                };
                ws.onclose = () => {
                    stopRecording();
                };
                // Update UI
                isRecording = true;
                recordBtn.className = 'btn btn-danger';
                document.getElementById('recordIcon').textContent = '⏹';
                document.getElementById('recordText').textContent = 'Stop Recording';
            } catch (err) {
                console.error(err);
                liveStatus.className = 'status-badge status-offline';
                liveStatus.textContent = 'Microphone Error';
                stopRecording();
            }
        }
        async function startAudioCapture() {
            mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
            audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
            const source = audioContext.createMediaStreamSource(mediaStream);
            // Create a ScriptProcessorNode with bufferSize of 4096 and a single input/output channel
            processor = audioContext.createScriptProcessor(4096, 1, 1);
            processor.onaudioprocess = function (e) {
                if (!isRecording || ws.readyState !== WebSocket.OPEN) return;
                const float32Array = e.inputBuffer.getChannelData(0);
                ws.send(float32Array.buffer);
            };
            source.connect(processor);
            processor.connect(audioContext.destination);
        }
        function stopRecording() {
            isRecording = false;
            if (processor) {
                processor.disconnect();
                processor = null;
            }
            if (mediaStream) {
                mediaStream.getTracks().forEach(track => track.stop());
                mediaStream = null;
            }
            if (audioContext) {
                audioContext.close();
                audioContext = null;
            }
            if (ws) {
                if (ws.readyState === WebSocket.OPEN) {
                    ws.send("END_OF_AUDIO");
                    setTimeout(() => ws.close(), 1000);
                }
                ws = null;
            }
            recordBtn.className = 'btn btn-success';
            document.getElementById('recordIcon').textContent = '🎤';
            document.getElementById('recordText').textContent = 'Start Recording';
            if (liveStatus.textContent === '🔴 Recording') {
                liveStatus.className = 'status-badge status-offline';
                liveStatus.textContent = 'Stopped';
            }
        }
        let liveSegments = [];
        function renderLiveSegments(segments) {
            let html = '';
            segments.forEach(seg => {
                const timeHtml = (seg.start !== undefined && seg.end !== undefined)
                    ? `<div class="segment-time">${formatTime(seg.start)} - ${formatTime(seg.end)}</div>`
                    : '';
                html += `<div class="segment">${timeHtml}<div class="segment-text">${seg.text}</div></div>`;
            });
            liveTranscript.innerHTML = html;
            liveTranscript.scrollTop = liveTranscript.scrollHeight;
        }
    </script>
 </body>
 </html>
--- a/scratch/test_ws.py
+++ b/scratch/test_ws.py
@ -0,0 +1,9 @@
 import websockets
 from websockets.sync.server import serve
 def handler(websocket):
    print("Path:", websocket.request.path)
    websocket.send("Hello")
 with serve(handler, "127.0.0.1", 8765) as server:
    server.serve_forever()
--- a/test_form.html
+++ b/test_form.html
@ -0,0 +1,727 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>WhisperLive Dashboard</title>
    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
    <style>
        :root {
            --primary: #4f46e5;
            --primary-hover: #4338ca;
            --bg-color: #0f172a;
            --card-bg: rgba(30, 41, 59, 0.7);
            --text-main: #f8fafc;
            --text-muted: #94a3b8;
            --border: rgba(255, 255, 255, 0.1);
            --success: #10b981;
            --danger: #ef4444;
            --warning: #f59e0b;
        }
        * {
            box-sizing: border-box;
            margin: 0;
            padding: 0;
        }
        body {
            font-family: 'Inter', sans-serif;
            background-color: var(--bg-color);
            color: var(--text-main);
            min-height: 100vh;
            background-image:
                radial-gradient(at 0% 0%, rgba(79, 70, 229, 0.15) 0px, transparent 50%),
                radial-gradient(at 100% 100%, rgba(16, 185, 129, 0.1) 0px, transparent 50%);
            background-attachment: fixed;
            padding: 2rem;
        }
        .container {
            max-width: 1000px;
            margin: 0 auto;
        }
        .header {
            text-align: center;
            margin-bottom: 2rem;
        }
        .header h1 {
            font-size: 2.5rem;
            font-weight: 700;
            background: linear-gradient(to right, #818cf8, #34d399);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
            margin-bottom: 0.5rem;
        }
        .header p {
            color: var(--text-muted);
        }
        .glass-panel {
            background: var(--card-bg);
            backdrop-filter: blur(12px);
            -webkit-backdrop-filter: blur(12px);
            border: 1px solid var(--border);
            border-radius: 1rem;
            padding: 1.5rem;
            margin-bottom: 1.5rem;
            box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.3);
        }
        /* Config Section */
        .config-grid {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 1rem;
        }
        @media (max-width: 768px) {
            .config-grid {
                grid-template-columns: 1fr;
            }
        }
        .form-group {
            margin-bottom: 1rem;
        }
        .form-group label {
            display: block;
            font-size: 0.875rem;
            font-weight: 500;
            margin-bottom: 0.5rem;
            color: var(--text-muted);
        }
        input[type="text"],
        input[type="file"],
        select {
            width: 100%;
            padding: 0.75rem 1rem;
            background: rgba(15, 23, 42, 0.6);
            border: 1px solid var(--border);
            border-radius: 0.5rem;
            color: var(--text-main);
            font-size: 0.875rem;
            transition: all 0.2s;
        }
        input[type="text"]:focus,
        select:focus {
            outline: none;
            border-color: var(--primary);
            box-shadow: 0 0 0 2px rgba(79, 70, 229, 0.2);
        }
        /* Tabs */
        .tabs {
            display: flex;
            gap: 0.5rem;
            margin-bottom: 1rem;
            border-bottom: 1px solid var(--border);
            padding-bottom: 0.5rem;
        }
        .tab-btn {
            background: transparent;
            border: none;
            color: var(--text-muted);
            padding: 0.75rem 1.5rem;
            font-size: 1rem;
            font-weight: 500;
            cursor: pointer;
            border-radius: 0.5rem;
            transition: all 0.2s;
        }
        .tab-btn:hover {
            color: var(--text-main);
            background: rgba(255, 255, 255, 0.05);
        }
        .tab-btn.active {
            color: var(--text-main);
            background: var(--primary);
            box-shadow: 0 4px 6px -1px rgba(79, 70, 229, 0.4);
        }
        .tab-content {
            display: none;
            animation: fadeIn 0.3s ease-in-out;
        }
        .tab-content.active {
            display: block;
        }
        @keyframes fadeIn {
            from {
                opacity: 0;
                transform: translateY(5px);
            }
            to {
                opacity: 1;
                transform: translateY(0);
            }
        }
        /* Buttons */
        .btn {
            background: var(--primary);
            color: white;
            border: none;
            padding: 0.75rem 1.5rem;
            border-radius: 0.5rem;
            font-weight: 600;
            cursor: pointer;
            transition: all 0.2s;
            display: inline-flex;
            align-items: center;
            justify-content: center;
            gap: 0.5rem;
            width: 100%;
        }
        .btn:hover {
            background: var(--primary-hover);
        }
        .btn:disabled {
            opacity: 0.5;
            cursor: not-allowed;
        }
        .btn-danger {
            background: var(--danger);
        }
        .btn-danger:hover {
            background: #dc2626;
        }
        .btn-success {
            background: var(--success);
        }
        .btn-success:hover {
            background: #059669;
        }
        /* Results / Live View */
        .transcript-box {
            background: rgba(15, 23, 42, 0.6);
            border: 1px solid var(--border);
            border-radius: 0.5rem;
            padding: 1.5rem;
            min-height: 200px;
            max-height: 400px;
            overflow-y: auto;
            margin-top: 1rem;
            line-height: 1.6;
        }
        .segment {
            margin-bottom: 0.75rem;
            padding-bottom: 0.75rem;
            border-bottom: 1px solid rgba(255, 255, 255, 0.05);
        }
        .segment:last-child {
            border-bottom: none;
            margin-bottom: 0;
            padding-bottom: 0;
        }
        .segment-time {
            font-size: 0.75rem;
            color: var(--primary);
            font-weight: 600;
            margin-bottom: 0.25rem;
        }
        .status-badge {
            display: inline-flex;
            align-items: center;
            gap: 0.3rem;
            padding: 0.25rem 0.75rem;
            border-radius: 9999px;
            font-size: 0.75rem;
            font-weight: 600;
        }
        .status-offline {
            background: rgba(239, 68, 68, 0.2);
            color: #fca5a5;
        }
        .status-online {
            background: rgba(16, 185, 129, 0.2);
            color: #6ee7b7;
        }
        .status-recording {
            background: rgba(239, 68, 68, 0.2);
            color: #fca5a5;
            animation: pulse 2s infinite;
        }
        @keyframes pulse {
            0% {
                box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4);
            }
            70% {
                box-shadow: 0 0 0 10px rgba(239, 68, 68, 0);
            }
            100% {
                box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
            }
        }
        /* Code snippets */
        pre {
            background: #1e293b;
            padding: 1rem;
            border-radius: 0.5rem;
            overflow-x: auto;
            font-size: 0.875rem;
            color: #e2e8f0;
            border: 1px solid var(--border);
            margin-bottom: 1rem;
        }
        code {
            font-family: 'Courier New', Courier, monospace;
        }
        .loading-spinner {
            display: none;
            width: 24px;
            height: 24px;
            border: 3px solid rgba(255, 255, 255, 0.3);
            border-radius: 50%;
            border-top-color: white;
            animation: spin 1s ease-in-out infinite;
        }
        @keyframes spin {
            to {
                transform: rotate(360deg);
            }
        }
    </style>
 </head>
 <body>
    <div class="container">
        <div class="header">
            <h1>WhisperLive</h1>
            <p>High-Performance Real-Time Audio Transcription</p>
        </div>
        <!-- Configuration Panel -->
        <div class="glass-panel">
            <h3 style="margin-bottom: 1rem; font-size: 1.1rem;">Connection Settings</h3>
            <div class="config-grid">
                <div class="form-group">
                    <label>HTTP API URL (For File Upload & API)</label>
                    <input type="text" id="httpUrl" value="https://whisperlive.classroomcopilot.ai">
                </div>
                <div class="form-group">
                    <label>WebSocket URL (For Live Audio)</label>
                    <input type="text" id="wsUrl" value="wss://whisperlive.classroomcopilot.ai/ws">
                </div>
            </div>
            <div style="margin-top: 0.5rem; font-size: 0.8rem; color: var(--text-muted);">
                HTTP Status: <span id="httpStatus" class="status-badge status-offline">Checking...</span>
            </div>
        </div>
        <!-- Main Workspace -->
        <div class="glass-panel">
            <div class="tabs">
                <button class="tab-btn active" onclick="switchTab('file-tab')">File Upload</button>
                <button class="tab-btn" onclick="switchTab('live-tab')">Live Microphone</button>
                <button class="tab-btn" onclick="switchTab('api-tab')">API Usage</button>
            </div>
            <!-- Tab 1: File Upload -->
            <div id="file-tab" class="tab-content active">
                <form id="fileForm">
                    <div class="form-group">
                        <label>Audio File</label>
                        <input type="file" id="audioFile" accept=".wav,.mp3,.flac,.m4a,.ogg,.webm" required>
                    </div>
                    <div class="config-grid">
                        <div class="form-group">
                            <label>Language</label>
                            <select id="fileLanguage">
                                <option value="">Auto-detect</option>
                                <option value="en">English</option>
                                <option value="es">Spanish</option>
                                <option value="fr">French</option>
                            </select>
                        </div>
                        <div class="form-group">
                            <label>Task</label>
                            <select id="fileTask">
                                <option value="transcribe">Transcribe</option>
                                <option value="translate">Translate to English</option>
                            </select>
                        </div>
                    </div>
                    <button type="submit" class="btn" id="fileSubmitBtn">
                        <span>Transcribe File</span>
                        <div class="loading-spinner" id="fileSpinner"></div>
                    </button>
                </form>
                <div id="fileResult" style="display: none;">
                    <div class="transcript-box" id="fileTranscript"></div>
                </div>
            </div>
            <!-- Tab 2: Live Recording -->
            <div id="live-tab" class="tab-content">
                <div class="config-grid" style="margin-bottom: 1.5rem;">
                    <div class="form-group">
                        <label>Language</label>
                        <select id="liveLanguage">
                            <option value="en">English</option>
                            <option value="es">Spanish</option>
                            <option value="fr">French</option>
                        </select>
                    </div>
                    <div class="form-group">
                        <label>Task</label>
                        <select id="liveTask">
                            <option value="transcribe">Transcribe</option>
                            <option value="translate">Translate to English</option>
                        </select>
                    </div>
                </div>
                <div style="display: flex; gap: 1rem; align-items: center;">
                    <button id="recordBtn" class="btn btn-success" style="width: auto;">
                        <span id="recordIcon">🎤</span> <span id="recordText">Start Recording</span>
                    </button>
                    <span id="liveStatus" class="status-badge status-offline" style="display: none;">Not
                        connected</span>
                </div>
                <div class="transcript-box" id="liveTranscript">
                    <div style="color: var(--text-muted); text-align: center; margin-top: 3rem;">
                        Click Start Recording to begin live transcription...
                    </div>
                </div>
            </div>
            <!-- Tab 3: API Usage -->
            <div id="api-tab" class="tab-content">
                <h3 style="margin-bottom: 1rem;">OpenAI Compatible API</h3>
                <p style="color: var(--text-muted); margin-bottom: 1rem; font-size: 0.9rem;">
                    WhisperLive acts as a drop-in replacement for OpenAI's Whisper API. You can use any standard OpenAI
                    client by changing the base URL.
                </p>
                <h4 style="margin-bottom: 0.5rem; color: #cbd5e1;">Python (openai package)</h4>
                <pre><code id="pythonSnippet">from openai import OpenAI
 client = OpenAI(
    api_key="sk-no-key-required",
    base_url="https://whisperlive.classroomcopilot.ai/v1/"
 )
 with open("audio.wav", "rb") as file:
    transcription = client.audio.transcriptions.create(
        file=file,
        model="base",
        response_format="verbose_json"
    )
 print(transcription.text)</code></pre>
                <h4 style="margin-bottom: 0.5rem; color: #cbd5e1;">cURL</h4>
                <pre><code id="curlSnippet">curl https://whisperlive.classroomcopilot.ai/v1/audio/transcriptions \
  -H "Content-Type: multipart/form-data" \
  -F file="@audio.wav" \
  -F model="base" \
  -F response_format="verbose_json"</code></pre>
            </div>
        </div>
    </div>
    <script>
        // DOM Elements
        const httpUrlInput = document.getElementById('httpUrl');
        const wsUrlInput = document.getElementById('wsUrl');
        const httpStatus = document.getElementById('httpStatus');
        // Initialization
        window.onload = () => {
            // Check if on same domain to set default URL intelligently, else leave defaults
            if (window.location.hostname !== '' && window.location.hostname !== 'localhost') {
                httpUrlInput.value = window.location.origin;
                wsUrlInput.value = window.location.origin.replace(/^http/, 'ws') + '/ws';
            }
            checkHealth();
            updateSnippets();
        };
        httpUrlInput.addEventListener('change', () => { checkHealth(); updateSnippets(); });
        // Tab Switching
        function switchTab(tabId) {
            document.querySelectorAll('.tab-content').forEach(t => t.classList.remove('active'));
            document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
            document.getElementById(tabId).classList.add('active');
            event.target.classList.add('active');
        }
        // Health Check
        async function checkHealth() {
            try {
                const res = await fetch(`${httpUrlInput.value}/health`);
                if (res.ok) {
                    httpStatus.className = 'status-badge status-online';
                    httpStatus.textContent = '✅ Online';
                } else throw new Error();
            } catch (e) {
                httpStatus.className = 'status-badge status-offline';
                httpStatus.textContent = '❌ Offline';
            }
        }
        // Update Code Snippets
        function updateSnippets() {
            const baseUrl = httpUrlInput.value.endsWith('/') ? httpUrlInput.value.slice(0, -1) : httpUrlInput.value;
            document.getElementById('pythonSnippet').textContent = `from openai import OpenAI\n\nclient = OpenAI(\n    api_key="sk-no-key-required",\n    base_url="${baseUrl}/v1/"\n)\n\nwith open("audio.wav", "rb") as file:\n    transcription = client.audio.transcriptions.create(\n        file=file,\n        model="base",\n        response_format="verbose_json"\n    )\n    \nprint(transcription.text)`;
            document.getElementById('curlSnippet').textContent = `curl ${baseUrl}/v1/audio/transcriptions \\\n  -H "Content-Type: multipart/form-data" \\\n  -F file="@audio.wav" \\\n  -F model="base" \\\n  -F response_format="verbose_json"`;
        }
        // Utility: Format Time
        function formatTime(seconds) {
            if (!seconds) return "0:00";
            const mins = Math.floor(seconds / 60);
            const secs = (seconds % 60).toFixed(2);
            return `${mins}:${secs.padStart(5, '0')}`;
        }
        // ==========================================
        // FEATURE 1: FILE TRANSCRIPTION
        // ==========================================
        document.getElementById('fileForm').addEventListener('submit', async (e) => {
            e.preventDefault();
            const file = document.getElementById('audioFile').files[0];
            if (!file) return;
            const btn = document.getElementById('fileSubmitBtn');
            const spinner = document.getElementById('fileSpinner');
            const resultBox = document.getElementById('fileResult');
            const transcriptBox = document.getElementById('fileTranscript');
            btn.disabled = true;
            spinner.style.display = 'block';
            resultBox.style.display = 'none';
            const formData = new FormData();
            formData.append('file', file);
            formData.append('model', 'base');
            formData.append('response_format', 'verbose_json');
            const lang = document.getElementById('fileLanguage').value;
            if (lang) formData.append('language', lang);
            const task = document.getElementById('fileTask').value;
            const baseUrl = httpUrlInput.value.endsWith('/') ? httpUrlInput.value.slice(0, -1) : httpUrlInput.value;
            const endpoint = task === 'translate' ? `${baseUrl}/v1/audio/translations` : `${baseUrl}/v1/audio/transcriptions`;
            try {
                const response = await fetch(endpoint, { method: 'POST', body: formData });
                const data = await response.json();
                resultBox.style.display = 'block';
                if (response.ok) {
                    let html = '';
                    if (data.segments && data.segments.length > 0) {
                        data.segments.forEach(seg => {
                            html += `<div class="segment"><div class="segment-time">${formatTime(seg.start)} - ${formatTime(seg.end)}</div><div class="segment-text">${seg.text}</div></div>`;
                        });
                    } else if (data.text) {
                        html += `<div class="segment"><div class="segment-text">${data.text}</div></div>`;
                    }
                    transcriptBox.innerHTML = html;
                } else {
                    transcriptBox.innerHTML = `<div style="color: var(--danger)">Error: ${data.error?.message || JSON.stringify(data.error)}</div>`;
                }
            } catch (error) {
                resultBox.style.display = 'block';
                transcriptBox.innerHTML = `<div style="color: var(--danger)">Network Error: ${error.message}</div>`;
            } finally {
                btn.disabled = false;
                spinner.style.display = 'none';
            }
        });
        // ==========================================
        // FEATURE 2: LIVE WEBSOCKET TRANSCRIPTION
        // ==========================================
        let ws = null;
        let audioContext = null;
        let mediaStream = null;
        let processor = null;
        let isRecording = false;
        const recordBtn = document.getElementById('recordBtn');
        const liveStatus = document.getElementById('liveStatus');
        const liveTranscript = document.getElementById('liveTranscript');
        recordBtn.addEventListener('click', async () => {
            if (isRecording) {
                stopRecording();
            } else {
                startRecording();
            }
        });
        async function startRecording() {
            liveTranscript.innerHTML = '';
            liveStatus.style.display = 'inline-flex';
            liveStatus.className = 'status-badge status-offline';
            liveStatus.textContent = 'Connecting...';
            try {
                // 1. Connect WebSocket
                ws = new WebSocket(wsUrlInput.value);
                ws.onopen = () => {
                    // Send options to server
                    const options = {
                        uid: "web-" + Math.random().toString(36).substring(7),
                        language: document.getElementById('liveLanguage').value,
                        task: document.getElementById('liveTask').value,
                        model: "base",
                        use_vad: true
                    };
                    ws.send(JSON.stringify(options));
                };
                ws.onmessage = async (event) => {
                    const data = JSON.parse(event.data);
                    if (data.message === "SERVER_READY") {
                        liveStatus.className = 'status-badge status-recording';
                        liveStatus.innerHTML = '🔴 Recording';
                        await startAudioCapture();
                    } else if (data.segments) {
                        renderLiveSegments(data.segments);
                    } else if (data.status === "WAIT") {
                        liveStatus.textContent = `Waiting in queue (Est: ${data.message} min)`;
                    } else if (data.message === "DISCONNECT") {
                        stopRecording();
                        liveStatus.className = 'status-badge status-offline';
                        liveStatus.textContent = 'Disconnected by server';
                    }
                };
                ws.onerror = (err) => {
                    console.error('WebSocket Error', err);
                    stopRecording();
                    liveStatus.className = 'status-badge status-offline';
                    liveStatus.textContent = 'Connection Error';
                };
                ws.onclose = () => {
                    stopRecording();
                };
                // Update UI
                isRecording = true;
                recordBtn.className = 'btn btn-danger';
                document.getElementById('recordIcon').textContent = '⏹';
                document.getElementById('recordText').textContent = 'Stop Recording';
            } catch (err) {
                console.error(err);
                liveStatus.className = 'status-badge status-offline';
                liveStatus.textContent = 'Microphone Error';
                stopRecording();
            }
        }
        async function startAudioCapture() {
            mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
            audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
            const source = audioContext.createMediaStreamSource(mediaStream);
            // Create a ScriptProcessorNode with bufferSize of 4096 and a single input/output channel
            processor = audioContext.createScriptProcessor(4096, 1, 1);
            processor.onaudioprocess = function (e) {
                if (!isRecording || ws.readyState !== WebSocket.OPEN) return;
                const float32Array = e.inputBuffer.getChannelData(0);
                ws.send(float32Array.buffer);
            };
            source.connect(processor);
            processor.connect(audioContext.destination);
        }
        function stopRecording() {
            isRecording = false;
            if (processor) {
                processor.disconnect();
                processor = null;
            }
            if (mediaStream) {
                mediaStream.getTracks().forEach(track => track.stop());
                mediaStream = null;
            }
            if (audioContext) {
                audioContext.close();
                audioContext = null;
            }
            if (ws) {
                if (ws.readyState === WebSocket.OPEN) {
                    ws.send("END_OF_AUDIO");
                    setTimeout(() => ws.close(), 1000);
                }
                ws = null;
            }
            recordBtn.className = 'btn btn-success';
            document.getElementById('recordIcon').textContent = '🎤';
            document.getElementById('recordText').textContent = 'Start Recording';
            if (liveStatus.textContent === '🔴 Recording') {
                liveStatus.className = 'status-badge status-offline';
                liveStatus.textContent = 'Stopped';
            }
        }
        let liveSegments = [];
        function renderLiveSegments(segments) {
            let html = '';
            segments.forEach(seg => {
                const timeHtml = (seg.start !== undefined && seg.end !== undefined)
                    ? `<div class="segment-time">${formatTime(seg.start)} - ${formatTime(seg.end)}</div>`
                    : '';
                html += `<div class="segment">${timeHtml}<div class="segment-text">${seg.text}</div></div>`;
            });
            liveTranscript.innerHTML = html;
            liveTranscript.scrollTop = liveTranscript.scrollHeight;
        }
    </script>
 </body>
 </html>
--- a/test_http_endpoints.py
+++ b/test_http_endpoints.py
@ -0,0 +1,159 @@
 #!/usr/bin/env python3
 """
 Test script for WhisperLive HTTP endpoints
 This script demonstrates how to use the new HTTP API for file transcription
 """
 import requests
 import json
 import os
 from pathlib import Path
 # Configuration
 HTTP_BASE_URL = "http://localhost:8080"  # Adjust if using different port
 WEBSOCKET_PORT = 5050  # Your existing WebSocket port
 def test_health_endpoint():
    """Test the health check endpoint"""
    print("Testing health endpoint...")
    try:
        response = requests.get(f"{HTTP_BASE_URL}/health")
        print(f"Status: {response.status_code}")
        print(f"Response: {response.json()}")
        return response.status_code == 200
    except Exception as e:
        print(f"Error: {e}")
        return False
 def test_file_transcription(audio_file_path, language=None, task="transcribe", model="base"):
    """Test file transcription endpoint"""
    print(f"\nTesting file transcription endpoint...")
    print(f"File: {audio_file_path}")
    print(f"Language: {language or 'auto-detect'}")
    print(f"Task: {task}")
    print(f"Model: {model}")
    if not os.path.exists(audio_file_path):
        print(f"Error: File {audio_file_path} not found")
        return False
    try:
        # Prepare the request
        files = {'file': open(audio_file_path, 'rb')}
        data = {
            'language': language,
            'task': task,
            'model': model
        }
        # Make the request
        response = requests.post(f"{HTTP_BASE_URL}/transcribe", files=files, data=data)
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print("Transcription successful!")
            print(f"Filename: {result.get('filename')}")
            print(f"Language: {result['info'].get('language')}")
            print(f"Duration: {result['info'].get('duration')} seconds")
            print(f"Number of segments: {len(result['segments'])}")
            # Print first few segments
            for i, segment in enumerate(result['segments'][:3]):
                print(f"Segment {i+1}: [{segment['start']:.2f}s - {segment['end']:.2f}s] {segment['text']}")
            if len(result['segments']) > 3:
                print(f"... and {len(result['segments']) - 3} more segments")
            return True
        else:
            print(f"Error: {response.text}")
            return False
    except Exception as e:
        print(f"Error: {e}")
        return False
 def test_url_transcription():
    """Test URL transcription endpoint (placeholder)"""
    print(f"\nTesting URL transcription endpoint...")
    try:
        data = {
            'url': 'https://example.com/audio.mp3',
            'language': 'en',
            'task': 'transcribe',
            'model': 'base'
        }
        response = requests.post(f"{HTTP_BASE_URL}/transcribe/url", json=data)
        print(f"Status: {response.status_code}")
        print(f"Response: {response.json()}")
        return response.status_code == 200
    except Exception as e:
        print(f"Error: {e}")
        return False
 def test_openai_endpoint(audio_file_path):
    """Test the OpenAI compatible endpoint"""
    print(f"\nTesting OpenAI compatible endpoint...")
    print(f"File: {audio_file_path}")
    if not os.path.exists(audio_file_path):
        print(f"Error: File {audio_file_path} not found")
        return False
    try:
        files = {'file': open(audio_file_path, 'rb')}
        data = {
            'model': 'whisper-1',
            'response_format': 'json'
        }
        response = requests.post(f"{HTTP_BASE_URL}/v1/audio/transcriptions", files=files, data=data)
        print(f"Status: {response.status_code}")
        if response.status_code == 200:
            result = response.json()
            print("OpenAI endpoint successful!")
            print(f"Response: {result}")
            return True
        else:
            print(f"Error: {response.text}")
            return False
    except Exception as e:
        print(f"Error: {e}")
        return False
 def main():
    """Main test function"""
    print("WhisperLive HTTP Endpoints Test")
    print("=" * 40)
    # Test health endpoint
    if not test_health_endpoint():
        print("Health check failed. Make sure the server is running.")
        return
    # Test file transcription with a sample audio file
    # You can replace this with any audio file you have
    sample_audio = "assets/jfk.flac"  # Adjust path as needed
    if os.path.exists(sample_audio):
        test_file_transcription(sample_audio, language="en", task="transcribe", model="base")
        test_openai_endpoint(sample_audio)
    else:
        print(f"\nSample audio file not found at {sample_audio}")
        print("You can test with any audio file by calling:")
        print("test_file_transcription('path/to/your/audio.wav')")
    # Test URL transcription endpoint
    test_url_transcription()
    print("\n" + "=" * 40)
    print("Test completed!")
 if __name__ == "__main__":
    main()