feat: apply local modifications to WhisperLive-Server

2026-05-13 22:33:35 +00:00 · 2026-05-13 22:33:35 +00:00 · 83edfff9d3
commit 83edfff9d3
parent 05648af633
17 changed files with 4274 additions and 1352 deletions
--- a/.archive/Dockerfile.macos.dev
+++ b/.archive/Dockerfile.macos.dev
@ -1,51 +0,0 @@
-FROM python:3.10-bookworm
-
-ARG DEBIAN_FRONTEND=noninteractive
-
-# Create log directories with proper permissions
-RUN mkdir -p /app/logs && \
-    touch /app/logs/whisperlive.log && \
-    touch /app/logs/connections.log && \
-    chmod 666 /app/logs/whisperlive.log && \
-    chmod 666 /app/logs/connections.log
-
-# install lib required for pyaudio
-RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
-
-# update pip to support for whl.metadata -> less downloading
-RUN pip install --no-cache-dir -U "pip>=24"
-
-# create a working directory
-WORKDIR /app
-
-# install the requirements for running the whisper-live server
-COPY requirements/server.txt /app/
-RUN pip install -r server.txt && rm server.txt
-
-COPY whisper_live /app/whisper_live
-COPY run_server.py /app
-
-# Port options
-EXPOSE ${PORT_WHISPERLIVE}
-EXPOSE ${PORT_WHISPERLIVE_SSL}
-ARG PORT_WHISPERLIVE
-ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
-ARG PORT_WHISPERLIVE_SSL
-ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
-
-# SSL options
-ARG WHISPERLIVE_SSL
-ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
-
-# Model options
-ARG WHISPL_USE_CUSTOM_MODEL
-ENV WHISPL_USE_CUSTOM_MODEL=${WHISPL_USE_CUSTOM_MODEL}
-ARG FASTERWHISPER_MODEL
-ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
-
-CMD ["sh", "-c", "\
-    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
-        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
-    else \
-        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --no_single_model; \
-    fi"]
--- a/.archive/Dockerfile.macos.prod
+++ b/.archive/Dockerfile.macos.prod
@ -1,45 +0,0 @@
-FROM python:3.10-bookworm
-
-ARG DEBIAN_FRONTEND=noninteractive
-
-# Create log directories with proper permissions
-RUN mkdir -p /app/logs && \
-    touch /app/logs/whisperlive.log && \
-    touch /app/logs/connections.log && \
-    chmod 666 /app/logs/whisperlive.log && \
-    chmod 666 /app/logs/connections.log
-
-# install lib required for pyaudio
-RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
-
-# update pip to support for whl.metadata -> less downloading
-RUN pip install --no-cache-dir -U "pip>=24"
-
-# create a working directory
-WORKDIR /app
-
-# install the requirements for running the whisper-live server
-COPY requirements/server.txt /app/
-RUN pip install -r server.txt && rm server.txt
-
-COPY whisper_live /app/whisper_live
-COPY run_server.py /app
-
-# Copy application files
-EXPOSE ${PORT_WHISPERLIVE}
-EXPOSE ${PORT_WHISPERLIVE_SSL}
-ARG PORT_WHISPERLIVE
-ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
-ARG PORT_WHISPERLIVE_SSL
-ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
-ARG FASTERWHISPER_MODEL
-ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
-ARG WHISPERLIVE_SSL
-ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
-
-CMD ["sh", "-c", "\
-    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
-        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
-    else \
-        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL; \
-    fi"]
--- a/.archive/Dockerfile.win.prod
+++ b/.archive/Dockerfile.win.prod
@ -1,49 +0,0 @@
-FROM python:3.10-bookworm
-
-ARG DEBIAN_FRONTEND=noninteractive
-
-# Create log directories with proper permissions
-RUN mkdir -p /app/logs && \
-    touch /app/logs/whisperlive.log && \
-    touch /app/logs/connections.log && \
-    chmod 666 /app/logs/whisperlive.log && \
-    chmod 666 /app/logs/connections.log
-
-# install lib required for pyaudio
-RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
-
-# update pip to support for whl.metadata -> less downloading
-RUN pip install --no-cache-dir -U "pip>=24"
-
-# create a working directory
-WORKDIR /app
-
-# install the requirements for running the whisper-live server
-COPY requirements/server.txt /app/
-RUN pip install -r server.txt && rm server.txt
-
-# make the paths of the nvidia libs installed as wheels visible. equivalent to:
-# export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`
-ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib"
-
-COPY whisper_live /app/whisper_live
-COPY run_server.py /app
-
-# Copy application files
-EXPOSE ${PORT_WHISPERLIVE}
-EXPOSE ${PORT_WHISPERLIVE_SSL}
-ARG PORT_WHISPERLIVE
-ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
-ARG PORT_WHISPERLIVE_SSL
-ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
-ARG FASTERWHISPER_MODEL
-ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
-ARG WHISPERLIVE_SSL
-ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
-
-CMD ["sh", "-c", "\
-    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
-        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
-    else \
-        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL; \
-    fi"]
--- a/.archive/docker-compose.yml
+++ b/.archive/docker-compose.yml
--- a/.env
+++ b/.env
@ -1,9 +1,10 @@
 # Whisper live settings
 APP_WS_PROTOCOL=wss
-APP_URL=kevlarai.com
+APP_URL=classroomcopilot.ai

-PORT_WHISPERLIVE=5050
+PORT_WHISPERLIVE=5000
 PORT_WHISPERLIVE_SSL=5053
+HTTP_PORT=8080
 WHISPERLIVE_SSL=false

 WHISPL_USE_CUSTOM_MODEL=false
--- a/20
+++ b/20
@ -20,22 +20,24 @@ WORKDIR /app

 # install the requirements for running the whisper-live server
 COPY requirements/server.txt /app/
-RUN pip install -r server.txt && rm server.txt
+RUN pip install --no-cache-dir "setuptools<70.0.0" wheel
+RUN pip install -r server.txt
+RUN pip install --no-build-isolation openai-whisper==20240930
+RUN rm server.txt

 # make the paths of the nvidia libs installed as wheels visible
-ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib"
+RUN pip install --no-cache-dir nvidia-cublas-cu12 nvidia-cudnn-cu12
+ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib:/usr/local/lib/python3.10/site-packages/torch/lib:${LD_LIBRARY_PATH}"

 COPY whisper_live /app/whisper_live
 COPY run_server.py /app
+COPY hybrid_server.py /app

-# Copy application files
-EXPOSE ${PORT_WHISPERLIVE}
-ARG PORT_WHISPERLIVE
-ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
-ARG FASTERWHISPER_MODEL
-ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
+# Expose both WebSocket and HTTP ports
+EXPOSE 5000 8080

-CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE}", "--backend", "faster_whisper"]
+# Use the hybrid server by default
+CMD python3 -u hybrid_server.py --websocket-port 5000 --http-port 8080 --backend faster_whisper

 # CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE}", "--backend", "faster_whisper", "--faster_whisper_custom_model_path", "/app/models/${FASTERWHISPER_MODEL}", "--ssl_cert_path", "/app/ssl"]

--- a/HYBRID_SERVER_README.md
+++ b/HYBRID_SERVER_README.md
@ -0,0 +1,260 @@
+# WhisperLive Hybrid Server
+
+This hybrid server extends the original WhisperLive-Server to support both WebSocket connections (for real-time audio streaming) and HTTP endpoints (for file transcription) in a single container.
+
+## Features
+
+- **WebSocket Server**: Original real-time audio transcription functionality
+- **HTTP Server**: New file upload and transcription endpoints
+- **Single Container**: Both services run in the same Docker container
+- **GPU Sharing**: Both services share the same GPU resources
+
+## Architecture
+
+The hybrid server runs two services simultaneously:
+1. **WebSocket Server**: Handles real-time audio streaming transcription
+2. **HTTP Server**: Handles file uploads and transcription requests
+
+Both services use the same WhisperLive transcriber instance, ensuring efficient resource usage.
+
+## Ports
+
+- **WebSocket Port**: Default 5050 (configurable via `PORT_WHISPERLIVE`)
+- **HTTP Port**: Default 8080 (configurable via `HTTP_PORT`)
+
+## HTTP Endpoints
+
+### 1. Health Check
+```
+GET /health
+```
+Returns server health status.
+
+**Response:**
+```json
+{
+  "status": "healthy",
+  "service": "WhisperLive Hybrid Server"
+}
+```
+
+### 2. OpenAI Compatible Endpoints
+```
+POST /v1/audio/transcriptions
+POST /v1/audio/translations
+```
+Fully compatible drop-in replacements for the standard OpenAI Whisper API.
+
+**Parameters:**
+- `file` (required): Audio file (WAV, MP3, FLAC, M4A, OGG, WEBM, MP4, MPEG, MPGA)
+- `model` (optional): Model size (default: "base")
+- `language` (optional): Language code (e.g., "en", "es", "fr")
+- `prompt` (optional): Text to guide the model's style
+- `response_format` (optional): "json", "text", "srt", "verbose_json", "vtt" (default: "json")
+- `temperature` (optional): Sampling temperature (0.0 to 1.0)
+
+**Example Request:**
+```bash
+curl -X POST http://localhost:8080/v1/audio/transcriptions \
+  -H "Content-Type: multipart/form-data" \
+  -F "file=@audio.wav" \
+  -F "model=whisper-1" \
+  -F "response_format=json"
+```
+
+**Response (JSON format):**
+```json
+{
+  "text": "Hello, this is a test."
+}
+```
+
+### 3. Legacy File Transcription
+```
+POST /transcribe
+```
+Transcribes an uploaded audio file.
+
+**Parameters:**
+- `file` (required): Audio file (WAV, MP3, FLAC, M4A, OGG, WEBM)
+- `language` (optional): Language code (e.g., "en", "es", "fr")
+- `task` (optional): "transcribe" or "translate" (default: "transcribe")
+- `model` (optional): Model size (default: "base")
+
+**Example Request:**
+```bash
+curl -X POST http://localhost:8080/transcribe \
+  -F "file=@audio.wav" \
+  -F "language=en" \
+  -F "task=transcribe" \
+  -F "model=base"
+```
+
+**Response:**
+```json
+{
+  "success": true,
+  "segments": [
+    {
+      "start": 0.0,
+      "end": 2.5,
+      "text": "Hello, this is a test.",
+      "no_speech_prob": 0.1
+    }
+  ],
+  "info": {
+    "language": "en",
+    "language_probability": 0.95,
+    "duration": 10.5,
+    "duration_after_vad": 10.5,
+    "transcription_options": {}
+  },
+  "filename": "audio.wav"
+}
+```
+
+### 3. URL Transcription (Placeholder)
+```
+POST /transcribe/url
+```
+Endpoint for transcribing audio from URLs (ready for implementation).
+
+## Usage Examples
+
+### Python Client
+```python
+import requests
+
+# Transcribe a file
+with open('audio.wav', 'rb') as f:
+    response = requests.post('http://localhost:8080/transcribe', 
+                           files={'file': f},
+                           data={'language': 'en', 'model': 'base'})
+    
+if response.status_code == 200:
+    result = response.json()
+    print(f"Transcription: {result['segments']}")
+```
+
+### JavaScript/Node.js
+```javascript
+const FormData = require('form-data');
+const fs = require('fs');
+
+const form = new FormData();
+form.append('file', fs.createReadStream('audio.wav'));
+form.append('language', 'en');
+form.append('model', 'base');
+
+fetch('http://localhost:8080/transcribe', {
+    method: 'POST',
+    body: form
+})
+.then(response => response.json())
+.then(result => console.log(result));
+```
+
+### cURL
+```bash
+# Basic transcription
+curl -X POST http://localhost:8080/transcribe \
+  -F "file=@audio.wav"
+
+# With parameters
+curl -X POST http://localhost:8080/transcribe \
+  -F "file=@audio.wav" \
+  -F "language=es" \
+  -F "task=translate" \
+  -F "model=small"
+```
+
+## Configuration
+
+### Environment Variables
+- `PORT_WHISPERLIVE`: WebSocket port (default: 5050)
+- `HTTP_PORT`: HTTP port (default: 8080)
+- `FASTERWHISPER_MODEL`: Custom model path
+- `OMP_NUM_THREADS`: OpenMP thread count
+
+### Docker Compose
+```yaml
+services:
+  whisperlive:
+    ports:
+      - "5050:5050"  # WebSocket
+      - "8080:8080"  # HTTP
+    environment:
+      PORT_WHISPERLIVE: 5050
+      HTTP_PORT: 8080
+```
+
+## Testing
+
+### 1. Test Script
+Run the Python test script:
+```bash
+python3 test_http_endpoints.py
+```
+
+### 2. Web Interface
+Open `test_form.html` in a web browser to test the HTTP endpoints with a user-friendly interface.
+
+### 3. Health Check
+```bash
+curl http://localhost:8080/health
+```
+
+## Backend Support
+
+Currently, the HTTP endpoints support:
+- **faster_whisper**: Full support for all features
+- **tensorrt**: Basic support (needs adaptation)
+- **openvino**: Basic support (needs adaptation)
+
+## File Size Limits
+
+- Maximum file size: 100MB
+- Supported formats: WAV, MP3, FLAC, M4A, OGG, WEBM
+
+## Performance Considerations
+
+- File transcription uses the same model instance as WebSocket connections
+- Temporary files are automatically cleaned up after processing
+- Both services share GPU memory efficiently
+- HTTP requests are processed in separate threads
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Port Already in Use**
+   - Check if ports 5050 or 8080 are available
+   - Use different ports via environment variables
+
+2. **File Upload Errors**
+   - Ensure file size is under 100MB
+   - Check file format is supported
+   - Verify file is not corrupted
+
+3. **GPU Memory Issues**
+   - Monitor GPU memory usage
+   - Consider using smaller model sizes
+   - Restart container if needed
+
+### Logs
+Check container logs for detailed error information:
+```bash
+docker logs whisperlive
+```
+
+## Migration from Original Server
+
+The hybrid server is fully backward compatible. Your existing WebSocket clients will continue to work without changes. The HTTP endpoints are additional functionality that doesn't interfere with the original service.
+
+## Future Enhancements
+
+- [ ] Support for more audio formats
+- [ ] Batch file processing
+- [ ] Progress tracking for long files
+- [ ] Authentication and rate limiting
+- [ ] WebSocket support for file transcription progress
--- a/pycache/hybrid_server.cpython-314.pyc
+++ b/pycache/hybrid_server.cpython-314.pyc
--- a/batch_transcribe.py
+++ b/batch_transcribe.py
@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+"""
+Batch Transcription Script for WhisperLive
+Processes all audio files in a folder using the HTTP transcription endpoint
+"""
+
+import os
+import sys
+import json
+import time
+import argparse
+import requests
+from pathlib import Path
+from typing import List, Dict, Optional
+import logging
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+class BatchTranscriber:
+    def __init__(self, server_url: str = "http://localhost:8080"):
+        self.server_url = server_url
+        self.supported_formats = {'.wav', '.mp3', '.flac', '.m4a', '.ogg', '.webm'}
+        
+    def get_audio_files(self, folder_path: str) -> List[Path]:
+        """Get all audio files from the specified folder"""
+        folder = Path(folder_path)
+        if not folder.exists():
+            raise FileNotFoundError(f"Folder not found: {folder_path}")
+        
+        audio_files = []
+        for file_path in folder.iterdir():
+            if file_path.is_file() and file_path.suffix.lower() in self.supported_formats:
+                audio_files.append(file_path)
+        
+        return sorted(audio_files)
+    
+    def transcribe_file(self, file_path: Path, language: Optional[str] = None, 
+                       task: str = "transcribe", model: str = "base") -> Dict:
+        """Transcribe a single audio file"""
+        try:
+            logger.info(f"Transcribing: {file_path.name}")
+            
+            with open(file_path, 'rb') as f:
+                files = {'file': f}
+                data = {
+                    'language': language,
+                    'task': task,
+                    'model': model
+                }
+                
+                response = requests.post(f"{self.server_url}/transcribe", 
+                                      files=files, data=data, timeout=300)
+                
+                if response.status_code == 200:
+                    result = response.json()
+                    logger.info(f"✅ Successfully transcribed: {file_path.name}")
+                    return result
+                else:
+                    error_msg = response.text
+                    logger.error(f"❌ Failed to transcribe {file_path.name}: {error_msg}")
+                    return {'error': error_msg, 'status_code': response.status_code}
+                    
+        except Exception as e:
+            logger.error(f"❌ Error transcribing {file_path.name}: {str(e)}")
+            return {'error': str(e)}
+    
+    def save_transcript(self, transcript_data: Dict, output_path: Path, 
+                       format_type: str = "txt") -> bool:
+        """Save transcript in specified format"""
+        try:
+            if 'error' in transcript_data:
+                return False
+                
+            if format_type == "txt":
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(f"Transcription of: {transcript_data.get('filename', 'Unknown')}\n")
+                    f.write(f"Language: {transcript_data['info'].get('language', 'Auto-detected')}\n")
+                    f.write(f"Duration: {transcript_data['info'].get('duration', 0):.2f} seconds\n")
+                    f.write("=" * 50 + "\n\n")
+                    
+                    for segment in transcript_data['segments']:
+                        f.write(f"[{segment['start']:.2f}s - {segment['end']:.2f}s] {segment['text']}\n")
+                        
+            elif format_type == "json":
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    json.dump(transcript_data, f, indent=2, ensure_ascii=False)
+                    
+            elif format_type == "srt":
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    for i, segment in enumerate(transcript_data['segments'], 1):
+                        start_time = self.format_srt_time(segment['start'])
+                        end_time = self.format_srt_time(segment['end'])
+                        f.write(f"{i}\n{start_time} --> {end_time}\n{segment['text']}\n\n")
+                        
+            elif format_type == "vtt":
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write("WEBVTT\n\n")
+                    for segment in transcript_data['segments']:
+                        start_time = self.format_vtt_time(segment['start'])
+                        end_time = self.format_vtt_time(segment['end'])
+                        f.write(f"{start_time} --> {end_time}\n{segment['text']}\n\n")
+            
+            logger.info(f"💾 Saved transcript: {output_path}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"❌ Error saving transcript {output_path}: {str(e)}")
+            return False
+    
+    def format_srt_time(self, seconds: float) -> str:
+        """Format time for SRT subtitles"""
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        secs = int(seconds % 60)
+        millisecs = int((seconds % 1) * 1000)
+        return f"{hours:02d}:{minutes:02d}:{secs:02d},{millisecs:03d}"
+    
+    def format_vtt_time(self, seconds: float) -> str:
+        """Format time for VTT subtitles"""
+        hours = int(seconds // 3600)
+        minutes = int((seconds % 3600) // 60)
+        secs = int(seconds % 60)
+        millisecs = int((seconds % 1) * 1000)
+        return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millisecs:03d}"
+    
+    def batch_transcribe(self, input_folder: str, output_folder: str, 
+                        language: Optional[str] = None, task: str = "transcribe", 
+                        model: str = "base", format_type: str = "txt", 
+                        delay: float = 1.0) -> Dict:
+        """Process all audio files in the input folder"""
+        
+        # Create output folder if it doesn't exist
+        output_path = Path(output_folder)
+        output_path.mkdir(parents=True, exist_ok=True)
+        
+        # Get all audio files
+        audio_files = self.get_audio_files(input_folder)
+        if not audio_files:
+            logger.warning(f"No audio files found in: {input_folder}")
+            return {'processed': 0, 'successful': 0, 'failed': 0}
+        
+        logger.info(f"Found {len(audio_files)} audio files to process")
+        
+        results = {
+            'processed': len(audio_files),
+            'successful': 0,
+            'failed': 0,
+            'files': []
+        }
+        
+        for i, audio_file in enumerate(audio_files, 1):
+            logger.info(f"Processing {i}/{len(audio_files)}: {audio_file.name}")
+            
+            # Transcribe the file
+            transcript_data = self.transcribe_file(audio_file, language, task, model)
+            
+            if 'error' not in transcript_data:
+                # Create output filename
+                base_name = audio_file.stem
+                output_file = output_path / f"{base_name}.{format_type}"
+                
+                # Save transcript
+                if self.save_transcript(transcript_data, output_file, format_type):
+                    results['successful'] += 1
+                    results['files'].append({
+                        'input': str(audio_file),
+                        'output': str(output_file),
+                        'status': 'success'
+                    })
+                else:
+                    results['failed'] += 1
+                    results['files'].append({
+                        'input': str(audio_file),
+                        'output': str(output_file),
+                        'status': 'failed'
+                    })
+            else:
+                results['failed'] += 1
+                results['files'].append({
+                    'input': str(audio_file),
+                    'output': None,
+                    'status': 'failed',
+                    'error': transcript_data.get('error', 'Unknown error')
+                })
+            
+            # Add delay between requests to avoid overwhelming the server
+            if i < len(audio_files):
+                time.sleep(delay)
+        
+        return results
+
+def main():
+    parser = argparse.ArgumentParser(description='Batch transcribe audio files using WhisperLive')
+    parser.add_argument('input_folder', help='Folder containing audio files')
+    parser.add_argument('output_folder', help='Folder to save transcripts')
+    parser.add_argument('--server', '-s', default='http://localhost:8080', 
+                       help='WhisperLive server URL (default: http://localhost:8080)')
+    parser.add_argument('--language', '-l', help='Language code (e.g., en, es, fr)')
+    parser.add_argument('--task', '-t', choices=['transcribe', 'translate'], default='transcribe',
+                       help='Task to perform (default: transcribe)')
+    parser.add_argument('--model', '-m', default='base',
+                       help='Model size (default: base)')
+    parser.add_argument('--format', '-f', choices=['txt', 'json', 'srt', 'vtt'], default='txt',
+                       help='Output format (default: txt)')
+    parser.add_argument('--delay', '-d', type=float, default=1.0,
+                       help='Delay between requests in seconds (default: 1.0)')
+    parser.add_argument('--verbose', '-v', action='store_true',
+                       help='Verbose output')
+    
+    args = parser.parse_args()
+    
+    if args.verbose:
+        logging.getLogger().setLevel(logging.DEBUG)
+    
+    try:
+        # Initialize transcriber
+        transcriber = BatchTranscriber(args.server)
+        
+        # Check server health
+        try:
+            response = requests.get(f"{args.server}/health", timeout=5)
+            if response.status_code != 200:
+                logger.error(f"Server health check failed: {response.status_code}")
+                sys.exit(1)
+            logger.info("✅ Server health check passed")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"❌ Cannot connect to server: {e}")
+            sys.exit(1)
+        
+        # Process files
+        results = transcriber.batch_transcribe(
+            input_folder=args.input_folder,
+            output_folder=args.output_folder,
+            language=args.language,
+            task=args.task,
+            model=args.model,
+            format_type=args.format,
+            delay=args.delay
+        )
+        
+        # Print summary
+        logger.info("\n" + "=" * 50)
+        logger.info("BATCH TRANSCRIPTION COMPLETED")
+        logger.info("=" * 50)
+        logger.info(f"Total files processed: {results['processed']}")
+        logger.info(f"Successful: {results['successful']}")
+        logger.info(f"Failed: {results['failed']}")
+        logger.info(f"Output folder: {args.output_folder}")
+        logger.info(f"Output format: {args.format}")
+        
+        if results['failed'] > 0:
+            logger.warning("\nFailed files:")
+            for file_info in results['files']:
+                if file_info['status'] == 'failed':
+                    logger.warning(f"  - {file_info['input']}: {file_info.get('error', 'Unknown error')}")
+        
+        if results['successful'] > 0:
+            logger.info(f"\n✅ Successfully processed {results['successful']} files!")
+        
+    except KeyboardInterrupt:
+        logger.info("\n⚠️  Process interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"❌ Unexpected error: {str(e)}")
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -15,6 +15,8 @@ services:
      LOG_PATH: /app/logs
      NVIDIA_VISIBLE_DEVICES: all
      NVIDIA_DRIVER_CAPABILITIES: compute,utility
+      PORT_WHISPERLIVE: ${PORT_WHISPERLIVE}
+      HTTP_PORT: ${HTTP_PORT:-8080}
    volumes:
      - ./models:/app/models
      - ./ssl:/app/ssl
@ -26,11 +28,15 @@ services:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
+              options:
+                memory: "4G"  # Match the main docker-compose.yml allocation
    ports:
-      - ${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE}
+      - "${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE}"
+      - "${HTTP_PORT:-8080}:8080"
+    restart: unless-stopped
    networks:
-      - audio-network
+      - default

 networks:
-  audio-network:
+  default:
    driver: bridge
--- a/hybrid_server.py
+++ b/hybrid_server.py
--- a/openapi.json
+++ b/openapi.json
@ -0,0 +1,866 @@
+{
+  "openapi": "3.1.0",
+  "info": {
+    "title": "WhisperLive API",
+    "description": "A high-performance speech-to-text API based on OpenAI's Whisper model.\nSupports real-time transcription via WebSocket and batch processing via HTTP.\n\n## Features\n- Real-time audio transcription\n- Batch file processing\n- Multiple language support\n- Translation capabilities\n- Multiple model sizes\n- WebSocket and HTTP interfaces\n",
+    "version": "1.0.0",
+    "contact": {
+      "name": "WhisperLive Support",
+      "url": "https://github.com/collabora/WhisperLive"
+    },
+    "license": {
+      "name": "MIT",
+      "url": "https://opensource.org/licenses/MIT"
+    }
+  },
+  "servers": [
+    {
+      "url": "http://localhost:8080",
+      "description": "Local development server"
+    },
+    {
+      "url": "https://api.whisperlive.com/v1",
+      "description": "Production server"
+    }
+  ],
+  "security": [
+    {
+      "ApiKeyAuth": []
+    }
+  ],
+  "paths": {
+    "/v1/audio/transcriptions": {
+      "post": {
+        "summary": "Create transcription",
+        "description": "Transcribes audio into the input language. The response will include the transcribed text\nand additional metadata such as language detection, confidence scores, and timestamps.\n",
+        "operationId": "createTranscription",
+        "tags": [
+          "Audio"
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "multipart/form-data": {
+              "schema": {
+                "type": "object",
+                "required": [
+                  "file"
+                ],
+                "properties": {
+                  "file": {
+                    "type": "string",
+                    "format": "binary",
+                    "description": "The audio file object (not file name) to transcribe, in one of these formats: \nflac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.\n"
+                  },
+                  "model": {
+                    "type": "string",
+                    "enum": [
+                      "tiny",
+                      "base",
+                      "small",
+                      "medium",
+                      "large"
+                    ],
+                    "default": "base",
+                    "description": "ID of the model to use. Only whisper-1 is currently available."
+                  },
+                  "language": {
+                    "type": "string",
+                    "pattern": "^[a-z]{2}$",
+                    "description": "The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.\nSupported languages: en, es, fr, de, it, pt, ru, ja, ko, zh, hi, ar\n"
+                  },
+                  "prompt": {
+                    "type": "string",
+                    "description": "An optional text to guide the model's style or continue a previous audio segment.\nThe prompt should match the audio language.\n"
+                  },
+                  "response_format": {
+                    "type": "string",
+                    "enum": [
+                      "json",
+                      "text",
+                      "srt",
+                      "verbose_json",
+                      "vtt"
+                    ],
+                    "default": "json",
+                    "description": "The format of the transcript output."
+                  },
+                  "temperature": {
+                    "type": "number",
+                    "minimum": 0,
+                    "maximum": 1,
+                    "default": 0,
+                    "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
+                  },
+                  "timestamp_granularities": {
+                    "type": "array",
+                    "items": {
+                      "type": "string",
+                      "enum": [
+                        "word",
+                        "segment"
+                      ]
+                    },
+                    "description": "The timestamp granularities to populate for this transcription."
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "oneOf": [
+                    {
+                      "$ref": "#/components/schemas/TranscriptionResponse"
+                    },
+                    {
+                      "$ref": "#/components/schemas/TranscriptionTextResponse"
+                    },
+                    {
+                      "$ref": "#/components/schemas/TranscriptionSrtResponse"
+                    },
+                    {
+                      "$ref": "#/components/schemas/TranscriptionVttResponse"
+                    }
+                  ]
+                }
+              }
+            }
+          },
+          "400": {
+            "$ref": "#/components/responses/BadRequest"
+          },
+          "401": {
+            "$ref": "#/components/responses/Unauthorized"
+          },
+          "413": {
+            "$ref": "#/components/responses/FileTooLarge"
+          },
+          "422": {
+            "$ref": "#/components/responses/ValidationError"
+          },
+          "429": {
+            "$ref": "#/components/responses/RateLimitExceeded"
+          },
+          "500": {
+            "$ref": "#/components/responses/InternalServerError"
+          }
+        }
+      }
+    },
+    "/v1/audio/translations": {
+      "post": {
+        "summary": "Create translation",
+        "description": "Translates audio into English. The response will include the translated text\nand additional metadata such as confidence scores and timestamps.\n",
+        "operationId": "createTranslation",
+        "tags": [
+          "Audio"
+        ],
+        "requestBody": {
+          "required": true,
+          "content": {
+            "multipart/form-data": {
+              "schema": {
+                "type": "object",
+                "required": [
+                  "file"
+                ],
+                "properties": {
+                  "file": {
+                    "type": "string",
+                    "format": "binary",
+                    "description": "The audio file object (not file name) to translate, in one of these formats: \nflac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.\n"
+                  },
+                  "model": {
+                    "type": "string",
+                    "enum": [
+                      "tiny",
+                      "base",
+                      "small",
+                      "medium",
+                      "large"
+                    ],
+                    "default": "base",
+                    "description": "ID of the model to use. Only whisper-1 is currently available."
+                  },
+                  "prompt": {
+                    "type": "string",
+                    "description": "An optional text to guide the model's style or continue a previous audio segment.\nThe prompt should be in English.\n"
+                  },
+                  "response_format": {
+                    "type": "string",
+                    "enum": [
+                      "json",
+                      "text",
+                      "srt",
+                      "verbose_json",
+                      "vtt"
+                    ],
+                    "default": "json",
+                    "description": "The format of the transcript output."
+                  },
+                  "temperature": {
+                    "type": "number",
+                    "minimum": 0,
+                    "maximum": 1,
+                    "default": 0,
+                    "description": "The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic."
+                  },
+                  "timestamp_granularities": {
+                    "type": "array",
+                    "items": {
+                      "type": "string",
+                      "enum": [
+                        "word",
+                        "segment"
+                      ]
+                    },
+                    "description": "The timestamp granularities to populate for this translation."
+                  }
+                }
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "oneOf": [
+                    {
+                      "$ref": "#/components/schemas/TranscriptionResponse"
+                    },
+                    {
+                      "$ref": "#/components/schemas/TranscriptionTextResponse"
+                    },
+                    {
+                      "$ref": "#/components/schemas/TranscriptionSrtResponse"
+                    },
+                    {
+                      "$ref": "#/components/schemas/TranscriptionVttResponse"
+                    }
+                  ]
+                }
+              }
+            }
+          },
+          "400": {
+            "$ref": "#/components/responses/BadRequest"
+          },
+          "401": {
+            "$ref": "#/components/responses/Unauthorized"
+          },
+          "413": {
+            "$ref": "#/components/responses/FileTooLarge"
+          },
+          "422": {
+            "$ref": "#/components/responses/ValidationError"
+          },
+          "429": {
+            "$ref": "#/components/responses/RateLimitExceeded"
+          },
+          "500": {
+            "$ref": "#/components/responses/InternalServerError"
+          }
+        }
+      }
+    },
+    "/v1/models": {
+      "get": {
+        "summary": "List models",
+        "description": "Lists the currently available models, and provides basic information about each one such as the owner and availability.",
+        "operationId": "listModels",
+        "tags": [
+          "Models"
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ListModelsResponse"
+                }
+              }
+            }
+          },
+          "401": {
+            "$ref": "#/components/responses/Unauthorized"
+          },
+          "500": {
+            "$ref": "#/components/responses/InternalServerError"
+          }
+        }
+      }
+    },
+    "/v1/models/{model}": {
+      "get": {
+        "summary": "Retrieve model",
+        "description": "Retrieves a model instance, providing basic information about the model such as the owner and permissioning.",
+        "operationId": "retrieveModel",
+        "tags": [
+          "Models"
+        ],
+        "parameters": [
+          {
+            "name": "model",
+            "in": "path",
+            "required": true,
+            "description": "The ID of the model to use for this request",
+            "schema": {
+              "type": "string",
+              "enum": [
+                "tiny",
+                "base",
+                "small",
+                "medium",
+                "large"
+              ]
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/Model"
+                }
+              }
+            }
+          },
+          "401": {
+            "$ref": "#/components/responses/Unauthorized"
+          },
+          "404": {
+            "$ref": "#/components/responses/NotFound"
+          },
+          "500": {
+            "$ref": "#/components/responses/InternalServerError"
+          }
+        }
+      }
+    },
+    "/v1/health": {
+      "get": {
+        "summary": "Health check",
+        "description": "Check the health status of the API server",
+        "operationId": "healthCheck",
+        "tags": [
+          "System"
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/HealthResponse"
+                }
+              }
+            }
+          }
+        }
+      }
+    },
+    "/v1/websocket": {
+      "get": {
+        "summary": "WebSocket connection",
+        "description": "Establishes a WebSocket connection for real-time audio transcription.\nSend audio data as binary frames and receive transcription results.\n",
+        "operationId": "websocketConnection",
+        "tags": [
+          "Real-time"
+        ],
+        "parameters": [
+          {
+            "name": "model",
+            "in": "query",
+            "description": "The model to use for transcription",
+            "schema": {
+              "type": "string",
+              "enum": [
+                "tiny",
+                "base",
+                "small",
+                "medium",
+                "large"
+              ],
+              "default": "base"
+            }
+          },
+          {
+            "name": "language",
+            "in": "query",
+            "description": "The language of the input audio",
+            "schema": {
+              "type": "string",
+              "pattern": "^[a-z]{2}$"
+            }
+          },
+          {
+            "name": "task",
+            "in": "query",
+            "description": "The task to perform",
+            "schema": {
+              "type": "string",
+              "enum": [
+                "transcribe",
+                "translate"
+              ],
+              "default": "transcribe"
+            }
+          }
+        ],
+        "responses": {
+          "101": {
+            "description": "Switching Protocols",
+            "headers": {
+              "Upgrade": {
+                "schema": {
+                  "type": "string",
+                  "example": "websocket"
+                }
+              },
+              "Connection": {
+                "schema": {
+                  "type": "string",
+                  "example": "Upgrade"
+                }
+              }
+            }
+          },
+          "400": {
+            "$ref": "#/components/responses/BadRequest"
+          },
+          "401": {
+            "$ref": "#/components/responses/Unauthorized"
+          }
+        }
+      }
+    }
+  },
+  "components": {
+    "securitySchemes": {
+      "ApiKeyAuth": {
+        "type": "apiKey",
+        "in": "header",
+        "name": "Authorization",
+        "description": "API key authentication. Include your API key in the Authorization header.\nExample: `Authorization: Bearer your-api-key-here`\n"
+      }
+    },
+    "schemas": {
+      "TranscriptionResponse": {
+        "type": "object",
+        "properties": {
+          "text": {
+            "type": "string",
+            "description": "The transcribed text"
+          },
+          "language": {
+            "type": "string",
+            "description": "The language of the input audio"
+          },
+          "duration": {
+            "type": "number",
+            "description": "The duration of the input audio in seconds"
+          },
+          "words": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Word"
+            },
+            "description": "Extracted words and their corresponding timestamps"
+          },
+          "segments": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Segment"
+            },
+            "description": "Segments of the transcribed text with timestamps"
+          }
+        },
+        "required": [
+          "text"
+        ]
+      },
+      "TranscriptionTextResponse": {
+        "type": "string",
+        "description": "The transcribed text as plain text"
+      },
+      "TranscriptionSrtResponse": {
+        "type": "string",
+        "description": "The transcribed text in SRT subtitle format"
+      },
+      "TranscriptionVttResponse": {
+        "type": "string",
+        "description": "The transcribed text in VTT subtitle format"
+      },
+      "Word": {
+        "type": "object",
+        "properties": {
+          "word": {
+            "type": "string",
+            "description": "The text content of the word"
+          },
+          "start": {
+            "type": "number",
+            "description": "Start time of the word in seconds"
+          },
+          "end": {
+            "type": "number",
+            "description": "End time of the word in seconds"
+          },
+          "probability": {
+            "type": "number",
+            "description": "Confidence score of the word (0-1)"
+          }
+        },
+        "required": [
+          "word",
+          "start",
+          "end"
+        ]
+      },
+      "Segment": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "integer",
+            "description": "Unique identifier for the segment"
+          },
+          "seek": {
+            "type": "number",
+            "description": "Seek offset of the segment in seconds"
+          },
+          "start": {
+            "type": "number",
+            "description": "Start time of the segment in seconds"
+          },
+          "end": {
+            "type": "number",
+            "description": "End time of the segment in seconds"
+          },
+          "text": {
+            "type": "string",
+            "description": "The text content of the segment"
+          },
+          "tokens": {
+            "type": "array",
+            "items": {
+              "type": "integer"
+            },
+            "description": "Array of token IDs for the segment"
+          },
+          "temperature": {
+            "type": "number",
+            "description": "Temperature parameter used for generating this segment"
+          },
+          "avg_logprob": {
+            "type": "number",
+            "description": "Average log probability of the segment"
+          },
+          "compression_ratio": {
+            "type": "number",
+            "description": "Compression ratio of the segment"
+          },
+          "no_speech_prob": {
+            "type": "number",
+            "description": "Probability of no speech in this segment"
+          },
+          "words": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Word"
+            },
+            "description": "Words in this segment"
+          }
+        },
+        "required": [
+          "id",
+          "seek",
+          "start",
+          "end",
+          "text"
+        ]
+      },
+      "Model": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The model identifier"
+          },
+          "object": {
+            "type": "string",
+            "enum": [
+              "model"
+            ],
+            "description": "The object type, which is always \"model\""
+          },
+          "created": {
+            "type": "integer",
+            "description": "The Unix timestamp (in seconds) when the model was created"
+          },
+          "owned_by": {
+            "type": "string",
+            "description": "The organization that owns the model"
+          },
+          "permission": {
+            "type": "array",
+            "items": {
+              "type": "object"
+            },
+            "description": "The permissions associated with the model"
+          },
+          "root": {
+            "type": "string",
+            "description": "The root of the model"
+          },
+          "parent": {
+            "type": "string",
+            "description": "The parent of the model"
+          }
+        },
+        "required": [
+          "id",
+          "object",
+          "created",
+          "owned_by"
+        ]
+      },
+      "ListModelsResponse": {
+        "type": "object",
+        "properties": {
+          "object": {
+            "type": "string",
+            "enum": [
+              "list"
+            ],
+            "description": "The object type, which is always \"list\""
+          },
+          "data": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/Model"
+            },
+            "description": "The list of models"
+          }
+        },
+        "required": [
+          "object",
+          "data"
+        ]
+      },
+      "HealthResponse": {
+        "type": "object",
+        "properties": {
+          "status": {
+            "type": "string",
+            "enum": [
+              "healthy",
+              "unhealthy"
+            ],
+            "description": "The health status of the service"
+          },
+          "service": {
+            "type": "string",
+            "description": "The name of the service"
+          },
+          "version": {
+            "type": "string",
+            "description": "The version of the service"
+          },
+          "timestamp": {
+            "type": "string",
+            "format": "date-time",
+            "description": "The current timestamp"
+          },
+          "uptime": {
+            "type": "number",
+            "description": "The uptime in seconds"
+          }
+        },
+        "required": [
+          "status",
+          "service"
+        ]
+      },
+      "Error": {
+        "type": "object",
+        "properties": {
+          "error": {
+            "type": "object",
+            "properties": {
+              "message": {
+                "type": "string",
+                "description": "A human-readable error message"
+              },
+              "type": {
+                "type": "string",
+                "description": "The type of error"
+              },
+              "code": {
+                "type": "string",
+                "description": "The error code"
+              },
+              "param": {
+                "type": "string",
+                "description": "The parameter that caused the error"
+              }
+            }
+          }
+        },
+        "required": [
+          "error"
+        ]
+      }
+    },
+    "responses": {
+      "BadRequest": {
+        "description": "Bad Request",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/Error"
+            },
+            "example": {
+              "error": {
+                "message": "Invalid request parameters",
+                "type": "invalid_request_error",
+                "code": "invalid_parameters"
+              }
+            }
+          }
+        }
+      },
+      "Unauthorized": {
+        "description": "Unauthorized",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/Error"
+            },
+            "example": {
+              "error": {
+                "message": "Invalid API key",
+                "type": "authentication_error",
+                "code": "invalid_api_key"
+              }
+            }
+          }
+        }
+      },
+      "FileTooLarge": {
+        "description": "File Too Large",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/Error"
+            },
+            "example": {
+              "error": {
+                "message": "File size exceeds maximum allowed size",
+                "type": "invalid_request_error",
+                "code": "file_too_large"
+              }
+            }
+          }
+        }
+      },
+      "ValidationError": {
+        "description": "Validation Error",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/Error"
+            },
+            "example": {
+              "error": {
+                "message": "Invalid file format",
+                "type": "invalid_request_error",
+                "code": "invalid_file_format"
+              }
+            }
+          }
+        }
+      },
+      "RateLimitExceeded": {
+        "description": "Rate Limit Exceeded",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/Error"
+            },
+            "example": {
+              "error": {
+                "message": "Rate limit exceeded",
+                "type": "rate_limit_error",
+                "code": "rate_limit_exceeded"
+              }
+            }
+          }
+        }
+      },
+      "InternalServerError": {
+        "description": "Internal Server Error",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/Error"
+            },
+            "example": {
+              "error": {
+                "message": "An internal server error occurred",
+                "type": "server_error",
+                "code": "internal_error"
+              }
+            }
+          }
+        }
+      },
+      "NotFound": {
+        "description": "Not Found",
+        "content": {
+          "application/json": {
+            "schema": {
+              "$ref": "#/components/schemas/Error"
+            },
+            "example": {
+              "error": {
+                "message": "Model not found",
+                "type": "invalid_request_error",
+                "code": "model_not_found"
+              }
+            }
+          }
+        }
+      }
+    }
+  },
+  "tags": [
+    {
+      "name": "Audio",
+      "description": "Audio transcription and translation operations"
+    },
+    {
+      "name": "Models",
+      "description": "Model management operations"
+    },
+    {
+      "name": "System",
+      "description": "System health and status operations"
+    },
+    {
+      "name": "Real-time",
+      "description": "Real-time audio processing via WebSocket"
+    }
+  ]
+}
--- a/requirements/server.txt
+++ b/requirements/server.txt
@ -9,5 +9,7 @@ av
 jiwer
 evaluate
 numpy<2
-openai-whisper==20240930
 tokenizers==0.20.3
+flask==3.0.0
+flask-sock
+websocket-client
--- a/scratch/dashboard.html
+++ b/scratch/dashboard.html
@ -0,0 +1,727 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>WhisperLive Dashboard</title>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
+    <style>
+        :root {
+            --primary: #4f46e5;
+            --primary-hover: #4338ca;
+            --bg-color: #0f172a;
+            --card-bg: rgba(30, 41, 59, 0.7);
+            --text-main: #f8fafc;
+            --text-muted: #94a3b8;
+            --border: rgba(255, 255, 255, 0.1);
+            --success: #10b981;
+            --danger: #ef4444;
+            --warning: #f59e0b;
+        }
+
+        * {
+            box-sizing: border-box;
+            margin: 0;
+            padding: 0;
+        }
+
+        body {
+            font-family: 'Inter', sans-serif;
+            background-color: var(--bg-color);
+            color: var(--text-main);
+            min-height: 100vh;
+            background-image:
+                radial-gradient(at 0% 0%, rgba(79, 70, 229, 0.15) 0px, transparent 50%),
+                radial-gradient(at 100% 100%, rgba(16, 185, 129, 0.1) 0px, transparent 50%);
+            background-attachment: fixed;
+            padding: 2rem;
+        }
+
+        .container {
+            max-width: 1000px;
+            margin: 0 auto;
+        }
+
+        .header {
+            text-align: center;
+            margin-bottom: 2rem;
+        }
+
+        .header h1 {
+            font-size: 2.5rem;
+            font-weight: 700;
+            background: linear-gradient(to right, #818cf8, #34d399);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            margin-bottom: 0.5rem;
+        }
+
+        .header p {
+            color: var(--text-muted);
+        }
+
+        .glass-panel {
+            background: var(--card-bg);
+            backdrop-filter: blur(12px);
+            -webkit-backdrop-filter: blur(12px);
+            border: 1px solid var(--border);
+            border-radius: 1rem;
+            padding: 1.5rem;
+            margin-bottom: 1.5rem;
+            box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.3);
+        }
+
+        /* Config Section */
+        .config-grid {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 1rem;
+        }
+
+        @media (max-width: 768px) {
+            .config-grid {
+                grid-template-columns: 1fr;
+            }
+        }
+
+        .form-group {
+            margin-bottom: 1rem;
+        }
+
+        .form-group label {
+            display: block;
+            font-size: 0.875rem;
+            font-weight: 500;
+            margin-bottom: 0.5rem;
+            color: var(--text-muted);
+        }
+
+        input[type="text"],
+        input[type="file"],
+        select {
+            width: 100%;
+            padding: 0.75rem 1rem;
+            background: rgba(15, 23, 42, 0.6);
+            border: 1px solid var(--border);
+            border-radius: 0.5rem;
+            color: var(--text-main);
+            font-size: 0.875rem;
+            transition: all 0.2s;
+        }
+
+        input[type="text"]:focus,
+        select:focus {
+            outline: none;
+            border-color: var(--primary);
+            box-shadow: 0 0 0 2px rgba(79, 70, 229, 0.2);
+        }
+
+        /* Tabs */
+        .tabs {
+            display: flex;
+            gap: 0.5rem;
+            margin-bottom: 1rem;
+            border-bottom: 1px solid var(--border);
+            padding-bottom: 0.5rem;
+        }
+
+        .tab-btn {
+            background: transparent;
+            border: none;
+            color: var(--text-muted);
+            padding: 0.75rem 1.5rem;
+            font-size: 1rem;
+            font-weight: 500;
+            cursor: pointer;
+            border-radius: 0.5rem;
+            transition: all 0.2s;
+        }
+
+        .tab-btn:hover {
+            color: var(--text-main);
+            background: rgba(255, 255, 255, 0.05);
+        }
+
+        .tab-btn.active {
+            color: var(--text-main);
+            background: var(--primary);
+            box-shadow: 0 4px 6px -1px rgba(79, 70, 229, 0.4);
+        }
+
+        .tab-content {
+            display: none;
+            animation: fadeIn 0.3s ease-in-out;
+        }
+
+        .tab-content.active {
+            display: block;
+        }
+
+        @keyframes fadeIn {
+            from {
+                opacity: 0;
+                transform: translateY(5px);
+            }
+
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+
+        /* Buttons */
+        .btn {
+            background: var(--primary);
+            color: white;
+            border: none;
+            padding: 0.75rem 1.5rem;
+            border-radius: 0.5rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.2s;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.5rem;
+            width: 100%;
+        }
+
+        .btn:hover {
+            background: var(--primary-hover);
+        }
+
+        .btn:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+        }
+
+        .btn-danger {
+            background: var(--danger);
+        }
+
+        .btn-danger:hover {
+            background: #dc2626;
+        }
+
+        .btn-success {
+            background: var(--success);
+        }
+
+        .btn-success:hover {
+            background: #059669;
+        }
+
+        /* Results / Live View */
+        .transcript-box {
+            background: rgba(15, 23, 42, 0.6);
+            border: 1px solid var(--border);
+            border-radius: 0.5rem;
+            padding: 1.5rem;
+            min-height: 200px;
+            max-height: 400px;
+            overflow-y: auto;
+            margin-top: 1rem;
+            line-height: 1.6;
+        }
+
+        .segment {
+            margin-bottom: 0.75rem;
+            padding-bottom: 0.75rem;
+            border-bottom: 1px solid rgba(255, 255, 255, 0.05);
+        }
+
+        .segment:last-child {
+            border-bottom: none;
+            margin-bottom: 0;
+            padding-bottom: 0;
+        }
+
+        .segment-time {
+            font-size: 0.75rem;
+            color: var(--primary);
+            font-weight: 600;
+            margin-bottom: 0.25rem;
+        }
+
+        .status-badge {
+            display: inline-flex;
+            align-items: center;
+            gap: 0.3rem;
+            padding: 0.25rem 0.75rem;
+            border-radius: 9999px;
+            font-size: 0.75rem;
+            font-weight: 600;
+        }
+
+        .status-offline {
+            background: rgba(239, 68, 68, 0.2);
+            color: #fca5a5;
+        }
+
+        .status-online {
+            background: rgba(16, 185, 129, 0.2);
+            color: #6ee7b7;
+        }
+
+        .status-recording {
+            background: rgba(239, 68, 68, 0.2);
+            color: #fca5a5;
+            animation: pulse 2s infinite;
+        }
+
+        @keyframes pulse {
+            0% {
+                box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4);
+            }
+
+            70% {
+                box-shadow: 0 0 0 10px rgba(239, 68, 68, 0);
+            }
+
+            100% {
+                box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
+            }
+        }
+
+        /* Code snippets */
+        pre {
+            background: #1e293b;
+            padding: 1rem;
+            border-radius: 0.5rem;
+            overflow-x: auto;
+            font-size: 0.875rem;
+            color: #e2e8f0;
+            border: 1px solid var(--border);
+            margin-bottom: 1rem;
+        }
+
+        code {
+            font-family: 'Courier New', Courier, monospace;
+        }
+
+        .loading-spinner {
+            display: none;
+            width: 24px;
+            height: 24px;
+            border: 3px solid rgba(255, 255, 255, 0.3);
+            border-radius: 50%;
+            border-top-color: white;
+            animation: spin 1s ease-in-out infinite;
+        }
+
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+    </style>
+</head>
+
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>WhisperLive</h1>
+            <p>High-Performance Real-Time Audio Transcription</p>
+        </div>
+
+        <!-- Configuration Panel -->
+        <div class="glass-panel">
+            <h3 style="margin-bottom: 1rem; font-size: 1.1rem;">Connection Settings</h3>
+            <div class="config-grid">
+                <div class="form-group">
+                    <label>HTTP API URL (For File Upload & API)</label>
+                    <input type="text" id="httpUrl" value="https://whisperlive.classroomcopilot.ai">
+                </div>
+                <div class="form-group">
+                    <label>WebSocket URL (For Live Audio)</label>
+                    <input type="text" id="wsUrl" value="wss://whisperlive.classroomcopilot.ai/ws">
+                </div>
+            </div>
+            <div style="margin-top: 0.5rem; font-size: 0.8rem; color: var(--text-muted);">
+                HTTP Status: <span id="httpStatus" class="status-badge status-offline">Checking...</span>
+            </div>
+        </div>
+
+        <!-- Main Workspace -->
+        <div class="glass-panel">
+            <div class="tabs">
+                <button class="tab-btn active" onclick="switchTab('file-tab')">File Upload</button>
+                <button class="tab-btn" onclick="switchTab('live-tab')">Live Microphone</button>
+                <button class="tab-btn" onclick="switchTab('api-tab')">API Usage</button>
+            </div>
+
+            <!-- Tab 1: File Upload -->
+            <div id="file-tab" class="tab-content active">
+                <form id="fileForm">
+                    <div class="form-group">
+                        <label>Audio File</label>
+                        <input type="file" id="audioFile" accept=".wav,.mp3,.flac,.m4a,.ogg,.webm" required>
+                    </div>
+
+                    <div class="config-grid">
+                        <div class="form-group">
+                            <label>Language</label>
+                            <select id="fileLanguage">
+                                <option value="">Auto-detect</option>
+                                <option value="en">English</option>
+                                <option value="es">Spanish</option>
+                                <option value="fr">French</option>
+                            </select>
+                        </div>
+                        <div class="form-group">
+                            <label>Task</label>
+                            <select id="fileTask">
+                                <option value="transcribe">Transcribe</option>
+                                <option value="translate">Translate to English</option>
+                            </select>
+                        </div>
+                    </div>
+
+                    <button type="submit" class="btn" id="fileSubmitBtn">
+                        <span>Transcribe File</span>
+                        <div class="loading-spinner" id="fileSpinner"></div>
+                    </button>
+                </form>
+
+                <div id="fileResult" style="display: none;">
+                    <div class="transcript-box" id="fileTranscript"></div>
+                </div>
+            </div>
+
+            <!-- Tab 2: Live Recording -->
+            <div id="live-tab" class="tab-content">
+                <div class="config-grid" style="margin-bottom: 1.5rem;">
+                    <div class="form-group">
+                        <label>Language</label>
+                        <select id="liveLanguage">
+                            <option value="en">English</option>
+                            <option value="es">Spanish</option>
+                            <option value="fr">French</option>
+                        </select>
+                    </div>
+                    <div class="form-group">
+                        <label>Task</label>
+                        <select id="liveTask">
+                            <option value="transcribe">Transcribe</option>
+                            <option value="translate">Translate to English</option>
+                        </select>
+                    </div>
+                </div>
+
+                <div style="display: flex; gap: 1rem; align-items: center;">
+                    <button id="recordBtn" class="btn btn-success" style="width: auto;">
+                        <span id="recordIcon">🎤</span> <span id="recordText">Start Recording</span>
+                    </button>
+                    <span id="liveStatus" class="status-badge status-offline" style="display: none;">Not
+                        connected</span>
+                </div>
+
+                <div class="transcript-box" id="liveTranscript">
+                    <div style="color: var(--text-muted); text-align: center; margin-top: 3rem;">
+                        Click Start Recording to begin live transcription...
+                    </div>
+                </div>
+            </div>
+
+            <!-- Tab 3: API Usage -->
+            <div id="api-tab" class="tab-content">
+                <h3 style="margin-bottom: 1rem;">OpenAI Compatible API</h3>
+                <p style="color: var(--text-muted); margin-bottom: 1rem; font-size: 0.9rem;">
+                    WhisperLive acts as a drop-in replacement for OpenAI's Whisper API. You can use any standard OpenAI
+                    client by changing the base URL.
+                </p>
+
+                <h4 style="margin-bottom: 0.5rem; color: #cbd5e1;">Python (openai package)</h4>
+                <pre><code id="pythonSnippet">from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-no-key-required",
+    base_url="https://whisperlive.classroomcopilot.ai/v1/"
+)
+
+with open("audio.wav", "rb") as file:
+    transcription = client.audio.transcriptions.create(
+        file=file,
+        model="base",
+        response_format="verbose_json"
+    )
+    
+print(transcription.text)</code></pre>
+
+                <h4 style="margin-bottom: 0.5rem; color: #cbd5e1;">cURL</h4>
+                <pre><code id="curlSnippet">curl https://whisperlive.classroomcopilot.ai/v1/audio/transcriptions \
+  -H "Content-Type: multipart/form-data" \
+  -F file="@audio.wav" \
+  -F model="base" \
+  -F response_format="verbose_json"</code></pre>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        // DOM Elements
+        const httpUrlInput = document.getElementById('httpUrl');
+        const wsUrlInput = document.getElementById('wsUrl');
+        const httpStatus = document.getElementById('httpStatus');
+
+        // Initialization
+        window.onload = () => {
+            // Check if on same domain to set default URL intelligently, else leave defaults
+            if (window.location.hostname !== '' && window.location.hostname !== 'localhost') {
+                httpUrlInput.value = window.location.origin;
+                wsUrlInput.value = window.location.origin.replace(/^http/, 'ws') + '/ws';
+            }
+            checkHealth();
+            updateSnippets();
+        };
+
+        httpUrlInput.addEventListener('change', () => { checkHealth(); updateSnippets(); });
+
+        // Tab Switching
+        function switchTab(tabId) {
+            document.querySelectorAll('.tab-content').forEach(t => t.classList.remove('active'));
+            document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
+            document.getElementById(tabId).classList.add('active');
+            event.target.classList.add('active');
+        }
+
+        // Health Check
+        async function checkHealth() {
+            try {
+                const res = await fetch(`${httpUrlInput.value}/health`);
+                if (res.ok) {
+                    httpStatus.className = 'status-badge status-online';
+                    httpStatus.textContent = '✅ Online';
+                } else throw new Error();
+            } catch (e) {
+                httpStatus.className = 'status-badge status-offline';
+                httpStatus.textContent = '❌ Offline';
+            }
+        }
+
+        // Update Code Snippets
+        function updateSnippets() {
+            const baseUrl = httpUrlInput.value.endsWith('/') ? httpUrlInput.value.slice(0, -1) : httpUrlInput.value;
+            document.getElementById('pythonSnippet').textContent = `from openai import OpenAI\n\nclient = OpenAI(\n    api_key="sk-no-key-required",\n    base_url="${baseUrl}/v1/"\n)\n\nwith open("audio.wav", "rb") as file:\n    transcription = client.audio.transcriptions.create(\n        file=file,\n        model="base",\n        response_format="verbose_json"\n    )\n    \nprint(transcription.text)`;
+            document.getElementById('curlSnippet').textContent = `curl ${baseUrl}/v1/audio/transcriptions \\\n  -H "Content-Type: multipart/form-data" \\\n  -F file="@audio.wav" \\\n  -F model="base" \\\n  -F response_format="verbose_json"`;
+        }
+
+        // Utility: Format Time
+        function formatTime(seconds) {
+            if (!seconds) return "0:00";
+            const mins = Math.floor(seconds / 60);
+            const secs = (seconds % 60).toFixed(2);
+            return `${mins}:${secs.padStart(5, '0')}`;
+        }
+
+        // ==========================================
+        // FEATURE 1: FILE TRANSCRIPTION
+        // ==========================================
+        document.getElementById('fileForm').addEventListener('submit', async (e) => {
+            e.preventDefault();
+            const file = document.getElementById('audioFile').files[0];
+            if (!file) return;
+
+            const btn = document.getElementById('fileSubmitBtn');
+            const spinner = document.getElementById('fileSpinner');
+            const resultBox = document.getElementById('fileResult');
+            const transcriptBox = document.getElementById('fileTranscript');
+
+            btn.disabled = true;
+            spinner.style.display = 'block';
+            resultBox.style.display = 'none';
+
+            const formData = new FormData();
+            formData.append('file', file);
+            formData.append('model', 'base');
+            formData.append('response_format', 'verbose_json');
+
+            const lang = document.getElementById('fileLanguage').value;
+            if (lang) formData.append('language', lang);
+
+            const task = document.getElementById('fileTask').value;
+            const baseUrl = httpUrlInput.value.endsWith('/') ? httpUrlInput.value.slice(0, -1) : httpUrlInput.value;
+            const endpoint = task === 'translate' ? `${baseUrl}/v1/audio/translations` : `${baseUrl}/v1/audio/transcriptions`;
+
+            try {
+                const response = await fetch(endpoint, { method: 'POST', body: formData });
+                const data = await response.json();
+
+                resultBox.style.display = 'block';
+                if (response.ok) {
+                    let html = '';
+                    if (data.segments && data.segments.length > 0) {
+                        data.segments.forEach(seg => {
+                            html += `<div class="segment"><div class="segment-time">${formatTime(seg.start)} - ${formatTime(seg.end)}</div><div class="segment-text">${seg.text}</div></div>`;
+                        });
+                    } else if (data.text) {
+                        html += `<div class="segment"><div class="segment-text">${data.text}</div></div>`;
+                    }
+                    transcriptBox.innerHTML = html;
+                } else {
+                    transcriptBox.innerHTML = `<div style="color: var(--danger)">Error: ${data.error?.message || JSON.stringify(data.error)}</div>`;
+                }
+            } catch (error) {
+                resultBox.style.display = 'block';
+                transcriptBox.innerHTML = `<div style="color: var(--danger)">Network Error: ${error.message}</div>`;
+            } finally {
+                btn.disabled = false;
+                spinner.style.display = 'none';
+            }
+        });
+
+        // ==========================================
+        // FEATURE 2: LIVE WEBSOCKET TRANSCRIPTION
+        // ==========================================
+        let ws = null;
+        let audioContext = null;
+        let mediaStream = null;
+        let processor = null;
+        let isRecording = false;
+
+        const recordBtn = document.getElementById('recordBtn');
+        const liveStatus = document.getElementById('liveStatus');
+        const liveTranscript = document.getElementById('liveTranscript');
+
+        recordBtn.addEventListener('click', async () => {
+            if (isRecording) {
+                stopRecording();
+            } else {
+                startRecording();
+            }
+        });
+
+        async function startRecording() {
+            liveTranscript.innerHTML = '';
+            liveStatus.style.display = 'inline-flex';
+            liveStatus.className = 'status-badge status-offline';
+            liveStatus.textContent = 'Connecting...';
+
+            try {
+                // 1. Connect WebSocket
+                ws = new WebSocket(wsUrlInput.value);
+
+                ws.onopen = () => {
+                    // Send options to server
+                    const options = {
+                        uid: "web-" + Math.random().toString(36).substring(7),
+                        language: document.getElementById('liveLanguage').value,
+                        task: document.getElementById('liveTask').value,
+                        model: "base",
+                        use_vad: true
+                    };
+                    ws.send(JSON.stringify(options));
+                };
+
+                ws.onmessage = async (event) => {
+                    const data = JSON.parse(event.data);
+
+                    if (data.message === "SERVER_READY") {
+                        liveStatus.className = 'status-badge status-recording';
+                        liveStatus.innerHTML = '🔴 Recording';
+                        await startAudioCapture();
+                    } else if (data.segments) {
+                        renderLiveSegments(data.segments);
+                    } else if (data.status === "WAIT") {
+                        liveStatus.textContent = `Waiting in queue (Est: ${data.message} min)`;
+                    } else if (data.message === "DISCONNECT") {
+                        stopRecording();
+                        liveStatus.className = 'status-badge status-offline';
+                        liveStatus.textContent = 'Disconnected by server';
+                    }
+                };
+
+                ws.onerror = (err) => {
+                    console.error('WebSocket Error', err);
+                    stopRecording();
+                    liveStatus.className = 'status-badge status-offline';
+                    liveStatus.textContent = 'Connection Error';
+                };
+
+                ws.onclose = () => {
+                    stopRecording();
+                };
+
+                // Update UI
+                isRecording = true;
+                recordBtn.className = 'btn btn-danger';
+                document.getElementById('recordIcon').textContent = '⏹';
+                document.getElementById('recordText').textContent = 'Stop Recording';
+
+            } catch (err) {
+                console.error(err);
+                liveStatus.className = 'status-badge status-offline';
+                liveStatus.textContent = 'Microphone Error';
+                stopRecording();
+            }
+        }
+
+        async function startAudioCapture() {
+            mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
+            const source = audioContext.createMediaStreamSource(mediaStream);
+
+            // Create a ScriptProcessorNode with bufferSize of 4096 and a single input/output channel
+            processor = audioContext.createScriptProcessor(4096, 1, 1);
+
+            processor.onaudioprocess = function (e) {
+                if (!isRecording || ws.readyState !== WebSocket.OPEN) return;
+                const float32Array = e.inputBuffer.getChannelData(0);
+                ws.send(float32Array.buffer);
+            };
+
+            source.connect(processor);
+            processor.connect(audioContext.destination);
+        }
+
+        function stopRecording() {
+            isRecording = false;
+
+            if (processor) {
+                processor.disconnect();
+                processor = null;
+            }
+            if (mediaStream) {
+                mediaStream.getTracks().forEach(track => track.stop());
+                mediaStream = null;
+            }
+            if (audioContext) {
+                audioContext.close();
+                audioContext = null;
+            }
+            if (ws) {
+                if (ws.readyState === WebSocket.OPEN) {
+                    ws.send("END_OF_AUDIO");
+                    setTimeout(() => ws.close(), 1000);
+                }
+                ws = null;
+            }
+
+            recordBtn.className = 'btn btn-success';
+            document.getElementById('recordIcon').textContent = '🎤';
+            document.getElementById('recordText').textContent = 'Start Recording';
+
+            if (liveStatus.textContent === '🔴 Recording') {
+                liveStatus.className = 'status-badge status-offline';
+                liveStatus.textContent = 'Stopped';
+            }
+        }
+
+        let liveSegments = [];
+        function renderLiveSegments(segments) {
+            let html = '';
+            segments.forEach(seg => {
+                const timeHtml = (seg.start !== undefined && seg.end !== undefined)
+                    ? `<div class="segment-time">${formatTime(seg.start)} - ${formatTime(seg.end)}</div>`
+                    : '';
+                html += `<div class="segment">${timeHtml}<div class="segment-text">${seg.text}</div></div>`;
+            });
+            liveTranscript.innerHTML = html;
+            liveTranscript.scrollTop = liveTranscript.scrollHeight;
+        }
+
+    </script>
+</body>
+
+</html>
--- a/scratch/test_ws.py
+++ b/scratch/test_ws.py
@ -0,0 +1,9 @@
+import websockets
+from websockets.sync.server import serve
+
+def handler(websocket):
+    print("Path:", websocket.request.path)
+    websocket.send("Hello")
+
+with serve(handler, "127.0.0.1", 8765) as server:
+    server.serve_forever()
--- a/test_form.html
+++ b/test_form.html
@ -0,0 +1,727 @@
+<!DOCTYPE html>
+<html lang="en">
+
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>WhisperLive Dashboard</title>
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
+    <style>
+        :root {
+            --primary: #4f46e5;
+            --primary-hover: #4338ca;
+            --bg-color: #0f172a;
+            --card-bg: rgba(30, 41, 59, 0.7);
+            --text-main: #f8fafc;
+            --text-muted: #94a3b8;
+            --border: rgba(255, 255, 255, 0.1);
+            --success: #10b981;
+            --danger: #ef4444;
+            --warning: #f59e0b;
+        }
+
+        * {
+            box-sizing: border-box;
+            margin: 0;
+            padding: 0;
+        }
+
+        body {
+            font-family: 'Inter', sans-serif;
+            background-color: var(--bg-color);
+            color: var(--text-main);
+            min-height: 100vh;
+            background-image:
+                radial-gradient(at 0% 0%, rgba(79, 70, 229, 0.15) 0px, transparent 50%),
+                radial-gradient(at 100% 100%, rgba(16, 185, 129, 0.1) 0px, transparent 50%);
+            background-attachment: fixed;
+            padding: 2rem;
+        }
+
+        .container {
+            max-width: 1000px;
+            margin: 0 auto;
+        }
+
+        .header {
+            text-align: center;
+            margin-bottom: 2rem;
+        }
+
+        .header h1 {
+            font-size: 2.5rem;
+            font-weight: 700;
+            background: linear-gradient(to right, #818cf8, #34d399);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            margin-bottom: 0.5rem;
+        }
+
+        .header p {
+            color: var(--text-muted);
+        }
+
+        .glass-panel {
+            background: var(--card-bg);
+            backdrop-filter: blur(12px);
+            -webkit-backdrop-filter: blur(12px);
+            border: 1px solid var(--border);
+            border-radius: 1rem;
+            padding: 1.5rem;
+            margin-bottom: 1.5rem;
+            box-shadow: 0 10px 25px -5px rgba(0, 0, 0, 0.3);
+        }
+
+        /* Config Section */
+        .config-grid {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 1rem;
+        }
+
+        @media (max-width: 768px) {
+            .config-grid {
+                grid-template-columns: 1fr;
+            }
+        }
+
+        .form-group {
+            margin-bottom: 1rem;
+        }
+
+        .form-group label {
+            display: block;
+            font-size: 0.875rem;
+            font-weight: 500;
+            margin-bottom: 0.5rem;
+            color: var(--text-muted);
+        }
+
+        input[type="text"],
+        input[type="file"],
+        select {
+            width: 100%;
+            padding: 0.75rem 1rem;
+            background: rgba(15, 23, 42, 0.6);
+            border: 1px solid var(--border);
+            border-radius: 0.5rem;
+            color: var(--text-main);
+            font-size: 0.875rem;
+            transition: all 0.2s;
+        }
+
+        input[type="text"]:focus,
+        select:focus {
+            outline: none;
+            border-color: var(--primary);
+            box-shadow: 0 0 0 2px rgba(79, 70, 229, 0.2);
+        }
+
+        /* Tabs */
+        .tabs {
+            display: flex;
+            gap: 0.5rem;
+            margin-bottom: 1rem;
+            border-bottom: 1px solid var(--border);
+            padding-bottom: 0.5rem;
+        }
+
+        .tab-btn {
+            background: transparent;
+            border: none;
+            color: var(--text-muted);
+            padding: 0.75rem 1.5rem;
+            font-size: 1rem;
+            font-weight: 500;
+            cursor: pointer;
+            border-radius: 0.5rem;
+            transition: all 0.2s;
+        }
+
+        .tab-btn:hover {
+            color: var(--text-main);
+            background: rgba(255, 255, 255, 0.05);
+        }
+
+        .tab-btn.active {
+            color: var(--text-main);
+            background: var(--primary);
+            box-shadow: 0 4px 6px -1px rgba(79, 70, 229, 0.4);
+        }
+
+        .tab-content {
+            display: none;
+            animation: fadeIn 0.3s ease-in-out;
+        }
+
+        .tab-content.active {
+            display: block;
+        }
+
+        @keyframes fadeIn {
+            from {
+                opacity: 0;
+                transform: translateY(5px);
+            }
+
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+
+        /* Buttons */
+        .btn {
+            background: var(--primary);
+            color: white;
+            border: none;
+            padding: 0.75rem 1.5rem;
+            border-radius: 0.5rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.2s;
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            gap: 0.5rem;
+            width: 100%;
+        }
+
+        .btn:hover {
+            background: var(--primary-hover);
+        }
+
+        .btn:disabled {
+            opacity: 0.5;
+            cursor: not-allowed;
+        }
+
+        .btn-danger {
+            background: var(--danger);
+        }
+
+        .btn-danger:hover {
+            background: #dc2626;
+        }
+
+        .btn-success {
+            background: var(--success);
+        }
+
+        .btn-success:hover {
+            background: #059669;
+        }
+
+        /* Results / Live View */
+        .transcript-box {
+            background: rgba(15, 23, 42, 0.6);
+            border: 1px solid var(--border);
+            border-radius: 0.5rem;
+            padding: 1.5rem;
+            min-height: 200px;
+            max-height: 400px;
+            overflow-y: auto;
+            margin-top: 1rem;
+            line-height: 1.6;
+        }
+
+        .segment {
+            margin-bottom: 0.75rem;
+            padding-bottom: 0.75rem;
+            border-bottom: 1px solid rgba(255, 255, 255, 0.05);
+        }
+
+        .segment:last-child {
+            border-bottom: none;
+            margin-bottom: 0;
+            padding-bottom: 0;
+        }
+
+        .segment-time {
+            font-size: 0.75rem;
+            color: var(--primary);
+            font-weight: 600;
+            margin-bottom: 0.25rem;
+        }
+
+        .status-badge {
+            display: inline-flex;
+            align-items: center;
+            gap: 0.3rem;
+            padding: 0.25rem 0.75rem;
+            border-radius: 9999px;
+            font-size: 0.75rem;
+            font-weight: 600;
+        }
+
+        .status-offline {
+            background: rgba(239, 68, 68, 0.2);
+            color: #fca5a5;
+        }
+
+        .status-online {
+            background: rgba(16, 185, 129, 0.2);
+            color: #6ee7b7;
+        }
+
+        .status-recording {
+            background: rgba(239, 68, 68, 0.2);
+            color: #fca5a5;
+            animation: pulse 2s infinite;
+        }
+
+        @keyframes pulse {
+            0% {
+                box-shadow: 0 0 0 0 rgba(239, 68, 68, 0.4);
+            }
+
+            70% {
+                box-shadow: 0 0 0 10px rgba(239, 68, 68, 0);
+            }
+
+            100% {
+                box-shadow: 0 0 0 0 rgba(239, 68, 68, 0);
+            }
+        }
+
+        /* Code snippets */
+        pre {
+            background: #1e293b;
+            padding: 1rem;
+            border-radius: 0.5rem;
+            overflow-x: auto;
+            font-size: 0.875rem;
+            color: #e2e8f0;
+            border: 1px solid var(--border);
+            margin-bottom: 1rem;
+        }
+
+        code {
+            font-family: 'Courier New', Courier, monospace;
+        }
+
+        .loading-spinner {
+            display: none;
+            width: 24px;
+            height: 24px;
+            border: 3px solid rgba(255, 255, 255, 0.3);
+            border-radius: 50%;
+            border-top-color: white;
+            animation: spin 1s ease-in-out infinite;
+        }
+
+        @keyframes spin {
+            to {
+                transform: rotate(360deg);
+            }
+        }
+    </style>
+</head>
+
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>WhisperLive</h1>
+            <p>High-Performance Real-Time Audio Transcription</p>
+        </div>
+
+        <!-- Configuration Panel -->
+        <div class="glass-panel">
+            <h3 style="margin-bottom: 1rem; font-size: 1.1rem;">Connection Settings</h3>
+            <div class="config-grid">
+                <div class="form-group">
+                    <label>HTTP API URL (For File Upload & API)</label>
+                    <input type="text" id="httpUrl" value="https://whisperlive.classroomcopilot.ai">
+                </div>
+                <div class="form-group">
+                    <label>WebSocket URL (For Live Audio)</label>
+                    <input type="text" id="wsUrl" value="wss://whisperlive.classroomcopilot.ai/ws">
+                </div>
+            </div>
+            <div style="margin-top: 0.5rem; font-size: 0.8rem; color: var(--text-muted);">
+                HTTP Status: <span id="httpStatus" class="status-badge status-offline">Checking...</span>
+            </div>
+        </div>
+
+        <!-- Main Workspace -->
+        <div class="glass-panel">
+            <div class="tabs">
+                <button class="tab-btn active" onclick="switchTab('file-tab')">File Upload</button>
+                <button class="tab-btn" onclick="switchTab('live-tab')">Live Microphone</button>
+                <button class="tab-btn" onclick="switchTab('api-tab')">API Usage</button>
+            </div>
+
+            <!-- Tab 1: File Upload -->
+            <div id="file-tab" class="tab-content active">
+                <form id="fileForm">
+                    <div class="form-group">
+                        <label>Audio File</label>
+                        <input type="file" id="audioFile" accept=".wav,.mp3,.flac,.m4a,.ogg,.webm" required>
+                    </div>
+
+                    <div class="config-grid">
+                        <div class="form-group">
+                            <label>Language</label>
+                            <select id="fileLanguage">
+                                <option value="">Auto-detect</option>
+                                <option value="en">English</option>
+                                <option value="es">Spanish</option>
+                                <option value="fr">French</option>
+                            </select>
+                        </div>
+                        <div class="form-group">
+                            <label>Task</label>
+                            <select id="fileTask">
+                                <option value="transcribe">Transcribe</option>
+                                <option value="translate">Translate to English</option>
+                            </select>
+                        </div>
+                    </div>
+
+                    <button type="submit" class="btn" id="fileSubmitBtn">
+                        <span>Transcribe File</span>
+                        <div class="loading-spinner" id="fileSpinner"></div>
+                    </button>
+                </form>
+
+                <div id="fileResult" style="display: none;">
+                    <div class="transcript-box" id="fileTranscript"></div>
+                </div>
+            </div>
+
+            <!-- Tab 2: Live Recording -->
+            <div id="live-tab" class="tab-content">
+                <div class="config-grid" style="margin-bottom: 1.5rem;">
+                    <div class="form-group">
+                        <label>Language</label>
+                        <select id="liveLanguage">
+                            <option value="en">English</option>
+                            <option value="es">Spanish</option>
+                            <option value="fr">French</option>
+                        </select>
+                    </div>
+                    <div class="form-group">
+                        <label>Task</label>
+                        <select id="liveTask">
+                            <option value="transcribe">Transcribe</option>
+                            <option value="translate">Translate to English</option>
+                        </select>
+                    </div>
+                </div>
+
+                <div style="display: flex; gap: 1rem; align-items: center;">
+                    <button id="recordBtn" class="btn btn-success" style="width: auto;">
+                        <span id="recordIcon">🎤</span> <span id="recordText">Start Recording</span>
+                    </button>
+                    <span id="liveStatus" class="status-badge status-offline" style="display: none;">Not
+                        connected</span>
+                </div>
+
+                <div class="transcript-box" id="liveTranscript">
+                    <div style="color: var(--text-muted); text-align: center; margin-top: 3rem;">
+                        Click Start Recording to begin live transcription...
+                    </div>
+                </div>
+            </div>
+
+            <!-- Tab 3: API Usage -->
+            <div id="api-tab" class="tab-content">
+                <h3 style="margin-bottom: 1rem;">OpenAI Compatible API</h3>
+                <p style="color: var(--text-muted); margin-bottom: 1rem; font-size: 0.9rem;">
+                    WhisperLive acts as a drop-in replacement for OpenAI's Whisper API. You can use any standard OpenAI
+                    client by changing the base URL.
+                </p>
+
+                <h4 style="margin-bottom: 0.5rem; color: #cbd5e1;">Python (openai package)</h4>
+                <pre><code id="pythonSnippet">from openai import OpenAI
+
+client = OpenAI(
+    api_key="sk-no-key-required",
+    base_url="https://whisperlive.classroomcopilot.ai/v1/"
+)
+
+with open("audio.wav", "rb") as file:
+    transcription = client.audio.transcriptions.create(
+        file=file,
+        model="base",
+        response_format="verbose_json"
+    )
+    
+print(transcription.text)</code></pre>
+
+                <h4 style="margin-bottom: 0.5rem; color: #cbd5e1;">cURL</h4>
+                <pre><code id="curlSnippet">curl https://whisperlive.classroomcopilot.ai/v1/audio/transcriptions \
+  -H "Content-Type: multipart/form-data" \
+  -F file="@audio.wav" \
+  -F model="base" \
+  -F response_format="verbose_json"</code></pre>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        // DOM Elements
+        const httpUrlInput = document.getElementById('httpUrl');
+        const wsUrlInput = document.getElementById('wsUrl');
+        const httpStatus = document.getElementById('httpStatus');
+
+        // Initialization
+        window.onload = () => {
+            // Check if on same domain to set default URL intelligently, else leave defaults
+            if (window.location.hostname !== '' && window.location.hostname !== 'localhost') {
+                httpUrlInput.value = window.location.origin;
+                wsUrlInput.value = window.location.origin.replace(/^http/, 'ws') + '/ws';
+            }
+            checkHealth();
+            updateSnippets();
+        };
+
+        httpUrlInput.addEventListener('change', () => { checkHealth(); updateSnippets(); });
+
+        // Tab Switching
+        function switchTab(tabId) {
+            document.querySelectorAll('.tab-content').forEach(t => t.classList.remove('active'));
+            document.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));
+            document.getElementById(tabId).classList.add('active');
+            event.target.classList.add('active');
+        }
+
+        // Health Check
+        async function checkHealth() {
+            try {
+                const res = await fetch(`${httpUrlInput.value}/health`);
+                if (res.ok) {
+                    httpStatus.className = 'status-badge status-online';
+                    httpStatus.textContent = '✅ Online';
+                } else throw new Error();
+            } catch (e) {
+                httpStatus.className = 'status-badge status-offline';
+                httpStatus.textContent = '❌ Offline';
+            }
+        }
+
+        // Update Code Snippets
+        function updateSnippets() {
+            const baseUrl = httpUrlInput.value.endsWith('/') ? httpUrlInput.value.slice(0, -1) : httpUrlInput.value;
+            document.getElementById('pythonSnippet').textContent = `from openai import OpenAI\n\nclient = OpenAI(\n    api_key="sk-no-key-required",\n    base_url="${baseUrl}/v1/"\n)\n\nwith open("audio.wav", "rb") as file:\n    transcription = client.audio.transcriptions.create(\n        file=file,\n        model="base",\n        response_format="verbose_json"\n    )\n    \nprint(transcription.text)`;
+            document.getElementById('curlSnippet').textContent = `curl ${baseUrl}/v1/audio/transcriptions \\\n  -H "Content-Type: multipart/form-data" \\\n  -F file="@audio.wav" \\\n  -F model="base" \\\n  -F response_format="verbose_json"`;
+        }
+
+        // Utility: Format Time
+        function formatTime(seconds) {
+            if (!seconds) return "0:00";
+            const mins = Math.floor(seconds / 60);
+            const secs = (seconds % 60).toFixed(2);
+            return `${mins}:${secs.padStart(5, '0')}`;
+        }
+
+        // ==========================================
+        // FEATURE 1: FILE TRANSCRIPTION
+        // ==========================================
+        document.getElementById('fileForm').addEventListener('submit', async (e) => {
+            e.preventDefault();
+            const file = document.getElementById('audioFile').files[0];
+            if (!file) return;
+
+            const btn = document.getElementById('fileSubmitBtn');
+            const spinner = document.getElementById('fileSpinner');
+            const resultBox = document.getElementById('fileResult');
+            const transcriptBox = document.getElementById('fileTranscript');
+
+            btn.disabled = true;
+            spinner.style.display = 'block';
+            resultBox.style.display = 'none';
+
+            const formData = new FormData();
+            formData.append('file', file);
+            formData.append('model', 'base');
+            formData.append('response_format', 'verbose_json');
+
+            const lang = document.getElementById('fileLanguage').value;
+            if (lang) formData.append('language', lang);
+
+            const task = document.getElementById('fileTask').value;
+            const baseUrl = httpUrlInput.value.endsWith('/') ? httpUrlInput.value.slice(0, -1) : httpUrlInput.value;
+            const endpoint = task === 'translate' ? `${baseUrl}/v1/audio/translations` : `${baseUrl}/v1/audio/transcriptions`;
+
+            try {
+                const response = await fetch(endpoint, { method: 'POST', body: formData });
+                const data = await response.json();
+
+                resultBox.style.display = 'block';
+                if (response.ok) {
+                    let html = '';
+                    if (data.segments && data.segments.length > 0) {
+                        data.segments.forEach(seg => {
+                            html += `<div class="segment"><div class="segment-time">${formatTime(seg.start)} - ${formatTime(seg.end)}</div><div class="segment-text">${seg.text}</div></div>`;
+                        });
+                    } else if (data.text) {
+                        html += `<div class="segment"><div class="segment-text">${data.text}</div></div>`;
+                    }
+                    transcriptBox.innerHTML = html;
+                } else {
+                    transcriptBox.innerHTML = `<div style="color: var(--danger)">Error: ${data.error?.message || JSON.stringify(data.error)}</div>`;
+                }
+            } catch (error) {
+                resultBox.style.display = 'block';
+                transcriptBox.innerHTML = `<div style="color: var(--danger)">Network Error: ${error.message}</div>`;
+            } finally {
+                btn.disabled = false;
+                spinner.style.display = 'none';
+            }
+        });
+
+        // ==========================================
+        // FEATURE 2: LIVE WEBSOCKET TRANSCRIPTION
+        // ==========================================
+        let ws = null;
+        let audioContext = null;
+        let mediaStream = null;
+        let processor = null;
+        let isRecording = false;
+
+        const recordBtn = document.getElementById('recordBtn');
+        const liveStatus = document.getElementById('liveStatus');
+        const liveTranscript = document.getElementById('liveTranscript');
+
+        recordBtn.addEventListener('click', async () => {
+            if (isRecording) {
+                stopRecording();
+            } else {
+                startRecording();
+            }
+        });
+
+        async function startRecording() {
+            liveTranscript.innerHTML = '';
+            liveStatus.style.display = 'inline-flex';
+            liveStatus.className = 'status-badge status-offline';
+            liveStatus.textContent = 'Connecting...';
+
+            try {
+                // 1. Connect WebSocket
+                ws = new WebSocket(wsUrlInput.value);
+
+                ws.onopen = () => {
+                    // Send options to server
+                    const options = {
+                        uid: "web-" + Math.random().toString(36).substring(7),
+                        language: document.getElementById('liveLanguage').value,
+                        task: document.getElementById('liveTask').value,
+                        model: "base",
+                        use_vad: true
+                    };
+                    ws.send(JSON.stringify(options));
+                };
+
+                ws.onmessage = async (event) => {
+                    const data = JSON.parse(event.data);
+
+                    if (data.message === "SERVER_READY") {
+                        liveStatus.className = 'status-badge status-recording';
+                        liveStatus.innerHTML = '🔴 Recording';
+                        await startAudioCapture();
+                    } else if (data.segments) {
+                        renderLiveSegments(data.segments);
+                    } else if (data.status === "WAIT") {
+                        liveStatus.textContent = `Waiting in queue (Est: ${data.message} min)`;
+                    } else if (data.message === "DISCONNECT") {
+                        stopRecording();
+                        liveStatus.className = 'status-badge status-offline';
+                        liveStatus.textContent = 'Disconnected by server';
+                    }
+                };
+
+                ws.onerror = (err) => {
+                    console.error('WebSocket Error', err);
+                    stopRecording();
+                    liveStatus.className = 'status-badge status-offline';
+                    liveStatus.textContent = 'Connection Error';
+                };
+
+                ws.onclose = () => {
+                    stopRecording();
+                };
+
+                // Update UI
+                isRecording = true;
+                recordBtn.className = 'btn btn-danger';
+                document.getElementById('recordIcon').textContent = '⏹';
+                document.getElementById('recordText').textContent = 'Stop Recording';
+
+            } catch (err) {
+                console.error(err);
+                liveStatus.className = 'status-badge status-offline';
+                liveStatus.textContent = 'Microphone Error';
+                stopRecording();
+            }
+        }
+
+        async function startAudioCapture() {
+            mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true });
+            audioContext = new (window.AudioContext || window.webkitAudioContext)({ sampleRate: 16000 });
+            const source = audioContext.createMediaStreamSource(mediaStream);
+
+            // Create a ScriptProcessorNode with bufferSize of 4096 and a single input/output channel
+            processor = audioContext.createScriptProcessor(4096, 1, 1);
+
+            processor.onaudioprocess = function (e) {
+                if (!isRecording || ws.readyState !== WebSocket.OPEN) return;
+                const float32Array = e.inputBuffer.getChannelData(0);
+                ws.send(float32Array.buffer);
+            };
+
+            source.connect(processor);
+            processor.connect(audioContext.destination);
+        }
+
+        function stopRecording() {
+            isRecording = false;
+
+            if (processor) {
+                processor.disconnect();
+                processor = null;
+            }
+            if (mediaStream) {
+                mediaStream.getTracks().forEach(track => track.stop());
+                mediaStream = null;
+            }
+            if (audioContext) {
+                audioContext.close();
+                audioContext = null;
+            }
+            if (ws) {
+                if (ws.readyState === WebSocket.OPEN) {
+                    ws.send("END_OF_AUDIO");
+                    setTimeout(() => ws.close(), 1000);
+                }
+                ws = null;
+            }
+
+            recordBtn.className = 'btn btn-success';
+            document.getElementById('recordIcon').textContent = '🎤';
+            document.getElementById('recordText').textContent = 'Start Recording';
+
+            if (liveStatus.textContent === '🔴 Recording') {
+                liveStatus.className = 'status-badge status-offline';
+                liveStatus.textContent = 'Stopped';
+            }
+        }
+
+        let liveSegments = [];
+        function renderLiveSegments(segments) {
+            let html = '';
+            segments.forEach(seg => {
+                const timeHtml = (seg.start !== undefined && seg.end !== undefined)
+                    ? `<div class="segment-time">${formatTime(seg.start)} - ${formatTime(seg.end)}</div>`
+                    : '';
+                html += `<div class="segment">${timeHtml}<div class="segment-text">${seg.text}</div></div>`;
+            });
+            liveTranscript.innerHTML = html;
+            liveTranscript.scrollTop = liveTranscript.scrollHeight;
+        }
+
+    </script>
+</body>
+
+</html>
--- a/test_http_endpoints.py
+++ b/test_http_endpoints.py
@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""
+Test script for WhisperLive HTTP endpoints
+This script demonstrates how to use the new HTTP API for file transcription
+"""
+
+import requests
+import json
+import os
+from pathlib import Path
+
+# Configuration
+HTTP_BASE_URL = "http://localhost:8080"  # Adjust if using different port
+WEBSOCKET_PORT = 5050  # Your existing WebSocket port
+
+def test_health_endpoint():
+    """Test the health check endpoint"""
+    print("Testing health endpoint...")
+    try:
+        response = requests.get(f"{HTTP_BASE_URL}/health")
+        print(f"Status: {response.status_code}")
+        print(f"Response: {response.json()}")
+        return response.status_code == 200
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
+
+def test_file_transcription(audio_file_path, language=None, task="transcribe", model="base"):
+    """Test file transcription endpoint"""
+    print(f"\nTesting file transcription endpoint...")
+    print(f"File: {audio_file_path}")
+    print(f"Language: {language or 'auto-detect'}")
+    print(f"Task: {task}")
+    print(f"Model: {model}")
+    
+    if not os.path.exists(audio_file_path):
+        print(f"Error: File {audio_file_path} not found")
+        return False
+    
+    try:
+        # Prepare the request
+        files = {'file': open(audio_file_path, 'rb')}
+        data = {
+            'language': language,
+            'task': task,
+            'model': model
+        }
+        
+        # Make the request
+        response = requests.post(f"{HTTP_BASE_URL}/transcribe", files=files, data=data)
+        
+        print(f"Status: {response.status_code}")
+        
+        if response.status_code == 200:
+            result = response.json()
+            print("Transcription successful!")
+            print(f"Filename: {result.get('filename')}")
+            print(f"Language: {result['info'].get('language')}")
+            print(f"Duration: {result['info'].get('duration')} seconds")
+            print(f"Number of segments: {len(result['segments'])}")
+            
+            # Print first few segments
+            for i, segment in enumerate(result['segments'][:3]):
+                print(f"Segment {i+1}: [{segment['start']:.2f}s - {segment['end']:.2f}s] {segment['text']}")
+            
+            if len(result['segments']) > 3:
+                print(f"... and {len(result['segments']) - 3} more segments")
+            
+            return True
+        else:
+            print(f"Error: {response.text}")
+            return False
+            
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
+
+def test_url_transcription():
+    """Test URL transcription endpoint (placeholder)"""
+    print(f"\nTesting URL transcription endpoint...")
+    try:
+        data = {
+            'url': 'https://example.com/audio.mp3',
+            'language': 'en',
+            'task': 'transcribe',
+            'model': 'base'
+        }
+        
+        response = requests.post(f"{HTTP_BASE_URL}/transcribe/url", json=data)
+        print(f"Status: {response.status_code}")
+        print(f"Response: {response.json()}")
+        return response.status_code == 200
+        
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
+
+def test_openai_endpoint(audio_file_path):
+    """Test the OpenAI compatible endpoint"""
+    print(f"\nTesting OpenAI compatible endpoint...")
+    print(f"File: {audio_file_path}")
+    
+    if not os.path.exists(audio_file_path):
+        print(f"Error: File {audio_file_path} not found")
+        return False
+        
+    try:
+        files = {'file': open(audio_file_path, 'rb')}
+        data = {
+            'model': 'whisper-1',
+            'response_format': 'json'
+        }
+        
+        response = requests.post(f"{HTTP_BASE_URL}/v1/audio/transcriptions", files=files, data=data)
+        print(f"Status: {response.status_code}")
+        
+        if response.status_code == 200:
+            result = response.json()
+            print("OpenAI endpoint successful!")
+            print(f"Response: {result}")
+            return True
+        else:
+            print(f"Error: {response.text}")
+            return False
+            
+    except Exception as e:
+        print(f"Error: {e}")
+        return False
+
+def main():
+    """Main test function"""
+    print("WhisperLive HTTP Endpoints Test")
+    print("=" * 40)
+    
+    # Test health endpoint
+    if not test_health_endpoint():
+        print("Health check failed. Make sure the server is running.")
+        return
+    
+    # Test file transcription with a sample audio file
+    # You can replace this with any audio file you have
+    sample_audio = "assets/jfk.flac"  # Adjust path as needed
+    
+    if os.path.exists(sample_audio):
+        test_file_transcription(sample_audio, language="en", task="transcribe", model="base")
+        test_openai_endpoint(sample_audio)
+    else:
+        print(f"\nSample audio file not found at {sample_audio}")
+        print("You can test with any audio file by calling:")
+        print("test_file_transcription('path/to/your/audio.wav')")
+    
+    # Test URL transcription endpoint
+    test_url_transcription()
+    
+    print("\n" + "=" * 40)
+    print("Test completed!")
+
+if __name__ == "__main__":
+    main()