import argparse import ssl import os import socket import threading import tempfile from pathlib import Path from flask import Flask, request, jsonify, send_file, Response from flask_sock import Sock from werkzeug.utils import secure_filename import websocket as ws_client import json import logging def format_time_srt(s): hours = int(s // 3600) minutes = int((s % 3600) // 60) seconds = int(s % 60) milliseconds = int((s - int(s)) * 1000) return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}" def format_time_vtt(s): hours = int(s // 3600) minutes = int((s % 3600) // 60) seconds = int(s % 60) milliseconds = int((s - int(s)) * 1000) return f"{hours:02}:{minutes:02}:{seconds:02}.{milliseconds:03}" def generate_srt(segments): output = "" for i, segment in enumerate(segments, start=1): start_time = format_time_srt(float(segment['start'])) end_time = format_time_srt(float(segment['end'])) text = segment['text'].strip() output += f"{i}\n{start_time} --> {end_time}\n{text}\n\n" return output def generate_vtt(segments): output = "WEBVTT\n\n" for segment in segments: start_time = format_time_vtt(float(segment['start'])) end_time = format_time_vtt(float(segment['end'])) text = segment['text'].strip() output += f"{start_time} --> {end_time}\n{text}\n\n" return output # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def check_port_availability(port): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) result = sock.connect_ex(('0.0.0.0', port)) sock.close() return result != 0 class HybridWhisperServer: def __init__(self, websocket_port, http_port, backend="faster_whisper", faster_whisper_custom_model_path=None, whisper_tensorrt_path=None, trt_multilingual=False, single_model=True, ssl_context=None): self.websocket_port = websocket_port self.http_port = http_port self.backend = backend self.faster_whisper_custom_model_path = faster_whisper_custom_model_path self.whisper_tensorrt_path = whisper_tensorrt_path self.trt_multilingual = trt_multilingual self.single_model = single_model self.ssl_context = ssl_context # Initialize Flask app self.app = Flask(__name__) self.app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max file size self.sock = Sock(self.app) self.setup_routes() # Initialize WhisperLive server from whisper_live.server import TranscriptionServer self.whisper_server = TranscriptionServer() # Create a shared transcriber instance for HTTP requests self.shared_transcriber = None if self.backend == "faster_whisper": from whisper_live.transcriber import WhisperModel # Use base model as default for HTTP requests model_size = "base" if self.faster_whisper_custom_model_path: model_size = self.faster_whisper_custom_model_path self.shared_transcriber = WhisperModel(model_size) def setup_routes(self): @self.app.route('/health', methods=['GET']) def health_check(): # Get GPU memory from nvidia-smi (GPU 1) import subprocess try: gpu_mem = float(subprocess.check_output( 'nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i 1', shell=True ).decode().strip()) / 1024.0 except Exception: gpu_mem = 0.0 # Get active WS connections active = len(self.whisper_server.clients) if hasattr(self.whisper_server, 'clients') else 0 return jsonify({ 'status': 'healthy', 'service': 'WhisperLive Hybrid Server', 'model_loaded': self.shared_transcriber is not None, 'gpu_memory_used_gb': round(gpu_mem, 1), 'active_connections': active }) @self.app.route('/', methods=['GET']) def serve_test_form(): """Serve the HTML test form""" html_content = """
High-Performance Real-Time Audio Transcription
WhisperLive acts as a drop-in replacement for OpenAI's Whisper API. You can use any standard OpenAI client by changing the base URL.
from openai import OpenAI
client = OpenAI(
api_key="sk-no-key-required",
base_url="https://whisperlive.classroomcopilot.ai/v1/"
)
with open("audio.wav", "rb") as file:
transcription = client.audio.transcriptions.create(
file=file,
model="base",
response_format="verbose_json"
)
print(transcription.text)
curl https://whisperlive.classroomcopilot.ai/v1/audio/transcriptions \
-H "Content-Type: multipart/form-data" \
-F file="@audio.wav" \
-F model="base" \
-F response_format="verbose_json"