From 335b4f498c0c606d07da87e96283940527aed4b4 Mon Sep 17 00:00:00 2001 From: kcar Date: Wed, 20 May 2026 20:46:52 +0000 Subject: [PATCH] fix: vad control, session metadata, health endpoint --- hybrid_server.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/hybrid_server.py b/hybrid_server.py index a679630..a6085ff 100644 --- a/hybrid_server.py +++ b/hybrid_server.py @@ -91,7 +91,25 @@ class HybridWhisperServer: def setup_routes(self): @self.app.route('/health', methods=['GET']) def health_check(): - return jsonify({'status': 'healthy', 'service': 'WhisperLive Hybrid Server'}) + # Get GPU memory from nvidia-smi (GPU 1) + import subprocess + try: + gpu_mem = float(subprocess.check_output( + 'nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i 1', shell=True + ).decode().strip()) / 1024.0 + except Exception: + gpu_mem = 0.0 + + # Get active WS connections + active = len(self.whisper_server.clients) if hasattr(self.whisper_server, 'clients') else 0 + + return jsonify({ + 'status': 'healthy', + 'service': 'WhisperLive Hybrid Server', + 'model_loaded': self.shared_transcriber is not None, + 'gpu_memory_used_gb': round(gpu_mem, 1), + 'active_connections': active + }) @self.app.route('/', methods=['GET']) def serve_test_form(): @@ -841,6 +859,7 @@ print(transcription.text) language = request.form.get('language', None) task = request.form.get('task', 'transcribe') # 'transcribe' or 'translate' model_size = request.form.get('model', 'base') + use_vad = request.form.get('use_vad', 'true').lower() == 'true' # For now, we'll use the shared transcriber regardless of the requested model size # In the future, we could create different transcriber instances for different models @@ -865,7 +884,8 @@ print(transcription.text) segments, info = self.shared_transcriber.transcribe( temp_path, language=language, - task=task + task=task, + vad_filter=use_vad ) else: # For other backends, use the server's transcriber @@ -1104,6 +1124,14 @@ print(transcription.text) if data == "END_OF_AUDIO": internal.send_binary(b"END_OF_AUDIO") else: + # Pass through session_metadata if present in config message + try: + msg = json.loads(data) + session_metadata = msg.get('session_metadata') + if session_metadata: + logger.info(f"Session metadata received: session_id={session_metadata.get('session_id')}, teacher_id={session_metadata.get('teacher_id')}") + except (json.JSONDecodeError, AttributeError): + pass internal.send(data) except Exception as e: