merge: restore WhisperLive production fixes

2026-05-27 15:51:33 +00:00 · 2026-05-27 15:51:33 +00:00 · 558d96ba1d
commit 558d96ba1d
parent 335b4f498c 65e8426cf7
2 changed files with 18 additions and 9 deletions
--- a/hybrid_server.py
+++ b/hybrid_server.py
@ -859,7 +859,7 @@ print(transcription.text)</code></pre>
                language = request.form.get('language', None)
                task = request.form.get('task', 'transcribe')  # 'transcribe' or 'translate'
                model_size = request.form.get('model', 'base')
-                use_vad = request.form.get('use_vad', 'true').lower() == 'true'
+                use_vad = request.args.get('use_vad', request.form.get('use_vad', 'true')).lower() == 'true'
                # For now, we'll use the shared transcriber regardless of the requested model size
                # In the future, we could create different transcriber instances for different models
--- a/whisper_live/server.py
+++ b/whisper_live/server.py
@ -427,7 +427,7 @@ class ServeClientBase(object):
        self.show_prev_out_thresh = 5   # if pause(no output from whisper) show previous output for 5 seconds
        self.add_pause_thresh = 3       # add a blank to segment list as a pause(no speech) for 3 seconds
        self.transcript = []
-        self.send_last_n_segments = 10
+        self.send_last_n_segments = 30
        # text formatting
        self.pick_previous_segments = 2
@ -461,9 +461,9 @@ class ServeClientBase(object):
        """
        self.lock.acquire()
-        if self.frames_np is not None and self.frames_np.shape[0] > 45*self.RATE:
+        if self.frames_np is not None and self.frames_np.shape[0] > 90*self.RATE:
-            self.frames_offset += 30.0
+            self.frames_offset += 60.0
-            self.frames_np = self.frames_np[int(30*self.RATE):]
+            self.frames_np = self.frames_np[int(60*self.RATE):]
            # check timestamp offset(should be >= self.frame_offset)
            # this basically means that there is no speech as timestamp offset hasnt updated
            # and is less than frame_offset
@ -482,7 +482,7 @@ class ServeClientBase(object):
        no valid segment for the last 30 seconds from whisper
        """
        with self.lock:
-            if self.frames_np[int((self.timestamp_offset - self.frames_offset)*self.RATE):].shape[0] > 25 * self.RATE:
+            if self.frames_np[int((self.timestamp_offset - self.frames_offset)*self.RATE):].shape[0] > 60 * self.RATE:
                duration = self.frames_np.shape[0] / self.RATE
                self.timestamp_offset = self.frames_offset + duration - 5
@ -807,10 +807,19 @@ class ServeClientFasterWhisper(ServeClientBase):
        self.same_output_threshold = 10
        self.end_time_for_same_output = None
-        device = "cuda" if torch.cuda.is_available() else "cpu"
+        # torch.cuda.is_available() fails when torch was compiled against a newer CUDA
        # than the driver provides. Use ctranslate2's own CUDA probe instead, since
        # faster_whisper relies on ctranslate2 — not torch — for inference.
        try:
            import ctranslate2 as _ct2
            _cuda_types = _ct2.get_supported_compute_types("cuda")
            device = "cuda" if _cuda_types else "cpu"
        except Exception:
            device = "cpu"
        if device == "cuda":
-            major, _ = torch.cuda.get_device_capability(device)
+            # Use int8 to stay within shared GPU memory budget (GPU 1 is shared with TTS/ComfyUI)
-            self.compute_type = "float16" if major >= 7 else "float32"
+            self.compute_type = "int8"
        else:
            self.compute_type = "int8"