merge: restore WhisperLive production fixes

This commit is contained in:
kcar 2026-05-27 15:51:33 +00:00
commit 558d96ba1d
2 changed files with 18 additions and 9 deletions

View File

@ -859,7 +859,7 @@ print(transcription.text)</code></pre>
language = request.form.get('language', None) language = request.form.get('language', None)
task = request.form.get('task', 'transcribe') # 'transcribe' or 'translate' task = request.form.get('task', 'transcribe') # 'transcribe' or 'translate'
model_size = request.form.get('model', 'base') model_size = request.form.get('model', 'base')
use_vad = request.form.get('use_vad', 'true').lower() == 'true' use_vad = request.args.get('use_vad', request.form.get('use_vad', 'true')).lower() == 'true'
# For now, we'll use the shared transcriber regardless of the requested model size # For now, we'll use the shared transcriber regardless of the requested model size
# In the future, we could create different transcriber instances for different models # In the future, we could create different transcriber instances for different models

View File

@ -427,7 +427,7 @@ class ServeClientBase(object):
self.show_prev_out_thresh = 5 # if pause(no output from whisper) show previous output for 5 seconds self.show_prev_out_thresh = 5 # if pause(no output from whisper) show previous output for 5 seconds
self.add_pause_thresh = 3 # add a blank to segment list as a pause(no speech) for 3 seconds self.add_pause_thresh = 3 # add a blank to segment list as a pause(no speech) for 3 seconds
self.transcript = [] self.transcript = []
self.send_last_n_segments = 10 self.send_last_n_segments = 30
# text formatting # text formatting
self.pick_previous_segments = 2 self.pick_previous_segments = 2
@ -461,9 +461,9 @@ class ServeClientBase(object):
""" """
self.lock.acquire() self.lock.acquire()
if self.frames_np is not None and self.frames_np.shape[0] > 45*self.RATE: if self.frames_np is not None and self.frames_np.shape[0] > 90*self.RATE:
self.frames_offset += 30.0 self.frames_offset += 60.0
self.frames_np = self.frames_np[int(30*self.RATE):] self.frames_np = self.frames_np[int(60*self.RATE):]
# check timestamp offset(should be >= self.frame_offset) # check timestamp offset(should be >= self.frame_offset)
# this basically means that there is no speech as timestamp offset hasnt updated # this basically means that there is no speech as timestamp offset hasnt updated
# and is less than frame_offset # and is less than frame_offset
@ -482,7 +482,7 @@ class ServeClientBase(object):
no valid segment for the last 30 seconds from whisper no valid segment for the last 30 seconds from whisper
""" """
with self.lock: with self.lock:
if self.frames_np[int((self.timestamp_offset - self.frames_offset)*self.RATE):].shape[0] > 25 * self.RATE: if self.frames_np[int((self.timestamp_offset - self.frames_offset)*self.RATE):].shape[0] > 60 * self.RATE:
duration = self.frames_np.shape[0] / self.RATE duration = self.frames_np.shape[0] / self.RATE
self.timestamp_offset = self.frames_offset + duration - 5 self.timestamp_offset = self.frames_offset + duration - 5
@ -807,10 +807,19 @@ class ServeClientFasterWhisper(ServeClientBase):
self.same_output_threshold = 10 self.same_output_threshold = 10
self.end_time_for_same_output = None self.end_time_for_same_output = None
device = "cuda" if torch.cuda.is_available() else "cpu" # torch.cuda.is_available() fails when torch was compiled against a newer CUDA
# than the driver provides. Use ctranslate2's own CUDA probe instead, since
# faster_whisper relies on ctranslate2 — not torch — for inference.
try:
import ctranslate2 as _ct2
_cuda_types = _ct2.get_supported_compute_types("cuda")
device = "cuda" if _cuda_types else "cpu"
except Exception:
device = "cpu"
if device == "cuda": if device == "cuda":
major, _ = torch.cuda.get_device_capability(device) # Use int8 to stay within shared GPU memory budget (GPU 1 is shared with TTS/ComfyUI)
self.compute_type = "float16" if major >= 7 else "float32" self.compute_type = "int8"
else: else:
self.compute_type = "int8" self.compute_type = "int8"