fix: tune WhisperLive buffering and CUDA detection
This commit is contained in:
parent
83b09b7a4a
commit
65e8426cf7
@ -427,7 +427,7 @@ class ServeClientBase(object):
|
||||
self.show_prev_out_thresh = 5 # if pause(no output from whisper) show previous output for 5 seconds
|
||||
self.add_pause_thresh = 3 # add a blank to segment list as a pause(no speech) for 3 seconds
|
||||
self.transcript = []
|
||||
self.send_last_n_segments = 10
|
||||
self.send_last_n_segments = 30
|
||||
|
||||
# text formatting
|
||||
self.pick_previous_segments = 2
|
||||
@ -461,9 +461,9 @@ class ServeClientBase(object):
|
||||
|
||||
"""
|
||||
self.lock.acquire()
|
||||
if self.frames_np is not None and self.frames_np.shape[0] > 45*self.RATE:
|
||||
self.frames_offset += 30.0
|
||||
self.frames_np = self.frames_np[int(30*self.RATE):]
|
||||
if self.frames_np is not None and self.frames_np.shape[0] > 90*self.RATE:
|
||||
self.frames_offset += 60.0
|
||||
self.frames_np = self.frames_np[int(60*self.RATE):]
|
||||
# check timestamp offset(should be >= self.frame_offset)
|
||||
# this basically means that there is no speech as timestamp offset hasnt updated
|
||||
# and is less than frame_offset
|
||||
@ -482,7 +482,7 @@ class ServeClientBase(object):
|
||||
no valid segment for the last 30 seconds from whisper
|
||||
"""
|
||||
with self.lock:
|
||||
if self.frames_np[int((self.timestamp_offset - self.frames_offset)*self.RATE):].shape[0] > 25 * self.RATE:
|
||||
if self.frames_np[int((self.timestamp_offset - self.frames_offset)*self.RATE):].shape[0] > 60 * self.RATE:
|
||||
duration = self.frames_np.shape[0] / self.RATE
|
||||
self.timestamp_offset = self.frames_offset + duration - 5
|
||||
|
||||
@ -807,10 +807,19 @@ class ServeClientFasterWhisper(ServeClientBase):
|
||||
self.same_output_threshold = 10
|
||||
self.end_time_for_same_output = None
|
||||
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
# torch.cuda.is_available() fails when torch was compiled against a newer CUDA
|
||||
# than the driver provides. Use ctranslate2's own CUDA probe instead, since
|
||||
# faster_whisper relies on ctranslate2 — not torch — for inference.
|
||||
try:
|
||||
import ctranslate2 as _ct2
|
||||
_cuda_types = _ct2.get_supported_compute_types("cuda")
|
||||
device = "cuda" if _cuda_types else "cpu"
|
||||
except Exception:
|
||||
device = "cpu"
|
||||
|
||||
if device == "cuda":
|
||||
major, _ = torch.cuda.get_device_capability(device)
|
||||
self.compute_type = "float16" if major >= 7 else "float32"
|
||||
# Use int8 to stay within shared GPU memory budget (GPU 1 is shared with TTS/ComfyUI)
|
||||
self.compute_type = "int8"
|
||||
else:
|
||||
self.compute_type = "int8"
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user