First commit of files

2025-06-07 13:06:08 +01:00 · 2025-06-07 13:06:08 +01:00 · 05648af633
commit 05648af633
61 changed files with 10357 additions and 0 deletions
--- a/.archive/Dockerfile.macos.dev
+++ b/.archive/Dockerfile.macos.dev
@ -0,0 +1,51 @@
+FROM python:3.10-bookworm
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Create log directories with proper permissions
+RUN mkdir -p /app/logs && \
+    touch /app/logs/whisperlive.log && \
+    touch /app/logs/connections.log && \
+    chmod 666 /app/logs/whisperlive.log && \
+    chmod 666 /app/logs/connections.log
+
+# install lib required for pyaudio
+RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# update pip to support for whl.metadata -> less downloading
+RUN pip install --no-cache-dir -U "pip>=24"
+
+# create a working directory
+WORKDIR /app
+
+# install the requirements for running the whisper-live server
+COPY requirements/server.txt /app/
+RUN pip install -r server.txt && rm server.txt
+
+COPY whisper_live /app/whisper_live
+COPY run_server.py /app
+
+# Port options
+EXPOSE ${PORT_WHISPERLIVE}
+EXPOSE ${PORT_WHISPERLIVE_SSL}
+ARG PORT_WHISPERLIVE
+ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
+ARG PORT_WHISPERLIVE_SSL
+ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
+
+# SSL options
+ARG WHISPERLIVE_SSL
+ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
+
+# Model options
+ARG WHISPL_USE_CUSTOM_MODEL
+ENV WHISPL_USE_CUSTOM_MODEL=${WHISPL_USE_CUSTOM_MODEL}
+ARG FASTERWHISPER_MODEL
+ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
+
+CMD ["sh", "-c", "\
+    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
+        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
+    else \
+        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --no_single_model; \
+    fi"]
--- a/.archive/Dockerfile.macos.prod
+++ b/.archive/Dockerfile.macos.prod
@ -0,0 +1,45 @@
+FROM python:3.10-bookworm
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Create log directories with proper permissions
+RUN mkdir -p /app/logs && \
+    touch /app/logs/whisperlive.log && \
+    touch /app/logs/connections.log && \
+    chmod 666 /app/logs/whisperlive.log && \
+    chmod 666 /app/logs/connections.log
+
+# install lib required for pyaudio
+RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# update pip to support for whl.metadata -> less downloading
+RUN pip install --no-cache-dir -U "pip>=24"
+
+# create a working directory
+WORKDIR /app
+
+# install the requirements for running the whisper-live server
+COPY requirements/server.txt /app/
+RUN pip install -r server.txt && rm server.txt
+
+COPY whisper_live /app/whisper_live
+COPY run_server.py /app
+
+# Copy application files
+EXPOSE ${PORT_WHISPERLIVE}
+EXPOSE ${PORT_WHISPERLIVE_SSL}
+ARG PORT_WHISPERLIVE
+ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
+ARG PORT_WHISPERLIVE_SSL
+ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
+ARG FASTERWHISPER_MODEL
+ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
+ARG WHISPERLIVE_SSL
+ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
+
+CMD ["sh", "-c", "\
+    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
+        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
+    else \
+        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL; \
+    fi"]
--- a/.archive/Dockerfile.win.prod
+++ b/.archive/Dockerfile.win.prod
@ -0,0 +1,49 @@
+FROM python:3.10-bookworm
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Create log directories with proper permissions
+RUN mkdir -p /app/logs && \
+    touch /app/logs/whisperlive.log && \
+    touch /app/logs/connections.log && \
+    chmod 666 /app/logs/whisperlive.log && \
+    chmod 666 /app/logs/connections.log
+
+# install lib required for pyaudio
+RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# update pip to support for whl.metadata -> less downloading
+RUN pip install --no-cache-dir -U "pip>=24"
+
+# create a working directory
+WORKDIR /app
+
+# install the requirements for running the whisper-live server
+COPY requirements/server.txt /app/
+RUN pip install -r server.txt && rm server.txt
+
+# make the paths of the nvidia libs installed as wheels visible. equivalent to:
+# export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`
+ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib"
+
+COPY whisper_live /app/whisper_live
+COPY run_server.py /app
+
+# Copy application files
+EXPOSE ${PORT_WHISPERLIVE}
+EXPOSE ${PORT_WHISPERLIVE_SSL}
+ARG PORT_WHISPERLIVE
+ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
+ARG PORT_WHISPERLIVE_SSL
+ENV PORT_WHISPERLIVE_SSL=${PORT_WHISPERLIVE_SSL}
+ARG FASTERWHISPER_MODEL
+ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
+ARG WHISPERLIVE_SSL
+ENV WHISPERLIVE_SSL=${WHISPERLIVE_SSL}
+
+CMD ["sh", "-c", "\
+    if [ \"$WHISPERLIVE_SSL\" = \"true\" ]; then \
+        python3 -u run_server.py --port $PORT_WHISPERLIVE_SSL --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL --ssl_cert_path /app/ssl; \
+    else \
+        python3 -u run_server.py --port $PORT_WHISPERLIVE --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL; \
+    fi"]
--- a/.archive/docker-compose.yml
+++ b/.archive/docker-compose.yml
--- a/.env
+++ b/.env
@ -0,0 +1,11 @@
+# Whisper live settings
+APP_WS_PROTOCOL=wss
+APP_URL=kevlarai.com
+
+PORT_WHISPERLIVE=5050
+PORT_WHISPERLIVE_SSL=5053
+WHISPERLIVE_SSL=false
+
+WHISPL_USE_CUSTOM_MODEL=false
+FASTERWHISPER_MODEL=faster-whisper-large-v3
+WHISPERLIVE_URL=${APP_WS_PROTOCOL}://whisperlive.${APP_URL}
--- a/42
+++ b/42
@ -0,0 +1,42 @@
+FROM python:3.10-bookworm
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Create log directories with proper permissions
+RUN mkdir -p /app/logs && \
+    touch /app/logs/whisperlive.log && \
+    touch /app/logs/connections.log && \
+    chmod 666 /app/logs/whisperlive.log && \
+    chmod 666 /app/logs/connections.log
+
+# install lib required for pyaudio
+RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# update pip to support for whl.metadata -> less downloading
+RUN pip install --no-cache-dir -U "pip>=24"
+
+# create a working directory
+WORKDIR /app
+
+# install the requirements for running the whisper-live server
+COPY requirements/server.txt /app/
+RUN pip install -r server.txt && rm server.txt
+
+# make the paths of the nvidia libs installed as wheels visible
+ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib"
+
+COPY whisper_live /app/whisper_live
+COPY run_server.py /app
+
+# Copy application files
+EXPOSE ${PORT_WHISPERLIVE}
+ARG PORT_WHISPERLIVE
+ENV PORT_WHISPERLIVE=${PORT_WHISPERLIVE}
+ARG FASTERWHISPER_MODEL
+ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
+
+CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE}", "--backend", "faster_whisper"]
+
+# CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE}", "--backend", "faster_whisper", "--faster_whisper_custom_model_path", "/app/models/${FASTERWHISPER_MODEL}", "--ssl_cert_path", "/app/ssl"]
+
+# CMD ["python3", "-u", "run_server.py", "--port", "${PORT_WHISPERLIVE_SSL}", "--backend", "faster_whisper", "--faster_whisper_custom_model_path", "/app/models/${FASTERWHISPER_MODEL}", "--ssl_cert_path", "/app/ssl"]
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2023 Vineet Suryan, Collabora Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,219 @@
+# WhisperLive
+
+<h2 align="center">
+  <a href="https://www.youtube.com/watch?v=0PHWCApIcCI"><img
+src="https://img.youtube.com/vi/0PHWCApIcCI/0.jpg" style="background-color:rgba(0,0,0,0);" height=300 alt="WhisperLive"></a>
+  <a href="https://www.youtube.com/watch?v=0f5oiG4oPWQ"><img
+  src="https://img.youtube.com/vi/0f5oiG4oPWQ/0.jpg" style="background-color:rgba(0,0,0,0);" height=300 alt="WhisperLive"></a>
+  <br><br>A nearly-live implementation of OpenAI's Whisper.
+<br><br>
+</h2>
+
+This project is a real-time transcription application that uses the OpenAI Whisper model
+to convert speech input into text output. It can be used to transcribe both live audio
+input from microphone and pre-recorded audio files.
+
+- [Installation](#installation)
+- [Getting Started](#getting-started)
+- [Running the Server](#running-the-server)
+- [Running the Client](#running-the-client)
+- [Browser Extensions](#browser-extensions)
+- [Whisper Live Server in Docker](#whisper-live-server-in-docker)
+- [Future Work](#future-work)
+- [Blog Posts](#blog-posts)
+- [Contact](#contact)
+- [Citations](#citations)
+
+## Installation
+- Install PyAudio
+```bash
+ bash scripts/setup.sh
+```
+
+- Install whisper-live from pip
+```bash
+ pip install whisper-live
+```
+
+### Setting up NVIDIA/TensorRT-LLM for TensorRT backend
+- Please follow [TensorRT_whisper readme](https://github.com/collabora/WhisperLive/blob/main/TensorRT_whisper.md) for setup of [NVIDIA/TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) and for building Whisper-TensorRT engine.
+
+## Getting Started
+The server supports 3 backends `faster_whisper`, `tensorrt` and `openvino`. If running `tensorrt` backend follow [TensorRT_whisper readme](https://github.com/collabora/WhisperLive/blob/main/TensorRT_whisper.md)
+
+### Running the Server
+- [Faster Whisper](https://github.com/SYSTRAN/faster-whisper) backend
+```bash
+python3 run_server.py --port 9090 \
+                      --backend faster_whisper
+  
+# running with custom model and cache_dir to save auto-converted ctranslate2 models
+python3 run_server.py --port 9090 \
+                      --backend faster_whisper \
+                      -fw "/path/to/custom/faster/whisper/model"
+                      -c ~/.cache/whisper-live/
+```
+
+- TensorRT backend. Currently, we recommend to only use the docker setup for TensorRT. Follow [TensorRT_whisper readme](https://github.com/collabora/WhisperLive/blob/main/TensorRT_whisper.md) which works as expected. Make sure to build your TensorRT Engines before running the server with TensorRT backend.
+```bash
+# Run English only model
+python3 run_server.py -p 9090 \
+                      -b tensorrt \
+                      -trt /home/TensorRT-LLM/examples/whisper/whisper_small_en
+
+# Run Multilingual model
+python3 run_server.py -p 9090 \
+                      -b tensorrt \
+                      -trt /home/TensorRT-LLM/examples/whisper/whisper_small \
+                      -m
+```
+
+- WhisperLive now supports the [OpenVINO](https://github.com/openvinotoolkit/openvino) backend for efficient inference on Intel CPUs, iGPU and dGPUs. Currently, we tested the models uploaded to [huggingface by OpenVINO](https://huggingface.co/OpenVINO?search_models=whisper).
+  - > **Docker Recommended:** Running WhisperLive with OpenVINO inside Docker automatically enables GPU support (iGPU/dGPU) without requiring additional host setup.
+  - > **Native (non-Docker) Use:** If you prefer running outside Docker, ensure the Intel drivers and OpenVINO runtime are installed and properly configured on your system. Refer to the documentation for [installing OpenVINO](https://docs.openvino.ai/2025/get-started/install-openvino.html?PACKAGE=OPENVINO_BASE&VERSION=v_2025_0_0&OP_SYSTEM=LINUX&DISTRIBUTION=PIP#).
+
+```
+python3 run_server.py -p 9090 -b openvino
+```
+
+
+#### Controlling OpenMP Threads
+To control the number of threads used by OpenMP, you can set the `OMP_NUM_THREADS` environment variable. This is useful for managing CPU resources and ensuring consistent performance. If not specified, `OMP_NUM_THREADS` is set to `1` by default. You can change this by using the `--omp_num_threads` argument:
+```bash
+python3 run_server.py --port 9090 \
+                      --backend faster_whisper \
+                      --omp_num_threads 4
+```
+
+#### Single model mode
+By default, when running the server without specifying a model, the server will instantiate a new whisper model for every client connection. This has the advantage, that the server can use different model sizes, based on the client's requested model size. On the other hand, it also means you have to wait for the model to be loaded upon client connection and you will have increased (V)RAM usage.
+
+When serving a custom TensorRT model using the `-trt` or a custom faster_whisper model using the `-fw` option, the server will instead only instantiate the custom model once and then reuse it for all client connections.
+
+If you don't want this, set `--no_single_model`.
+
+
+### Running the Client
+- Initializing the client with below parameters:
+  - `lang`: Language of the input audio, applicable only if using a multilingual model.
+  - `translate`: If set to `True` then translate from any language to `en`.
+  - `model`: Whisper model size.
+  - `use_vad`: Whether to use `Voice Activity Detection` on the server.
+  - `save_output_recording`: Set to True to save the microphone input as a `.wav` file during live transcription. This option is helpful for recording sessions for later playback or analysis. Defaults to `False`. 
+  - `output_recording_filename`: Specifies the `.wav` file path where the microphone input will be saved if `save_output_recording` is set to `True`.
+  - `max_clients`: Specifies the maximum number of clients the server should allow. Defaults to 4.
+  - `max_connection_time`: Maximum connection time for each client in seconds. Defaults to 600.
+  - `mute_audio_playback`: Whether to mute audio playback when transcribing an audio file. Defaults to False.
+
+```python
+from whisper_live.client import TranscriptionClient
+client = TranscriptionClient(
+  "localhost",
+  9090,
+  lang="en",
+  translate=False,
+  model="small",                                      # also support hf_model => `Systran/faster-whisper-small`
+  use_vad=False,
+  save_output_recording=True,                         # Only used for microphone input, False by Default
+  output_recording_filename="./output_recording.wav", # Only used for microphone input
+  max_clients=4,
+  max_connection_time=600,
+  mute_audio_playback=False,                          # Only used for file input, False by Default
+)
+```
+It connects to the server running on localhost at port 9090. Using a multilingual model, language for the transcription will be automatically detected. You can also use the language option to specify the target language for the transcription, in this case, English ("en"). The translate option should be set to `True` if we want to translate from the source language to English and `False` if we want to transcribe in the source language.
+
+- Transcribe an audio file:
+```python
+client("tests/jfk.wav")
+```
+
+- To transcribe from microphone:
+```python
+client()
+```
+
+- To transcribe from a RTSP stream:
+```python
+client(rtsp_url="rtsp://admin:admin@192.168.0.1/rtsp")
+```
+
+- To transcribe from a HLS stream:
+```python
+client(hls_url="http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_1xtra/bbc_1xtra.isml/bbc_1xtra-audio%3d96000.norewind.m3u8")
+```
+
+## Browser Extensions
+- Run the server with your desired backend as shown [here](https://github.com/collabora/WhisperLive?tab=readme-ov-file#running-the-server).
+- Transcribe audio directly from your browser using our Chrome or Firefox extensions. Refer to [Audio-Transcription-Chrome](https://github.com/collabora/whisper-live/tree/main/Audio-Transcription-Chrome#readme) and https://github.com/collabora/WhisperLive/blob/main/TensorRT_whisper.md
+
+## Whisper Live Server in Docker
+- GPU
+  - Faster-Whisper
+  ```bash
+  docker run -it --gpus all -p 9090:9090 ghcr.io/collabora/whisperlive-gpu:latest
+  ```
+
+  - TensorRT. Refer to [TensorRT_whisper readme](https://github.com/collabora/WhisperLive/blob/main/TensorRT_whisper.md) for setup and more tensorrt backend configurations.
+  ```bash
+  docker build . -f docker/Dockerfile.tensorrt -t whisperlive-tensorrt
+  docker run -p 9090:9090 --runtime=nvidia --entrypoint /bin/bash -it whisperlive-tensorrt
+
+  # Build small.en engine
+  bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small.en        # float16
+  bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small.en int8   # int8 weight only quantization
+  bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small.en int4   # int4 weight only quantization
+
+  # Run server with small.en
+  python3 run_server.py --port 9090 \
+                        --backend tensorrt \
+                        --trt_model_path "/app/TensorRT-LLM-examples/whisper/whisper_small_en_float16"
+                        --trt_model_path "/app/TensorRT-LLM-examples/whisper/whisper_small_en_int8"
+                        --trt_model_path "/app/TensorRT-LLM-examples/whisper/whisper_small_en_int4"
+  ```
+
+  - OpenVINO
+  ```
+  docker run -it --device=/dev/dri -p 9090:9090 ghcr.io/collabora/whisperlive-openvino
+  ```
+
+- CPU
+  - Faster-whisper
+  ```bash
+  docker run -it -p 9090:9090 ghcr.io/collabora/whisperlive-cpu:latest
+  ```
+
+## Future Work
+- [ ] Add translation to other languages on top of transcription.
+
+## Blog Posts
+- [Transforming speech technology with WhisperLive](https://www.collabora.com/news-and-blog/blog/2024/05/28/transforming-speech-technology-with-whisperlive/)
+- [WhisperFusion: Ultra-low latency conversations with an AI chatbot](https://www.collabora.com/news-and-blog/news-and-events/whisperfusion-ultra-low-latency-conversations-with-an-ai-chatbot.html) powered by WhisperLive
+- [Breaking language barriers 2.0: Moving closer towards fully reliable, production-ready Hindi ASR](https://www.collabora.com/news-and-blog/news-and-events/breaking-language-barriers-20-moving-closer-production-ready-hindi-asr.html) which is used in WhisperLive for hindi.
+
+## Contact
+
+We are available to help you with both Open Source and proprietary AI projects. You can reach us via the Collabora website or [vineet.suryan@collabora.com](mailto:vineet.suryan@collabora.com) and [marcus.edel@collabora.com](mailto:marcus.edel@collabora.com).
+
+
+## Citations
+```bibtex
+@article{Whisper
+  title = {Robust Speech Recognition via Large-Scale Weak Supervision},
+  url = {https://arxiv.org/abs/2212.04356},
+  author = {Radford, Alec and Kim, Jong Wook and Xu, Tao and Brockman, Greg and McLeavey, Christine and Sutskever, Ilya},
+  publisher = {arXiv},
+  year = {2022},
+}
+```
+
+```bibtex
+@misc{Silero VAD,
+  author = {Silero Team},
+  title = {Silero VAD: pre-trained enterprise-grade Voice Activity Detector (VAD), Number Detector and Language Classifier},
+  year = {2021},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  howpublished = {\url{https://github.com/snakers4/silero-vad}},
+  email = {hello@silero.ai}
+}
--- a/init.py
+++ b/init.py
--- a/assets/jfk.flac
+++ b/assets/jfk.flac
--- a/check_cudnn.py
+++ b/check_cudnn.py
@ -0,0 +1,16 @@
+import tensorflow as tf
+
+if tf.test.is_built_with_cuda():
+    print("TF is built with CUDA")
+else:
+    print("TF is not built with CUDA")
+
+if tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None):
+    print("CUDA is available in TF")
+else:
+    print("CUDA is not available in TF")
+
+if tf.test.is_built_with_cudnn():
+    print("cuDNN is available")
+else:
+    print("cuDNN is not available")
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@ -0,0 +1,36 @@
+version: '3.8'
+
+services:
+  whisperlive:
+    container_name: whisperlive
+    build:
+      context: .
+      dockerfile: Dockerfile
+      args:
+        PORT_WHISPERLIVE: ${PORT_WHISPERLIVE}
+        FASTERWHISPER_MODEL: ${FASTERWHISPER_MODEL}
+    env_file:
+      - .env
+    environment:
+      LOG_PATH: /app/logs
+      NVIDIA_VISIBLE_DEVICES: all
+      NVIDIA_DRIVER_CAPABILITIES: compute,utility
+    volumes:
+      - ./models:/app/models
+      - ./ssl:/app/ssl
+      - ./logs:/app/logs
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    ports:
+      - ${PORT_WHISPERLIVE}:${PORT_WHISPERLIVE}
+    networks:
+      - audio-network
+
+networks:
+  audio-network:
+    driver: bridge
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@ -0,0 +1,35 @@
+FROM python:3.10-bookworm
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# install lib required for pyaudio
+RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# update pip to support for whl.metadata -> less downloading
+RUN pip install --no-cache-dir -U "pip>=24"
+
+# create a working directory
+RUN mkdir /app
+WORKDIR /app
+
+# install the requirements for running the whisper-live server
+COPY requirements/server.txt /app/
+RUN pip install --no-cache-dir -r server.txt && rm server.txt
+
+# make the paths of the nvidia libs installed as wheels visible. equivalent to:
+# export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`
+ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib"
+
+EXPOSE ${WHISPERLIVE_PORT}
+
+COPY whisper_live /app/whisper_live
+COPY models /app/models
+COPY run_server.py /app
+
+ARG WHISPERLIVE_PORT
+ENV WHISPERLIVE_PORT=${WHISPERLIVE_PORT}
+
+ARG FASTERWHISPER_MODEL
+ENV FASTERWHISPER_MODEL=${FASTERWHISPER_MODEL}
+
+CMD python3 run_server.py --port $WHISPERLIVE_PORT --backend faster_whisper --faster_whisper_custom_model_path /app/models/$FASTERWHISPER_MODEL
--- a/docker/Dockerfile.cpu
+++ b/docker/Dockerfile.cpu
@ -0,0 +1,25 @@
+FROM python:3.10-bookworm
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# install lib required for pyaudio
+RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# update pip to support for whl.metadata -> less downloading
+RUN pip install --no-cache-dir -U "pip>=24"
+
+# create a working directory
+RUN mkdir /app
+WORKDIR /app
+
+# install pytorch, but without the nvidia-libs that are only necessary for gpu
+RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
+
+# install the requirements for running the whisper-live server
+COPY requirements/server.txt /app/
+RUN pip install --no-cache-dir -r server.txt && rm server.txt
+
+COPY whisper_live /app/whisper_live
+COPY run_server.py /app
+
+CMD ["python", "run_server.py"]
--- a/docker/Dockerfile.gpu
+++ b/docker/Dockerfile.gpu
@ -0,0 +1,26 @@
+FROM python:3.10-bookworm
+
+ARG DEBIAN_FRONTEND=noninteractive
+
+# install lib required for pyaudio
+RUN apt update && apt install -y portaudio19-dev && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# update pip to support for whl.metadata -> less downloading
+RUN pip install --no-cache-dir -U "pip>=24"
+
+# create a working directory
+RUN mkdir /app
+WORKDIR /app
+
+# install the requirements for running the whisper-live server
+COPY requirements/server.txt /app/
+RUN pip install --no-cache-dir -r server.txt && rm server.txt
+
+# make the paths of the nvidia libs installed as wheels visible. equivalent to:
+# export LD_LIBRARY_PATH=`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'`
+ENV LD_LIBRARY_PATH="/usr/local/lib/python3.10/site-packages/nvidia/cublas/lib:/usr/local/lib/python3.10/site-packages/nvidia/cudnn/lib"
+
+COPY whisper_live /app/whisper_live
+COPY run_server.py /app
+
+CMD ["python", "run_server.py"]
--- a/docker/Dockerfile.tensorrt
+++ b/docker/Dockerfile.tensorrt
@ -0,0 +1,37 @@
+FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
+ARG DEBIAN_FRONTEND=noninteractive
+
+# Remove any third-party apt sources to avoid issues with expiring keys.
+RUN rm -f /etc/apt/sources.list.d/*.list
+
+# Install some basic utilities.
+RUN apt-get update && apt-get install -y \
+    python3.10 python3-pip openmpi-bin libopenmpi-dev git wget \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip3 install --no-cache-dir -U tensorrt_llm==0.9.0 --extra-index-url https://pypi.nvidia.com
+
+WORKDIR /app
+
+RUN git clone -b v0.9.0 --depth 1 https://github.com/NVIDIA/TensorRT-LLM.git && \
+    mv TensorRT-LLM/examples ./TensorRT-LLM-examples && \
+    rm -rf TensorRT-LLM
+
+COPY assets/ ./assets
+RUN wget -nc -P assets/ https://raw.githubusercontent.com/openai/whisper/main/whisper/assets/mel_filters.npz
+
+COPY scripts/setup.sh ./
+RUN apt update && bash setup.sh && rm setup.sh
+
+COPY requirements/server.txt .
+RUN pip install --no-cache-dir -r server.txt && rm server.txt
+
+COPY whisper_live ./whisper_live
+COPY scripts/build_whisper_tensorrt.sh .
+COPY run_server.py .
+
+# Build the TensorRT engine
+RUN bash build_whisper_tensorrt.sh /app/TensorRT-LLM-examples small.en
+
+# Set the command to run the server
+CMD ["python3", "run_server.py", "--port", "9090", "--backend", "tensorrt", "--trt_model_path", "/app/TensorRT-LLM-examples/whisper/whisper_small_en"]
--- a/docker/docker-compose.override.yml
+++ b/docker/docker-compose.override.yml
@ -0,0 +1,28 @@
+services:
+  whisperlive-server:
+    runtime: nvidia
+    build:
+      context: ./backend/whisperlive/server
+      dockerfile: Dockerfile.tensorrt  # Override to use Dockerfile.tensorrt
+      args:
+        WHISPERLIVE_PORT: ${WHISPERLIVE_PORT}
+    env_file:
+        - ./.env
+    environment:
+      WHISPERLIVE_PORT: ${WHISPERLIVE_PORT}
+      NVIDIA_VISIBLE_DEVICES: all
+      NVIDIA_DRIVER_CAPABILITIES: compute,utility
+    volumes:
+      - data_volume:/data
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+    ports:
+      - ${WHISPERLIVE_PORT}:${WHISPERLIVE_PORT}
+    networks:
+      - app-network
+
--- a/docs/.nojekyll
+++ b/docs/.nojekyll
--- a/docs/doctrees/environment.pickle
+++ b/docs/doctrees/environment.pickle
--- a/docs/doctrees/index.doctree
+++ b/docs/doctrees/index.doctree
--- a/docs/html/.buildinfo
+++ b/docs/html/.buildinfo
@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 7b818b47e6f359b937e5a2517f120d43
+tags: 645f666f9bcd5a90fca523b33c5a78b7
--- a/docs/html/_sources/index.rst.txt
+++ b/docs/html/_sources/index.rst.txt
@ -0,0 +1,26 @@
+.. whisper_live documentation master file, created by
+   sphinx-quickstart on Fri Sep 22 11:39:30 2023.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Whisper Live documentation!
+========================================
+
+.. toctree::
+   :maxdepth: 2
+
+
+.. automodule:: whisper_live.server
+   :members:
+
+.. automodule:: whisper_live.client
+   :members:
+
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
--- a/docs/html/_static/alabaster.css
+++ b/docs/html/_static/alabaster.css
@ -0,0 +1,703 @@
+@import url("basic.css");
+
+/* -- page layout ----------------------------------------------------------- */
+
+body {
+    font-family: Georgia, serif;
+    font-size: 17px;
+    background-color: #fff;
+    color: #000;
+    margin: 0;
+    padding: 0;
+}
+
+
+div.document {
+    width: 940px;
+    margin: 30px auto 0 auto;
+}
+
+div.documentwrapper {
+    float: left;
+    width: 100%;
+}
+
+div.bodywrapper {
+    margin: 0 0 0 220px;
+}
+
+div.sphinxsidebar {
+    width: 220px;
+    font-size: 14px;
+    line-height: 1.5;
+}
+
+hr {
+    border: 1px solid #B1B4B6;
+}
+
+div.body {
+    background-color: #fff;
+    color: #3E4349;
+    padding: 0 30px 0 30px;
+}
+
+div.body > .section {
+    text-align: left;
+}
+
+div.footer {
+    width: 940px;
+    margin: 20px auto 30px auto;
+    font-size: 14px;
+    color: #888;
+    text-align: right;
+}
+
+div.footer a {
+    color: #888;
+}
+
+p.caption {
+    font-family: inherit;
+    font-size: inherit;
+}
+
+
+div.relations {
+    display: none;
+}
+
+
+div.sphinxsidebar a {
+    color: #444;
+    text-decoration: none;
+    border-bottom: 1px dotted #999;
+}
+
+div.sphinxsidebar a:hover {
+    border-bottom: 1px solid #999;
+}
+
+div.sphinxsidebarwrapper {
+    padding: 18px 10px;
+}
+
+div.sphinxsidebarwrapper p.logo {
+    padding: 0;
+    margin: -10px 0 0 0px;
+    text-align: center;
+}
+
+div.sphinxsidebarwrapper h1.logo {
+    margin-top: -10px;
+    text-align: center;
+    margin-bottom: 5px;
+    text-align: left;
+}
+
+div.sphinxsidebarwrapper h1.logo-name {
+    margin-top: 0px;
+}
+
+div.sphinxsidebarwrapper p.blurb {
+    margin-top: 0;
+    font-style: normal;
+}
+
+div.sphinxsidebar h3,
+div.sphinxsidebar h4 {
+    font-family: Georgia, serif;
+    color: #444;
+    font-size: 24px;
+    font-weight: normal;
+    margin: 0 0 5px 0;
+    padding: 0;
+}
+
+div.sphinxsidebar h4 {
+    font-size: 20px;
+}
+
+div.sphinxsidebar h3 a {
+    color: #444;
+}
+
+div.sphinxsidebar p.logo a,
+div.sphinxsidebar h3 a,
+div.sphinxsidebar p.logo a:hover,
+div.sphinxsidebar h3 a:hover {
+    border: none;
+}
+
+div.sphinxsidebar p {
+    color: #555;
+    margin: 10px 0;
+}
+
+div.sphinxsidebar ul {
+    margin: 10px 0;
+    padding: 0;
+    color: #000;
+}
+
+div.sphinxsidebar ul li.toctree-l1 > a {
+    font-size: 120%;
+}
+
+div.sphinxsidebar ul li.toctree-l2 > a {
+    font-size: 110%;
+}
+
+div.sphinxsidebar input {
+    border: 1px solid #CCC;
+    font-family: Georgia, serif;
+    font-size: 1em;
+}
+
+div.sphinxsidebar hr {
+    border: none;
+    height: 1px;
+    color: #AAA;
+    background: #AAA;
+
+    text-align: left;
+    margin-left: 0;
+    width: 50%;
+}
+
+div.sphinxsidebar .badge {
+    border-bottom: none;
+}
+
+div.sphinxsidebar .badge:hover {
+    border-bottom: none;
+}
+
+/* To address an issue with donation coming after search */
+div.sphinxsidebar h3.donation {
+    margin-top: 10px;
+}
+
+/* -- body styles ----------------------------------------------------------- */
+
+a {
+    color: #004B6B;
+    text-decoration: underline;
+}
+
+a:hover {
+    color: #6D4100;
+    text-decoration: underline;
+}
+
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+    font-family: Georgia, serif;
+    font-weight: normal;
+    margin: 30px 0px 10px 0px;
+    padding: 0;
+}
+
+div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; }
+div.body h2 { font-size: 180%; }
+div.body h3 { font-size: 150%; }
+div.body h4 { font-size: 130%; }
+div.body h5 { font-size: 100%; }
+div.body h6 { font-size: 100%; }
+
+a.headerlink {
+    color: #DDD;
+    padding: 0 4px;
+    text-decoration: none;
+}
+
+a.headerlink:hover {
+    color: #444;
+    background: #EAEAEA;
+}
+
+div.body p, div.body dd, div.body li {
+    line-height: 1.4em;
+}
+
+div.admonition {
+    margin: 20px 0px;
+    padding: 10px 30px;
+    background-color: #EEE;
+    border: 1px solid #CCC;
+}
+
+div.admonition tt.xref, div.admonition code.xref, div.admonition a tt {
+    background-color: #FBFBFB;
+    border-bottom: 1px solid #fafafa;
+}
+
+div.admonition p.admonition-title {
+    font-family: Georgia, serif;
+    font-weight: normal;
+    font-size: 24px;
+    margin: 0 0 10px 0;
+    padding: 0;
+    line-height: 1;
+}
+
+div.admonition p.last {
+    margin-bottom: 0;
+}
+
+div.highlight {
+    background-color: #fff;
+}
+
+dt:target, .highlight {
+    background: #FAF3E8;
+}
+
+div.warning {
+    background-color: #FCC;
+    border: 1px solid #FAA;
+}
+
+div.danger {
+    background-color: #FCC;
+    border: 1px solid #FAA;
+    -moz-box-shadow: 2px 2px 4px #D52C2C;
+    -webkit-box-shadow: 2px 2px 4px #D52C2C;
+    box-shadow: 2px 2px 4px #D52C2C;
+}
+
+div.error {
+    background-color: #FCC;
+    border: 1px solid #FAA;
+    -moz-box-shadow: 2px 2px 4px #D52C2C;
+    -webkit-box-shadow: 2px 2px 4px #D52C2C;
+    box-shadow: 2px 2px 4px #D52C2C;
+}
+
+div.caution {
+    background-color: #FCC;
+    border: 1px solid #FAA;
+}
+
+div.attention {
+    background-color: #FCC;
+    border: 1px solid #FAA;
+}
+
+div.important {
+    background-color: #EEE;
+    border: 1px solid #CCC;
+}
+
+div.note {
+    background-color: #EEE;
+    border: 1px solid #CCC;
+}
+
+div.tip {
+    background-color: #EEE;
+    border: 1px solid #CCC;
+}
+
+div.hint {
+    background-color: #EEE;
+    border: 1px solid #CCC;
+}
+
+div.seealso {
+    background-color: #EEE;
+    border: 1px solid #CCC;
+}
+
+div.topic {
+    background-color: #EEE;
+}
+
+p.admonition-title {
+    display: inline;
+}
+
+p.admonition-title:after {
+    content: ":";
+}
+
+pre, tt, code {
+    font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
+    font-size: 0.9em;
+}
+
+.hll {
+    background-color: #FFC;
+    margin: 0 -12px;
+    padding: 0 12px;
+    display: block;
+}
+
+img.screenshot {
+}
+
+tt.descname, tt.descclassname, code.descname, code.descclassname {
+    font-size: 0.95em;
+}
+
+tt.descname, code.descname {
+    padding-right: 0.08em;
+}
+
+img.screenshot {
+    -moz-box-shadow: 2px 2px 4px #EEE;
+    -webkit-box-shadow: 2px 2px 4px #EEE;
+    box-shadow: 2px 2px 4px #EEE;
+}
+
+table.docutils {
+    border: 1px solid #888;
+    -moz-box-shadow: 2px 2px 4px #EEE;
+    -webkit-box-shadow: 2px 2px 4px #EEE;
+    box-shadow: 2px 2px 4px #EEE;
+}
+
+table.docutils td, table.docutils th {
+    border: 1px solid #888;
+    padding: 0.25em 0.7em;
+}
+
+table.field-list, table.footnote {
+    border: none;
+    -moz-box-shadow: none;
+    -webkit-box-shadow: none;
+    box-shadow: none;
+}
+
+table.footnote {
+    margin: 15px 0;
+    width: 100%;
+    border: 1px solid #EEE;
+    background: #FDFDFD;
+    font-size: 0.9em;
+}
+
+table.footnote + table.footnote {
+    margin-top: -15px;
+    border-top: none;
+}
+
+table.field-list th {
+    padding: 0 0.8em 0 0;
+}
+
+table.field-list td {
+    padding: 0;
+}
+
+table.field-list p {
+    margin-bottom: 0.8em;
+}
+
+/* Cloned from
+ * https://github.com/sphinx-doc/sphinx/commit/ef60dbfce09286b20b7385333d63a60321784e68
+ */
+.field-name {
+    -moz-hyphens: manual;
+    -ms-hyphens: manual;
+    -webkit-hyphens: manual;
+    hyphens: manual;
+}
+
+table.footnote td.label {
+    width: .1px;
+    padding: 0.3em 0 0.3em 0.5em;
+}
+
+table.footnote td {
+    padding: 0.3em 0.5em;
+}
+
+dl {
+    margin-left: 0;
+    margin-right: 0;
+    margin-top: 0;
+    padding: 0;
+}
+
+dl dd {
+    margin-left: 30px;
+}
+
+blockquote {
+    margin: 0 0 0 30px;
+    padding: 0;
+}
+
+ul, ol {
+    /* Matches the 30px from the narrow-screen "li > ul" selector below */
+    margin: 10px 0 10px 30px;
+    padding: 0;
+}
+
+pre {
+    background: #EEE;
+    padding: 7px 30px;
+    margin: 15px 0px;
+    line-height: 1.3em;
+}
+
+div.viewcode-block:target {
+    background: #ffd;
+}
+
+dl pre, blockquote pre, li pre {
+    margin-left: 0;
+    padding-left: 30px;
+}
+
+tt, code {
+    background-color: #ecf0f3;
+    color: #222;
+    /* padding: 1px 2px; */
+}
+
+tt.xref, code.xref, a tt {
+    background-color: #FBFBFB;
+    border-bottom: 1px solid #fff;
+}
+
+a.reference {
+    text-decoration: none;
+    border-bottom: 1px dotted #004B6B;
+}
+
+/* Don't put an underline on images */
+a.image-reference, a.image-reference:hover {
+    border-bottom: none;
+}
+
+a.reference:hover {
+    border-bottom: 1px solid #6D4100;
+}
+
+a.footnote-reference {
+    text-decoration: none;
+    font-size: 0.7em;
+    vertical-align: top;
+    border-bottom: 1px dotted #004B6B;
+}
+
+a.footnote-reference:hover {
+    border-bottom: 1px solid #6D4100;
+}
+
+a:hover tt, a:hover code {
+    background: #EEE;
+}
+
+
+@media screen and (max-width: 870px) {
+
+    div.sphinxsidebar {
+    	display: none;
+    }
+
+    div.document {
+       width: 100%;
+
+    }
+
+    div.documentwrapper {
+    	margin-left: 0;
+    	margin-top: 0;
+    	margin-right: 0;
+    	margin-bottom: 0;
+    }
+
+    div.bodywrapper {
+    	margin-top: 0;
+    	margin-right: 0;
+    	margin-bottom: 0;
+    	margin-left: 0;
+    }
+
+    ul {
+    	margin-left: 0;
+    }
+
+	li > ul {
+        /* Matches the 30px from the "ul, ol" selector above */
+		margin-left: 30px;
+	}
+
+    .document {
+    	width: auto;
+    }
+
+    .footer {
+    	width: auto;
+    }
+
+    .bodywrapper {
+    	margin: 0;
+    }
+
+    .footer {
+    	width: auto;
+    }
+
+    .github {
+        display: none;
+    }
+
+
+
+}
+
+
+
+@media screen and (max-width: 875px) {
+
+    body {
+        margin: 0;
+        padding: 20px 30px;
+    }
+
+    div.documentwrapper {
+        float: none;
+        background: #fff;
+    }
+
+    div.sphinxsidebar {
+        display: block;
+        float: none;
+        width: 102.5%;
+        margin: 50px -30px -20px -30px;
+        padding: 10px 20px;
+        background: #333;
+        color: #FFF;
+    }
+
+    div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p,
+    div.sphinxsidebar h3 a {
+        color: #fff;
+    }
+
+    div.sphinxsidebar a {
+        color: #AAA;
+    }
+
+    div.sphinxsidebar p.logo {
+        display: none;
+    }
+
+    div.document {
+        width: 100%;
+        margin: 0;
+    }
+
+    div.footer {
+        display: none;
+    }
+
+    div.bodywrapper {
+        margin: 0;
+    }
+
+    div.body {
+        min-height: 0;
+        padding: 0;
+    }
+
+    .rtd_doc_footer {
+        display: none;
+    }
+
+    .document {
+        width: auto;
+    }
+
+    .footer {
+        width: auto;
+    }
+
+    .footer {
+        width: auto;
+    }
+
+    .github {
+        display: none;
+    }
+}
+
+
+/* misc. */
+
+.revsys-inline {
+    display: none!important;
+}
+
+/* Make nested-list/multi-paragraph items look better in Releases changelog
+ * pages. Without this, docutils' magical list fuckery causes inconsistent
+ * formatting between different release sub-lists.
+ */
+div#changelog > div.section > ul > li > p:only-child {
+    margin-bottom: 0;
+}
+
+/* Hide fugly table cell borders in ..bibliography:: directive output */
+table.docutils.citation, table.docutils.citation td, table.docutils.citation th {
+  border: none;
+  /* Below needed in some edge cases; if not applied, bottom shadows appear */
+  -moz-box-shadow: none;
+  -webkit-box-shadow: none;
+  box-shadow: none;
+}
+
+
+/* relbar */
+
+.related {
+    line-height: 30px;
+    width: 100%;
+    font-size: 0.9rem;
+}
+
+.related.top {
+    border-bottom: 1px solid #EEE;
+    margin-bottom: 20px;
+}
+
+.related.bottom {
+    border-top: 1px solid #EEE;
+}
+
+.related ul {
+    padding: 0;
+    margin: 0;
+    list-style: none;
+}
+
+.related li {
+    display: inline;
+}
+
+nav#rellinks {
+    float: right;
+}
+
+nav#rellinks li+li:before {
+    content: "|";
+}
+
+nav#breadcrumbs li+li:before {
+    content: "\00BB";
+}
+
+/* Hide certain items when printing */
+@media print {
+    div.related {
+        display: none;
+    }
+}
--- a/docs/html/_static/basic.css
+++ b/docs/html/_static/basic.css
@ -0,0 +1,925 @@
+/*
+ * basic.css
+ * ~~~~~~~~~
+ *
+ * Sphinx stylesheet -- basic theme.
+ *
+ * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+/* -- main layout ----------------------------------------------------------- */
+
+div.clearer {
+    clear: both;
+}
+
+div.section::after {
+    display: block;
+    content: '';
+    clear: left;
+}
+
+/* -- relbar ---------------------------------------------------------------- */
+
+div.related {
+    width: 100%;
+    font-size: 90%;
+}
+
+div.related h3 {
+    display: none;
+}
+
+div.related ul {
+    margin: 0;
+    padding: 0 0 0 10px;
+    list-style: none;
+}
+
+div.related li {
+    display: inline;
+}
+
+div.related li.right {
+    float: right;
+    margin-right: 5px;
+}
+
+/* -- sidebar --------------------------------------------------------------- */
+
+div.sphinxsidebarwrapper {
+    padding: 10px 5px 0 10px;
+}
+
+div.sphinxsidebar {
+    float: left;
+    width: 230px;
+    margin-left: -100%;
+    font-size: 90%;
+    word-wrap: break-word;
+    overflow-wrap : break-word;
+}
+
+div.sphinxsidebar ul {
+    list-style: none;
+}
+
+div.sphinxsidebar ul ul,
+div.sphinxsidebar ul.want-points {
+    margin-left: 20px;
+    list-style: square;
+}
+
+div.sphinxsidebar ul ul {
+    margin-top: 0;
+    margin-bottom: 0;
+}
+
+div.sphinxsidebar form {
+    margin-top: 10px;
+}
+
+div.sphinxsidebar input {
+    border: 1px solid #98dbcc;
+    font-family: sans-serif;
+    font-size: 1em;
+}
+
+div.sphinxsidebar #searchbox form.search {
+    overflow: hidden;
+}
+
+div.sphinxsidebar #searchbox input[type="text"] {
+    float: left;
+    width: 80%;
+    padding: 0.25em;
+    box-sizing: border-box;
+}
+
+div.sphinxsidebar #searchbox input[type="submit"] {
+    float: left;
+    width: 20%;
+    border-left: none;
+    padding: 0.25em;
+    box-sizing: border-box;
+}
+
+
+img {
+    border: 0;
+    max-width: 100%;
+}
+
+/* -- search page ----------------------------------------------------------- */
+
+ul.search {
+    margin: 10px 0 0 20px;
+    padding: 0;
+}
+
+ul.search li {
+    padding: 5px 0 5px 20px;
+    background-image: url(file.png);
+    background-repeat: no-repeat;
+    background-position: 0 7px;
+}
+
+ul.search li a {
+    font-weight: bold;
+}
+
+ul.search li p.context {
+    color: #888;
+    margin: 2px 0 0 30px;
+    text-align: left;
+}
+
+ul.keywordmatches li.goodmatch a {
+    font-weight: bold;
+}
+
+/* -- index page ------------------------------------------------------------ */
+
+table.contentstable {
+    width: 90%;
+    margin-left: auto;
+    margin-right: auto;
+}
+
+table.contentstable p.biglink {
+    line-height: 150%;
+}
+
+a.biglink {
+    font-size: 1.3em;
+}
+
+span.linkdescr {
+    font-style: italic;
+    padding-top: 5px;
+    font-size: 90%;
+}
+
+/* -- general index --------------------------------------------------------- */
+
+table.indextable {
+    width: 100%;
+}
+
+table.indextable td {
+    text-align: left;
+    vertical-align: top;
+}
+
+table.indextable ul {
+    margin-top: 0;
+    margin-bottom: 0;
+    list-style-type: none;
+}
+
+table.indextable > tbody > tr > td > ul {
+    padding-left: 0em;
+}
+
+table.indextable tr.pcap {
+    height: 10px;
+}
+
+table.indextable tr.cap {
+    margin-top: 10px;
+    background-color: #f2f2f2;
+}
+
+img.toggler {
+    margin-right: 3px;
+    margin-top: 3px;
+    cursor: pointer;
+}
+
+div.modindex-jumpbox {
+    border-top: 1px solid #ddd;
+    border-bottom: 1px solid #ddd;
+    margin: 1em 0 1em 0;
+    padding: 0.4em;
+}
+
+div.genindex-jumpbox {
+    border-top: 1px solid #ddd;
+    border-bottom: 1px solid #ddd;
+    margin: 1em 0 1em 0;
+    padding: 0.4em;
+}
+
+/* -- domain module index --------------------------------------------------- */
+
+table.modindextable td {
+    padding: 2px;
+    border-collapse: collapse;
+}
+
+/* -- general body styles --------------------------------------------------- */
+
+div.body {
+    min-width: 360px;
+    max-width: 800px;
+}
+
+div.body p, div.body dd, div.body li, div.body blockquote {
+    -moz-hyphens: auto;
+    -ms-hyphens: auto;
+    -webkit-hyphens: auto;
+    hyphens: auto;
+}
+
+a.headerlink {
+    visibility: hidden;
+}
+
+a:visited {
+    color: #551A8B;
+}
+
+h1:hover > a.headerlink,
+h2:hover > a.headerlink,
+h3:hover > a.headerlink,
+h4:hover > a.headerlink,
+h5:hover > a.headerlink,
+h6:hover > a.headerlink,
+dt:hover > a.headerlink,
+caption:hover > a.headerlink,
+p.caption:hover > a.headerlink,
+div.code-block-caption:hover > a.headerlink {
+    visibility: visible;
+}
+
+div.body p.caption {
+    text-align: inherit;
+}
+
+div.body td {
+    text-align: left;
+}
+
+.first {
+    margin-top: 0 !important;
+}
+
+p.rubric {
+    margin-top: 30px;
+    font-weight: bold;
+}
+
+img.align-left, figure.align-left, .figure.align-left, object.align-left {
+    clear: left;
+    float: left;
+    margin-right: 1em;
+}
+
+img.align-right, figure.align-right, .figure.align-right, object.align-right {
+    clear: right;
+    float: right;
+    margin-left: 1em;
+}
+
+img.align-center, figure.align-center, .figure.align-center, object.align-center {
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+img.align-default, figure.align-default, .figure.align-default {
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
+
+.align-left {
+    text-align: left;
+}
+
+.align-center {
+    text-align: center;
+}
+
+.align-default {
+    text-align: center;
+}
+
+.align-right {
+    text-align: right;
+}
+
+/* -- sidebars -------------------------------------------------------------- */
+
+div.sidebar,
+aside.sidebar {
+    margin: 0 0 0.5em 1em;
+    border: 1px solid #ddb;
+    padding: 7px;
+    background-color: #ffe;
+    width: 40%;
+    float: right;
+    clear: right;
+    overflow-x: auto;
+}
+
+p.sidebar-title {
+    font-weight: bold;
+}
+
+nav.contents,
+aside.topic,
+div.admonition, div.topic, blockquote {
+    clear: left;
+}
+
+/* -- topics ---------------------------------------------------------------- */
+
+nav.contents,
+aside.topic,
+div.topic {
+    border: 1px solid #ccc;
+    padding: 7px;
+    margin: 10px 0 10px 0;
+}
+
+p.topic-title {
+    font-size: 1.1em;
+    font-weight: bold;
+    margin-top: 10px;
+}
+
+/* -- admonitions ----------------------------------------------------------- */
+
+div.admonition {
+    margin-top: 10px;
+    margin-bottom: 10px;
+    padding: 7px;
+}
+
+div.admonition dt {
+    font-weight: bold;
+}
+
+p.admonition-title {
+    margin: 0px 10px 5px 0px;
+    font-weight: bold;
+}
+
+div.body p.centered {
+    text-align: center;
+    margin-top: 25px;
+}
+
+/* -- content of sidebars/topics/admonitions -------------------------------- */
+
+div.sidebar > :last-child,
+aside.sidebar > :last-child,
+nav.contents > :last-child,
+aside.topic > :last-child,
+div.topic > :last-child,
+div.admonition > :last-child {
+    margin-bottom: 0;
+}
+
+div.sidebar::after,
+aside.sidebar::after,
+nav.contents::after,
+aside.topic::after,
+div.topic::after,
+div.admonition::after,
+blockquote::after {
+    display: block;
+    content: '';
+    clear: both;
+}
+
+/* -- tables ---------------------------------------------------------------- */
+
+table.docutils {
+    margin-top: 10px;
+    margin-bottom: 10px;
+    border: 0;
+    border-collapse: collapse;
+}
+
+table.align-center {
+    margin-left: auto;
+    margin-right: auto;
+}
+
+table.align-default {
+    margin-left: auto;
+    margin-right: auto;
+}
+
+table caption span.caption-number {
+    font-style: italic;
+}
+
+table caption span.caption-text {
+}
+
+table.docutils td, table.docutils th {
+    padding: 1px 8px 1px 5px;
+    border-top: 0;
+    border-left: 0;
+    border-right: 0;
+    border-bottom: 1px solid #aaa;
+}
+
+th {
+    text-align: left;
+    padding-right: 5px;
+}
+
+table.citation {
+    border-left: solid 1px gray;
+    margin-left: 1px;
+}
+
+table.citation td {
+    border-bottom: none;
+}
+
+th > :first-child,
+td > :first-child {
+    margin-top: 0px;
+}
+
+th > :last-child,
+td > :last-child {
+    margin-bottom: 0px;
+}
+
+/* -- figures --------------------------------------------------------------- */
+
+div.figure, figure {
+    margin: 0.5em;
+    padding: 0.5em;
+}
+
+div.figure p.caption, figcaption {
+    padding: 0.3em;
+}
+
+div.figure p.caption span.caption-number,
+figcaption span.caption-number {
+    font-style: italic;
+}
+
+div.figure p.caption span.caption-text,
+figcaption span.caption-text {
+}
+
+/* -- field list styles ----------------------------------------------------- */
+
+table.field-list td, table.field-list th {
+    border: 0 !important;
+}
+
+.field-list ul {
+    margin: 0;
+    padding-left: 1em;
+}
+
+.field-list p {
+    margin: 0;
+}
+
+.field-name {
+    -moz-hyphens: manual;
+    -ms-hyphens: manual;
+    -webkit-hyphens: manual;
+    hyphens: manual;
+}
+
+/* -- hlist styles ---------------------------------------------------------- */
+
+table.hlist {
+    margin: 1em 0;
+}
+
+table.hlist td {
+    vertical-align: top;
+}
+
+/* -- object description styles --------------------------------------------- */
+
+.sig {
+	font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace;
+}
+
+.sig-name, code.descname {
+    background-color: transparent;
+    font-weight: bold;
+}
+
+.sig-name {
+	font-size: 1.1em;
+}
+
+code.descname {
+    font-size: 1.2em;
+}
+
+.sig-prename, code.descclassname {
+    background-color: transparent;
+}
+
+.optional {
+    font-size: 1.3em;
+}
+
+.sig-paren {
+    font-size: larger;
+}
+
+.sig-param.n {
+	font-style: italic;
+}
+
+/* C++ specific styling */
+
+.sig-inline.c-texpr,
+.sig-inline.cpp-texpr {
+	font-family: unset;
+}
+
+.sig.c   .k, .sig.c   .kt,
+.sig.cpp .k, .sig.cpp .kt {
+	color: #0033B3;
+}
+
+.sig.c   .m,
+.sig.cpp .m {
+	color: #1750EB;
+}
+
+.sig.c   .s, .sig.c   .sc,
+.sig.cpp .s, .sig.cpp .sc {
+	color: #067D17;
+}
+
+
+/* -- other body styles ----------------------------------------------------- */
+
+ol.arabic {
+    list-style: decimal;
+}
+
+ol.loweralpha {
+    list-style: lower-alpha;
+}
+
+ol.upperalpha {
+    list-style: upper-alpha;
+}
+
+ol.lowerroman {
+    list-style: lower-roman;
+}
+
+ol.upperroman {
+    list-style: upper-roman;
+}
+
+:not(li) > ol > li:first-child > :first-child,
+:not(li) > ul > li:first-child > :first-child {
+    margin-top: 0px;
+}
+
+:not(li) > ol > li:last-child > :last-child,
+:not(li) > ul > li:last-child > :last-child {
+    margin-bottom: 0px;
+}
+
+ol.simple ol p,
+ol.simple ul p,
+ul.simple ol p,
+ul.simple ul p {
+    margin-top: 0;
+}
+
+ol.simple > li:not(:first-child) > p,
+ul.simple > li:not(:first-child) > p {
+    margin-top: 0;
+}
+
+ol.simple p,
+ul.simple p {
+    margin-bottom: 0;
+}
+
+aside.footnote > span,
+div.citation > span {
+    float: left;
+}
+aside.footnote > span:last-of-type,
+div.citation > span:last-of-type {
+  padding-right: 0.5em;
+}
+aside.footnote > p {
+  margin-left: 2em;
+}
+div.citation > p {
+  margin-left: 4em;
+}
+aside.footnote > p:last-of-type,
+div.citation > p:last-of-type {
+    margin-bottom: 0em;
+}
+aside.footnote > p:last-of-type:after,
+div.citation > p:last-of-type:after {
+    content: "";
+    clear: both;
+}
+
+dl.field-list {
+    display: grid;
+    grid-template-columns: fit-content(30%) auto;
+}
+
+dl.field-list > dt {
+    font-weight: bold;
+    word-break: break-word;
+    padding-left: 0.5em;
+    padding-right: 5px;
+}
+
+dl.field-list > dd {
+    padding-left: 0.5em;
+    margin-top: 0em;
+    margin-left: 0em;
+    margin-bottom: 0em;
+}
+
+dl {
+    margin-bottom: 15px;
+}
+
+dd > :first-child {
+    margin-top: 0px;
+}
+
+dd ul, dd table {
+    margin-bottom: 10px;
+}
+
+dd {
+    margin-top: 3px;
+    margin-bottom: 10px;
+    margin-left: 30px;
+}
+
+.sig dd {
+    margin-top: 0px;
+    margin-bottom: 0px;
+}
+
+.sig dl {
+    margin-top: 0px;
+    margin-bottom: 0px;
+}
+
+dl > dd:last-child,
+dl > dd:last-child > :last-child {
+    margin-bottom: 0;
+}
+
+dt:target, span.highlighted {
+    background-color: #fbe54e;
+}
+
+rect.highlighted {
+    fill: #fbe54e;
+}
+
+dl.glossary dt {
+    font-weight: bold;
+    font-size: 1.1em;
+}
+
+.versionmodified {
+    font-style: italic;
+}
+
+.system-message {
+    background-color: #fda;
+    padding: 5px;
+    border: 3px solid red;
+}
+
+.footnote:target  {
+    background-color: #ffa;
+}
+
+.line-block {
+    display: block;
+    margin-top: 1em;
+    margin-bottom: 1em;
+}
+
+.line-block .line-block {
+    margin-top: 0;
+    margin-bottom: 0;
+    margin-left: 1.5em;
+}
+
+.guilabel, .menuselection {
+    font-family: sans-serif;
+}
+
+.accelerator {
+    text-decoration: underline;
+}
+
+.classifier {
+    font-style: oblique;
+}
+
+.classifier:before {
+    font-style: normal;
+    margin: 0 0.5em;
+    content: ":";
+    display: inline-block;
+}
+
+abbr, acronym {
+    border-bottom: dotted 1px;
+    cursor: help;
+}
+
+.translated {
+    background-color: rgba(207, 255, 207, 0.2)
+}
+
+.untranslated {
+    background-color: rgba(255, 207, 207, 0.2)
+}
+
+/* -- code displays --------------------------------------------------------- */
+
+pre {
+    overflow: auto;
+    overflow-y: hidden;  /* fixes display issues on Chrome browsers */
+}
+
+pre, div[class*="highlight-"] {
+    clear: both;
+}
+
+span.pre {
+    -moz-hyphens: none;
+    -ms-hyphens: none;
+    -webkit-hyphens: none;
+    hyphens: none;
+    white-space: nowrap;
+}
+
+div[class*="highlight-"] {
+    margin: 1em 0;
+}
+
+td.linenos pre {
+    border: 0;
+    background-color: transparent;
+    color: #aaa;
+}
+
+table.highlighttable {
+    display: block;
+}
+
+table.highlighttable tbody {
+    display: block;
+}
+
+table.highlighttable tr {
+    display: flex;
+}
+
+table.highlighttable td {
+    margin: 0;
+    padding: 0;
+}
+
+table.highlighttable td.linenos {
+    padding-right: 0.5em;
+}
+
+table.highlighttable td.code {
+    flex: 1;
+    overflow: hidden;
+}
+
+.highlight .hll {
+    display: block;
+}
+
+div.highlight pre,
+table.highlighttable pre {
+    margin: 0;
+}
+
+div.code-block-caption + div {
+    margin-top: 0;
+}
+
+div.code-block-caption {
+    margin-top: 1em;
+    padding: 2px 5px;
+    font-size: small;
+}
+
+div.code-block-caption code {
+    background-color: transparent;
+}
+
+table.highlighttable td.linenos,
+span.linenos,
+div.highlight span.gp {  /* gp: Generic.Prompt */
+  user-select: none;
+  -webkit-user-select: text; /* Safari fallback only */
+  -webkit-user-select: none; /* Chrome/Safari */
+  -moz-user-select: none; /* Firefox */
+  -ms-user-select: none; /* IE10+ */
+}
+
+div.code-block-caption span.caption-number {
+    padding: 0.1em 0.3em;
+    font-style: italic;
+}
+
+div.code-block-caption span.caption-text {
+}
+
+div.literal-block-wrapper {
+    margin: 1em 0;
+}
+
+code.xref, a code {
+    background-color: transparent;
+    font-weight: bold;
+}
+
+h1 code, h2 code, h3 code, h4 code, h5 code, h6 code {
+    background-color: transparent;
+}
+
+.viewcode-link {
+    float: right;
+}
+
+.viewcode-back {
+    float: right;
+    font-family: sans-serif;
+}
+
+div.viewcode-block:target {
+    margin: -1px -10px;
+    padding: 0 10px;
+}
+
+/* -- math display ---------------------------------------------------------- */
+
+img.math {
+    vertical-align: middle;
+}
+
+div.body div.math p {
+    text-align: center;
+}
+
+span.eqno {
+    float: right;
+}
+
+span.eqno a.headerlink {
+    position: absolute;
+    z-index: 1;
+}
+
+div.math:hover a.headerlink {
+    visibility: visible;
+}
+
+/* -- printout stylesheet --------------------------------------------------- */
+
+@media print {
+    div.document,
+    div.documentwrapper,
+    div.bodywrapper {
+        margin: 0 !important;
+        width: 100%;
+    }
+
+    div.sphinxsidebar,
+    div.related,
+    div.footer,
+    #top-link {
+        display: none;
+    }
+}
--- a/docs/html/_static/custom.css
+++ b/docs/html/_static/custom.css
@ -0,0 +1 @@
+/* This file intentionally left blank. */
--- a/docs/html/_static/doctools.js
+++ b/docs/html/_static/doctools.js
@ -0,0 +1,156 @@
+/*
+ * doctools.js
+ * ~~~~~~~~~~~
+ *
+ * Base JavaScript utilities for all Sphinx HTML documentation.
+ *
+ * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+"use strict";
+
+const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([
+  "TEXTAREA",
+  "INPUT",
+  "SELECT",
+  "BUTTON",
+]);
+
+const _ready = (callback) => {
+  if (document.readyState !== "loading") {
+    callback();
+  } else {
+    document.addEventListener("DOMContentLoaded", callback);
+  }
+};
+
+/**
+ * Small JavaScript module for the documentation.
+ */
+const Documentation = {
+  init: () => {
+    Documentation.initDomainIndexTable();
+    Documentation.initOnKeyListeners();
+  },
+
+  /**
+   * i18n support
+   */
+  TRANSLATIONS: {},
+  PLURAL_EXPR: (n) => (n === 1 ? 0 : 1),
+  LOCALE: "unknown",
+
+  // gettext and ngettext don't access this so that the functions
+  // can safely bound to a different name (_ = Documentation.gettext)
+  gettext: (string) => {
+    const translated = Documentation.TRANSLATIONS[string];
+    switch (typeof translated) {
+      case "undefined":
+        return string; // no translation
+      case "string":
+        return translated; // translation exists
+      default:
+        return translated[0]; // (singular, plural) translation tuple exists
+    }
+  },
+
+  ngettext: (singular, plural, n) => {
+    const translated = Documentation.TRANSLATIONS[singular];
+    if (typeof translated !== "undefined")
+      return translated[Documentation.PLURAL_EXPR(n)];
+    return n === 1 ? singular : plural;
+  },
+
+  addTranslations: (catalog) => {
+    Object.assign(Documentation.TRANSLATIONS, catalog.messages);
+    Documentation.PLURAL_EXPR = new Function(
+      "n",
+      `return (${catalog.plural_expr})`
+    );
+    Documentation.LOCALE = catalog.locale;
+  },
+
+  /**
+   * helper function to focus on search bar
+   */
+  focusSearchBar: () => {
+    document.querySelectorAll("input[name=q]")[0]?.focus();
+  },
+
+  /**
+   * Initialise the domain index toggle buttons
+   */
+  initDomainIndexTable: () => {
+    const toggler = (el) => {
+      const idNumber = el.id.substr(7);
+      const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`);
+      if (el.src.substr(-9) === "minus.png") {
+        el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`;
+        toggledRows.forEach((el) => (el.style.display = "none"));
+      } else {
+        el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`;
+        toggledRows.forEach((el) => (el.style.display = ""));
+      }
+    };
+
+    const togglerElements = document.querySelectorAll("img.toggler");
+    togglerElements.forEach((el) =>
+      el.addEventListener("click", (event) => toggler(event.currentTarget))
+    );
+    togglerElements.forEach((el) => (el.style.display = ""));
+    if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler);
+  },
+
+  initOnKeyListeners: () => {
+    // only install a listener if it is really needed
+    if (
+      !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS &&
+      !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS
+    )
+      return;
+
+    document.addEventListener("keydown", (event) => {
+      // bail for input elements
+      if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
+      // bail with special keys
+      if (event.altKey || event.ctrlKey || event.metaKey) return;
+
+      if (!event.shiftKey) {
+        switch (event.key) {
+          case "ArrowLeft":
+            if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
+
+            const prevLink = document.querySelector('link[rel="prev"]');
+            if (prevLink && prevLink.href) {
+              window.location.href = prevLink.href;
+              event.preventDefault();
+            }
+            break;
+          case "ArrowRight":
+            if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break;
+
+            const nextLink = document.querySelector('link[rel="next"]');
+            if (nextLink && nextLink.href) {
+              window.location.href = nextLink.href;
+              event.preventDefault();
+            }
+            break;
+        }
+      }
+
+      // some keyboard layouts may need Shift to get /
+      switch (event.key) {
+        case "/":
+          if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break;
+          Documentation.focusSearchBar();
+          event.preventDefault();
+      }
+    });
+  },
+};
+
+// quick alias for translations
+const _ = Documentation.gettext;
+
+_ready(Documentation.init);
--- a/docs/html/_static/documentation_options.js
+++ b/docs/html/_static/documentation_options.js
@ -0,0 +1,13 @@
+const DOCUMENTATION_OPTIONS = {
+    VERSION: '',
+    LANGUAGE: 'en',
+    COLLAPSE_INDEX: false,
+    BUILDER: 'html',
+    FILE_SUFFIX: '.html',
+    LINK_SUFFIX: '.html',
+    HAS_SOURCE: true,
+    SOURCELINK_SUFFIX: '.txt',
+    NAVIGATION_WITH_KEYS: false,
+    SHOW_SEARCH_SUMMARY: true,
+    ENABLE_SEARCH_SHORTCUTS: true,
+};
--- a/docs/html/_static/file.png
+++ b/docs/html/_static/file.png
--- a/docs/html/_static/language_data.js
+++ b/docs/html/_static/language_data.js
@ -0,0 +1,199 @@
+/*
+ * language_data.js
+ * ~~~~~~~~~~~~~~~~
+ *
+ * This script contains the language-specific data used by searchtools.js,
+ * namely the list of stopwords, stemmer, scorer and splitter.
+ *
+ * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+
+var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
+
+
+/* Non-minified version is copied as a separate JS file, is available */
+
+/**
+ * Porter Stemmer
+ */
+var Stemmer = function() {
+
+  var step2list = {
+    ational: 'ate',
+    tional: 'tion',
+    enci: 'ence',
+    anci: 'ance',
+    izer: 'ize',
+    bli: 'ble',
+    alli: 'al',
+    entli: 'ent',
+    eli: 'e',
+    ousli: 'ous',
+    ization: 'ize',
+    ation: 'ate',
+    ator: 'ate',
+    alism: 'al',
+    iveness: 'ive',
+    fulness: 'ful',
+    ousness: 'ous',
+    aliti: 'al',
+    iviti: 'ive',
+    biliti: 'ble',
+    logi: 'log'
+  };
+
+  var step3list = {
+    icate: 'ic',
+    ative: '',
+    alize: 'al',
+    iciti: 'ic',
+    ical: 'ic',
+    ful: '',
+    ness: ''
+  };
+
+  var c = "[^aeiou]";          // consonant
+  var v = "[aeiouy]";          // vowel
+  var C = c + "[^aeiouy]*";    // consonant sequence
+  var V = v + "[aeiou]*";      // vowel sequence
+
+  var mgr0 = "^(" + C + ")?" + V + C;                      // [C]VC... is m>0
+  var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$";    // [C]VC[V] is m=1
+  var mgr1 = "^(" + C + ")?" + V + C + V + C;              // [C]VCVC... is m>1
+  var s_v   = "^(" + C + ")?" + v;                         // vowel in stem
+
+  this.stemWord = function (w) {
+    var stem;
+    var suffix;
+    var firstch;
+    var origword = w;
+
+    if (w.length < 3)
+      return w;
+
+    var re;
+    var re2;
+    var re3;
+    var re4;
+
+    firstch = w.substr(0,1);
+    if (firstch == "y")
+      w = firstch.toUpperCase() + w.substr(1);
+
+    // Step 1a
+    re = /^(.+?)(ss|i)es$/;
+    re2 = /^(.+?)([^s])s$/;
+
+    if (re.test(w))
+      w = w.replace(re,"$1$2");
+    else if (re2.test(w))
+      w = w.replace(re2,"$1$2");
+
+    // Step 1b
+    re = /^(.+?)eed$/;
+    re2 = /^(.+?)(ed|ing)$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      re = new RegExp(mgr0);
+      if (re.test(fp[1])) {
+        re = /.$/;
+        w = w.replace(re,"");
+      }
+    }
+    else if (re2.test(w)) {
+      var fp = re2.exec(w);
+      stem = fp[1];
+      re2 = new RegExp(s_v);
+      if (re2.test(stem)) {
+        w = stem;
+        re2 = /(at|bl|iz)$/;
+        re3 = new RegExp("([^aeiouylsz])\\1$");
+        re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
+        if (re2.test(w))
+          w = w + "e";
+        else if (re3.test(w)) {
+          re = /.$/;
+          w = w.replace(re,"");
+        }
+        else if (re4.test(w))
+          w = w + "e";
+      }
+    }
+
+    // Step 1c
+    re = /^(.+?)y$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      re = new RegExp(s_v);
+      if (re.test(stem))
+        w = stem + "i";
+    }
+
+    // Step 2
+    re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      suffix = fp[2];
+      re = new RegExp(mgr0);
+      if (re.test(stem))
+        w = stem + step2list[suffix];
+    }
+
+    // Step 3
+    re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      suffix = fp[2];
+      re = new RegExp(mgr0);
+      if (re.test(stem))
+        w = stem + step3list[suffix];
+    }
+
+    // Step 4
+    re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
+    re2 = /^(.+?)(s|t)(ion)$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      re = new RegExp(mgr1);
+      if (re.test(stem))
+        w = stem;
+    }
+    else if (re2.test(w)) {
+      var fp = re2.exec(w);
+      stem = fp[1] + fp[2];
+      re2 = new RegExp(mgr1);
+      if (re2.test(stem))
+        w = stem;
+    }
+
+    // Step 5
+    re = /^(.+?)e$/;
+    if (re.test(w)) {
+      var fp = re.exec(w);
+      stem = fp[1];
+      re = new RegExp(mgr1);
+      re2 = new RegExp(meq1);
+      re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
+      if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
+        w = stem;
+    }
+    re = /ll$/;
+    re2 = new RegExp(mgr1);
+    if (re.test(w) && re2.test(w)) {
+      re = /.$/;
+      w = w.replace(re,"");
+    }
+
+    // and turn initial Y back to y
+    if (firstch == "y")
+      w = firstch.toLowerCase() + w.substr(1);
+    return w;
+  }
+}
+
--- a/docs/html/_static/minus.png
+++ b/docs/html/_static/minus.png
--- a/docs/html/_static/plus.png
+++ b/docs/html/_static/plus.png
--- a/docs/html/_static/pygments.css
+++ b/docs/html/_static/pygments.css
@ -0,0 +1,84 @@
+pre { line-height: 125%; }
+td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
+span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
+td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
+span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
+.highlight .hll { background-color: #ffffcc }
+.highlight { background: #f8f8f8; }
+.highlight .c { color: #8f5902; font-style: italic } /* Comment */
+.highlight .err { color: #a40000; border: 1px solid #ef2929 } /* Error */
+.highlight .g { color: #000000 } /* Generic */
+.highlight .k { color: #004461; font-weight: bold } /* Keyword */
+.highlight .l { color: #000000 } /* Literal */
+.highlight .n { color: #000000 } /* Name */
+.highlight .o { color: #582800 } /* Operator */
+.highlight .x { color: #000000 } /* Other */
+.highlight .p { color: #000000; font-weight: bold } /* Punctuation */
+.highlight .ch { color: #8f5902; font-style: italic } /* Comment.Hashbang */
+.highlight .cm { color: #8f5902; font-style: italic } /* Comment.Multiline */
+.highlight .cp { color: #8f5902 } /* Comment.Preproc */
+.highlight .cpf { color: #8f5902; font-style: italic } /* Comment.PreprocFile */
+.highlight .c1 { color: #8f5902; font-style: italic } /* Comment.Single */
+.highlight .cs { color: #8f5902; font-style: italic } /* Comment.Special */
+.highlight .gd { color: #a40000 } /* Generic.Deleted */
+.highlight .ge { color: #000000; font-style: italic } /* Generic.Emph */
+.highlight .ges { color: #000000 } /* Generic.EmphStrong */
+.highlight .gr { color: #ef2929 } /* Generic.Error */
+.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.highlight .gi { color: #00A000 } /* Generic.Inserted */
+.highlight .go { color: #888888 } /* Generic.Output */
+.highlight .gp { color: #745334 } /* Generic.Prompt */
+.highlight .gs { color: #000000; font-weight: bold } /* Generic.Strong */
+.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.highlight .gt { color: #a40000; font-weight: bold } /* Generic.Traceback */
+.highlight .kc { color: #004461; font-weight: bold } /* Keyword.Constant */
+.highlight .kd { color: #004461; font-weight: bold } /* Keyword.Declaration */
+.highlight .kn { color: #004461; font-weight: bold } /* Keyword.Namespace */
+.highlight .kp { color: #004461; font-weight: bold } /* Keyword.Pseudo */
+.highlight .kr { color: #004461; font-weight: bold } /* Keyword.Reserved */
+.highlight .kt { color: #004461; font-weight: bold } /* Keyword.Type */
+.highlight .ld { color: #000000 } /* Literal.Date */
+.highlight .m { color: #990000 } /* Literal.Number */
+.highlight .s { color: #4e9a06 } /* Literal.String */
+.highlight .na { color: #c4a000 } /* Name.Attribute */
+.highlight .nb { color: #004461 } /* Name.Builtin */
+.highlight .nc { color: #000000 } /* Name.Class */
+.highlight .no { color: #000000 } /* Name.Constant */
+.highlight .nd { color: #888888 } /* Name.Decorator */
+.highlight .ni { color: #ce5c00 } /* Name.Entity */
+.highlight .ne { color: #cc0000; font-weight: bold } /* Name.Exception */
+.highlight .nf { color: #000000 } /* Name.Function */
+.highlight .nl { color: #f57900 } /* Name.Label */
+.highlight .nn { color: #000000 } /* Name.Namespace */
+.highlight .nx { color: #000000 } /* Name.Other */
+.highlight .py { color: #000000 } /* Name.Property */
+.highlight .nt { color: #004461; font-weight: bold } /* Name.Tag */
+.highlight .nv { color: #000000 } /* Name.Variable */
+.highlight .ow { color: #004461; font-weight: bold } /* Operator.Word */
+.highlight .pm { color: #000000; font-weight: bold } /* Punctuation.Marker */
+.highlight .w { color: #f8f8f8; text-decoration: underline } /* Text.Whitespace */
+.highlight .mb { color: #990000 } /* Literal.Number.Bin */
+.highlight .mf { color: #990000 } /* Literal.Number.Float */
+.highlight .mh { color: #990000 } /* Literal.Number.Hex */
+.highlight .mi { color: #990000 } /* Literal.Number.Integer */
+.highlight .mo { color: #990000 } /* Literal.Number.Oct */
+.highlight .sa { color: #4e9a06 } /* Literal.String.Affix */
+.highlight .sb { color: #4e9a06 } /* Literal.String.Backtick */
+.highlight .sc { color: #4e9a06 } /* Literal.String.Char */
+.highlight .dl { color: #4e9a06 } /* Literal.String.Delimiter */
+.highlight .sd { color: #8f5902; font-style: italic } /* Literal.String.Doc */
+.highlight .s2 { color: #4e9a06 } /* Literal.String.Double */
+.highlight .se { color: #4e9a06 } /* Literal.String.Escape */
+.highlight .sh { color: #4e9a06 } /* Literal.String.Heredoc */
+.highlight .si { color: #4e9a06 } /* Literal.String.Interpol */
+.highlight .sx { color: #4e9a06 } /* Literal.String.Other */
+.highlight .sr { color: #4e9a06 } /* Literal.String.Regex */
+.highlight .s1 { color: #4e9a06 } /* Literal.String.Single */
+.highlight .ss { color: #4e9a06 } /* Literal.String.Symbol */
+.highlight .bp { color: #3465a4 } /* Name.Builtin.Pseudo */
+.highlight .fm { color: #000000 } /* Name.Function.Magic */
+.highlight .vc { color: #000000 } /* Name.Variable.Class */
+.highlight .vg { color: #000000 } /* Name.Variable.Global */
+.highlight .vi { color: #000000 } /* Name.Variable.Instance */
+.highlight .vm { color: #000000 } /* Name.Variable.Magic */
+.highlight .il { color: #990000 } /* Literal.Number.Integer.Long */
--- a/docs/html/_static/searchtools.js
+++ b/docs/html/_static/searchtools.js
@ -0,0 +1,574 @@
+/*
+ * searchtools.js
+ * ~~~~~~~~~~~~~~~~
+ *
+ * Sphinx JavaScript utilities for the full-text search.
+ *
+ * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS.
+ * :license: BSD, see LICENSE for details.
+ *
+ */
+"use strict";
+
+/**
+ * Simple result scoring code.
+ */
+if (typeof Scorer === "undefined") {
+  var Scorer = {
+    // Implement the following function to further tweak the score for each result
+    // The function takes a result array [docname, title, anchor, descr, score, filename]
+    // and returns the new score.
+    /*
+    score: result => {
+      const [docname, title, anchor, descr, score, filename] = result
+      return score
+    },
+    */
+
+    // query matches the full name of an object
+    objNameMatch: 11,
+    // or matches in the last dotted part of the object name
+    objPartialMatch: 6,
+    // Additive scores depending on the priority of the object
+    objPrio: {
+      0: 15, // used to be importantResults
+      1: 5, // used to be objectResults
+      2: -5, // used to be unimportantResults
+    },
+    //  Used when the priority is not in the mapping.
+    objPrioDefault: 0,
+
+    // query found in title
+    title: 15,
+    partialTitle: 7,
+    // query found in terms
+    term: 5,
+    partialTerm: 2,
+  };
+}
+
+const _removeChildren = (element) => {
+  while (element && element.lastChild) element.removeChild(element.lastChild);
+};
+
+/**
+ * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping
+ */
+const _escapeRegExp = (string) =>
+  string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
+
+const _displayItem = (item, searchTerms, highlightTerms) => {
+  const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
+  const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
+  const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX;
+  const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY;
+  const contentRoot = document.documentElement.dataset.content_root;
+
+  const [docName, title, anchor, descr, score, _filename] = item;
+
+  let listItem = document.createElement("li");
+  let requestUrl;
+  let linkUrl;
+  if (docBuilder === "dirhtml") {
+    // dirhtml builder
+    let dirname = docName + "/";
+    if (dirname.match(/\/index\/$/))
+      dirname = dirname.substring(0, dirname.length - 6);
+    else if (dirname === "index/") dirname = "";
+    requestUrl = contentRoot + dirname;
+    linkUrl = requestUrl;
+  } else {
+    // normal html builders
+    requestUrl = contentRoot + docName + docFileSuffix;
+    linkUrl = docName + docLinkSuffix;
+  }
+  let linkEl = listItem.appendChild(document.createElement("a"));
+  linkEl.href = linkUrl + anchor;
+  linkEl.dataset.score = score;
+  linkEl.innerHTML = title;
+  if (descr) {
+    listItem.appendChild(document.createElement("span")).innerHTML =
+      " (" + descr + ")";
+    // highlight search terms in the description
+    if (SPHINX_HIGHLIGHT_ENABLED)  // set in sphinx_highlight.js
+      highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));
+  }
+  else if (showSearchSummary)
+    fetch(requestUrl)
+      .then((responseData) => responseData.text())
+      .then((data) => {
+        if (data)
+          listItem.appendChild(
+            Search.makeSearchSummary(data, searchTerms)
+          );
+        // highlight search terms in the summary
+        if (SPHINX_HIGHLIGHT_ENABLED)  // set in sphinx_highlight.js
+          highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted"));
+      });
+  Search.output.appendChild(listItem);
+};
+const _finishSearch = (resultCount) => {
+  Search.stopPulse();
+  Search.title.innerText = _("Search Results");
+  if (!resultCount)
+    Search.status.innerText = Documentation.gettext(
+      "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories."
+    );
+  else
+    Search.status.innerText = _(
+      `Search finished, found ${resultCount} page(s) matching the search query.`
+    );
+};
+const _displayNextItem = (
+  results,
+  resultCount,
+  searchTerms,
+  highlightTerms,
+) => {
+  // results left, load the summary and display it
+  // this is intended to be dynamic (don't sub resultsCount)
+  if (results.length) {
+    _displayItem(results.pop(), searchTerms, highlightTerms);
+    setTimeout(
+      () => _displayNextItem(results, resultCount, searchTerms, highlightTerms),
+      5
+    );
+  }
+  // search finished, update title and status message
+  else _finishSearch(resultCount);
+};
+
+/**
+ * Default splitQuery function. Can be overridden in ``sphinx.search`` with a
+ * custom function per language.
+ *
+ * The regular expression works by splitting the string on consecutive characters
+ * that are not Unicode letters, numbers, underscores, or emoji characters.
+ * This is the same as ``\W+`` in Python, preserving the surrogate pair area.
+ */
+if (typeof splitQuery === "undefined") {
+  var splitQuery = (query) => query
+      .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
+      .filter(term => term)  // remove remaining empty strings
+}
+
+/**
+ * Search Module
+ */
+const Search = {
+  _index: null,
+  _queued_query: null,
+  _pulse_status: -1,
+
+  htmlToText: (htmlString) => {
+    const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html');
+    htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() });
+    const docContent = htmlElement.querySelector('[role="main"]');
+    if (docContent !== undefined) return docContent.textContent;
+    console.warn(
+      "Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template."
+    );
+    return "";
+  },
+
+  init: () => {
+    const query = new URLSearchParams(window.location.search).get("q");
+    document
+      .querySelectorAll('input[name="q"]')
+      .forEach((el) => (el.value = query));
+    if (query) Search.performSearch(query);
+  },
+
+  loadIndex: (url) =>
+    (document.body.appendChild(document.createElement("script")).src = url),
+
+  setIndex: (index) => {
+    Search._index = index;
+    if (Search._queued_query !== null) {
+      const query = Search._queued_query;
+      Search._queued_query = null;
+      Search.query(query);
+    }
+  },
+
+  hasIndex: () => Search._index !== null,
+
+  deferQuery: (query) => (Search._queued_query = query),
+
+  stopPulse: () => (Search._pulse_status = -1),
+
+  startPulse: () => {
+    if (Search._pulse_status >= 0) return;
+
+    const pulse = () => {
+      Search._pulse_status = (Search._pulse_status + 1) % 4;
+      Search.dots.innerText = ".".repeat(Search._pulse_status);
+      if (Search._pulse_status >= 0) window.setTimeout(pulse, 500);
+    };
+    pulse();
+  },
+
+  /**
+   * perform a search for something (or wait until index is loaded)
+   */
+  performSearch: (query) => {
+    // create the required interface elements
+    const searchText = document.createElement("h2");
+    searchText.textContent = _("Searching");
+    const searchSummary = document.createElement("p");
+    searchSummary.classList.add("search-summary");
+    searchSummary.innerText = "";
+    const searchList = document.createElement("ul");
+    searchList.classList.add("search");
+
+    const out = document.getElementById("search-results");
+    Search.title = out.appendChild(searchText);
+    Search.dots = Search.title.appendChild(document.createElement("span"));
+    Search.status = out.appendChild(searchSummary);
+    Search.output = out.appendChild(searchList);
+
+    const searchProgress = document.getElementById("search-progress");
+    // Some themes don't use the search progress node
+    if (searchProgress) {
+      searchProgress.innerText = _("Preparing search...");
+    }
+    Search.startPulse();
+
+    // index already loaded, the browser was quick!
+    if (Search.hasIndex()) Search.query(query);
+    else Search.deferQuery(query);
+  },
+
+  /**
+   * execute search (requires search index to be loaded)
+   */
+  query: (query) => {
+    const filenames = Search._index.filenames;
+    const docNames = Search._index.docnames;
+    const titles = Search._index.titles;
+    const allTitles = Search._index.alltitles;
+    const indexEntries = Search._index.indexentries;
+
+    // stem the search terms and add them to the correct list
+    const stemmer = new Stemmer();
+    const searchTerms = new Set();
+    const excludedTerms = new Set();
+    const highlightTerms = new Set();
+    const objectTerms = new Set(splitQuery(query.toLowerCase().trim()));
+    splitQuery(query.trim()).forEach((queryTerm) => {
+      const queryTermLower = queryTerm.toLowerCase();
+
+      // maybe skip this "word"
+      // stopwords array is from language_data.js
+      if (
+        stopwords.indexOf(queryTermLower) !== -1 ||
+        queryTerm.match(/^\d+$/)
+      )
+        return;
+
+      // stem the word
+      let word = stemmer.stemWord(queryTermLower);
+      // select the correct list
+      if (word[0] === "-") excludedTerms.add(word.substr(1));
+      else {
+        searchTerms.add(word);
+        highlightTerms.add(queryTermLower);
+      }
+    });
+
+    if (SPHINX_HIGHLIGHT_ENABLED) {  // set in sphinx_highlight.js
+      localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" "))
+    }
+
+    // console.debug("SEARCH: searching for:");
+    // console.info("required: ", [...searchTerms]);
+    // console.info("excluded: ", [...excludedTerms]);
+
+    // array of [docname, title, anchor, descr, score, filename]
+    let results = [];
+    _removeChildren(document.getElementById("search-progress"));
+
+    const queryLower = query.toLowerCase();
+    for (const [title, foundTitles] of Object.entries(allTitles)) {
+      if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) {
+        for (const [file, id] of foundTitles) {
+          let score = Math.round(100 * queryLower.length / title.length)
+          results.push([
+            docNames[file],
+            titles[file] !== title ? `${titles[file]} > ${title}` : title,
+            id !== null ? "#" + id : "",
+            null,
+            score,
+            filenames[file],
+          ]);
+        }
+      }
+    }
+
+    // search for explicit entries in index directives
+    for (const [entry, foundEntries] of Object.entries(indexEntries)) {
+      if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) {
+        for (const [file, id] of foundEntries) {
+          let score = Math.round(100 * queryLower.length / entry.length)
+          results.push([
+            docNames[file],
+            titles[file],
+            id ? "#" + id : "",
+            null,
+            score,
+            filenames[file],
+          ]);
+        }
+      }
+    }
+
+    // lookup as object
+    objectTerms.forEach((term) =>
+      results.push(...Search.performObjectSearch(term, objectTerms))
+    );
+
+    // lookup as search terms in fulltext
+    results.push(...Search.performTermsSearch(searchTerms, excludedTerms));
+
+    // let the scorer override scores with a custom scoring function
+    if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item)));
+
+    // now sort the results by score (in opposite order of appearance, since the
+    // display function below uses pop() to retrieve items) and then
+    // alphabetically
+    results.sort((a, b) => {
+      const leftScore = a[4];
+      const rightScore = b[4];
+      if (leftScore === rightScore) {
+        // same score: sort alphabetically
+        const leftTitle = a[1].toLowerCase();
+        const rightTitle = b[1].toLowerCase();
+        if (leftTitle === rightTitle) return 0;
+        return leftTitle > rightTitle ? -1 : 1; // inverted is intentional
+      }
+      return leftScore > rightScore ? 1 : -1;
+    });
+
+    // remove duplicate search results
+    // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept
+    let seen = new Set();
+    results = results.reverse().reduce((acc, result) => {
+      let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(',');
+      if (!seen.has(resultStr)) {
+        acc.push(result);
+        seen.add(resultStr);
+      }
+      return acc;
+    }, []);
+
+    results = results.reverse();
+
+    // for debugging
+    //Search.lastresults = results.slice();  // a copy
+    // console.info("search results:", Search.lastresults);
+
+    // print the results
+    _displayNextItem(results, results.length, searchTerms, highlightTerms);
+  },
+
+  /**
+   * search for object names
+   */
+  performObjectSearch: (object, objectTerms) => {
+    const filenames = Search._index.filenames;
+    const docNames = Search._index.docnames;
+    const objects = Search._index.objects;
+    const objNames = Search._index.objnames;
+    const titles = Search._index.titles;
+
+    const results = [];
+
+    const objectSearchCallback = (prefix, match) => {
+      const name = match[4]
+      const fullname = (prefix ? prefix + "." : "") + name;
+      const fullnameLower = fullname.toLowerCase();
+      if (fullnameLower.indexOf(object) < 0) return;
+
+      let score = 0;
+      const parts = fullnameLower.split(".");
+
+      // check for different match types: exact matches of full name or
+      // "last name" (i.e. last dotted part)
+      if (fullnameLower === object || parts.slice(-1)[0] === object)
+        score += Scorer.objNameMatch;
+      else if (parts.slice(-1)[0].indexOf(object) > -1)
+        score += Scorer.objPartialMatch; // matches in last name
+
+      const objName = objNames[match[1]][2];
+      const title = titles[match[0]];
+
+      // If more than one term searched for, we require other words to be
+      // found in the name/title/description
+      const otherTerms = new Set(objectTerms);
+      otherTerms.delete(object);
+      if (otherTerms.size > 0) {
+        const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase();
+        if (
+          [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0)
+        )
+          return;
+      }
+
+      let anchor = match[3];
+      if (anchor === "") anchor = fullname;
+      else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname;
+
+      const descr = objName + _(", in ") + title;
+
+      // add custom score for some objects according to scorer
+      if (Scorer.objPrio.hasOwnProperty(match[2]))
+        score += Scorer.objPrio[match[2]];
+      else score += Scorer.objPrioDefault;
+
+      results.push([
+        docNames[match[0]],
+        fullname,
+        "#" + anchor,
+        descr,
+        score,
+        filenames[match[0]],
+      ]);
+    };
+    Object.keys(objects).forEach((prefix) =>
+      objects[prefix].forEach((array) =>
+        objectSearchCallback(prefix, array)
+      )
+    );
+    return results;
+  },
+
+  /**
+   * search for full-text terms in the index
+   */
+  performTermsSearch: (searchTerms, excludedTerms) => {
+    // prepare search
+    const terms = Search._index.terms;
+    const titleTerms = Search._index.titleterms;
+    const filenames = Search._index.filenames;
+    const docNames = Search._index.docnames;
+    const titles = Search._index.titles;
+
+    const scoreMap = new Map();
+    const fileMap = new Map();
+
+    // perform the search on the required terms
+    searchTerms.forEach((word) => {
+      const files = [];
+      const arr = [
+        { files: terms[word], score: Scorer.term },
+        { files: titleTerms[word], score: Scorer.title },
+      ];
+      // add support for partial matches
+      if (word.length > 2) {
+        const escapedWord = _escapeRegExp(word);
+        Object.keys(terms).forEach((term) => {
+          if (term.match(escapedWord) && !terms[word])
+            arr.push({ files: terms[term], score: Scorer.partialTerm });
+        });
+        Object.keys(titleTerms).forEach((term) => {
+          if (term.match(escapedWord) && !titleTerms[word])
+            arr.push({ files: titleTerms[word], score: Scorer.partialTitle });
+        });
+      }
+
+      // no match but word was a required one
+      if (arr.every((record) => record.files === undefined)) return;
+
+      // found search word in contents
+      arr.forEach((record) => {
+        if (record.files === undefined) return;
+
+        let recordFiles = record.files;
+        if (recordFiles.length === undefined) recordFiles = [recordFiles];
+        files.push(...recordFiles);
+
+        // set score for the word in each file
+        recordFiles.forEach((file) => {
+          if (!scoreMap.has(file)) scoreMap.set(file, {});
+          scoreMap.get(file)[word] = record.score;
+        });
+      });
+
+      // create the mapping
+      files.forEach((file) => {
+        if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1)
+          fileMap.get(file).push(word);
+        else fileMap.set(file, [word]);
+      });
+    });
+
+    // now check if the files don't contain excluded terms
+    const results = [];
+    for (const [file, wordList] of fileMap) {
+      // check if all requirements are matched
+
+      // as search terms with length < 3 are discarded
+      const filteredTermCount = [...searchTerms].filter(
+        (term) => term.length > 2
+      ).length;
+      if (
+        wordList.length !== searchTerms.size &&
+        wordList.length !== filteredTermCount
+      )
+        continue;
+
+      // ensure that none of the excluded terms is in the search result
+      if (
+        [...excludedTerms].some(
+          (term) =>
+            terms[term] === file ||
+            titleTerms[term] === file ||
+            (terms[term] || []).includes(file) ||
+            (titleTerms[term] || []).includes(file)
+        )
+      )
+        break;
+
+      // select one (max) score for the file.
+      const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w]));
+      // add result to the result list
+      results.push([
+        docNames[file],
+        titles[file],
+        "",
+        null,
+        score,
+        filenames[file],
+      ]);
+    }
+    return results;
+  },
+
+  /**
+   * helper function to return a node containing the
+   * search summary for a given text. keywords is a list
+   * of stemmed words.
+   */
+  makeSearchSummary: (htmlText, keywords) => {
+    const text = Search.htmlToText(htmlText);
+    if (text === "") return null;
+
+    const textLower = text.toLowerCase();
+    const actualStartPosition = [...keywords]
+      .map((k) => textLower.indexOf(k.toLowerCase()))
+      .filter((i) => i > -1)
+      .slice(-1)[0];
+    const startWithContext = Math.max(actualStartPosition - 120, 0);
+
+    const top = startWithContext === 0 ? "" : "...";
+    const tail = startWithContext + 240 < text.length ? "..." : "";
+
+    let summary = document.createElement("p");
+    summary.classList.add("context");
+    summary.textContent = top + text.substr(startWithContext, 240).trim() + tail;
+
+    return summary;
+  },
+};
+
+_ready(Search.init);
--- a/docs/html/_static/sphinx_highlight.js
+++ b/docs/html/_static/sphinx_highlight.js
@ -0,0 +1,154 @@
+/* Highlighting utilities for Sphinx HTML documentation. */
+"use strict";
+
+const SPHINX_HIGHLIGHT_ENABLED = true
+
+/**
+ * highlight a given string on a node by wrapping it in
+ * span elements with the given class name.
+ */
+const _highlight = (node, addItems, text, className) => {
+  if (node.nodeType === Node.TEXT_NODE) {
+    const val = node.nodeValue;
+    const parent = node.parentNode;
+    const pos = val.toLowerCase().indexOf(text);
+    if (
+      pos >= 0 &&
+      !parent.classList.contains(className) &&
+      !parent.classList.contains("nohighlight")
+    ) {
+      let span;
+
+      const closestNode = parent.closest("body, svg, foreignObject");
+      const isInSVG = closestNode && closestNode.matches("svg");
+      if (isInSVG) {
+        span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
+      } else {
+        span = document.createElement("span");
+        span.classList.add(className);
+      }
+
+      span.appendChild(document.createTextNode(val.substr(pos, text.length)));
+      const rest = document.createTextNode(val.substr(pos + text.length));
+      parent.insertBefore(
+        span,
+        parent.insertBefore(
+          rest,
+          node.nextSibling
+        )
+      );
+      node.nodeValue = val.substr(0, pos);
+      /* There may be more occurrences of search term in this node. So call this
+       * function recursively on the remaining fragment.
+       */
+      _highlight(rest, addItems, text, className);
+
+      if (isInSVG) {
+        const rect = document.createElementNS(
+          "http://www.w3.org/2000/svg",
+          "rect"
+        );
+        const bbox = parent.getBBox();
+        rect.x.baseVal.value = bbox.x;
+        rect.y.baseVal.value = bbox.y;
+        rect.width.baseVal.value = bbox.width;
+        rect.height.baseVal.value = bbox.height;
+        rect.setAttribute("class", className);
+        addItems.push({ parent: parent, target: rect });
+      }
+    }
+  } else if (node.matches && !node.matches("button, select, textarea")) {
+    node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
+  }
+};
+const _highlightText = (thisNode, text, className) => {
+  let addItems = [];
+  _highlight(thisNode, addItems, text, className);
+  addItems.forEach((obj) =>
+    obj.parent.insertAdjacentElement("beforebegin", obj.target)
+  );
+};
+
+/**
+ * Small JavaScript module for the documentation.
+ */
+const SphinxHighlight = {
+
+  /**
+   * highlight the search words provided in localstorage in the text
+   */
+  highlightSearchWords: () => {
+    if (!SPHINX_HIGHLIGHT_ENABLED) return;  // bail if no highlight
+
+    // get and clear terms from localstorage
+    const url = new URL(window.location);
+    const highlight =
+        localStorage.getItem("sphinx_highlight_terms")
+        || url.searchParams.get("highlight")
+        || "";
+    localStorage.removeItem("sphinx_highlight_terms")
+    url.searchParams.delete("highlight");
+    window.history.replaceState({}, "", url);
+
+    // get individual terms from highlight string
+    const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
+    if (terms.length === 0) return; // nothing to do
+
+    // There should never be more than one element matching "div.body"
+    const divBody = document.querySelectorAll("div.body");
+    const body = divBody.length ? divBody[0] : document.querySelector("body");
+    window.setTimeout(() => {
+      terms.forEach((term) => _highlightText(body, term, "highlighted"));
+    }, 10);
+
+    const searchBox = document.getElementById("searchbox");
+    if (searchBox === null) return;
+    searchBox.appendChild(
+      document
+        .createRange()
+        .createContextualFragment(
+          '<p class="highlight-link">' +
+            '<a href="javascript:SphinxHighlight.hideSearchWords()">' +
+            _("Hide Search Matches") +
+            "</a></p>"
+        )
+    );
+  },
+
+  /**
+   * helper function to hide the search marks again
+   */
+  hideSearchWords: () => {
+    document
+      .querySelectorAll("#searchbox .highlight-link")
+      .forEach((el) => el.remove());
+    document
+      .querySelectorAll("span.highlighted")
+      .forEach((el) => el.classList.remove("highlighted"));
+    localStorage.removeItem("sphinx_highlight_terms")
+  },
+
+  initEscapeListener: () => {
+    // only install a listener if it is really needed
+    if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
+
+    document.addEventListener("keydown", (event) => {
+      // bail for input elements
+      if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
+      // bail with special keys
+      if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
+      if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
+        SphinxHighlight.hideSearchWords();
+        event.preventDefault();
+      }
+    });
+  },
+};
+
+_ready(() => {
+  /* Do not call highlightSearchWords() when we are on the search page.
+   * It will highlight words from the *previous* search query.
+   */
+  if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords();
+  SphinxHighlight.initEscapeListener();
+});
--- a/docs/html/genindex.html
+++ b/docs/html/genindex.html
@ -0,0 +1,281 @@
+<!DOCTYPE html>
+
+<html lang="en" data-content_root="./">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Index &#8212; whisper_live  documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=4f649999" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=039e1c02" />
+    <script src="_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="_static/doctools.js?v=888ff710"></script>
+    <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
+    <link rel="index" title="Index" href="#" />
+    <link rel="search" title="Search" href="search.html" />
+   
+  <link rel="stylesheet" href="_static/custom.css" type="text/css" />
+  
+  
+  <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
+
+  </head><body>
+  
+
+    <div class="document">
+      <div class="documentwrapper">
+        <div class="bodywrapper">
+          
+
+          <div class="body" role="main">
+            
+
+<h1 id="index">Index</h1>
+
+<div class="genindex-jumpbox">
+ <a href="#A"><strong>A</strong></a>
+ | <a href="#B"><strong>B</strong></a>
+ | <a href="#C"><strong>C</strong></a>
+ | <a href="#D"><strong>D</strong></a>
+ | <a href="#F"><strong>F</strong></a>
+ | <a href="#G"><strong>G</strong></a>
+ | <a href="#M"><strong>M</strong></a>
+ | <a href="#O"><strong>O</strong></a>
+ | <a href="#P"><strong>P</strong></a>
+ | <a href="#R"><strong>R</strong></a>
+ | <a href="#S"><strong>S</strong></a>
+ | <a href="#T"><strong>T</strong></a>
+ | <a href="#U"><strong>U</strong></a>
+ | <a href="#W"><strong>W</strong></a>
+ 
+</div>
+<h2 id="A">A</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.server.ServeClient.add_frames">add_frames() (whisper_live.server.ServeClient method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="B">B</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client.bytes_to_float_array">bytes_to_float_array() (whisper_live.client.Client static method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="C">C</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.server.ServeClient.cleanup">cleanup() (whisper_live.server.ServeClient method)</a>
+</li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client">Client (class in whisper_live.client)</a>
+</li>
+      <li><a href="index.html#whisper_live.client.Client.close_websocket">close_websocket() (whisper_live.client.Client method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="D">D</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.server.ServeClient.disconnect">disconnect() (whisper_live.server.ServeClient method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="F">F</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.server.ServeClient.fill_output">fill_output() (whisper_live.server.ServeClient method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="G">G</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client.get_client_socket">get_client_socket() (whisper_live.client.Client method)</a>
+</li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.server.TranscriptionServer.get_wait_time">get_wait_time() (whisper_live.server.TranscriptionServer method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="M">M</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li>
+    module
+
+      <ul>
+        <li><a href="index.html#module-whisper_live.client">whisper_live.client</a>
+</li>
+        <li><a href="index.html#module-whisper_live.server">whisper_live.server</a>
+</li>
+      </ul></li>
+  </ul></td>
+</tr></table>
+
+<h2 id="O">O</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client.on_message">on_message() (whisper_live.client.Client method)</a>
+</li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client.on_open">on_open() (whisper_live.client.Client method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="P">P</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client.play_file">play_file() (whisper_live.client.Client method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="R">R</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client.record">record() (whisper_live.client.Client method)</a>
+</li>
+      <li><a href="index.html#whisper_live.server.TranscriptionServer.recv_audio">recv_audio() (whisper_live.server.TranscriptionServer method)</a>
+</li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.resample">resample() (in module whisper_live.client)</a>
+</li>
+      <li><a href="index.html#whisper_live.server.TranscriptionServer.run">run() (whisper_live.server.TranscriptionServer method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="S">S</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client.send_packet_to_server">send_packet_to_server() (whisper_live.client.Client method)</a>
+</li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.server.ServeClient">ServeClient (class in whisper_live.server)</a>
+</li>
+      <li><a href="index.html#whisper_live.server.ServeClient.speech_to_text">speech_to_text() (whisper_live.server.ServeClient method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="T">T</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.TranscriptionClient">TranscriptionClient (class in whisper_live.client)</a>
+</li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.server.TranscriptionServer">TranscriptionServer (class in whisper_live.server)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="U">U</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.server.ServeClient.update_segments">update_segments() (whisper_live.server.ServeClient method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+<h2 id="W">W</h2>
+<table style="width: 100%" class="indextable genindextable"><tr>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li>
+    whisper_live.client
+
+      <ul>
+        <li><a href="index.html#module-whisper_live.client">module</a>
+</li>
+      </ul></li>
+      <li>
+    whisper_live.server
+
+      <ul>
+        <li><a href="index.html#module-whisper_live.server">module</a>
+</li>
+      </ul></li>
+  </ul></td>
+  <td style="width: 33%; vertical-align: top;"><ul>
+      <li><a href="index.html#whisper_live.client.Client.write_audio_frames_to_file">write_audio_frames_to_file() (whisper_live.client.Client method)</a>
+</li>
+      <li><a href="index.html#whisper_live.client.Client.write_output_recording">write_output_recording() (whisper_live.client.Client method)</a>
+</li>
+  </ul></td>
+</tr></table>
+
+
+
+          </div>
+          
+        </div>
+      </div>
+      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
+        <div class="sphinxsidebarwrapper">
+<h1 class="logo"><a href="index.html">whisper_live</a></h1>
+
+
+
+
+
+
+
+
+<h3>Navigation</h3>
+
+<div class="relations">
+<h3>Related Topics</h3>
+<ul>
+  <li><a href="index.html">Documentation overview</a><ul>
+  </ul></li>
+</ul>
+</div>
+<div id="searchbox" style="display: none" role="search">
+  <h3 id="searchlabel">Quick search</h3>
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</div>
+<script>document.getElementById('searchbox').style.display = "block"</script>
+
+
+
+
+
+
+
+
+        </div>
+      </div>
+      <div class="clearer"></div>
+    </div>
+    <div class="footer">
+      &copy;2023, Collabora.
+      
+      |
+      Powered by <a href="http://sphinx-doc.org/">Sphinx 7.2.6</a>
+      &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.13</a>
+      
+    </div>
+
+    
+
+    
+  </body>
+</html>
--- a/docs/html/index.html
+++ b/docs/html/index.html
@ -0,0 +1,468 @@
+<!DOCTYPE html>
+
+<html lang="en" data-content_root="./">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
+
+    <title>Welcome to Whisper Live documentation! &#8212; whisper_live  documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=4f649999" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=039e1c02" />
+    <script src="_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="_static/doctools.js?v=888ff710"></script>
+    <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+   
+  <link rel="stylesheet" href="_static/custom.css" type="text/css" />
+  
+  
+  <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
+
+  </head><body>
+  
+
+    <div class="document">
+      <div class="documentwrapper">
+        <div class="bodywrapper">
+          
+
+          <div class="body" role="main">
+            
+  <section id="welcome-to-whisper-live-documentation">
+<h1>Welcome to Whisper Live documentation!<a class="headerlink" href="#welcome-to-whisper-live-documentation" title="Link to this heading">¶</a></h1>
+<div class="toctree-wrapper compound">
+</div>
+<dl class="py class" id="module-whisper_live.server">
+<dt class="sig sig-object py" id="whisper_live.server.ServeClient">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">whisper_live.server.</span></span><span class="sig-name descname"><span class="pre">ServeClient</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">websocket</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">task</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'transcribe'</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">device</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">multilingual</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">language</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">client_uid</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.ServeClient" title="Link to this definition">¶</a></dt>
+<dd><dl class="simple">
+<dt>Attributes:</dt><dd><p>RATE (int): The audio sampling rate (constant) set to 16000.
+SERVER_READY (str): A constant message indicating that the server is ready.
+DISCONNECT (str): A constant message indicating that the client should disconnect.
+client_uid (str): A unique identifier for the client.
+data (bytes): Accumulated audio data.
+frames (bytes): Accumulated audio frames.
+language (str): The language for transcription.
+task (str): The task type, e.g., “transcribe.”
+transcriber (WhisperModel): The Whisper model for speech-to-text.
+timestamp_offset (float): The offset in audio timestamps.
+frames_np (numpy.ndarray): NumPy array to store audio frames.
+frames_offset (float): The offset in audio frames.
+text (list): List of transcribed text segments.
+current_out (str): The current incomplete transcription.
+prev_out (str): The previous incomplete transcription.
+t_start (float): Timestamp for the start of transcription.
+exit (bool): A flag to exit the transcription thread.
+same_output_threshold (int): Threshold for consecutive same output segments.
+show_prev_out_thresh (int): Threshold for showing previous output segments.
+add_pause_thresh (int): Threshold for adding a pause (blank) segment.
+transcript (list): List of transcribed segments.
+send_last_n_segments (int): Number of last segments to send to the client.
+wrapper (textwrap.TextWrapper): Text wrapper for formatting text.
+pick_previous_segments (int): Number of previous segments to include in the output.
+websocket: The WebSocket connection for the client.</p>
+</dd>
+</dl>
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.ServeClient.add_frames">
+<span class="sig-name descname"><span class="pre">add_frames</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">frame_np</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.ServeClient.add_frames" title="Link to this definition">¶</a></dt>
+<dd><p>Add audio frames to the ongoing audio stream buffer.</p>
+<p>This method is responsible for maintaining the audio stream buffer, allowing the continuous addition
+of audio frames as they are received. It also ensures that the buffer does not exceed a specified size
+to prevent excessive memory usage.</p>
+<p>If the buffer size exceeds a threshold (45 seconds of audio data), it discards the oldest 30 seconds
+of audio data to maintain a reasonable buffer size. If the buffer is empty, it initializes it with the provided
+audio frame. The audio stream buffer is used for real-time processing of audio data for transcription.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>frame_np (numpy.ndarray): The audio frame data as a NumPy array.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.ServeClient.cleanup">
+<span class="sig-name descname"><span class="pre">cleanup</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.ServeClient.cleanup" title="Link to this definition">¶</a></dt>
+<dd><p>Perform cleanup tasks before exiting the transcription service.</p>
+<p>This method performs necessary cleanup tasks, including stopping the transcription thread, marking
+the exit flag to indicate the transcription thread should exit gracefully, and destroying resources
+associated with the transcription process.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.ServeClient.disconnect">
+<span class="sig-name descname"><span class="pre">disconnect</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.ServeClient.disconnect" title="Link to this definition">¶</a></dt>
+<dd><p>Notify the client of disconnection and send a disconnect message.</p>
+<p>This method sends a disconnect message to the client via the WebSocket connection to notify them
+that the transcription service is disconnecting gracefully.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.ServeClient.fill_output">
+<span class="sig-name descname"><span class="pre">fill_output</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">output</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.ServeClient.fill_output" title="Link to this definition">¶</a></dt>
+<dd><p>Format the current incomplete transcription output by combining it with previous complete segments.
+The resulting transcription is wrapped into two lines, each containing a maximum of 50 characters.</p>
+<p>It ensures that the combined transcription fits within two lines, with a maximum of 50 characters per line.
+Segments are concatenated in the order they exist in the list of previous segments, with the most
+recent complete segment first and older segments prepended as needed to maintain the character limit.
+If a 3-second pause is detected in the previous segments, any text preceding it is discarded to ensure
+the transcription starts with the most recent complete content. The resulting transcription is returned
+as a single string.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>output(str): The current incomplete transcription segment.</p>
+</dd>
+<dt>Returns:</dt><dd><p>str: A formatted transcription wrapped in two lines.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.ServeClient.speech_to_text">
+<span class="sig-name descname"><span class="pre">speech_to_text</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.ServeClient.speech_to_text" title="Link to this definition">¶</a></dt>
+<dd><p>Process an audio stream in an infinite loop, continuously transcribing the speech.</p>
+<p>This method continuously receives audio frames, performs real-time transcription, and sends
+transcribed segments to the client via a WebSocket connection.</p>
+<p>If the client’s language is not detected, it waits for 30 seconds of audio input to make a language prediction.
+It utilizes the Whisper ASR model to transcribe the audio, continuously processing and streaming results. Segments
+are sent to the client in real-time, and a history of segments is maintained to provide context.Pauses in speech 
+(no output from Whisper) are handled by showing the previous output for a set duration. A blank segment is added if 
+there is no speech for a specified duration to indicate a pause.</p>
+<dl class="simple">
+<dt>Raises:</dt><dd><p>Exception: If there is an issue with audio processing or WebSocket communication.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.ServeClient.update_segments">
+<span class="sig-name descname"><span class="pre">update_segments</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">segments</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">duration</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.ServeClient.update_segments" title="Link to this definition">¶</a></dt>
+<dd><p>Processes the segments from whisper. Appends all the segments to the list
+except for the last segment assuming that it is incomplete.</p>
+<p>Updates the ongoing transcript with transcribed segments, including their start and end times.
+Complete segments are appended to the transcript in chronological order. Incomplete segments 
+(assumed to be the last one) are processed to identify repeated content. If the same incomplete 
+segment is seen multiple times, it updates the offset and appends the segment to the transcript.
+A threshold is used to detect repeated content and ensure it is only included once in the transcript.
+The timestamp offset is updated based on the duration of processed segments. The method returns the 
+last processed segment, allowing it to be sent to the client for real-time updates.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>segments(dict) : dictionary of segments as returned by whisper
+duration(float): duration of the current chunk</p>
+</dd>
+<dt>Returns:</dt><dd><dl class="simple">
+<dt>dict or None: The last processed segment with its start time, end time, and transcribed text.</dt><dd><p>Returns None if there are no valid segments to process.</p>
+</dd>
+</dl>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="whisper_live.server.TranscriptionServer">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">whisper_live.server.</span></span><span class="sig-name descname"><span class="pre">TranscriptionServer</span></span><a class="headerlink" href="#whisper_live.server.TranscriptionServer" title="Link to this definition">¶</a></dt>
+<dd><p>Represents a transcription server that handles incoming audio from clients.</p>
+<dl class="simple">
+<dt>Attributes:</dt><dd><p>RATE (int): The audio sampling rate (constant) set to 16000.
+vad_model (torch.Module): The voice activity detection model.
+vad_threshold (float): The voice activity detection threshold.
+clients (dict): A dictionary to store connected clients.
+websockets (dict): A dictionary to store WebSocket connections.
+clients_start_time (dict): A dictionary to track client start times.
+max_clients (int): Maximum allowed connected clients.
+max_connection_time (int): Maximum allowed connection time in seconds.</p>
+</dd>
+</dl>
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.TranscriptionServer.get_wait_time">
+<span class="sig-name descname"><span class="pre">get_wait_time</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.TranscriptionServer.get_wait_time" title="Link to this definition">¶</a></dt>
+<dd><p>Calculate and return the estimated wait time for clients.</p>
+<dl class="simple">
+<dt>Returns:</dt><dd><p>float: The estimated wait time in minutes.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.TranscriptionServer.recv_audio">
+<span class="sig-name descname"><span class="pre">recv_audio</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">websocket</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.TranscriptionServer.recv_audio" title="Link to this definition">¶</a></dt>
+<dd><p>Receive audio chunks from a client in an infinite loop.</p>
+<p>Continuously receives audio frames from a connected client
+over a WebSocket connection. It processes the audio frames using a
+voice activity detection (VAD) model to determine if they contain speech
+or not. If the audio frame contains speech, it is added to the client’s
+audio data for ASR.
+If the maximum number of clients is reached, the method sends a
+“WAIT” status to the client, indicating that they should wait
+until a slot is available.
+If a client’s connection exceeds the maximum allowed time, it will
+be disconnected, and the client’s resources will be cleaned up.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>websocket (WebSocket): The WebSocket connection for the client.</p>
+</dd>
+<dt>Raises:</dt><dd><p>Exception: If there is an error during the audio frame processing.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.server.TranscriptionServer.run">
+<span class="sig-name descname"><span class="pre">run</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">host</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">port</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">9090</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.server.TranscriptionServer.run" title="Link to this definition">¶</a></dt>
+<dd><p>Run the transcription server.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>host (str): The host address to bind the server.
+port (int): The port number to bind the server.</p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class" id="module-whisper_live.client">
+<dt class="sig sig-object py" id="whisper_live.client.Client">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">whisper_live.client.</span></span><span class="sig-name descname"><span class="pre">Client</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">host</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">port</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">is_multilingual</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lang</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">translate</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client" title="Link to this definition">¶</a></dt>
+<dd><p>Handles audio recording, streaming, and communication with a server using WebSocket.</p>
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.bytes_to_float_array">
+<em class="property"><span class="pre">static</span><span class="w"> </span></em><span class="sig-name descname"><span class="pre">bytes_to_float_array</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">audio_bytes</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.bytes_to_float_array" title="Link to this definition">¶</a></dt>
+<dd><p>Convert audio data from bytes to a NumPy float array.</p>
+<p>It assumes that the audio data is in 16-bit PCM format. The audio data is normalized to 
+have values between -1 and 1.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>audio_bytes (bytes): Audio data in bytes.</p>
+</dd>
+<dt>Returns:</dt><dd><p>np.ndarray: A NumPy array containing the audio data as float values normalized between -1 and 1.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.close_websocket">
+<span class="sig-name descname"><span class="pre">close_websocket</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.close_websocket" title="Link to this definition">¶</a></dt>
+<dd><p>Close the WebSocket connection and join the WebSocket thread.</p>
+<p>First attempts to close the WebSocket connection using <cite>self.client_socket.close()</cite>. After 
+closing the connection, it joins the WebSocket thread to ensure proper termination.</p>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.get_client_socket">
+<span class="sig-name descname"><span class="pre">get_client_socket</span></span><span class="sig-paren">(</span><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.get_client_socket" title="Link to this definition">¶</a></dt>
+<dd><p>Get the WebSocket client socket instance.</p>
+<dl class="simple">
+<dt>Returns:</dt><dd><p>WebSocketApp: The WebSocket client socket instance currently in use by the client.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.on_message">
+<span class="sig-name descname"><span class="pre">on_message</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ws</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">message</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.on_message" title="Link to this definition">¶</a></dt>
+<dd><p>Callback function called when a message is received from the server.</p>
+<p>It updates various attributes of the client based on the received message, including
+recording status, language detection, and server messages. If a disconnect message
+is received, it sets the recording status to False.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>ws (websocket.WebSocketApp): The WebSocket client instance.
+message (str): The received message from the server.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.on_open">
+<span class="sig-name descname"><span class="pre">on_open</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">ws</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.on_open" title="Link to this definition">¶</a></dt>
+<dd><p>Callback function called when the WebSocket connection is successfully opened.</p>
+<p>Sends an initial configuration message to the server, including client UID, multilingual mode,
+language selection, and task type.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>ws (websocket.WebSocketApp): The WebSocket client instance.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.play_file">
+<span class="sig-name descname"><span class="pre">play_file</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">filename</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.play_file" title="Link to this definition">¶</a></dt>
+<dd><p>Play an audio file and send it to the server for processing.</p>
+<p>Reads an audio file, plays it through the audio output, and simultaneously sends
+the audio data to the server for processing. It uses PyAudio to create an audio 
+stream for playback. The audio data is read from the file in chunks, converted to 
+floating-point format, and sent to the server using WebSocket communication.
+This method is typically used when you want to process pre-recorded audio and send it
+to the server in real-time.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>filename (str): The path to the audio file to be played and sent to the server.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.record">
+<span class="sig-name descname"><span class="pre">record</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">out_file</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">'output_recording.wav'</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.record" title="Link to this definition">¶</a></dt>
+<dd><p>Record audio data from the input stream and save it to a WAV file.</p>
+<p>Continuously records audio data from the input stream, sends it to the server via a WebSocket
+connection, and simultaneously saves it to multiple WAV files in chunks. It stops recording when
+the <cite>RECORD_SECONDS</cite> duration is reached or when the <cite>RECORDING</cite> flag is set to <cite>False</cite>.</p>
+<p>Audio data is saved in chunks to the “chunks” directory. Each chunk is saved as a separate WAV file.
+The recording will continue until the specified duration is reached or until the <cite>RECORDING</cite> flag is set to <cite>False</cite>.
+The recording process can be interrupted by sending a KeyboardInterrupt (e.g., pressing Ctrl+C). After recording, 
+the method combines all the saved audio chunks into the specified <cite>out_file</cite>.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>out_file (str, optional): The name of the output WAV file to save the entire recording. Default is “output_recording.wav”.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.send_packet_to_server">
+<span class="sig-name descname"><span class="pre">send_packet_to_server</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">message</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.send_packet_to_server" title="Link to this definition">¶</a></dt>
+<dd><p>Send an audio packet to the server using WebSocket.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>message (bytes): The audio data packet in bytes to be sent to the server.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.write_audio_frames_to_file">
+<span class="sig-name descname"><span class="pre">write_audio_frames_to_file</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">frames</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">file_name</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.write_audio_frames_to_file" title="Link to this definition">¶</a></dt>
+<dd><p>Write audio frames to a WAV file.</p>
+<p>The WAV file is created or overwritten with the specified name. The audio frames should be 
+in the correct format and match the specified channel, sample width, and sample rate.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>frames (bytes): The audio frames to be written to the file.
+file_name (str): The name of the WAV file to which the frames will be written.</p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py method">
+<dt class="sig sig-object py" id="whisper_live.client.Client.write_output_recording">
+<span class="sig-name descname"><span class="pre">write_output_recording</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">n_audio_file</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">out_file</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.Client.write_output_recording" title="Link to this definition">¶</a></dt>
+<dd><p>Combine and save recorded audio chunks into a single WAV file.</p>
+<p>The individual audio chunk files are expected to be located in the “chunks” directory. Reads each chunk 
+file, appends its audio data to the final recording, and then deletes the chunk file. After combining
+and saving, the final recording is stored in the specified <cite>out_file</cite>.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>n_audio_file (int): The number of audio chunk files to combine.
+out_file (str): The name of the output WAV file to save the final recording.</p>
+</dd>
+</dl>
+</dd></dl>
+
+</dd></dl>
+
+<dl class="py class">
+<dt class="sig sig-object py" id="whisper_live.client.TranscriptionClient">
+<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">whisper_live.client.</span></span><span class="sig-name descname"><span class="pre">TranscriptionClient</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">host</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">port</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">is_multilingual</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">lang</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">translate</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.TranscriptionClient" title="Link to this definition">¶</a></dt>
+<dd><p>Client for handling audio transcription tasks via a WebSocket connection.</p>
+<p>Acts as a high-level client for audio transcription tasks using a WebSocket connection. It can be used
+to send audio data for transcription to a server and receive transcribed text segments.</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>host (str): The hostname or IP address of the server.
+port (int): The port number to connect to on the server.
+is_multilingual (bool, optional): Indicates whether the transcription should support multiple languages (default is False).
+lang (str, optional): The primary language for transcription (used if <cite>is_multilingual</cite> is False). Default is None, which defaults to English (‘en’).
+translate (bool, optional): Indicates whether translation tasks are required (default is False).</p>
+</dd>
+<dt>Attributes:</dt><dd><p>client (Client): An instance of the underlying Client class responsible for handling the WebSocket connection.</p>
+</dd>
+<dt>Example:</dt><dd><p>To create a TranscriptionClient and start transcription on microphone audio:
+<code class="docutils literal notranslate"><span class="pre">`python</span>
+<span class="pre">transcription_client</span> <span class="pre">=</span> <span class="pre">TranscriptionClient(host=&quot;localhost&quot;,</span> <span class="pre">port=9090,</span> <span class="pre">is_multilingual=True)</span>
+<span class="pre">transcription_client()</span>
+<span class="pre">`</span></code></p>
+</dd>
+</dl>
+</dd></dl>
+
+<dl class="py function">
+<dt class="sig sig-object py" id="whisper_live.client.resample">
+<span class="sig-prename descclassname"><span class="pre">whisper_live.client.</span></span><span class="sig-name descname"><span class="pre">resample</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">file</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">sr</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">int</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">16000</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#whisper_live.client.resample" title="Link to this definition">¶</a></dt>
+<dd><p># <a class="reference external" href="https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/audio.py#L22">https://github.com/openai/whisper/blob/7858aa9c08d98f75575035ecd6481f462d66ca27/whisper/audio.py#L22</a>
+Open an audio file and read as mono waveform, resampling as necessary,
+save the resampled audio</p>
+<dl class="simple">
+<dt>Args:</dt><dd><p>file (str): The audio file to open
+sr (int): The sample rate to resample the audio if necessary</p>
+</dd>
+<dt>Returns:</dt><dd><p>resampled_file (str): The resampled audio file</p>
+</dd>
+</dl>
+</dd></dl>
+
+</section>
+<section id="indices-and-tables">
+<h1>Indices and tables<a class="headerlink" href="#indices-and-tables" title="Link to this heading">¶</a></h1>
+<ul class="simple">
+<li><p><a class="reference internal" href="genindex.html"><span class="std std-ref">Index</span></a></p></li>
+<li><p><a class="reference internal" href="py-modindex.html"><span class="std std-ref">Module Index</span></a></p></li>
+<li><p><a class="reference internal" href="search.html"><span class="std std-ref">Search Page</span></a></p></li>
+</ul>
+</section>
+
+
+          </div>
+          
+        </div>
+      </div>
+      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
+        <div class="sphinxsidebarwrapper">
+<h1 class="logo"><a href="#">whisper_live</a></h1>
+
+
+
+
+
+
+
+
+<h3>Navigation</h3>
+
+<div class="relations">
+<h3>Related Topics</h3>
+<ul>
+  <li><a href="#">Documentation overview</a><ul>
+  </ul></li>
+</ul>
+</div>
+<div id="searchbox" style="display: none" role="search">
+  <h3 id="searchlabel">Quick search</h3>
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</div>
+<script>document.getElementById('searchbox').style.display = "block"</script>
+
+
+
+
+
+
+
+
+        </div>
+      </div>
+      <div class="clearer"></div>
+    </div>
+    <div class="footer">
+      &copy;2023, Collabora.
+      
+      |
+      Powered by <a href="http://sphinx-doc.org/">Sphinx 7.2.6</a>
+      &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.13</a>
+      
+      |
+      <a href="_sources/index.rst.txt"
+          rel="nofollow">Page source</a>
+    </div>
+
+    
+
+    
+  </body>
+</html>
--- a/docs/html/objects.inv
+++ b/docs/html/objects.inv
@ -0,0 +1,5 @@
+# Sphinx inventory version 2
+# Project: whisper_live
+# Version: 
+# The remainder of this file is compressed using zlib.
+xÚUËnÂ0¼ç+\µ×DåÊµ§JT‰JÅ^«ŽmùAÈß×Æ@A hNIvwÆû˜uºF8ƒ–J±ÁŠI<C5A0>ÊÓÏ[ÍƒDòJ„â¸ßÊçl+_HYt—¨êífœ#³SìmPµê=:ê5]K
ž‚µÐïÒ@ßh>ˆŠIí<49>v¸ršý ÉR£§ÙB'ñhE[tjO 
ª‘h#¡§k!Çžn‘iËG‚*N
¤î¥Ñ:´´#¹:+<R\D<>Xh÷j_Y&ÔÁ›ài.S¨z(Ù·å˜ÆF-€EÉe¬ƒb‰èò¤‘¶4£ªEzËôYçû1ü»cWX˜DPÁL¡àÂ12?…%êHîµ0…&:‘5I˜·“˜‚áåé°nãçà&ŸÉrq”Ë€a_aØÝ‰ˆ¸Ì¢Å¤”n—MÞé‡°…›·d<C2B7>jg!Îó¹„JRÎÈÁº‹#ïéµøãš¥ “ˆ%J¦cõ^“eÎ‹|Ä¼HŒiT<69>z*â^9ÌôåÁ‘Ù>óþæcO¼w`_},Q<>£‚eÍ90Û2f‘ý_ñOTüëÖäÒ
--- a/docs/html/py-modindex.html
+++ b/docs/html/py-modindex.html
@ -0,0 +1,123 @@
+<!DOCTYPE html>
+
+<html lang="en" data-content_root="./">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Python Module Index &#8212; whisper_live  documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=4f649999" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=039e1c02" />
+    <script src="_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="_static/doctools.js?v=888ff710"></script>
+    <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+
+   
+  <link rel="stylesheet" href="_static/custom.css" type="text/css" />
+  
+  
+  <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
+
+
+
+  </head><body>
+  
+
+    <div class="document">
+      <div class="documentwrapper">
+        <div class="bodywrapper">
+          
+
+          <div class="body" role="main">
+            
+
+   <h1>Python Module Index</h1>
+
+   <div class="modindex-jumpbox">
+   <a href="#cap-w"><strong>w</strong></a>
+   </div>
+
+   <table class="indextable modindextable">
+     <tr class="pcap"><td></td><td>&#160;</td><td></td></tr>
+     <tr class="cap" id="cap-w"><td></td><td>
+       <strong>w</strong></td><td></td></tr>
+     <tr>
+       <td><img src="_static/minus.png" class="toggler"
+              id="toggle-1" style="display: none" alt="-" /></td>
+       <td>
+       <code class="xref">whisper_live</code></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="index.html#module-whisper_live.client"><code class="xref">whisper_live.client</code></a></td><td>
+       <em></em></td></tr>
+     <tr class="cg-1">
+       <td></td>
+       <td>&#160;&#160;&#160;
+       <a href="index.html#module-whisper_live.server"><code class="xref">whisper_live.server</code></a></td><td>
+       <em></em></td></tr>
+   </table>
+
+
+          </div>
+          
+        </div>
+      </div>
+      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
+        <div class="sphinxsidebarwrapper">
+<h1 class="logo"><a href="index.html">whisper_live</a></h1>
+
+
+
+
+
+
+
+
+<h3>Navigation</h3>
+
+<div class="relations">
+<h3>Related Topics</h3>
+<ul>
+  <li><a href="index.html">Documentation overview</a><ul>
+  </ul></li>
+</ul>
+</div>
+<div id="searchbox" style="display: none" role="search">
+  <h3 id="searchlabel">Quick search</h3>
+    <div class="searchformwrapper">
+    <form class="search" action="search.html" method="get">
+      <input type="text" name="q" aria-labelledby="searchlabel" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
+      <input type="submit" value="Go" />
+    </form>
+    </div>
+</div>
+<script>document.getElementById('searchbox').style.display = "block"</script>
+
+
+
+
+
+
+
+
+        </div>
+      </div>
+      <div class="clearer"></div>
+    </div>
+    <div class="footer">
+      &copy;2023, Collabora.
+      
+      |
+      Powered by <a href="http://sphinx-doc.org/">Sphinx 7.2.6</a>
+      &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.13</a>
+      
+    </div>
+
+    
+
+    
+  </body>
+</html>
--- a/docs/html/search.html
+++ b/docs/html/search.html
@ -0,0 +1,117 @@
+<!DOCTYPE html>
+
+<html lang="en" data-content_root="./">
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>Search &#8212; whisper_live  documentation</title>
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css?v=4f649999" />
+    <link rel="stylesheet" type="text/css" href="_static/alabaster.css?v=039e1c02" />
+    
+    <script src="_static/documentation_options.js?v=5929fcd5"></script>
+    <script src="_static/doctools.js?v=888ff710"></script>
+    <script src="_static/sphinx_highlight.js?v=dc90522c"></script>
+    <script src="_static/searchtools.js"></script>
+    <script src="_static/language_data.js"></script>
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="#" />
+  <script src="searchindex.js" defer></script>
+  
+   
+  <link rel="stylesheet" href="_static/custom.css" type="text/css" />
+  
+  
+  <meta name="viewport" content="width=device-width, initial-scale=0.9, maximum-scale=0.9" />
+
+
+  </head><body>
+  
+
+    <div class="document">
+      <div class="documentwrapper">
+        <div class="bodywrapper">
+          
+
+          <div class="body" role="main">
+            
+  <h1 id="search-documentation">Search</h1>
+  
+  <noscript>
+  <div class="admonition warning">
+  <p>
+    Please activate JavaScript to enable the search
+    functionality.
+  </p>
+  </div>
+  </noscript>
+  
+  
+  <p>
+    Searching for multiple words only shows matches that contain
+    all words.
+  </p>
+  
+  
+  <form action="" method="get">
+    <input type="text" name="q" aria-labelledby="search-documentation" value="" autocomplete="off" autocorrect="off" autocapitalize="off" spellcheck="false"/>
+    <input type="submit" value="search" />
+    <span id="search-progress" style="padding-left: 10px"></span>
+  </form>
+  
+  
+  
+  <div id="search-results">
+  
+  </div>
+  
+
+          </div>
+          
+        </div>
+      </div>
+      <div class="sphinxsidebar" role="navigation" aria-label="main navigation">
+        <div class="sphinxsidebarwrapper">
+<h1 class="logo"><a href="index.html">whisper_live</a></h1>
+
+
+
+
+
+
+
+
+<h3>Navigation</h3>
+
+<div class="relations">
+<h3>Related Topics</h3>
+<ul>
+  <li><a href="index.html">Documentation overview</a><ul>
+  </ul></li>
+</ul>
+</div>
+
+
+
+
+
+
+
+
+        </div>
+      </div>
+      <div class="clearer"></div>
+    </div>
+    <div class="footer">
+      &copy;2023, Collabora.
+      
+      |
+      Powered by <a href="http://sphinx-doc.org/">Sphinx 7.2.6</a>
+      &amp; <a href="https://github.com/bitprophet/alabaster">Alabaster 0.7.13</a>
+      
+    </div>
+
+    
+
+    
+  </body>
+</html>
--- a/docs/html/searchindex.js
+++ b/docs/html/searchindex.js
--- a/docs/index.html
+++ b/docs/index.html
@ -0,0 +1 @@
+<meta http-equiv="refresh" content="0; url=./html/index.html" />
--- a/requirements/server.txt
+++ b/requirements/server.txt
@ -0,0 +1,13 @@
+faster-whisper==1.1.0
+websockets
+onnxruntime==1.17.0
+numba
+kaldialign
+soundfile
+scipy
+av
+jiwer
+evaluate
+numpy<2
+openai-whisper==20240930
+tokenizers==0.20.3
--- a/run_server.py
+++ b/run_server.py
@ -0,0 +1,84 @@
+import argparse
+import ssl
+import os
+import socket
+
+def check_port_availability(port):
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    result = sock.connect_ex(('0.0.0.0', port))
+    sock.close()
+    return result != 0
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--port', '-p',
+                        type=int,
+                        default=int(os.getenv('PORT_WHISPERLIVE')),
+                        help="Websocket port to run the server on.")
+    parser.add_argument('--backend', '-b',
+                        type=str,
+                        default='faster_whisper',
+                        help='Backends from ["tensorrt", "faster_whisper"]')
+    parser.add_argument('--faster_whisper_custom_model_path', '-fw',
+                        type=str, default=None,
+                        help="Custom Faster Whisper Model")
+    parser.add_argument('--trt_model_path', '-trt',
+                        type=str,
+                        default=None,
+                        help='Whisper TensorRT model path')
+    parser.add_argument('--trt_multilingual', '-m',
+                        action="store_true",
+                        help='Boolean only for TensorRT model. True if multilingual.')
+    parser.add_argument('--ssl_cert_path', '-ssl',
+                        type=str,
+                        default=None,
+                        help='Path to cert.pem and key.pem if ssl should be used.')
+    parser.add_argument('--omp_num_threads', '-omp',
+                        type=int,
+                        default=1,
+                        help="Number of threads to use for OpenMP")
+    parser.add_argument('--no_single_model', '-nsm',
+                        action='store_true',
+                        help='Set this if every connection should instantiate its own model. Only relevant for custom model, passed using -trt or -fw.')
+    args = parser.parse_args()
+
+    if args.backend == "tensorrt":
+        if args.trt_model_path is None:
+            raise ValueError("Please Provide a valid tensorrt model path")
+
+    port = args.port
+    if not check_port_availability(port):
+        print(f"Warning: Port {port} might already be in use!")
+    
+    ssl_context = None
+    if args.ssl_cert_path is not None:
+        try:
+            ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+            ssl_context.load_cert_chain(
+                certfile=f"{args.ssl_cert_path}/cert.pem",
+                keyfile=f"{args.ssl_cert_path}/privkey.pem"
+            )
+            print("SSL context created successfully")
+        except Exception as e:
+            print(f"Failed to load SSL certificates: {str(e)}")
+            raise
+
+    if "OMP_NUM_THREADS" not in os.environ:
+        print(f"Setting OMP_NUM_THREADS to {args.omp_num_threads}")
+        os.environ["OMP_NUM_THREADS"] = str(args.omp_num_threads)
+
+    from whisper_live.server import TranscriptionServer
+    print(f"Running server with args: {args}")
+    server = TranscriptionServer()
+    
+    print(f"Starting server on port {args.port} with backend {args.backend} using SSL: {args.ssl_cert_path is not None}")
+    server.run(
+        "0.0.0.0",
+        port=args.port,
+        backend=args.backend,
+        faster_whisper_custom_model_path=args.faster_whisper_custom_model_path,
+        whisper_tensorrt_path=args.trt_model_path,
+        trt_multilingual=args.trt_multilingual,
+        single_model=not args.no_single_model,
+        ssl_context=ssl_context
+    )
--- a/scripts/build_whisper_tensorrt.sh
+++ b/scripts/build_whisper_tensorrt.sh
@ -0,0 +1,77 @@
+#!/bin/bash
+
+download_and_build_model() {
+    local model_name="$1"
+    local model_url=""
+
+    case "$model_name" in
+        "tiny.en")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/d3dd57d32accea0b295c96e26691aa14d8822fac7d9d27d5dc00b4ca2826dd03/tiny.en.pt"
+            ;;
+        "tiny")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/65147644a518d12f04e32d6f3b26facc3f8dd46e5390956a9424a650c0ce22b9/tiny.pt"
+            ;;
+        "base.en")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/25a8566e1d0c1e2231d1c762132cd20e0f96a85d16145c3a00adf5d1ac670ead/base.en.pt"
+            ;;
+        "base")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/ed3a0b6b1c0edf879ad9b11b1af5a0e6ab5db9205f891f668f8b0e6c6326e34e/base.pt"
+            ;;
+        "small.en")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/f953ad0fd29cacd07d5a9eda5624af0f6bcf2258be67c92b79389873d91e0872/small.en.pt"
+            ;;
+        "small")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/9ecf779972d90ba49c06d968637d720dd632c55bbf19d441fb42bf17a411e794/small.pt"
+            ;;
+        "medium.en")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/d7440d1dc186f76616474e0ff0b3b6b879abc9d1a4926b7adfa41db2d497ab4f/medium.en.pt"
+            ;;
+        "medium")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt"
+            ;;
+        "large-v1")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/e4b87e7e0bf463eb8e6956e646f1e277e901512310def2c24bf0e11bd3c28e9a/large-v1.pt"
+            ;;
+        "large-v2")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/81f7c96c852ee8fc832187b0132e569d6c3065a3252ed18e56effd0b6a73e524/large-v2.pt"
+            ;;
+        "large-v3" | "large")
+            model_url="https://openaipublic.azureedge.net/main/whisper/models/e5b1a55b89c1367dacf97e3e19bfd829a01529dbfdeefa8caeb59b3f1b81dadb/large-v3.pt"
+            ;;
+        *)
+            echo "Invalid model name: $model_name"
+            exit 1
+            ;;
+    esac
+
+    echo "Downloading $model_name..."
+    # wget --directory-prefix=assets "$model_url"
+    # echo "Download completed: ${model_name}.pt"
+    if [ ! -f "assets/${model_name}.pt" ]; then
+        wget --directory-prefix=assets "$model_url"
+        echo "Download completed: ${model_name}.pt"
+    else
+        echo "${model_name}.pt already exists in assets directory."
+    fi
+
+    local output_dir="whisper_${model_name//./_}"
+    echo "$output_dir"
+    echo "Running build script for $model_name with output directory $output_dir"
+    python3 build.py --output_dir "$output_dir" --use_gpt_attention_plugin --use_gemm_plugin --use_bert_attention_plugin --model_name "$model_name"
+    echo "Whisper $model_name TensorRT engine built."
+    echo "========================================="
+    echo "Model is located at: $(pwd)/$output_dir"
+}
+
+if [ "$#" -lt 1 ]; then
+    echo "Usage: $0 <path-to-tensorrt-examples-dir> [model-name]"
+    exit 1
+fi
+
+tensorrt_examples_dir="$1"
+model_name="${2:-small.en}"
+
+cd $1/whisper
+pip install --no-deps -r requirements.txt
+
+download_and_build_model "$model_name"
--- a/scripts/setup.sh
+++ b/scripts/setup.sh
@ -0,0 +1,4 @@
+#!/bin/bash
+
+apt-get update
+apt-get install -y portaudio19-dev ffmpeg wget
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,60 @@
+import pathlib
+from setuptools import find_packages, setup
+from whisper_live.__version__ import __version__
+
+
+# The directory containing this file
+HERE = pathlib.Path(__file__).parent
+
+# The text of the README file
+README = (HERE / "README.md").read_text()
+
+# This call to setup() does all the work
+setup(
+    name="whisper_live",
+    version=__version__,
+    description="A nearly-live implementation of OpenAI's Whisper.",
+    long_description=README,
+    long_description_content_type="text/markdown",
+    include_package_data=True,
+    url="https://github.com/collabora/WhisperLive",
+    author="Collabora Ltd",
+    author_email="vineet.suryan@collabora.com",
+    license="MIT",
+    classifiers=[
+        "Development Status :: 4 - Beta",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: MIT License",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3 :: Only",
+        "Programming Language :: Python :: 3.8",
+        "Programming Language :: Python :: 3.9",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    packages=find_packages(
+        exclude=(
+            "examples",
+            "Audio-Transcription-Chrome",
+            "Audio-Transcription-Firefox",
+            "requirements",
+            "whisper-finetuning"
+        )
+    ),
+    install_requires=[
+        "PyAudio",
+        "faster-whisper==1.1.0",
+        "torch",
+        "torchaudio",
+        "websockets",
+        "onnxruntime==1.17.0",
+        "scipy",
+        "websocket-client",
+        "numba",
+        "openai-whisper==20240930", #TODO: understand this
+        "kaldialign",
+        "soundfile",
+        "tokenizers==0.20.3"
+    ],
+    python_requires=">=3.8"
+)
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/test_client.py
+++ b/tests/test_client.py
@ -0,0 +1,156 @@
+import json
+import os
+import scipy
+import websocket
+import copy
+import unittest
+from unittest.mock import patch, MagicMock
+from whisper_live.client import Client, TranscriptionClient, TranscriptionTeeClient
+from whisper_live.utils import resample
+from pathlib import Path
+
+
+class BaseTestCase(unittest.TestCase):
+    @patch('whisper_live.client.websocket.WebSocketApp')
+    @patch('whisper_live.client.pyaudio.PyAudio')
+    def setUp(self, mock_pyaudio, mock_websocket):
+        self.mock_pyaudio_instance = MagicMock()
+        mock_pyaudio.return_value = self.mock_pyaudio_instance
+        self.mock_stream = MagicMock()
+        self.mock_pyaudio_instance.open.return_value = self.mock_stream
+
+        self.mock_ws_app = mock_websocket.return_value
+        self.mock_ws_app.send = MagicMock()
+
+        self.client = TranscriptionClient(host='localhost', port=9090, lang="en").client
+
+        self.mock_pyaudio = mock_pyaudio
+        self.mock_websocket = mock_websocket
+        self.mock_audio_packet = b'\x00\x01\x02\x03'
+
+    def tearDown(self):
+        self.client.close_websocket()
+        self.mock_pyaudio.stop()
+        self.mock_websocket.stop()
+        del self.client
+
+class TestClientWebSocketCommunication(BaseTestCase):
+    def test_websocket_communication(self):
+        expected_url = 'ws://localhost:9090'
+        self.mock_websocket.assert_called()
+        self.assertEqual(self.mock_websocket.call_args[0][0], expected_url)
+
+
+class TestClientCallbacks(BaseTestCase):
+    def test_on_open(self):
+        expected_message = json.dumps({
+            "uid": self.client.uid,
+            "language": self.client.language,
+            "task": self.client.task,
+            "model": self.client.model,
+            "use_vad": True
+        })
+        self.client.on_open(self.mock_ws_app)
+        self.mock_ws_app.send.assert_called_with(expected_message)
+
+    def test_on_message(self):
+        message = json.dumps(
+            {
+                "uid": self.client.uid,
+                "message": "SERVER_READY",
+                "backend": "faster_whisper"
+            }
+        )
+        self.client.on_message(self.mock_ws_app, message)
+
+        message = json.dumps({
+            "uid": self.client.uid,
+            "segments": [
+                {"start": 0, "end": 1, "text": "Test transcript"},
+                {"start": 1, "end": 2, "text": "Test transcript 2"},
+                {"start": 2, "end": 3, "text": "Test transcript 3"}
+            ]
+        })
+        self.client.on_message(self.mock_ws_app, message)
+
+        # Assert that the transcript was updated correctly
+        self.assertEqual(len(self.client.transcript), 2)
+        self.assertEqual(self.client.transcript[1]['text'], "Test transcript 2")
+
+    def test_on_close(self):
+        close_status_code = 1000
+        close_msg = "Normal closure"
+        self.client.on_close(self.mock_ws_app, close_status_code, close_msg)
+
+        self.assertFalse(self.client.recording)
+        self.assertFalse(self.client.server_error)
+        self.assertFalse(self.client.waiting)
+
+    def test_on_error(self):
+        error_message = "Test Error"
+        self.client.on_error(self.mock_ws_app, error_message)
+
+        self.assertTrue(self.client.server_error)
+        self.assertEqual(self.client.error_message, error_message)
+
+
+class TestAudioResampling(unittest.TestCase):
+    def test_resample_audio(self):
+        original_audio = "assets/jfk.flac"
+        expected_sr = 16000
+        resampled_audio = resample(original_audio, expected_sr)
+
+        sr, _ = scipy.io.wavfile.read(resampled_audio)
+        self.assertEqual(sr, expected_sr)
+
+        os.remove(resampled_audio)
+
+
+class TestSendingAudioPacket(BaseTestCase):
+    def test_send_packet(self):
+        self.client.send_packet_to_server(self.mock_audio_packet)
+        self.client.client_socket.send.assert_called_with(self.mock_audio_packet, websocket.ABNF.OPCODE_BINARY)
+
+class TestTee(BaseTestCase):
+    @patch('whisper_live.client.websocket.WebSocketApp')
+    @patch('whisper_live.client.pyaudio.PyAudio')
+    def setUp(self, mock_audio, mock_websocket):
+        super().setUp()
+        self.client2 = Client(host='localhost', port=9090, lang="es", translate=False, srt_file_path="transcript.srt")
+        self.client3 = Client(host='localhost', port=9090, lang="es", translate=True, srt_file_path="translation.srt")
+        # need a separate mock for each websocket
+        self.client3.client_socket = copy.deepcopy(self.client3.client_socket)
+        self.tee = TranscriptionTeeClient([self.client2, self.client3])
+
+    def tearDown(self):
+        self.tee.close_all_clients()
+        del self.tee
+        super().tearDown()
+
+    def test_invalid_constructor(self):
+        with self.assertRaises(Exception) as context:
+            TranscriptionTeeClient([])
+
+    def test_multicast_unconditional(self):
+        self.tee.multicast_packet(self.mock_audio_packet, True)
+        for client in self.tee.clients:
+            client.client_socket.send.assert_called_with(self.mock_audio_packet, websocket.ABNF.OPCODE_BINARY)
+
+    def test_multicast_conditional(self):
+        self.client2.recording = False
+        self.client3.recording = True
+        self.tee.multicast_packet(self.mock_audio_packet, False)
+        self.client2.client_socket.send.assert_not_called()
+        self.client3.client_socket.send.assert_called_with(self.mock_audio_packet, websocket.ABNF.OPCODE_BINARY)
+
+    def test_close_all(self):
+        self.tee.close_all_clients()
+        for client in self.tee.clients:
+            client.client_socket.close.assert_called()
+
+    def test_write_all_srt(self):
+        for client in self.tee.clients:
+            client.server_backend = "faster_whisper"
+        self.tee.write_all_clients_srt()
+        self.assertTrue(Path("transcript.srt").is_file())
+        self.assertTrue(Path("translation.srt").is_file())
--- a/tests/test_server.py
+++ b/tests/test_server.py
@ -0,0 +1,150 @@
+import subprocess
+import time
+import json
+import unittest
+from unittest import mock
+
+import numpy as np
+import evaluate
+
+from websockets.exceptions import ConnectionClosed
+from whisper_live.server import TranscriptionServer
+from whisper_live.client import Client, TranscriptionClient, TranscriptionTeeClient
+from whisper.normalizers import EnglishTextNormalizer
+
+
+class TestTranscriptionServerInitialization(unittest.TestCase):
+    def test_initialization(self):
+        server = TranscriptionServer()
+        self.assertEqual(server.client_manager.max_clients, 4)
+        self.assertEqual(server.client_manager.max_connection_time, 600)
+        self.assertDictEqual(server.client_manager.clients, {})
+        self.assertDictEqual(server.client_manager.start_times, {})
+
+
+class TestGetWaitTime(unittest.TestCase):
+    def setUp(self):
+        self.server = TranscriptionServer()
+        self.server.client_manager.start_times = {
+            'client1': time.time() - 120,
+            'client2': time.time() - 300
+        }
+        self.server.client_manager.max_connection_time = 600
+
+    def test_get_wait_time(self):
+        expected_wait_time = (600 - (time.time() - self.server.client_manager.start_times['client2'])) / 60
+        print(self.server.client_manager.get_wait_time(), expected_wait_time)
+        self.assertAlmostEqual(self.server.client_manager.get_wait_time(), expected_wait_time, places=2)
+
+
+class TestServerConnection(unittest.TestCase):
+    def setUp(self):
+        self.server = TranscriptionServer()
+
+    @mock.patch('websockets.WebSocketCommonProtocol')
+    def test_connection(self, mock_websocket):
+        mock_websocket.recv.return_value = json.dumps({
+            'uid': 'test_client',
+            'language': 'en',
+            'task': 'transcribe',
+            'model': 'tiny.en'
+        })
+        self.server.recv_audio(mock_websocket, "faster_whisper")
+
+    @mock.patch('websockets.WebSocketCommonProtocol')
+    def test_recv_audio_exception_handling(self, mock_websocket):
+        mock_websocket.recv.side_effect = [json.dumps({
+            'uid': 'test_client',
+            'language': 'en',
+            'task': 'transcribe',
+            'model': 'tiny.en'
+        }),  np.array([1, 2, 3]).tobytes()]
+
+        with self.assertLogs(level="ERROR"):
+            self.server.recv_audio(mock_websocket, "faster_whisper")
+
+        self.assertNotIn(mock_websocket, self.server.client_manager.clients)
+
+
+class TestServerInferenceAccuracy(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.mock_pyaudio_patch = mock.patch('pyaudio.PyAudio')
+        cls.mock_pyaudio = cls.mock_pyaudio_patch.start()
+        cls.mock_pyaudio.return_value.open.return_value = mock.MagicMock()
+        
+        cls.server_process = subprocess.Popen(["python", "run_server.py"])
+        time.sleep(2)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.server_process.terminate()
+        cls.server_process.wait()
+
+    def setUp(self):
+        self.metric = evaluate.load("wer")
+        self.normalizer = EnglishTextNormalizer()
+
+    def check_prediction(self, srt_path):
+        gt = "And so my fellow Americans, ask not, what your country can do for you. Ask what you can do for your country!"
+        with open(srt_path, "r") as f:
+            lines = f.readlines()
+            prediction = " ".join([line.strip() for line in lines[2::4]])
+        prediction_normalized = self.normalizer(prediction)
+        gt_normalized = self.normalizer(gt)
+
+        # calculate WER
+        wer = self.metric.compute(
+            predictions=[prediction_normalized],
+            references=[gt_normalized]
+        )
+        self.assertLess(wer, 0.05)
+
+    def test_inference(self):
+        client = TranscriptionClient(
+            "localhost", "9090", model="base.en", lang="en",
+        )
+        client("assets/jfk.flac")
+        self.check_prediction("output.srt")
+
+    def test_simultaneous_inference(self):
+        client1 = Client(
+            "localhost", "9090", model="base.en", lang="en", srt_file_path="transcript1.srt")
+        client2 = Client(
+            "localhost", "9090", model="base.en", lang="en", srt_file_path="transcript2.srt")
+        tee = TranscriptionTeeClient([client1, client2])
+        tee("assets/jfk.flac")
+        self.check_prediction("transcript1.srt")
+        self.check_prediction("transcript2.srt")
+
+
+class TestExceptionHandling(unittest.TestCase):
+    def setUp(self):
+        self.server = TranscriptionServer()
+
+    @mock.patch('websockets.WebSocketCommonProtocol')
+    def test_connection_closed_exception(self, mock_websocket):
+        mock_websocket.recv.side_effect = ConnectionClosed(1001, "testing connection closed")
+
+        with self.assertLogs(level="INFO") as log:
+            self.server.recv_audio(mock_websocket, "faster_whisper")
+            self.assertTrue(any("Connection closed by client" in message for message in log.output))
+
+    @mock.patch('websockets.WebSocketCommonProtocol')
+    def test_json_decode_exception(self, mock_websocket):
+        mock_websocket.recv.return_value = "invalid json"
+
+        with self.assertLogs(level="ERROR") as log:
+            self.server.recv_audio(mock_websocket, "faster_whisper")
+            self.assertTrue(any("Failed to decode JSON from client" in message for message in log.output))
+
+    @mock.patch('websockets.WebSocketCommonProtocol')
+    def test_unexpected_exception_handling(self, mock_websocket):
+        mock_websocket.recv.side_effect = RuntimeError("Unexpected error")
+
+        with self.assertLogs(level="ERROR") as log:
+            self.server.recv_audio(mock_websocket, "faster_whisper")
+            for message in log.output:
+                print(message)
+            print()
+            self.assertTrue(any("Unexpected error" in message for message in log.output))
--- a/tests/test_vad.py
+++ b/tests/test_vad.py
@ -0,0 +1,26 @@
+import unittest
+import numpy as np
+from whisper_live.tensorrt_utils import load_audio
+from whisper_live.vad import VoiceActivityDetector
+
+
+class TestVoiceActivityDetection(unittest.TestCase):
+    def setUp(self):
+        self.vad = VoiceActivityDetector()
+        self.sample_rate = 16000
+
+    def generate_silence(self, duration_seconds):
+        return np.zeros(int(self.sample_rate * duration_seconds), dtype=np.float32)
+
+    def load_speech_segment(self, filepath):
+        return load_audio(filepath)
+
+    def test_vad_silence_detection(self):
+        silence = self.generate_silence(3)
+        is_speech_present = self.vad(silence.copy())
+        self.assertFalse(is_speech_present, "VAD incorrectly identified silence as speech.")
+
+    def test_vad_speech_detection(self):
+        audio_tensor = load_audio("assets/jfk.flac")
+        is_speech_present = self.vad(audio_tensor)
+        self.assertTrue(is_speech_present, "VAD failed to identify speech segment.")
--- a/whisper_live/init.py
+++ b/whisper_live/init.py
--- a/whisper_live/pycache/init.cpython-312.pyc
+++ b/whisper_live/pycache/init.cpython-312.pyc
--- a/whisper_live/pycache/version.cpython-312.pyc
+++ b/whisper_live/pycache/version.cpython-312.pyc
--- a/whisper_live/version.py
+++ b/whisper_live/version.py
@ -0,0 +1 @@
+__version__ = "0.4.1"
--- a/whisper_live/server.py
+++ b/whisper_live/server.py
--- a/whisper_live/tensorrt_utils.py
+++ b/whisper_live/tensorrt_utils.py
@ -0,0 +1,365 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+import os
+from collections import defaultdict
+from functools import lru_cache
+from pathlib import Path
+from subprocess import CalledProcessError, run
+from typing import Dict, Iterable, List, Optional, TextIO, Tuple, Union
+
+import kaldialign
+import numpy as np
+import soundfile
+import torch
+import torch.nn.functional as F
+
+Pathlike = Union[str, Path]
+
+SAMPLE_RATE = 16000
+N_FFT = 400
+HOP_LENGTH = 160
+CHUNK_LENGTH = 30
+N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE  # 480000 samples in a 30-second chunk
+
+
+def load_audio(file: str, sr: int = SAMPLE_RATE):
+    """
+    Open an audio file and read as mono waveform, resampling as necessary
+
+    Parameters
+    ----------
+    file: str
+        The audio file to open
+
+    sr: int
+        The sample rate to resample the audio if necessary
+
+    Returns
+    -------
+    A NumPy array containing the audio waveform, in float32 dtype.
+    """
+
+    # This launches a subprocess to decode audio while down-mixing
+    # and resampling as necessary.  Requires the ffmpeg CLI in PATH.
+    # fmt: off
+    cmd = [
+        "ffmpeg", "-nostdin", "-threads", "0", "-i", file, "-f", "s16le", "-ac",
+        "1", "-acodec", "pcm_s16le", "-ar",
+        str(sr), "-"
+    ]
+    # fmt: on
+    try:
+        out = run(cmd, capture_output=True, check=True).stdout
+    except CalledProcessError as e:
+        raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
+
+    return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
+
+
+def load_audio_wav_format(wav_path):
+    # make sure audio in .wav format
+    assert wav_path.endswith(
+        '.wav'), f"Only support .wav format, but got {wav_path}"
+    waveform, sample_rate = soundfile.read(wav_path)
+    assert sample_rate == 16000, f"Only support 16k sample rate, but got {sample_rate}"
+    return waveform, sample_rate
+
+
+def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1):
+    """
+    Pad or trim the audio array to N_SAMPLES, as expected by the encoder.
+    """
+    if torch.is_tensor(array):
+        if array.shape[axis] > length:
+            array = array.index_select(dim=axis,
+                                       index=torch.arange(length,
+                                                          device=array.device))
+
+        if array.shape[axis] < length:
+            pad_widths = [(0, 0)] * array.ndim
+            pad_widths[axis] = (0, length - array.shape[axis])
+            array = F.pad(array,
+                          [pad for sizes in pad_widths[::-1] for pad in sizes])
+    else:
+        if array.shape[axis] > length:
+            array = array.take(indices=range(length), axis=axis)
+
+        if array.shape[axis] < length:
+            pad_widths = [(0, 0)] * array.ndim
+            pad_widths[axis] = (0, length - array.shape[axis])
+            array = np.pad(array, pad_widths)
+
+    return array
+
+
+@lru_cache(maxsize=None)
+def mel_filters(device,
+                n_mels: int,
+                mel_filters_dir: str = None) -> torch.Tensor:
+    """
+    load the mel filterbank matrix for projecting STFT into a Mel spectrogram.
+    Allows decoupling librosa dependency; saved using:
+
+        np.savez_compressed(
+            "mel_filters.npz",
+            mel_80=librosa.filters.mel(sr=16000, n_fft=400, n_mels=80),
+        )
+    """
+    assert n_mels in {80, 128}, f"Unsupported n_mels: {n_mels}"
+    if mel_filters_dir is None:
+        mel_filters_path = os.path.join(os.path.dirname(__file__), "assets",
+                                        "mel_filters.npz")
+    else:
+        mel_filters_path = os.path.join(mel_filters_dir, "mel_filters.npz")
+    with np.load(mel_filters_path) as f:
+        return torch.from_numpy(f[f"mel_{n_mels}"]).to(device)
+
+
+def log_mel_spectrogram(
+    audio: Union[str, np.ndarray, torch.Tensor],
+    n_mels: int,
+    padding: int = 0,
+    device: Optional[Union[str, torch.device]] = None,
+    return_duration: bool = False,
+    mel_filters_dir: str = None,
+):
+    """
+    Compute the log-Mel spectrogram of
+
+    Parameters
+    ----------
+    audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
+        The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
+
+    n_mels: int
+        The number of Mel-frequency filters, only 80 and 128 are supported
+
+    padding: int
+        Number of zero samples to pad to the right
+
+    device: Optional[Union[str, torch.device]]
+        If given, the audio tensor is moved to this device before STFT
+
+    Returns
+    -------
+    torch.Tensor, shape = (80 or 128, n_frames)
+        A Tensor that contains the Mel spectrogram
+    """
+    if not torch.is_tensor(audio):
+        if isinstance(audio, str):
+            if audio.endswith('.wav'):
+                audio, _ = load_audio_wav_format(audio)
+            else:
+                audio = load_audio(audio)
+        assert isinstance(audio,
+                          np.ndarray), f"Unsupported audio type: {type(audio)}"
+        duration = audio.shape[-1] / SAMPLE_RATE
+        audio = pad_or_trim(audio, N_SAMPLES)
+        audio = audio.astype(np.float32)
+        audio = torch.from_numpy(audio)
+
+    if device is not None:
+        audio = audio.to(device)
+    if padding > 0:
+        audio = F.pad(audio, (0, padding))
+    window = torch.hann_window(N_FFT).to(audio.device)
+    stft = torch.stft(audio,
+                      N_FFT,
+                      HOP_LENGTH,
+                      window=window,
+                      return_complex=True)
+    magnitudes = stft[..., :-1].abs()**2
+
+    filters = mel_filters(audio.device, n_mels, mel_filters_dir)
+    mel_spec = filters @ magnitudes
+
+    log_spec = torch.clamp(mel_spec, min=1e-10).log10()
+    log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
+    log_spec = (log_spec + 4.0) / 4.0
+    if return_duration:
+        return log_spec, duration
+    else:
+        return log_spec
+
+
+def store_transcripts(filename: Pathlike, texts: Iterable[Tuple[str, str,
+                                                                str]]) -> None:
+    """Save predicted results and reference transcripts to a file.
+    https://github.com/k2-fsa/icefall/blob/master/icefall/utils.py
+    Args:
+      filename:
+        File to save the results to.
+      texts:
+        An iterable of tuples. The first element is the cur_id, the second is
+        the reference transcript and the third element is the predicted result.
+    Returns:
+      Return None.
+    """
+    with open(filename, "w") as f:
+        for cut_id, ref, hyp in texts:
+            print(f"{cut_id}:\tref={ref}", file=f)
+            print(f"{cut_id}:\thyp={hyp}", file=f)
+
+
+def write_error_stats(                                              # noqa: C901
+    f: TextIO,
+    test_set_name: str,
+    results: List[Tuple[str, str]],
+    enable_log: bool = True,
+) -> float:
+    """Write statistics based on predicted results and reference transcripts.
+    https://github.com/k2-fsa/icefall/blob/master/icefall/utils.py
+    It will write the following to the given file:
+
+        - WER
+        - number of insertions, deletions, substitutions, corrects and total
+          reference words. For example::
+
+              Errors: 23 insertions, 57 deletions, 212 substitutions, over 2606
+              reference words (2337 correct)
+
+        - The difference between the reference transcript and predicted result.
+          An instance is given below::
+
+            THE ASSOCIATION OF (EDISON->ADDISON) ILLUMINATING COMPANIES
+
+          The above example shows that the reference word is `EDISON`,
+          but it is predicted to `ADDISON` (a substitution error).
+
+          Another example is::
+
+            FOR THE FIRST DAY (SIR->*) I THINK
+
+          The reference word `SIR` is missing in the predicted
+          results (a deletion error).
+      results:
+        An iterable of tuples. The first element is the cur_id, the second is
+        the reference transcript and the third element is the predicted result.
+      enable_log:
+        If True, also print detailed WER to the console.
+        Otherwise, it is written only to the given file.
+    Returns:
+      Return None.
+    """
+    subs: Dict[Tuple[str, str], int] = defaultdict(int)
+    ins: Dict[str, int] = defaultdict(int)
+    dels: Dict[str, int] = defaultdict(int)
+
+    # `words` stores counts per word, as follows:
+    #   corr, ref_sub, hyp_sub, ins, dels
+    words: Dict[str, List[int]] = defaultdict(lambda: [0, 0, 0, 0, 0])
+    num_corr = 0
+    ERR = "*"
+    for cut_id, ref, hyp in results:
+        ali = kaldialign.align(ref, hyp, ERR)
+        for ref_word, hyp_word in ali:
+            if ref_word == ERR:
+                ins[hyp_word] += 1
+                words[hyp_word][3] += 1
+            elif hyp_word == ERR:
+                dels[ref_word] += 1
+                words[ref_word][4] += 1
+            elif hyp_word != ref_word:
+                subs[(ref_word, hyp_word)] += 1
+                words[ref_word][1] += 1
+                words[hyp_word][2] += 1
+            else:
+                words[ref_word][0] += 1
+                num_corr += 1
+    ref_len = sum([len(r) for _, r, _ in results])
+    sub_errs = sum(subs.values())
+    ins_errs = sum(ins.values())
+    del_errs = sum(dels.values())
+    tot_errs = sub_errs + ins_errs + del_errs
+    tot_err_rate = "%.2f" % (100.0 * tot_errs / ref_len)
+
+    if enable_log:
+        logging.info(f"[{test_set_name}] %WER {tot_errs / ref_len:.2%} "
+                     f"[{tot_errs} / {ref_len}, {ins_errs} ins, "
+                     f"{del_errs} del, {sub_errs} sub ]")
+
+    print(f"%WER = {tot_err_rate}", file=f)
+    print(
+        f"Errors: {ins_errs} insertions, {del_errs} deletions, "
+        f"{sub_errs} substitutions, over {ref_len} reference "
+        f"words ({num_corr} correct)",
+        file=f,
+    )
+    print(
+        "Search below for sections starting with PER-UTT DETAILS:, "
+        "SUBSTITUTIONS:, DELETIONS:, INSERTIONS:, PER-WORD STATS:",
+        file=f,
+    )
+
+    print("", file=f)
+    print("PER-UTT DETAILS: corr or (ref->hyp)  ", file=f)
+    for cut_id, ref, hyp in results:
+        ali = kaldialign.align(ref, hyp, ERR)
+        combine_successive_errors = True
+        if combine_successive_errors:
+            ali = [[[x], [y]] for x, y in ali]
+            for i in range(len(ali) - 1):
+                if ali[i][0] != ali[i][1] and ali[i + 1][0] != ali[i + 1][1]:
+                    ali[i + 1][0] = ali[i][0] + ali[i + 1][0]
+                    ali[i + 1][1] = ali[i][1] + ali[i + 1][1]
+                    ali[i] = [[], []]
+            ali = [[
+                list(filter(lambda a: a != ERR, x)),
+                list(filter(lambda a: a != ERR, y)),
+            ] for x, y in ali]
+            ali = list(filter(lambda x: x != [[], []], ali))
+            ali = [[
+                ERR if x == [] else " ".join(x),
+                ERR if y == [] else " ".join(y),
+            ] for x, y in ali]
+
+        print(
+            f"{cut_id}:\t" + " ".join((ref_word if ref_word == hyp_word else
+                                       f"({ref_word}->{hyp_word})"
+                                       for ref_word, hyp_word in ali)),
+            file=f,
+        )
+
+    print("", file=f)
+    print("SUBSTITUTIONS: count ref -> hyp", file=f)
+
+    for count, (ref, hyp) in sorted([(v, k) for k, v in subs.items()],
+                                    reverse=True):
+        print(f"{count}   {ref} -> {hyp}", file=f)
+
+    print("", file=f)
+    print("DELETIONS: count ref", file=f)
+    for count, ref in sorted([(v, k) for k, v in dels.items()], reverse=True):
+        print(f"{count}   {ref}", file=f)
+
+    print("", file=f)
+    print("INSERTIONS: count hyp", file=f)
+    for count, hyp in sorted([(v, k) for k, v in ins.items()], reverse=True):
+        print(f"{count}   {hyp}", file=f)
+
+    print("", file=f)
+    print("PER-WORD STATS: word  corr tot_errs count_in_ref count_in_hyp",
+          file=f)
+    for _, word, counts in sorted([(sum(v[1:]), k, v)
+                                   for k, v in words.items()],
+                                  reverse=True):
+        (corr, ref_sub, hyp_sub, ins, dels) = counts
+        tot_errs = ref_sub + hyp_sub + ins + dels
+        ref_count = corr + ref_sub + dels
+        hyp_count = corr + hyp_sub + ins
+
+        print(f"{word}   {corr} {tot_errs} {ref_count} {hyp_count}", file=f)
+    return float(tot_err_rate)
--- a/whisper_live/transcriber.py
+++ b/whisper_live/transcriber.py
--- a/whisper_live/transcriber_tensorrt.py
+++ b/whisper_live/transcriber_tensorrt.py
@ -0,0 +1,320 @@
+import json
+import re
+from collections import OrderedDict
+from pathlib import Path
+from typing import Union
+
+import torch
+import numpy as np
+import torch.nn.functional as F
+from whisper.tokenizer import get_tokenizer
+from whisper_live.tensorrt_utils import (mel_filters, load_audio_wav_format, pad_or_trim, load_audio)
+
+import tensorrt_llm
+import tensorrt_llm.logger as logger
+from tensorrt_llm._utils import (str_dtype_to_torch, str_dtype_to_trt,
+                                 trt_dtype_to_torch)
+from tensorrt_llm.runtime import ModelConfig, SamplingConfig
+from tensorrt_llm.runtime.session import Session, TensorInfo
+
+
+SAMPLE_RATE = 16000
+N_FFT = 400
+HOP_LENGTH = 160
+CHUNK_LENGTH = 30
+N_SAMPLES = CHUNK_LENGTH * SAMPLE_RATE  # 480000 samples in a 30-second chunk
+
+
+class WhisperEncoding:
+
+    def __init__(self, engine_dir):
+        self.session = self.get_session(engine_dir)
+
+    def get_session(self, engine_dir):
+        config_path = engine_dir / 'encoder_config.json'
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+
+        dtype = config['builder_config']['precision']
+        n_mels = config['builder_config']['n_mels']
+        num_languages = config['builder_config']['num_languages']
+
+        self.dtype = dtype
+        self.n_mels = n_mels
+        self.num_languages = num_languages
+
+        serialize_path = engine_dir / f'whisper_encoder_{self.dtype}_tp1_rank0.engine'
+
+        with open(serialize_path, 'rb') as f:
+            session = Session.from_serialized_engine(f.read())
+
+        return session
+
+    def get_audio_features(self, mel):
+        inputs = OrderedDict()
+        output_list = []
+
+        inputs.update({'x': mel})
+        output_list.append(
+            TensorInfo('x', str_dtype_to_trt(self.dtype), mel.shape))
+
+        output_info = (self.session).infer_shapes(output_list)
+
+        logger.debug(f'output info {output_info}')
+        outputs = {
+            t.name: torch.empty(tuple(t.shape),
+                                dtype=trt_dtype_to_torch(t.dtype),
+                                device='cuda')
+            for t in output_info
+        }
+        stream = torch.cuda.current_stream()
+        ok = self.session.run(inputs=inputs,
+                              outputs=outputs,
+                              stream=stream.cuda_stream)
+        assert ok, 'Engine execution failed'
+        stream.synchronize()
+        audio_features = outputs['output']
+        return audio_features
+
+
+class WhisperDecoding:
+
+    def __init__(self, engine_dir, runtime_mapping, debug_mode=False):
+
+        self.decoder_config = self.get_config(engine_dir)
+        self.decoder_generation_session = self.get_session(
+            engine_dir, runtime_mapping, debug_mode)
+
+    def get_config(self, engine_dir):
+        config_path = engine_dir / 'decoder_config.json'
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+        decoder_config = OrderedDict()
+        decoder_config.update(config['plugin_config'])
+        decoder_config.update(config['builder_config'])
+        return decoder_config
+
+    def get_session(self, engine_dir, runtime_mapping, debug_mode=False):
+        dtype = self.decoder_config['precision']
+        serialize_path = engine_dir / f'whisper_decoder_{dtype}_tp1_rank0.engine'
+        with open(serialize_path, "rb") as f:
+            decoder_engine_buffer = f.read()
+
+        decoder_model_config = ModelConfig(
+            num_heads=self.decoder_config['num_heads'],
+            num_kv_heads=self.decoder_config['num_heads'],
+            hidden_size=self.decoder_config['hidden_size'],
+            vocab_size=self.decoder_config['vocab_size'],
+            num_layers=self.decoder_config['num_layers'],
+            gpt_attention_plugin=self.decoder_config['gpt_attention_plugin'],
+            remove_input_padding=self.decoder_config['remove_input_padding'],
+            cross_attention=self.decoder_config['cross_attention'],
+            has_position_embedding=self.
+            decoder_config['has_position_embedding'],
+            has_token_type_embedding=self.
+            decoder_config['has_token_type_embedding'],
+        )
+        decoder_generation_session = tensorrt_llm.runtime.GenerationSession(
+            decoder_model_config,
+            decoder_engine_buffer,
+            runtime_mapping,
+            debug_mode=debug_mode)
+
+        return decoder_generation_session
+
+    def generate(self,
+                 decoder_input_ids,
+                 encoder_outputs,
+                 eot_id,
+                 max_new_tokens=40,
+                 num_beams=1):
+        encoder_input_lengths = torch.tensor(
+            [encoder_outputs.shape[1] for x in range(encoder_outputs.shape[0])],
+            dtype=torch.int32,
+            device='cuda')
+
+        decoder_input_lengths = torch.tensor([
+            decoder_input_ids.shape[-1]
+            for _ in range(decoder_input_ids.shape[0])
+        ],
+                                             dtype=torch.int32,
+                                             device='cuda')
+        decoder_max_input_length = torch.max(decoder_input_lengths).item()
+
+        # generation config
+        sampling_config = SamplingConfig(end_id=eot_id,
+                                         pad_id=eot_id,
+                                         num_beams=num_beams)
+        self.decoder_generation_session.setup(
+            decoder_input_lengths.size(0),
+            decoder_max_input_length,
+            max_new_tokens,
+            beam_width=num_beams,
+            encoder_max_input_length=encoder_outputs.shape[1])
+
+        torch.cuda.synchronize()
+
+        decoder_input_ids = decoder_input_ids.type(torch.int32).cuda()
+        output_ids = self.decoder_generation_session.decode(
+            decoder_input_ids,
+            decoder_input_lengths,
+            sampling_config,
+            encoder_output=encoder_outputs,
+            encoder_input_lengths=encoder_input_lengths,
+        )
+        torch.cuda.synchronize()
+
+        # get the list of int from output_ids tensor
+        output_ids = output_ids.cpu().numpy().tolist()
+        return output_ids
+
+
+class WhisperTRTLLM(object):
+
+    def __init__(self, engine_dir, assets_dir=None, device=None, is_multilingual=False,
+                 language="en", task="transcribe"):
+        world_size = 1
+        runtime_rank = tensorrt_llm.mpi_rank()
+        runtime_mapping = tensorrt_llm.Mapping(world_size, runtime_rank)
+        torch.cuda.set_device(runtime_rank % runtime_mapping.gpus_per_node)
+        engine_dir = Path(engine_dir)
+
+        self.encoder = WhisperEncoding(engine_dir)
+        self.decoder = WhisperDecoding(engine_dir,
+                                       runtime_mapping,
+                                       debug_mode=False)
+        self.n_mels = self.encoder.n_mels
+        # self.tokenizer = get_tokenizer(num_languages=self.encoder.num_languages,
+        #                                tokenizer_dir=assets_dir)
+        self.device = device
+        self.tokenizer = get_tokenizer(
+            is_multilingual,
+            num_languages=self.encoder.num_languages,
+            language=language,
+            task=task,
+        )
+        self.filters = mel_filters(self.device, self.encoder.n_mels, assets_dir)
+
+    def log_mel_spectrogram(
+        self,
+        audio: Union[str, np.ndarray, torch.Tensor],
+        padding: int = 0,
+        return_duration=True
+    ):
+        """
+        Compute the log-Mel spectrogram of
+
+        Parameters
+        ----------
+        audio: Union[str, np.ndarray, torch.Tensor], shape = (*)
+            The path to audio or either a NumPy array or Tensor containing the audio waveform in 16 kHz
+
+        n_mels: int
+            The number of Mel-frequency filters, only 80 and 128 are supported
+
+        padding: int
+            Number of zero samples to pad to the right
+
+        device: Optional[Union[str, torch.device]]
+            If given, the audio tensor is moved to this device before STFT
+
+        Returns
+        -------
+        torch.Tensor, shape = (80 or 128, n_frames)
+            A Tensor that contains the Mel spectrogram
+        """
+        if not torch.is_tensor(audio):
+            if isinstance(audio, str):
+                if audio.endswith('.wav'):
+                    audio, _ = load_audio_wav_format(audio)
+                else:
+                    audio = load_audio(audio)
+            assert isinstance(audio, np.ndarray), f"Unsupported audio type: {type(audio)}"
+            duration = audio.shape[-1] / SAMPLE_RATE
+            audio = pad_or_trim(audio, N_SAMPLES)
+            audio = audio.astype(np.float32)
+            audio = torch.from_numpy(audio)
+
+        if self.device is not None:
+            audio = audio.to(self.device)
+        if padding > 0:
+            audio = F.pad(audio, (0, padding))
+        window = torch.hann_window(N_FFT).to(audio.device)
+        stft = torch.stft(audio, N_FFT, HOP_LENGTH, window=window, return_complex=True)
+        magnitudes = stft[..., :-1].abs()**2
+
+        mel_spec = self.filters @ magnitudes
+
+        log_spec = torch.clamp(mel_spec, min=1e-10).log10()
+        log_spec = torch.maximum(log_spec, log_spec.max() - 8.0)
+        log_spec = (log_spec + 4.0) / 4.0
+        if return_duration:
+            return log_spec, duration
+        else:
+            return log_spec
+
+    def process_batch(
+            self,
+            mel,
+            text_prefix="<|startoftranscript|><|en|><|transcribe|><|notimestamps|>",
+            num_beams=1):
+        prompt_id = self.tokenizer.encode(
+            text_prefix, allowed_special=set(self.tokenizer.special_tokens.keys()))
+
+        prompt_id = torch.tensor(prompt_id)
+        batch_size = mel.shape[0]
+        decoder_input_ids = prompt_id.repeat(batch_size, 1)
+
+        encoder_output = self.encoder.get_audio_features(mel)
+        output_ids = self.decoder.generate(decoder_input_ids,
+                                           encoder_output,
+                                           self.tokenizer.eot,
+                                           max_new_tokens=96,
+                                           num_beams=num_beams)
+        texts = []
+        for i in range(len(output_ids)):
+            text = self.tokenizer.decode(output_ids[i][0]).strip()
+            texts.append(text)
+        return texts
+
+    def transcribe(
+            self,
+            mel,
+            text_prefix="<|startoftranscript|><|en|><|transcribe|><|notimestamps|>",
+            dtype='float16',
+            batch_size=1,
+            num_beams=1,
+            ):
+        mel = mel.type(str_dtype_to_torch(dtype))
+        mel = mel.unsqueeze(0)
+        predictions = self.process_batch(mel, text_prefix, num_beams)
+        prediction = predictions[0]
+
+        # remove all special tokens in the prediction
+        prediction = re.sub(r'<\|.*?\|>', '', prediction)
+        return prediction.strip()
+
+
+def decode_wav_file(
+        model,
+        mel,
+        text_prefix="<|startoftranscript|><|en|><|transcribe|><|notimestamps|>",
+        dtype='float16',
+        batch_size=1,
+        num_beams=1,
+        normalizer=None,
+        mel_filters_dir=None):
+
+    mel = mel.type(str_dtype_to_torch(dtype))
+    mel = mel.unsqueeze(0)
+    # repeat the mel spectrogram to match the batch size
+    mel = mel.repeat(batch_size, 1, 1)
+    predictions = model.process_batch(mel, text_prefix, num_beams)
+    prediction = predictions[0]
+
+    # remove all special tokens in the prediction
+    prediction = re.sub(r'<\|.*?\|>', '', prediction)
+    if normalizer:
+        prediction = normalizer(prediction)
+
+    return prediction.strip()
--- a/whisper_live/utils.py
+++ b/whisper_live/utils.py
@ -0,0 +1,82 @@
+import os
+import textwrap
+import scipy
+import numpy as np
+import av
+from pathlib import Path
+
+
+def clear_screen():
+    """Clears the console screen."""
+    os.system("cls" if os.name == "nt" else "clear")
+
+
+def print_transcript(text):
+    """Prints formatted transcript text."""
+    wrapper = textwrap.TextWrapper(width=60)
+    for line in wrapper.wrap(text="".join(text)):
+        print(line)
+
+
+def format_time(s):
+    """Convert seconds (float) to SRT time format."""
+    hours = int(s // 3600)
+    minutes = int((s % 3600) // 60)
+    seconds = int(s % 60)
+    milliseconds = int((s - int(s)) * 1000)
+    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
+
+
+def create_srt_file(segments, resampled_file):
+    with open(resampled_file, 'w', encoding='utf-8') as srt_file:
+        segment_number = 1
+        for segment in segments:
+            start_time = format_time(float(segment['start']))
+            end_time = format_time(float(segment['end']))
+            text = segment['text']
+
+            srt_file.write(f"{segment_number}\n")
+            srt_file.write(f"{start_time} --> {end_time}\n")
+            srt_file.write(f"{text}\n\n")
+
+            segment_number += 1
+
+
+def resample(file: str, sr: int = 16000):
+    """
+    Resample the audio file to 16kHz.
+
+    Args:
+        file (str): The audio file to open
+        sr (int): The sample rate to resample the audio if necessary
+
+    Returns:
+        resampled_file (str): The resampled audio file
+    """
+    container = av.open(file)
+    stream = next(s for s in container.streams if s.type == 'audio')
+
+    resampler = av.AudioResampler(
+        format='s16',
+        layout='mono',
+        rate=sr,
+    )
+
+    resampled_file = Path(file).stem + "_resampled.wav"
+    output_container = av.open(resampled_file, mode='w')
+    output_stream = output_container.add_stream('pcm_s16le', rate=sr)
+    output_stream.layout = 'mono'
+
+    for frame in container.decode(audio=0):
+        frame.pts = None
+        resampled_frames = resampler.resample(frame)
+        if resampled_frames is not None:
+            for resampled_frame in resampled_frames:
+                for packet in output_stream.encode(resampled_frame):
+                    output_container.mux(packet)
+
+    for packet in output_stream.encode(None):
+        output_container.mux(packet)
+
+    output_container.close()
+    return resampled_file
--- a/whisper_live/vad.py
+++ b/whisper_live/vad.py
@ -0,0 +1,155 @@
+# original: https://github.com/snakers4/silero-vad/blob/master/utils_vad.py
+
+import os
+import subprocess
+import torch
+import numpy as np
+import onnxruntime
+import warnings
+
+
+class VoiceActivityDetection():
+
+    def __init__(self, force_onnx_cpu=True):
+        path = self.download()
+
+        opts = onnxruntime.SessionOptions()
+        opts.log_severity_level = 3
+
+        opts.inter_op_num_threads = 1
+        opts.intra_op_num_threads = 1
+
+        if force_onnx_cpu and 'CPUExecutionProvider' in onnxruntime.get_available_providers():
+            self.session = onnxruntime.InferenceSession(path, providers=['CPUExecutionProvider'], sess_options=opts)
+        else:
+            self.session = onnxruntime.InferenceSession(path, providers=['CUDAExecutionProvider'], sess_options=opts)
+
+        self.reset_states()
+        self.sample_rates = [8000, 16000]
+
+    def _validate_input(self, x, sr: int):
+        if x.dim() == 1:
+            x = x.unsqueeze(0)
+        if x.dim() > 2:
+            raise ValueError(f"Too many dimensions for input audio chunk {x.dim()}")
+
+        if sr != 16000 and (sr % 16000 == 0):
+            step = sr // 16000
+            x = x[:, ::step]
+            sr = 16000
+
+        if sr not in self.sample_rates:
+            raise ValueError(f"Supported sampling rates: {self.sample_rates} (or multiply of 16000)")
+        if sr / x.shape[1] > 31.25:
+            raise ValueError("Input audio chunk is too short")
+
+        return x, sr
+
+    def reset_states(self, batch_size=1):
+        self._state = torch.zeros((2, batch_size, 128)).float()
+        self._context = torch.zeros(0)
+        self._last_sr = 0
+        self._last_batch_size = 0
+
+    def __call__(self, x, sr: int):
+
+        x, sr = self._validate_input(x, sr)
+        num_samples = 512 if sr == 16000 else 256
+
+        if x.shape[-1] != num_samples:
+            raise ValueError(f"Provided number of samples is {x.shape[-1]} (Supported values: 256 for 8000 sample rate, 512 for 16000)")
+
+        batch_size = x.shape[0]
+        context_size = 64 if sr == 16000 else 32
+
+        if not self._last_batch_size:
+            self.reset_states(batch_size)
+        if (self._last_sr) and (self._last_sr != sr):
+            self.reset_states(batch_size)
+        if (self._last_batch_size) and (self._last_batch_size != batch_size):
+            self.reset_states(batch_size)
+
+        if not len(self._context):
+            self._context = torch.zeros(batch_size, context_size)
+
+        x = torch.cat([self._context, x], dim=1)
+        if sr in [8000, 16000]:
+            ort_inputs = {'input': x.numpy(), 'state': self._state.numpy(), 'sr': np.array(sr, dtype='int64')}
+            ort_outs = self.session.run(None, ort_inputs)
+            out, state = ort_outs
+            self._state = torch.from_numpy(state)
+        else:
+            raise ValueError()
+
+        self._context = x[..., -context_size:]
+        self._last_sr = sr
+        self._last_batch_size = batch_size
+
+        out = torch.from_numpy(out)
+        return out
+
+    def audio_forward(self, x, sr: int):
+        outs = []
+        x, sr = self._validate_input(x, sr)
+        self.reset_states()
+        num_samples = 512 if sr == 16000 else 256
+
+        if x.shape[1] % num_samples:
+            pad_num = num_samples - (x.shape[1] % num_samples)
+            x = torch.nn.functional.pad(x, (0, pad_num), 'constant', value=0.0)
+
+        for i in range(0, x.shape[1], num_samples):
+            wavs_batch = x[:, i:i+num_samples]
+            out_chunk = self.__call__(wavs_batch, sr)
+            outs.append(out_chunk)
+
+        stacked = torch.cat(outs, dim=1)
+        return stacked.cpu()
+
+    @staticmethod
+    def download(model_url="https://github.com/snakers4/silero-vad/raw/v5.0/files/silero_vad.onnx"):
+        target_dir = os.path.expanduser("~/.cache/whisper-live/")
+
+        # Ensure the target directory exists
+        os.makedirs(target_dir, exist_ok=True)
+
+        # Define the target file path
+        model_filename = os.path.join(target_dir, "silero_vad.onnx")
+
+        # Check if the model file already exists
+        if not os.path.exists(model_filename):
+            # If it doesn't exist, download the model using wget
+            try:
+                subprocess.run(["wget", "-O", model_filename, model_url], check=True)
+            except subprocess.CalledProcessError:
+                print("Failed to download the model using wget.")
+        return model_filename
+
+
+class VoiceActivityDetector:
+    def __init__(self, threshold=0.5, frame_rate=16000):
+        """
+        Initializes the VoiceActivityDetector with a voice activity detection model and a threshold.
+
+        Args:
+            threshold (float, optional): The probability threshold for detecting voice activity. Defaults to 0.5.
+        """
+        self.model = VoiceActivityDetection()
+        self.threshold = threshold
+        self.frame_rate = frame_rate
+
+    def __call__(self, audio_frame):
+        """
+        Determines if the given audio frame contains speech by comparing the detected speech probability against
+        the threshold.
+
+        Args:
+            audio_frame (np.ndarray): The audio frame to be analyzed for voice activity. It is expected to be a
+                                      NumPy array of audio samples.
+
+        Returns:
+            bool: True if the speech probability exceeds the threshold, indicating the presence of voice activity;
+                  False otherwise.
+        """
+        speech_probs = self.model.audio_forward(torch.from_numpy(audio_frame.copy()), self.frame_rate)[0]
+        return torch.any(speech_probs > self.threshold).item()
--- a/workflows/ci.yml
+++ b/workflows/ci.yml
@ -0,0 +1,169 @@
+name: Test & Build CI/CD
+
+on:
+  push:
+    branches:
+      - main
+    tags:
+      - v*
+  pull_request:
+    branches: [ main ]
+    types: [opened, synchronize, reopened]
+
+jobs:
+  run-tests:
+    runs-on: ubuntu-22.04
+    strategy:
+      matrix:
+        python-version: [3.8, 3.9, '3.10', 3.11]
+    steps:
+      - uses: actions/checkout@v2
+      
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Cache Python dependencies
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pip
+            !~/.cache/pip/log
+          key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('requirements/server.txt', 'requirements/client.txt') }}
+          restore-keys: |
+            ${{ runner.os }}-pip-${{ matrix.python-version }}-
+      
+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y ffmpeg portaudio19-dev
+      
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements/server.txt --extra-index-url https://download.pytorch.org/whl/cpu
+          pip install -r requirements/client.txt
+      
+      - name: Run tests
+        run: |
+          echo "Running tests with Python ${{ matrix.python-version }}"
+          python -m unittest discover -s tests
+  
+  check-code-format:
+    runs-on: ubuntu-22.04
+    strategy:
+      matrix:
+        python-version: [3.8, 3.9, '3.10', 3.11]
+
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install flake8
+
+      - name: Lint with flake8
+        run: |
+          # stop the build if there are Python syntax errors or undefined names
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+  build-and-push-docker-cpu:
+    needs: [run-tests, check-code-format]
+    runs-on: ubuntu-22.04
+    if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/'))
+    steps:
+      - uses: actions/checkout@v2
+      
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GHCR_TOKEN }}
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          file: docker/Dockerfile.cpu
+          push: true
+          tags: ghcr.io/collabora/whisperlive-cpu:latest
+  
+  build-and-push-docker-gpu:
+    needs: [run-tests, check-code-format, build-and-push-docker-cpu]
+    timeout-minutes: 20
+    runs-on: ubuntu-22.04
+    if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/'))
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v1
+        with:
+          registry: ghcr.io
+          username: ${{ github.repository_owner }}
+          password: ${{ secrets.GHCR_TOKEN }}
+
+      - name: Docker Prune
+        run: docker system prune -af
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v1
+
+      - name: Build and push Docker GPU image
+        uses: docker/build-push-action@v2
+        with:
+          context: .
+          file: docker/Dockerfile.gpu
+          push: true
+          tags: ghcr.io/collabora/whisperlive-gpu:latest
+
+  publish-to-pypi:
+    needs: [run-tests, check-code-format]
+    runs-on: ubuntu-22.04
+    if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Set up Python 3.8
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+
+      - name: Cache Python dependencies
+        uses: actions/cache@v2
+        with:
+          path: |
+            ~/.cache/pip
+            !~/.cache/pip/log
+          key: ubuntu-latest-pip-3.8-${{ hashFiles('requirements/server.txt', 'requirements/client.txt') }}
+          restore-keys: |
+            ubuntu-latest-pip-3.8-
+
+      - name: Install system dependencies
+        run: sudo apt-get update && sudo apt-get install -y ffmpeg portaudio19-dev
+
+      - name: Install Python dependencies
+        run: |
+          pip install -r requirements/server.txt
+          pip install -r requirements/client.txt
+          pip install wheel
+
+      - name: Build package
+        run: python setup.py sdist bdist_wheel
+
+      - name: Publish package to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_API_TOKEN }}
				`@ -0,0 +1 @@`
				`<meta http-equiv="refresh" content="0; url=./html/index.html" />`