This commit is contained in:
K Car 2025-07-10 09:25:19 +01:00
parent a7de5af021
commit 5bf9e53449
11 changed files with 457 additions and 76 deletions

7
.dockerignore Normal file
View File

@ -0,0 +1,7 @@
node_modules
.git
Dockerfile
docker-compose.yml
nginx/
*.log
.env

6
.env
View File

@ -1,5 +1 @@
VITE_WHISPERLIVE_URL=wss://whisperlive.classroomcopilot.ai
VITE_APP_URL=whisperlive.classroomcopilot.ai
VITE_APP_PROTOCOL=https
VITE_APP_NAME=ClassroomCopilotLive
VITE_DEV=false
VITE_WHISPERLIVE_URL=wss://whisperlive.kevlarai.com

View File

@ -9,16 +9,13 @@ RUN corepack prepare yarn@4.8.0 --activate
# Copy package files
COPY package.json yarn.lock ./
COPY whisperlive-frontend/package.json ./whisperlive-frontend/
# Now run yarn install
RUN yarn install
# Copy source files
COPY whisperlive-frontend ./whisperlive-frontend
COPY . ./
# Build the application
RUN yarn workspace whisperlive-frontend build
RUN yarn build
# Production stage
FROM nginx:alpine
@ -26,17 +23,11 @@ FROM nginx:alpine
# Create SSL directory
RUN mkdir -p /etc/nginx/ssl
# Create a win/macos switcher
ARG BUILD_OS
ENV BUILD_OS=${BUILD_OS}
ARG NGINX_MODE
ENV NGINX_MODE=${NGINX_MODE}
# Copy nginx configuration
COPY whisperlive-frontend/nginx/nginx-${BUILD_OS}-${NGINX_MODE:-dev}.conf /etc/nginx/conf.d/default.conf
COPY nginx/nginx.conf /etc/nginx/conf.d/default.conf
# Copy built files from builder
COPY --from=builder /app/whisperlive-frontend/dist /usr/share/nginx/html
COPY --from=builder /app/dist /usr/share/nginx/html
# Start nginx
CMD ["nginx", "-g", "daemon off;"]

8
docker-compose.yml Normal file
View File

@ -0,0 +1,8 @@
services:
whisperlive-frontend:
container_name: whisperlive-frontend
build:
context: .
dockerfile: ./Dockerfile
ports:
- "80:80"

View File

@ -1,55 +0,0 @@
server {
listen 5054;
server_name localhost;
return 301 https://$server_name$request_uri;
}
server {
listen 5055 ssl;
server_name localhost;
root /usr/share/nginx/html;
index index.html;
# SSL configuration
ssl_certificate /etc/nginx/ssl/fullchain.pem;
ssl_certificate_key /etc/nginx/ssl/privkey.pem;
ssl_protocols TLSv1.2 TLSv1.3;
ssl_ciphers HIGH:!aNULL:!MD5;
ssl_prefer_server_ciphers on;
ssl_session_cache shared:SSL:10m;
ssl_session_timeout 10m;
# Enable gzip compression
gzip on;
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
# Security headers
add_header X-Frame-Options "SAMEORIGIN";
add_header X-XSS-Protection "1; mode=block";
add_header X-Content-Type-Options "nosniff";
add_header Referrer-Policy "strict-origin-when-cross-origin";
add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; media-src 'self' blob:; connect-src 'self' ws: wss:;";
add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
location / {
try_files $uri $uri/ /index.html;
}
# Cache static assets
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg)$ {
expires 1y;
add_header Cache-Control "public, no-transform";
}
# WebSocket proxy for WhisperLive server
location /ws {
proxy_pass https://whisperlive.classroomcopilot.ai;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}
}

View File

@ -1,6 +1,6 @@
server {
listen 5054;
server_name localhost;
listen 80;
server_name whisperlive-frontend.kevlarai.com;
root /usr/share/nginx/html;
index index.html;
@ -29,7 +29,7 @@ server {
# WebSocket proxy for WhisperLive server
location /ws {
proxy_pass https://whisperlive-macos:5050;
proxy_pass https://whisperlive.kevlarai.com;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";

402
src/App.tsx Normal file
View File

@ -0,0 +1,402 @@
import { useState } from 'react'
import { Tab } from '@headlessui/react'
import { MicrophoneIcon, StopIcon, ArrowUpTrayIcon } from '@heroicons/react/24/solid'
const wsUrl = import.meta.env.VITE_WHISPERLIVE_URL
console.log('wsUrl', wsUrl)
console.log('process.env', import.meta.env)
interface WhisperLiveOptions {
language: string | null
task: 'transcribe' | 'translate'
model: 'tiny.en' | 'base.en' | 'small.en' | 'medium.en' | 'large'
useVad: boolean
}
function classNames(...classes: string[]) {
return classes.filter(Boolean).join(' ')
}
// Generate a UUID v4
function generateUUID(): string {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
const r = Math.random() * 16 | 0;
const v = c === 'x' ? r : (r & 0x3 | 0x8);
console.log(v.toString(16))
return v.toString(16);
});
}
export default function App() {
const [isRecording, setIsRecording] = useState(false)
const [transcript, setTranscript] = useState('')
const [segments, setSegments] = useState<Array<{text: string, completed: boolean}>>([])
const [currentSegment, setCurrentSegment] = useState<{text: string, completed: boolean} | null>(null)
const [options, setOptions] = useState<WhisperLiveOptions>({
language: null, // Auto-detect
task: 'transcribe',
model: 'base.en',
useVad: true
})
const startRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
const wsUrl = import.meta.env.VITE_WHISPERLIVE_URL
const socket = new WebSocket(`${wsUrl}/ws`)
console.log(socket)
socket.onopen = () => {
socket.send(JSON.stringify({
uid: generateUUID(),
...options
}))
}
socket.onmessage = (event) => {
console.log(event)
const data = JSON.parse(event.data)
if (data.status === 'WAIT') {
console.log('data', data)
alert(data.message)
return
}
if (data.language && options.language === null) {
console.log('data.language', data.language)
setOptions(prev => ({ ...prev, language: data.language }))
return
}
if (data.message === 'DISCONNECT') {
console.log('data.message', data.message)
setIsRecording(false)
return
}
// Handle transcription segments
if (data.segments) {
console.log('data.segments', data.segments)
// Process completed segments
const completedSegments = data.segments.slice(0, -1).map((segment: { text: string }) => ({
text: segment.text,
completed: true
}))
// Process current (incomplete) segment
const newCurrentSegment = data.segments.length > 0 ? {
text: data.segments[data.segments.length - 1].text,
completed: false
} : null
// Update segments state
setSegments(completedSegments)
// Update current segment state
setCurrentSegment(newCurrentSegment)
// Update transcript with all completed segments
const fullTranscript = completedSegments.map((s: { text: string }) => s.text).join(' ')
setTranscript(fullTranscript)
}
}
console.log('audioContext')
const audioContext = new AudioContext()
console.log('audioContext', audioContext)
const source = audioContext.createMediaStreamSource(stream)
console.log('source', source)
// Create and load the audio worklet
await audioContext.audioWorklet.addModule('audioProcessor.js')
const workletNode = new AudioWorkletNode(audioContext, 'audio-processor')
workletNode.port.onmessage = (e) => {
if (!socket || socket.readyState !== WebSocket.OPEN) return
const audioData16kHz = resampleTo16kHZ(e.data, audioContext.sampleRate)
socket.send(audioData16kHz)
}
source.connect(workletNode)
workletNode.connect(audioContext.destination)
setIsRecording(true)
} catch (err) {
console.error('Error starting recording:', err)
alert('Error starting recording. Please check your microphone permissions.')
}
}
const stopRecording = () => {
setIsRecording(false)
// Close WebSocket connection and stop audio recording
}
const handleFileUpload = (event: React.ChangeEvent<HTMLInputElement>) => {
const file = event.target.files?.[0]
if (!file) return
const reader = new FileReader()
reader.onload = async (e) => {
const arrayBuffer = e.target?.result as ArrayBuffer
const audioContext = new AudioContext()
const audioBuffer = await audioContext.decodeAudioData(arrayBuffer)
// Process audio file in chunks
const socket = new WebSocket(`wss://whisperlive.classroomcopilot.ai/ws`)
socket.onopen = () => {
socket.send(JSON.stringify({
uid: generateUUID(),
...options
}))
}
socket.onmessage = (event) => {
const data = JSON.parse(event.data)
if (data.status === 'WAIT') {
alert(data.message)
return
}
if (data.language && options.language === null) {
setOptions(prev => ({ ...prev, language: data.language }))
return
}
if (data.message === 'DISCONNECT') {
return
}
// Handle transcription segments
if (data.segments) {
// Process completed segments
const completedSegments = data.segments.slice(0, -1).map((segment: { text: string }) => ({
text: segment.text,
completed: true
}))
// Process current (incomplete) segment
const newCurrentSegment = data.segments.length > 0 ? {
text: data.segments[data.segments.length - 1].text,
completed: false
} : null
// Update segments state
setSegments(completedSegments)
// Update current segment state
setCurrentSegment(newCurrentSegment)
// Update transcript with all completed segments
const fullTranscript = completedSegments.map((s: { text: string }) => s.text).join('')
setTranscript(fullTranscript)
}
}
// Process audio in chunks
const chunkSize = 4096
const data = audioBuffer.getChannelData(0)
for (let i = 0; i < data.length; i += chunkSize) {
const chunk = data.slice(i, i + chunkSize)
const audioData16kHz = resampleTo16kHZ(chunk, audioBuffer.sampleRate)
socket.send(audioData16kHz)
}
}
reader.readAsArrayBuffer(file)
}
function resampleTo16kHZ(audioData: Float32Array, origSampleRate: number): Float32Array {
const targetLength = Math.round(audioData.length * (16000 / origSampleRate))
const resampledData = new Float32Array(targetLength)
const springFactor = (audioData.length - 1) / (targetLength - 1)
resampledData[0] = audioData[0]
resampledData[targetLength - 1] = audioData[audioData.length - 1]
for (let i = 1; i < targetLength - 1; i++) {
const index = i * springFactor
const leftIndex = Math.floor(index)
const rightIndex = Math.ceil(index)
const fraction = index - leftIndex
resampledData[i] = audioData[leftIndex] + (audioData[rightIndex] - audioData[leftIndex]) * fraction
}
return resampledData
}
return (
<div className="min-h-screen p-8">
<div className="max-w-3xl mx-auto">
<h1 className="text-3xl font-bold text-center mb-8">WhisperLive Transcription</h1>
<Tab.Group>
<Tab.List className="flex space-x-1 rounded-xl bg-blue-900/20 p-1">
<Tab
className={({ selected }: { selected: boolean }) =>
classNames(
'w-full rounded-lg py-2.5 text-sm font-medium leading-5',
'ring-white ring-opacity-60 ring-offset-2 ring-offset-blue-400 focus:outline-none focus:ring-2',
selected
? 'bg-white shadow text-blue-700'
: 'text-blue-100 hover:bg-white/[0.12] hover:text-white'
)
}
>
Record & Transcribe
</Tab>
<Tab
className={({ selected }: { selected: boolean }) =>
classNames(
'w-full rounded-lg py-2.5 text-sm font-medium leading-5',
'ring-white ring-opacity-60 ring-offset-2 ring-offset-blue-400 focus:outline-none focus:ring-2',
selected
? 'bg-white shadow text-blue-700'
: 'text-blue-100 hover:bg-white/[0.12] hover:text-white'
)
}
>
Upload Audio
</Tab>
</Tab.List>
<Tab.Panels className="mt-4">
<Tab.Panel>
<div className="space-y-4">
<div className="flex justify-center">
<button
onClick={isRecording ? stopRecording : startRecording}
className={classNames(
'flex items-center px-4 py-2 rounded-lg font-medium',
isRecording
? 'bg-red-600 hover:bg-red-700 text-white'
: 'bg-blue-600 hover:bg-blue-700 text-white'
)}
>
{isRecording ? (
<>
<StopIcon className="h-5 w-5 mr-2" />
Stop Recording
</>
) : (
<>
<MicrophoneIcon className="h-5 w-5 mr-2" />
Start Recording
</>
)}
</button>
</div>
</div>
</Tab.Panel>
<Tab.Panel>
<div className="space-y-4">
<div className="flex justify-center">
<label className="flex items-center px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg cursor-pointer">
<ArrowUpTrayIcon className="h-5 w-5 mr-2" />
Upload Audio File
<input
type="file"
accept="audio/*"
className="hidden"
onChange={handleFileUpload}
/>
</label>
</div>
</div>
</Tab.Panel>
</Tab.Panels>
</Tab.Group>
<div className="mt-8">
<h2 className="text-xl font-semibold mb-4">Transcript</h2>
<div className="bg-white rounded-lg p-4 min-h-[200px] shadow">
{transcript || 'No transcript yet...'}
</div>
</div>
<div className="mt-8">
<h2 className="text-xl font-semibold mb-4">Segments</h2>
<div className="bg-white rounded-lg p-4 min-h-[100px] shadow">
{segments.length > 0 || currentSegment ? (
<div className="space-y-2">
{segments.map((segment, index) => (
<div key={index} className="p-2 bg-gray-50 rounded">
<span className="font-medium">{segment.text}</span>
<span className="ml-2 text-xs text-gray-500">
{segment.completed ? '✓' : '...'}
</span>
</div>
))}
{currentSegment && (
<div className="p-2 bg-blue-50 rounded border border-blue-200">
<span className="font-medium">{currentSegment.text}</span>
<span className="ml-2 text-xs text-blue-500">
Currently transcribing...
</span>
</div>
)}
</div>
) : (
'No segments yet...'
)}
</div>
</div>
<div className="mt-8">
<h2 className="text-xl font-semibold mb-4">Options</h2>
<div className="grid grid-cols-2 gap-4">
<div>
<label className="block text-sm font-medium text-gray-700">Language</label>
<select
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
value={options.language || ''}
onChange={(e) => setOptions(prev => ({ ...prev, language: e.target.value || null }))}
>
<option value="">Auto-detect</option>
<option value="en">English</option>
<option value="es">Spanish</option>
<option value="fr">French</option>
{/* Add more languages as needed */}
</select>
</div>
<div>
<label className="block text-sm font-medium text-gray-700">Task</label>
<select
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
value={options.task}
onChange={(e) => setOptions(prev => ({ ...prev, task: e.target.value as 'transcribe' | 'translate' }))}
>
<option value="transcribe">Transcribe</option>
<option value="translate">Translate to English</option>
</select>
</div>
<div>
<label className="block text-sm font-medium text-gray-700">Model</label>
<select
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
value={options.model}
onChange={(e) => setOptions(prev => ({ ...prev, model: e.target.value as WhisperLiveOptions['model'] }))}
>
<option value="tiny.en">Tiny (English)</option>
<option value="base.en">Base (English)</option>
<option value="small.en">Small (English)</option>
<option value="medium.en">Medium (English)</option>
<option value="large">Large (Multilingual)</option>
</select>
</div>
<div>
<label className="flex items-center space-x-2">
<input
type="checkbox"
className="rounded border-gray-300 text-blue-600 shadow-sm focus:border-blue-500 focus:ring-blue-500"
checked={options.useVad}
onChange={(e) => setOptions(prev => ({ ...prev, useVad: e.target.checked }))}
/>
<span className="text-sm font-medium text-gray-700">Use Voice Activity Detection</span>
</label>
</div>
</div>
</div>
</div>
</div>
)
}

13
src/env.d.ts vendored Normal file
View File

@ -0,0 +1,13 @@
/// <reference types="vite/client" />
interface ImportMetaEnv {
readonly VITE_WHISPERLIVE_URL: string
readonly VITE_APP_URL: string
readonly VITE_APP_PROTOCOL: string
readonly VITE_APP_NAME: string
readonly VITE_DEV: string
}
interface ImportMeta {
readonly env: ImportMetaEnv
}

9
src/index.css Normal file
View File

@ -0,0 +1,9 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
@layer base {
body {
@apply bg-gray-50 text-gray-900;
}
}

10
src/main.tsx Normal file
View File

@ -0,0 +1,10 @@
import React from 'react'
import ReactDOM from 'react-dom/client'
import App from './App'
import './index.css'
ReactDOM.createRoot(document.getElementById('root')!).render(
<React.StrictMode>
<App />
</React.StrictMode>,
)