updates

2025-07-10 09:25:19 +01:00 · 2025-07-10 09:25:19 +01:00 · 5bf9e53449
commit 5bf9e53449
parent a7de5af021
11 changed files with 457 additions and 76 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -0,0 +1,7 @@
+node_modules
+.git
+Dockerfile
+docker-compose.yml
+nginx/
+*.log
+.env
--- a/.env
+++ b/.env
@ -1,5 +1 @@
-VITE_WHISPERLIVE_URL=wss://whisperlive.classroomcopilot.ai
-VITE_APP_URL=whisperlive.classroomcopilot.ai
-VITE_APP_PROTOCOL=https
-VITE_APP_NAME=ClassroomCopilotLive
-VITE_DEV=false
+VITE_WHISPERLIVE_URL=wss://whisperlive.kevlarai.com
--- a/17
+++ b/17
@ -9,16 +9,13 @@ RUN corepack prepare yarn@4.8.0 --activate

 # Copy package files
 COPY package.json yarn.lock ./
-COPY whisperlive-frontend/package.json ./whisperlive-frontend/
-
-# Now run yarn install
 RUN yarn install

 # Copy source files
-COPY whisperlive-frontend ./whisperlive-frontend
+COPY . ./

 # Build the application
-RUN yarn workspace whisperlive-frontend build
+RUN yarn build

 # Production stage
 FROM nginx:alpine
@ -26,17 +23,11 @@ FROM nginx:alpine
 # Create SSL directory
 RUN mkdir -p /etc/nginx/ssl

-# Create a win/macos switcher
-ARG BUILD_OS
-ENV BUILD_OS=${BUILD_OS}
-ARG NGINX_MODE
-ENV NGINX_MODE=${NGINX_MODE}
-
 # Copy nginx configuration
-COPY whisperlive-frontend/nginx/nginx-${BUILD_OS}-${NGINX_MODE:-dev}.conf /etc/nginx/conf.d/default.conf
+COPY nginx/nginx.conf /etc/nginx/conf.d/default.conf

 # Copy built files from builder
-COPY --from=builder /app/whisperlive-frontend/dist /usr/share/nginx/html
+COPY --from=builder /app/dist /usr/share/nginx/html

 # Start nginx
 CMD ["nginx", "-g", "daemon off;"] 
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -0,0 +1,8 @@
+services:
+  whisperlive-frontend:
+    container_name: whisperlive-frontend
+    build:
+      context: .
+      dockerfile: ./Dockerfile
+    ports:
+      - "80:80"
--- a/nginx/nginx-macos-prod.conf
+++ b/nginx/nginx-macos-prod.conf
@ -1,55 +0,0 @@
-server {
-    listen 5054;
-    server_name localhost;
-    return 301 https://$server_name$request_uri;
-}
-
-server {
-    listen 5055 ssl;
-    server_name localhost;
-    root /usr/share/nginx/html;
-    index index.html;
-
-    # SSL configuration
-    ssl_certificate /etc/nginx/ssl/fullchain.pem;
-    ssl_certificate_key /etc/nginx/ssl/privkey.pem;
-    ssl_protocols TLSv1.2 TLSv1.3;
-    ssl_ciphers HIGH:!aNULL:!MD5;
-    ssl_prefer_server_ciphers on;
-    ssl_session_cache shared:SSL:10m;
-    ssl_session_timeout 10m;
-
-    # Enable gzip compression
-    gzip on;
-    gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
-
-    # Security headers
-    add_header X-Frame-Options "SAMEORIGIN";
-    add_header X-XSS-Protection "1; mode=block";
-    add_header X-Content-Type-Options "nosniff";
-    add_header Referrer-Policy "strict-origin-when-cross-origin";
-    add_header Content-Security-Policy "default-src 'self'; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline'; img-src 'self' data: blob:; media-src 'self' blob:; connect-src 'self' ws: wss:;";
-    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
-
-    location / {
-        try_files $uri $uri/ /index.html;
-    }
-
-    # Cache static assets
-    location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg)$ {
-        expires 1y;
-        add_header Cache-Control "public, no-transform";
-    }
-
-    # WebSocket proxy for WhisperLive server
-    location /ws {
-        proxy_pass https://whisperlive.classroomcopilot.ai;
-        proxy_http_version 1.1;
-        proxy_set_header Upgrade $http_upgrade;
-        proxy_set_header Connection "upgrade";
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
-    }
-} 
--- a/nginx/nginx-win-prod.conf
+++ b/nginx/nginx-win-prod.conf
--- a/nginx/nginx-macos-dev.conf
+++ b/nginx/nginx-macos-dev.conf
@ -1,6 +1,6 @@
 server {
-    listen 5054;
-    server_name localhost;
+    listen 80;
+    server_name whisperlive-frontend.kevlarai.com;
    root /usr/share/nginx/html;
    index index.html;

@ -29,7 +29,7 @@ server {

    # WebSocket proxy for WhisperLive server
    location /ws {
-        proxy_pass https://whisperlive-macos:5050;
+        proxy_pass https://whisperlive.kevlarai.com;
        proxy_http_version 1.1;
        proxy_set_header Upgrade $http_upgrade;
        proxy_set_header Connection "upgrade";
--- a/src/App.tsx
+++ b/src/App.tsx
@ -0,0 +1,402 @@
+import { useState } from 'react'
+import { Tab } from '@headlessui/react'
+import { MicrophoneIcon, StopIcon, ArrowUpTrayIcon } from '@heroicons/react/24/solid'
+
+
+const wsUrl = import.meta.env.VITE_WHISPERLIVE_URL
+console.log('wsUrl', wsUrl)
+console.log('process.env', import.meta.env)
+interface WhisperLiveOptions {
+  language: string | null
+  task: 'transcribe' | 'translate'
+  model: 'tiny.en' | 'base.en' | 'small.en' | 'medium.en' | 'large'
+  useVad: boolean
+}
+
+function classNames(...classes: string[]) {
+  return classes.filter(Boolean).join(' ')
+}
+
+// Generate a UUID v4
+function generateUUID(): string {
+  return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
+    const r = Math.random() * 16 | 0;
+    const v = c === 'x' ? r : (r & 0x3 | 0x8);
+    console.log(v.toString(16))
+    return v.toString(16);
+  });
+}
+
+export default function App() {
+  const [isRecording, setIsRecording] = useState(false)
+  const [transcript, setTranscript] = useState('')
+  const [segments, setSegments] = useState<Array<{text: string, completed: boolean}>>([])
+  const [currentSegment, setCurrentSegment] = useState<{text: string, completed: boolean} | null>(null)
+  const [options, setOptions] = useState<WhisperLiveOptions>({
+    language: null, // Auto-detect
+    task: 'transcribe',
+    model: 'base.en',
+    useVad: true
+  })
+
+  const startRecording = async () => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true })
+      const wsUrl = import.meta.env.VITE_WHISPERLIVE_URL
+      const socket = new WebSocket(`${wsUrl}/ws`)
+      
+      console.log(socket)
+      socket.onopen = () => {
+        socket.send(JSON.stringify({
+          uid: generateUUID(),
+          ...options
+        }))
+      }
+
+      socket.onmessage = (event) => {
+        console.log(event)
+        const data = JSON.parse(event.data)
+        if (data.status === 'WAIT') {
+          console.log('data', data)
+          alert(data.message)
+          return
+        }
+        if (data.language && options.language === null) {
+          console.log('data.language', data.language)
+          setOptions(prev => ({ ...prev, language: data.language }))
+          return
+        }
+        if (data.message === 'DISCONNECT') {
+          console.log('data.message', data.message)
+          setIsRecording(false)
+          return
+        }
+        
+        // Handle transcription segments
+        if (data.segments) {
+          console.log('data.segments', data.segments)
+          // Process completed segments
+          const completedSegments = data.segments.slice(0, -1).map((segment: { text: string }) => ({
+            text: segment.text,
+            completed: true
+          }))
+          
+          // Process current (incomplete) segment
+          const newCurrentSegment = data.segments.length > 0 ? {
+            text: data.segments[data.segments.length - 1].text,
+            completed: false
+          } : null
+          
+          // Update segments state
+          setSegments(completedSegments)
+          
+          // Update current segment state
+          setCurrentSegment(newCurrentSegment)
+          
+          // Update transcript with all completed segments
+          const fullTranscript = completedSegments.map((s: { text: string }) => s.text).join(' ')
+          setTranscript(fullTranscript)
+        }
+      }
+
+      console.log('audioContext')
+      const audioContext = new AudioContext()
+      console.log('audioContext', audioContext)
+      const source = audioContext.createMediaStreamSource(stream)
+      console.log('source', source)
+
+      // Create and load the audio worklet
+      await audioContext.audioWorklet.addModule('audioProcessor.js')
+      const workletNode = new AudioWorkletNode(audioContext, 'audio-processor')
+      
+      workletNode.port.onmessage = (e) => {
+        if (!socket || socket.readyState !== WebSocket.OPEN) return
+        const audioData16kHz = resampleTo16kHZ(e.data, audioContext.sampleRate)
+        socket.send(audioData16kHz)
+      }
+
+      source.connect(workletNode)
+      workletNode.connect(audioContext.destination)
+      setIsRecording(true)
+    } catch (err) {
+      console.error('Error starting recording:', err)
+      alert('Error starting recording. Please check your microphone permissions.')
+    }
+  }
+
+  const stopRecording = () => {
+    setIsRecording(false)
+    // Close WebSocket connection and stop audio recording
+  }
+
+  const handleFileUpload = (event: React.ChangeEvent<HTMLInputElement>) => {
+    const file = event.target.files?.[0]
+    if (!file) return
+
+    const reader = new FileReader()
+    reader.onload = async (e) => {
+      const arrayBuffer = e.target?.result as ArrayBuffer
+      const audioContext = new AudioContext()
+      const audioBuffer = await audioContext.decodeAudioData(arrayBuffer)
+      
+      // Process audio file in chunks
+      const socket = new WebSocket(`wss://whisperlive.classroomcopilot.ai/ws`)
+      
+      socket.onopen = () => {
+        socket.send(JSON.stringify({
+          uid: generateUUID(),
+          ...options
+        }))
+      }
+
+      socket.onmessage = (event) => {
+        const data = JSON.parse(event.data)
+        if (data.status === 'WAIT') {
+          alert(data.message)
+          return
+        }
+        if (data.language && options.language === null) {
+          setOptions(prev => ({ ...prev, language: data.language }))
+          return
+        }
+        if (data.message === 'DISCONNECT') {
+          return
+        }
+        
+        // Handle transcription segments
+        if (data.segments) {
+          // Process completed segments
+          const completedSegments = data.segments.slice(0, -1).map((segment: { text: string }) => ({
+            text: segment.text,
+            completed: true
+          }))
+          
+          // Process current (incomplete) segment
+          const newCurrentSegment = data.segments.length > 0 ? {
+            text: data.segments[data.segments.length - 1].text,
+            completed: false
+          } : null
+          
+          // Update segments state
+          setSegments(completedSegments)
+          
+          // Update current segment state
+          setCurrentSegment(newCurrentSegment)
+          
+          // Update transcript with all completed segments
+          const fullTranscript = completedSegments.map((s: { text: string }) => s.text).join('')
+          setTranscript(fullTranscript)
+        }
+      }
+
+      // Process audio in chunks
+      const chunkSize = 4096
+      const data = audioBuffer.getChannelData(0)
+      for (let i = 0; i < data.length; i += chunkSize) {
+        const chunk = data.slice(i, i + chunkSize)
+        const audioData16kHz = resampleTo16kHZ(chunk, audioBuffer.sampleRate)
+        socket.send(audioData16kHz)
+      }
+    }
+    reader.readAsArrayBuffer(file)
+  }
+
+  function resampleTo16kHZ(audioData: Float32Array, origSampleRate: number): Float32Array {
+    const targetLength = Math.round(audioData.length * (16000 / origSampleRate))
+    const resampledData = new Float32Array(targetLength)
+    
+    const springFactor = (audioData.length - 1) / (targetLength - 1)
+    resampledData[0] = audioData[0]
+    resampledData[targetLength - 1] = audioData[audioData.length - 1]
+    
+    for (let i = 1; i < targetLength - 1; i++) {
+      const index = i * springFactor
+      const leftIndex = Math.floor(index)
+      const rightIndex = Math.ceil(index)
+      const fraction = index - leftIndex
+      resampledData[i] = audioData[leftIndex] + (audioData[rightIndex] - audioData[leftIndex]) * fraction
+    }
+    
+    return resampledData
+  }
+
+  return (
+    <div className="min-h-screen p-8">
+      <div className="max-w-3xl mx-auto">
+        <h1 className="text-3xl font-bold text-center mb-8">WhisperLive Transcription</h1>
+        
+        <Tab.Group>
+          <Tab.List className="flex space-x-1 rounded-xl bg-blue-900/20 p-1">
+            <Tab
+              className={({ selected }: { selected: boolean }) =>
+                classNames(
+                  'w-full rounded-lg py-2.5 text-sm font-medium leading-5',
+                  'ring-white ring-opacity-60 ring-offset-2 ring-offset-blue-400 focus:outline-none focus:ring-2',
+                  selected
+                    ? 'bg-white shadow text-blue-700'
+                    : 'text-blue-100 hover:bg-white/[0.12] hover:text-white'
+                )
+              }
+            >
+              Record & Transcribe
+            </Tab>
+            <Tab
+              className={({ selected }: { selected: boolean }) =>
+                classNames(
+                  'w-full rounded-lg py-2.5 text-sm font-medium leading-5',
+                  'ring-white ring-opacity-60 ring-offset-2 ring-offset-blue-400 focus:outline-none focus:ring-2',
+                  selected
+                    ? 'bg-white shadow text-blue-700'
+                    : 'text-blue-100 hover:bg-white/[0.12] hover:text-white'
+                )
+              }
+            >
+              Upload Audio
+            </Tab>
+          </Tab.List>
+
+          <Tab.Panels className="mt-4">
+            <Tab.Panel>
+              <div className="space-y-4">
+                <div className="flex justify-center">
+                  <button
+                    onClick={isRecording ? stopRecording : startRecording}
+                    className={classNames(
+                      'flex items-center px-4 py-2 rounded-lg font-medium',
+                      isRecording
+                        ? 'bg-red-600 hover:bg-red-700 text-white'
+                        : 'bg-blue-600 hover:bg-blue-700 text-white'
+                    )}
+                  >
+                    {isRecording ? (
+                      <>
+                        <StopIcon className="h-5 w-5 mr-2" />
+                        Stop Recording
+                      </>
+                    ) : (
+                      <>
+                        <MicrophoneIcon className="h-5 w-5 mr-2" />
+                        Start Recording
+                      </>
+                    )}
+                  </button>
+                </div>
+              </div>
+            </Tab.Panel>
+
+            <Tab.Panel>
+              <div className="space-y-4">
+                <div className="flex justify-center">
+                  <label className="flex items-center px-4 py-2 bg-blue-600 hover:bg-blue-700 text-white rounded-lg cursor-pointer">
+                    <ArrowUpTrayIcon className="h-5 w-5 mr-2" />
+                    Upload Audio File
+                    <input
+                      type="file"
+                      accept="audio/*"
+                      className="hidden"
+                      onChange={handleFileUpload}
+                    />
+                  </label>
+                </div>
+              </div>
+            </Tab.Panel>
+          </Tab.Panels>
+        </Tab.Group>
+
+        <div className="mt-8">
+          <h2 className="text-xl font-semibold mb-4">Transcript</h2>
+          <div className="bg-white rounded-lg p-4 min-h-[200px] shadow">
+            {transcript || 'No transcript yet...'}
+          </div>
+        </div>
+
+        <div className="mt-8">
+          <h2 className="text-xl font-semibold mb-4">Segments</h2>
+          <div className="bg-white rounded-lg p-4 min-h-[100px] shadow">
+            {segments.length > 0 || currentSegment ? (
+              <div className="space-y-2">
+                {segments.map((segment, index) => (
+                  <div key={index} className="p-2 bg-gray-50 rounded">
+                    <span className="font-medium">{segment.text}</span>
+                    <span className="ml-2 text-xs text-gray-500">
+                      {segment.completed ? '✓' : '...'}
+                    </span>
+                  </div>
+                ))}
+                {currentSegment && (
+                  <div className="p-2 bg-blue-50 rounded border border-blue-200">
+                    <span className="font-medium">{currentSegment.text}</span>
+                    <span className="ml-2 text-xs text-blue-500">
+                      Currently transcribing...
+                    </span>
+                  </div>
+                )}
+              </div>
+            ) : (
+              'No segments yet...'
+            )}
+          </div>
+        </div>
+
+        <div className="mt-8">
+          <h2 className="text-xl font-semibold mb-4">Options</h2>
+          <div className="grid grid-cols-2 gap-4">
+            <div>
+              <label className="block text-sm font-medium text-gray-700">Language</label>
+              <select
+                className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
+                value={options.language || ''}
+                onChange={(e) => setOptions(prev => ({ ...prev, language: e.target.value || null }))}
+              >
+                <option value="">Auto-detect</option>
+                <option value="en">English</option>
+                <option value="es">Spanish</option>
+                <option value="fr">French</option>
+                {/* Add more languages as needed */}
+              </select>
+            </div>
+
+            <div>
+              <label className="block text-sm font-medium text-gray-700">Task</label>
+              <select
+                className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
+                value={options.task}
+                onChange={(e) => setOptions(prev => ({ ...prev, task: e.target.value as 'transcribe' | 'translate' }))}
+              >
+                <option value="transcribe">Transcribe</option>
+                <option value="translate">Translate to English</option>
+              </select>
+            </div>
+
+            <div>
+              <label className="block text-sm font-medium text-gray-700">Model</label>
+              <select
+                className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
+                value={options.model}
+                onChange={(e) => setOptions(prev => ({ ...prev, model: e.target.value as WhisperLiveOptions['model'] }))}
+              >
+                <option value="tiny.en">Tiny (English)</option>
+                <option value="base.en">Base (English)</option>
+                <option value="small.en">Small (English)</option>
+                <option value="medium.en">Medium (English)</option>
+                <option value="large">Large (Multilingual)</option>
+              </select>
+            </div>
+
+            <div>
+              <label className="flex items-center space-x-2">
+                <input
+                  type="checkbox"
+                  className="rounded border-gray-300 text-blue-600 shadow-sm focus:border-blue-500 focus:ring-blue-500"
+                  checked={options.useVad}
+                  onChange={(e) => setOptions(prev => ({ ...prev, useVad: e.target.checked }))}
+                />
+                <span className="text-sm font-medium text-gray-700">Use Voice Activity Detection</span>
+              </label>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  )
+} 
--- a/src/env.d.ts
+++ b/src/env.d.ts
@ -0,0 +1,13 @@
+/// <reference types="vite/client" />
+
+interface ImportMetaEnv {
+  readonly VITE_WHISPERLIVE_URL: string
+  readonly VITE_APP_URL: string
+  readonly VITE_APP_PROTOCOL: string
+  readonly VITE_APP_NAME: string
+  readonly VITE_DEV: string
+}
+
+interface ImportMeta {
+  readonly env: ImportMetaEnv
+} 
--- a/src/index.css
+++ b/src/index.css
@ -0,0 +1,9 @@
+@tailwind base;
+@tailwind components;
+@tailwind utilities;
+
+@layer base {
+  body {
+    @apply bg-gray-50 text-gray-900;
+  }
+} 
--- a/src/main.tsx
+++ b/src/main.tsx
@ -0,0 +1,10 @@
+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import App from './App'
+import './index.css'
+
+ReactDOM.createRoot(document.getElementById('root')!).render(
+  <React.StrictMode>
+    <App />
+  </React.StrictMode>,
+)