#!/usr/bin/env python3 """ LiveTalker Fixed Voice Server Reliable voice input with proper browser support """ import asyncio import json import logging import time import base64 import numpy as np from typing import Dict, Any from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import HTMLResponse from fastapi.middleware.cors import CORSMiddleware import uvicorn logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI(title="LiveTalker Voice Server") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) active_connections: Dict[str, Dict] = {} def simple_vad(audio_data: np.ndarray, threshold: float = 0.01) -> tuple: """Energy-based voice activity detection""" if len(audio_data) == 0: return False, 0.0 rms = np.sqrt(np.mean(audio_data ** 2)) is_speech = rms > threshold confidence = min(rms / threshold, 1.0) if threshold > 0 else 0.0 return is_speech, confidence @app.get("/") async def root(): """Main voice interface with fixed compatibility""" html_content = """ LiveTalker Voice Assistant

🎙️ LiveTalker

Connection Status

Ready to connect

🎤 Microphone

Not initialized

Click to start voice detection

Welcome! Connect and setup microphone to start voice chat.

Activity Log

Waiting...

""" return HTMLResponse(content=html_content) @app.get("/health") async def health_check(): """Health check endpoint""" return { "status": "healthy", "timestamp": time.time(), "active_connections": len(active_connections), "features": { "voice_input": True, "simple_vad": True, "real_time": True } } @app.websocket("/media-stream") async def websocket_endpoint(websocket: WebSocket): """WebSocket for voice processing""" await websocket.accept() session_id = f"session_{int(time.time() * 1000)}" session = { "id": session_id, "websocket": websocket, "audio_buffer": [], "is_listening": False, "last_speech": 0 } active_connections[session_id] = session logger.info(f"Voice session started: {session_id}") try: await websocket.send_json({ "type": "config", "session_id": session_id, "message": "Voice processing initialized" }) async for message in websocket.iter_json(): await handle_message(session, message) except WebSocketDisconnect: logger.info(f"Session ended: {session_id}") except Exception as e: logger.error(f"Session error: {e}") try: await websocket.send_json({ "type": "error", "error": str(e) }) except: pass finally: if session_id in active_connections: del active_connections[session_id] async def handle_message(session: Dict, message: Dict[str, Any]): """Handle WebSocket messages""" msg_type = message.get("type") if msg_type == "start_conversation": session["is_listening"] = True await session["websocket"].send_json({ "type": "conversation_started", "message": "Voice conversation active!" }) elif msg_type == "audio" and session["is_listening"]: try: # Decode audio audio_data = base64.b64decode(message["data"]) audio_np = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0 if len(audio_np) > 0: # Run VAD is_speech, confidence = simple_vad(audio_np, threshold=0.015) # Send VAD result await session["websocket"].send_json({ "type": "vad_result", "is_speech": is_speech, "confidence": confidence, "timestamp": time.time() }) # Accumulate speech if is_speech: session["audio_buffer"].extend(audio_np.tolist()) session["last_speech"] = time.time() # Process when we have enough audio if len(session["audio_buffer"]) > 16000: # ~1 second await process_speech(session) # Process on silence after speech elif (len(session["audio_buffer"]) > 8000 and time.time() - session["last_speech"] > 0.8): await process_speech(session) except Exception as e: await session["websocket"].send_json({ "type": "error", "error": f"Audio processing failed: {str(e)}" }) elif msg_type == "stop_listening": session["is_listening"] = False if session["audio_buffer"]: await process_speech(session) async def process_speech(session: Dict): """Process accumulated speech""" if len(session["audio_buffer"]) < 4000: # Skip very short audio session["audio_buffer"] = [] return try: duration = len(session["audio_buffer"]) / 16000 text = f"Speech detected: {duration:.1f} second audio segment" await session["websocket"].send_json({ "type": "speech_to_text", "text": text, "confidence": 0.85, "duration": duration }) # AI response response = f"I heard {duration:.1f} seconds of your voice! Voice detection is working perfectly." await session["websocket"].send_json({ "type": "ai_response", "text": response, "processing_time": f"{duration:.1f}s audio processed" }) session["audio_buffer"] = [] logger.info(f"Processed {duration:.1f}s speech in session {session['id']}") except Exception as e: logger.error(f"Speech processing error: {e}") session["audio_buffer"] = [] if __name__ == "__main__": print("🎙️ LiveTalker Voice Server") print("✅ Real microphone input") print("✅ Voice activity detection") print("✅ Browser compatibility") print("✅ Real-time processing") print("") print("📱 Local: http://localhost:8000") print("🌐 Network: http://100.118.75.128:8000") print("") print("🔧 Steps: Connect → Setup Mic → Start Voice") uvicorn.run( app, host="0.0.0.0", port=8000, log_level="info" )