LiveTalker GPU Voice Chat

#!/usr/bin/env python3 """ Stable GPU Voice Chat Server - Simplified Version """ import asyncio import logging import json import torch from transformers import AutoTokenizer, AutoModelForCausalLM from fastapi import FastAPI, WebSocket, WebSocketDisconnect from fastapi.responses import HTMLResponse import uvicorn # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Global variables for GPU model gpu_model = None gpu_tokenizer = None device = None async def initialize_gpu_model(): """Initialize GPU model once at startup""" global gpu_model, gpu_tokenizer, device logger.info("🔥 Initializing GPU model...") try: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") logger.info(f"Using device: {device}") if torch.cuda.is_available(): logger.info(f"GPU: {torch.cuda.get_device_name(0)}") # Load tokenizer gpu_tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small") if gpu_tokenizer.pad_token is None: gpu_tokenizer.pad_token = gpu_tokenizer.eos_token # Load model gpu_model = AutoModelForCausalLM.from_pretrained( "microsoft/DialoGPT-small", torch_dtype=torch.float16, device_map="auto" if torch.cuda.is_available() else None ) gpu_model.eval() logger.info("✅ GPU model initialized successfully!") except Exception as e: logger.error(f"❌ Failed to initialize GPU model: {e}") gpu_model = None gpu_tokenizer = None async def generate_gpu_response(user_input: str) -> str: """Generate response using GPU model""" global gpu_model, gpu_tokenizer, device if not gpu_model or not gpu_tokenizer: return "Sorry, GPU model not available. Please try again." try: # Prepare input chat_history_ids = gpu_tokenizer.encode( user_input + gpu_tokenizer.eos_token, return_tensors="pt" ) if device and device.type == "cuda": chat_history_ids = chat_history_ids.to(device) # Generate response with torch.no_grad(): chat_history_ids = gpu_model.generate( chat_history_ids, max_length=chat_history_ids.shape[1] + 30, num_beams=3, do_sample=True, temperature=0.8, pad_token_id=gpu_tokenizer.eos_token_id ) # Decode response response = gpu_tokenizer.decode( chat_history_ids[:, chat_history_ids.shape[1]:][0], skip_special_tokens=True ) # Clean up response response = response.strip() if not response: response = "I understand. Could you tell me more about that?" return response except Exception as e: logger.error(f"GPU generation error: {e}") return f"I'm having trouble processing that. Could you rephrase? (GPU Error: {str(e)[:50]})" # Create FastAPI app app = FastAPI(title="Stable GPU LiveTalker", version="1.0.0") # Voice Chat HTML (same as before but with GPU branding) VOICE_CHAT_HTML = """ LiveTalker GPU Voice Chat

🚀 GPU ACCELERATED: Ultra-fast DialoGPT responses with CUDA 11.8

GPU Voice Chat Ready!

Click the microphone to speak, or type your message below.

""" @app.on_event("startup") async def startup_event(): """Initialize GPU model on startup""" await initialize_gpu_model() @app.get("/", response_class=HTMLResponse) async def voice_chat(): """Main voice chat interface""" return VOICE_CHAT_HTML @app.websocket("/ws") async def websocket_endpoint(websocket: WebSocket): """WebSocket endpoint for real-time voice chat""" await websocket.accept() logger.info('GPU Voice chat WebSocket connected') try: while True: data = await websocket.receive_text() message = json.loads(data) logger.info(f'Received message: {message}') if message.get('type') == 'text': user_text = message.get('text', '') # Generate GPU response response = await generate_gpu_response(user_text) await websocket.send_text(json.dumps({ 'type': 'text', 'text': response, 'speaker': 'assistant' })) except WebSocketDisconnect: logger.info('GPU Voice chat WebSocket disconnected') except Exception as e: logger.error(f'WebSocket error: {e}') @app.get("/health") async def health(): """Health check endpoint""" global gpu_model, device return { 'status': 'ok', 'mode': 'stable_gpu', 'gpu_available': gpu_model is not None, 'device': str(device) if device else 'unknown', 'features': ['voice_input', 'voice_output', 'gpu_inference'] } def main(): """Start the stable GPU voice chat server""" logger.info("🚀 Starting Stable GPU LiveTalker Server") uvicorn.run(app, host="0.0.0.0", port=8002, log_level="info") if __name__ == "__main__": main()

🎙️ LiveTalker GPU

GPU Voice Chat Ready!