#!/usr/bin/env python3
"""
Real LiveTalker Voice Chat Server
With actual speech recognition and intelligent AI responses
"""

import ssl
import uvicorn
import asyncio
import json
import logging
import time
import base64
import wave
import io
import tempfile
import os
from typing import Dict, Any
from pathlib import Path

from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.responses import HTMLResponse, FileResponse
from fastapi.middleware.cors import CORSMiddleware

import speech_recognition as sr
import openai

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

app = FastAPI(title="LiveTalker Real Voice Chat")

# Enable CORS
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

active_sessions: Dict[str, Dict] = {}

# Initialize speech recognizer
recognizer = sr.Recognizer()

# Set OpenAI API key if available
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
if OPENAI_API_KEY:
    openai.api_key = OPENAI_API_KEY
    logger.info("OpenAI API key found - using GPT for responses")
else:
    logger.warning("No OpenAI API key found - using mock responses")

def simple_vad(audio_data, threshold: float = 0.01) -> tuple:
    """Simple energy-based voice activity detection"""
    if not audio_data:
        return False, 0.0
    
    # Calculate RMS energy
    energy = sum(x*x for x in audio_data) / len(audio_data) if audio_data else 0
    rms = energy ** 0.5
    is_speech = rms > threshold
    confidence = min(rms / threshold, 1.0) if threshold > 0 else 0.0
    
    return is_speech, confidence

async def transcribe_audio(audio_data: bytes) -> str:
    """Transcribe audio using speech recognition"""
    try:
        # Convert bytes to WAV format
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
            # Write WAV header and data
            with wave.open(tmp_file.name, 'wb') as wav_file:
                wav_file.setnchannels(1)  # Mono
                wav_file.setsampwidth(2)  # 2 bytes per sample (16-bit)
                wav_file.setframerate(16000)  # 16kHz
                wav_file.writeframes(audio_data)
            
            # Use speech recognition
            with sr.AudioFile(tmp_file.name) as source:
                audio = recognizer.record(source)
                try:
                    # Try OpenAI Whisper API first if key available
                    if OPENAI_API_KEY:
                        text = recognizer.recognize_whisper_api(audio, api_key=OPENAI_API_KEY)
                        logger.info(f"Whisper API transcription: {text}")
                        return text
                    else:
                        # Fall back to Google Speech Recognition (free)
                        text = recognizer.recognize_google(audio)
                        logger.info(f"Google transcription: {text}")
                        return text
                except sr.RequestError as e:
                    logger.error(f"Speech recognition error: {e}")
                    return ""
                except sr.UnknownValueError:
                    logger.debug("Could not understand audio")
                    return ""
            
    except Exception as e:
        logger.error(f"Transcription error: {e}")
        return ""
    finally:
        # Clean up temp file
        if 'tmp_file' in locals() and os.path.exists(tmp_file.name):
            os.unlink(tmp_file.name)

async def get_ai_response(text: str) -> str:
    """Get intelligent AI response"""
    try:
        if OPENAI_API_KEY:
            # Use OpenAI GPT
            response = await asyncio.get_event_loop().run_in_executor(
                None,
                lambda: openai.ChatCompletion.create(
                    model="gpt-3.5-turbo",
                    messages=[
                        {"role": "system", "content": "You are a helpful voice assistant. Keep responses conversational and under 2 sentences."},
                        {"role": "user", "content": text}
                    ],
                    max_tokens=100,
                    temperature=0.7
                )
            )
            return response.choices[0].message.content.strip()
        else:
            # Simple mock responses
            responses = {
                "hello": "Hello! How are you doing today?",
                "how are you": "I'm doing great, thanks for asking! How about you?",
                "what time": f"The current time is {time.strftime('%I:%M %p')}",
                "weather": "I don't have access to weather data, but I hope it's nice where you are!",
                "thank you": "You're very welcome! Is there anything else I can help you with?",
                "bye": "Goodbye! Have a wonderful day!",
            }
            
            text_lower = text.lower()
            for key, response in responses.items():
                if key in text_lower:
                    return response
            
            return f"I heard you say '{text}'. That's interesting! Tell me more about that."
    
    except Exception as e:
        logger.error(f"AI response error: {e}")
        return "I'm sorry, I'm having trouble processing your request right now."

@app.get("/")
async def serve_main_page():
    """Serve main voice chat page"""
    html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>LiveTalker Real Voice Chat</title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            margin: 0; padding: 40px;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white; min-height: 100vh;
        }
        .container {
            max-width: 800px; margin: 0 auto;
            background: rgba(255,255,255,0.1); padding: 40px;
            border-radius: 20px; backdrop-filter: blur(15px);
            box-shadow: 0 8px 32px rgba(0,0,0,0.3);
        }
        h1 { text-align: center; font-size: 3em; margin-bottom: 30px; }
        .status {
            background: rgba(76,175,80,0.2); border: 2px solid #4CAF50;
            padding: 15px; border-radius: 10px; margin: 20px 0; text-align: center;
        }
        .controls {
            display: grid; grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
            gap: 15px; margin: 30px 0;
        }
        .btn {
            padding: 15px 25px; border: none; border-radius: 10px;
            background: #4CAF50; color: white; cursor: pointer;
            font-size: 16px; font-weight: 600; transition: all 0.3s ease;
        }
        .btn:hover { background: #45a049; transform: translateY(-2px); }
        .btn:disabled { background: #666; cursor: not-allowed; }
        .btn.danger { background: #f44336; }
        .vad-display {
            margin: 30px 0; padding: 20px;
            background: rgba(0,0,0,0.3); border-radius: 12px;
        }
        .vad-bar {
            width: 100%; height: 40px;
            background: rgba(255,255,255,0.2); border-radius: 20px;
            overflow: hidden; margin: 15px 0;
        }
        .vad-level {
            height: 100%; background: linear-gradient(90deg, #4CAF50, #8BC34A, #FFC107, #FF5722);
            width: 0%; transition: width 0.1s ease; border-radius: 20px;
        }
        .conversation {
            background: rgba(0,0,0,0.4); border-radius: 15px;
            padding: 20px; margin: 20px 0; max-height: 400px;
            overflow-y: auto; min-height: 200px;
        }
        .message {
            margin: 10px 0; padding: 10px 15px; border-radius: 8px;
            max-width: 85%; word-wrap: break-word;
        }
        .message.user {
            background: rgba(33,150,243,0.4); margin-left: auto; text-align: right;
        }
        .message.assistant {
            background: rgba(76,175,80,0.4); margin-right: auto;
        }
        .message.system {
            background: rgba(158,158,158,0.3); margin: 10px auto;
            text-align: center; font-style: italic; max-width: 90%;
        }
        .transcription-status {
            background: rgba(255,193,7,0.2); border: 1px solid #FFC107;
            padding: 10px; border-radius: 8px; margin: 10px 0;
            text-align: center; font-style: italic;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>🎙️ LiveTalker Real Voice Chat</h1>
        
        <div class="status">
            ✅ <strong>Real Speech Recognition & AI Responses</strong>
        </div>
        
        <div class="controls">
            <button class="btn" onclick="connectWebSocket()">🔗 Connect</button>
            <button class="btn" onclick="requestMicrophone()" id="micBtn">🎤 Enable Mic</button>
            <button class="btn" onclick="startListening()" id="startBtn" disabled>🎧 Start Chat</button>
            <button class="btn danger" onclick="stopListening()" id="stopBtn" disabled>🛑 Stop</button>
        </div>
        
        <div class="vad-display">
            <h3>🎵 Voice Activity Detection</h3>
            <div class="vad-bar">
                <div class="vad-level" id="vadLevel"></div>
            </div>
            <div style="text-align: center;" id="vadStatus">Ready to connect</div>
        </div>
        
        <div class="conversation" id="conversation">
            <div class="message system">🎙️ Real voice chat with speech recognition and AI responses!</div>
            <div class="message system">Click Connect → Enable Mic → Start Chat, then say something!</div>
        </div>
        
        <div class="transcription-status" id="transcriptionStatus" style="display:none;">
            🎤 Processing speech...
        </div>
    </div>

    <script>
        let ws = null; let mediaStream = null; let audioContext = null;
        let processor = null; let isRecording = false; let connected = false;
        let audioBuffer = []; let silenceStart = null;
        
        function addMessage(type, content) {
            const conversation = document.getElementById('conversation');
            const message = document.createElement('div');
            message.className = `message ${type}`;
            message.textContent = content;
            conversation.appendChild(message);
            conversation.scrollTop = conversation.scrollHeight;
        }
        
        function updateVAD(level, isActive) {
            const vadLevel = document.getElementById('vadLevel');
            const vadStatus = document.getElementById('vadStatus');
            
            vadLevel.style.width = `${level * 100}%`;
            vadStatus.textContent = isActive ? 
                `🎵 Speaking: ${(level * 100).toFixed(0)}%` : 
                `🔇 Silence: ${(level * 100).toFixed(0)}%`;
        }
        
        function showTranscriptionStatus(show) {
            const status = document.getElementById('transcriptionStatus');
            status.style.display = show ? 'block' : 'none';
        }
        
        async function connectWebSocket() {
            const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
            const wsUrl = `${protocol}//${window.location.host}/voice`;
            
            try {
                ws = new WebSocket(wsUrl);
                
                ws.onopen = function() {
                    connected = true;
                    addMessage('system', '✅ Connected! Enable microphone to start.');
                };
                
                ws.onmessage = function(event) {
                    const data = JSON.parse(event.data);
                    handleServerMessage(data);
                };
                
                ws.onclose = function() {
                    connected = false;
                    addMessage('system', '❌ Disconnected');
                };
                
            } catch (error) {
                addMessage('system', `❌ Connection failed: ${error}`);
            }
        }
        
        function handleServerMessage(data) {
            switch(data.type) {
                case 'vad_result':
                    updateVAD(data.confidence || 0, data.is_speech || false);
                    break;
                    
                case 'transcription':
                    if (data.text && data.text.trim()) {
                        addMessage('user', data.text);
                        showTranscriptionStatus(false);
                    }
                    break;
                    
                case 'ai_response':
                    if (data.text) {
                        addMessage('assistant', data.text);
                    }
                    break;
                    
                case 'processing':
                    showTranscriptionStatus(true);
                    break;
                    
                case 'status':
                    addMessage('system', data.message);
                    break;
            }
        }
        
        async function requestMicrophone() {
            try {
                mediaStream = await navigator.mediaDevices.getUserMedia({
                    audio: {
                        sampleRate: 16000, channelCount: 1,
                        echoCancellation: true, noiseSuppression: true
                    }
                });
                
                addMessage('system', '✅ Microphone enabled');
                
                const AudioContextClass = window.AudioContext || window.webkitAudioContext;
                audioContext = new AudioContextClass({ sampleRate: 16000 });
                
                const source = audioContext.createMediaStreamSource(mediaStream);
                processor = audioContext.createScriptProcessor(4096, 1, 1);
                
                processor.onaudioprocess = function(event) {
                    if (isRecording && connected) {
                        const inputData = event.inputBuffer.getChannelData(0);
                        processAudioData(inputData);
                    }
                };
                
                source.connect(processor);
                processor.connect(audioContext.destination);
                
                document.getElementById('startBtn').disabled = false;
                document.getElementById('micBtn').disabled = true;
                document.getElementById('micBtn').textContent = '✅ Mic Ready';
                
            } catch (error) {
                addMessage('system', `❌ Microphone error: ${error.message}`);
            }
        }
        
        function processAudioData(inputData) {
            // Add to buffer
            const int16Array = new Int16Array(inputData.length);
            for (let i = 0; i < inputData.length; i++) {
                int16Array[i] = Math.max(-1, Math.min(1, inputData[i])) * 0x7FFF;
            }
            audioBuffer.push(...int16Array);
            
            // Send VAD data
            const uint8Array = new Uint8Array(int16Array.buffer);
            const base64String = btoa(String.fromCharCode.apply(null, uint8Array));
            
            ws?.send(JSON.stringify({
                type: 'audio_vad',
                data: base64String
            }));
            
            // Check for silence (send buffer when speech ends)
            const energy = inputData.reduce((sum, val) => sum + val * val, 0) / inputData.length;
            const isSpeaking = energy > 0.001;
            
            if (isSpeaking) {
                silenceStart = null;
            } else if (!silenceStart) {
                silenceStart = Date.now();
            } else if (Date.now() - silenceStart > 1000 && audioBuffer.length > 16000) {
                // 1 second of silence and we have audio - process it
                processAccumulatedAudio();
            }
        }
        
        function processAccumulatedAudio() {
            if (audioBuffer.length < 8000) return; // Need at least 0.5 seconds
            
            const audioData = new Uint8Array(audioBuffer.length * 2);
            for (let i = 0; i < audioBuffer.length; i++) {
                audioData[i * 2] = audioBuffer[i] & 0xFF;
                audioData[i * 2 + 1] = (audioBuffer[i] >> 8) & 0xFF;
            }
            
            const base64String = btoa(String.fromCharCode.apply(null, audioData));
            
            ws?.send(JSON.stringify({
                type: 'transcribe',
                data: base64String
            }));
            
            audioBuffer = [];
            silenceStart = null;
        }
        
        function startListening() {
            if (!connected || !mediaStream) return;
            
            isRecording = true;
            audioBuffer = [];
            addMessage('system', '🎧 Listening... Start speaking!');
            
            document.getElementById('startBtn').disabled = true;
            document.getElementById('stopBtn').disabled = false;
            
            if (audioContext?.state === 'suspended') {
                audioContext.resume();
            }
        }
        
        function stopListening() {
            isRecording = false;
            
            // Process any remaining audio
            if (audioBuffer.length > 8000) {
                processAccumulatedAudio();
            }
            
            addMessage('system', '🛑 Stopped listening');
            showTranscriptionStatus(false);
            
            document.getElementById('startBtn').disabled = false;
            document.getElementById('stopBtn').disabled = true;
        }
        
        window.addEventListener('load', function() {
            addMessage('system', 'Ready! Click Connect to start voice chat.');
        });
    </script>
</body>
</html>
    """
    return HTMLResponse(content=html_content)

@app.get("/health")
async def health_check():
    return {
        "status": "healthy",
        "timestamp": time.time(),
        "features": {
            "speech_recognition": True,
            "openai_api": bool(OPENAI_API_KEY),
            "https_enabled": True
        }
    }

@app.websocket("/voice")
async def voice_websocket(websocket: WebSocket):
    """WebSocket for real voice chat"""
    await websocket.accept()
    session_id = f"session_{int(time.time() * 1000)}"
    
    session = {
        "id": session_id,
        "websocket": websocket,
        "audio_buffer": [],
        "conversation": []
    }
    active_sessions[session_id] = session
    
    try:
        await websocket.send_json({
            "type": "status",
            "message": "🎙️ Real voice chat ready! Enable microphone to start talking."
        })
        
        async for message in websocket.iter_json():
            if message.get("type") == "audio_vad":
                # Handle voice activity detection
                try:
                    audio_data = base64.b64decode(message["data"])
                    # Convert to float for VAD
                    audio_float = [x/32768.0 for x in audio_data]
                    
                    is_speech, confidence = simple_vad(audio_float)
                    
                    await websocket.send_json({
                        "type": "vad_result",
                        "is_speech": is_speech,
                        "confidence": confidence
                    })
                    
                except Exception as e:
                    logger.error(f"VAD error: {e}")
            
            elif message.get("type") == "transcribe":
                # Handle speech transcription
                try:
                    await websocket.send_json({"type": "processing"})
                    
                    audio_data = base64.b64decode(message["data"])
                    
                    # Transcribe speech
                    transcript = await transcribe_audio(audio_data)
                    
                    if transcript.strip():
                        # Send transcription
                        await websocket.send_json({
                            "type": "transcription",
                            "text": transcript
                        })
                        
                        # Get AI response
                        ai_response = await get_ai_response(transcript)
                        
                        # Send AI response
                        await websocket.send_json({
                            "type": "ai_response",
                            "text": ai_response
                        })
                        
                        # Store in conversation
                        session["conversation"].extend([
                            {"role": "user", "text": transcript, "timestamp": time.time()},
                            {"role": "assistant", "text": ai_response, "timestamp": time.time()}
                        ])
                        
                except Exception as e:
                    logger.error(f"Transcription error: {e}")
                    await websocket.send_json({
                        "type": "status",
                        "message": f"Error processing speech: {str(e)}"
                    })
                
    except WebSocketDisconnect:
        logger.info(f"Session {session_id} disconnected")
    finally:
        if session_id in active_sessions:
            del active_sessions[session_id]

def main():
    print("🎙️ Starting LiveTalker Real Voice Chat...")
    
    # Check for certificates
    cert_file = "livetalker.crt"
    key_file = "livetalker.key"
    
    if not (Path(cert_file).exists() and Path(key_file).exists()):
        print(f"❌ HTTPS certificates not found")
        uvicorn.run(app, host="0.0.0.0", port=8000)
    else:
        print("✅ HTTPS certificates found")
        print("🎙️ Starting with real speech recognition and AI responses")
        print("")
        print("📍 Access URLs:")
        print("   Local HTTPS: https://localhost:8000")
        if OPENAI_API_KEY:
            print("   🤖 OpenAI GPT responses enabled")
        else:
            print("   🤖 Using mock responses (set OPENAI_API_KEY for GPT)")
        print("")
        
        uvicorn.run(
            app,
            host="0.0.0.0",
            port=8000,
            ssl_certfile=cert_file,
            ssl_keyfile=key_file
        )

if __name__ == "__main__":
    main()