import os
import re
import subprocess
import yt_dlp # type: ignore
import epub2txt # type: ignore
from flask import Flask, request, jsonify # type: ignore
from faster_whisper import WhisperModel # type: ignore
# from filmot import Filmot # type: ignore

app = Flask(__name__)

# Model setup: use "tiny", "base", etc. and `int8` for CPU
model = WhisperModel("base", compute_type="int8")

FFMPEG_BIN = r"C:\ffmpeg\bin"  # Update if necessary
TRANSCRIPT_DIR = "transcripts"
os.makedirs(TRANSCRIPT_DIR, exist_ok=True)

def extract_video_id(url):
    match = re.search(r'(?:v=|youtu\.be/)([\w-]{11})', url)
    return match.group(1) if match else None

def get_audio_duration(filename):
    try:
        result = subprocess.run(
            [os.path.join(FFMPEG_BIN, 'ffprobe'), '-v', 'error', '-show_entries',
             'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', filename],
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True
        )
        return float(result.stdout.strip())
    except Exception as e:
        print(f"[ffprobe error] {e}")
        return 0.0

def download_audio(video_url, video_id):
    output_filename = f"{video_id}.mp3"
    print(f"[INFO] Downloading audio as {output_filename}...")

    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': f'{video_id}.%(ext)s',
        'quiet': True,
        'no_warnings': True,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '192',
        }],
        'ffmpeg_location': FFMPEG_BIN,
    }

    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])

    if not os.path.exists(output_filename):
        raise FileNotFoundError(f"Audio file {output_filename} not found.")

    if os.path.getsize(output_filename) == 0:
        raise RuntimeError(f"Downloaded audio file {output_filename} is empty.")

    duration = get_audio_duration(output_filename)
    if duration == 0.0:
        raise RuntimeError(f"Audio file {output_filename} has 0 duration or is invalid.")

    return output_filename

@app.route('/transcribe', methods=['POST'])
def transcribe():
    data = request.get_json()
    if not data or 'url' not in data:
        return jsonify({'error': 'Missing YouTube URL'}), 400

    video_url = data['url']
    video_id = extract_video_id(video_url)
    if not video_id:
        return jsonify({'error': 'Invalid YouTube URL'}), 400

    try:
        audio_file = download_audio(video_url, video_id)

        print(f"[INFO] Transcribing {audio_file}...")
        segments, _ = model.transcribe(audio_file)
        transcript = " ".join([segment.text for segment in segments])

        # Save to file
        transcript_path = os.path.join(TRANSCRIPT_DIR, f"{video_id}.txt")
        with open(transcript_path, "w", encoding="utf-8") as f:
            f.write(transcript)

        os.remove(audio_file)  # Delete audio file
        print("[INFO] Transcription complete. Audio file removed.")

        return jsonify({
            'video_id': video_id,
            'transcript': transcript
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/extract-epub', methods=['POST'])
def extract_epub():
    if 'file' not in request.files:
        return jsonify({'error': 'No file part in request'}), 400

    file = request.files['file']
    if file.filename == '':
        return jsonify({'error': 'No selected file'}), 400

    if not file.filename.lower().endswith('.epub'):
        return jsonify({'error': 'Invalid file type. Only .epub allowed'}), 400

    # Save temporarily
    temp_path = os.path.join(TRANSCRIPT_DIR, file.filename)
    file.save(temp_path)

    # Optional: Check file size limit (e.g., 10MB)
    MAX_SIZE_MB = 10
    if os.path.getsize(temp_path) > MAX_SIZE_MB * 1024 * 1024:
        os.remove(temp_path)
        return jsonify({'error': f'File too large (>{MAX_SIZE_MB}MB)'}), 400

    try:
        # Extract text from EPUB
        print(f"[INFO] Extracting text from {file.filename}")
        extracted_text = epub2txt.epub2txt(temp_path)

        # Save transcript
        base_filename = os.path.splitext(file.filename)[0]
        transcript_path = os.path.join(TRANSCRIPT_DIR, f"{base_filename}.txt")
        with open(transcript_path, "w", encoding="utf-8") as f:
            f.write(extracted_text)

        os.remove(temp_path)  # Clean up EPUB file

        return jsonify({
            'filename': file.filename,
            'extracted_text': extracted_text[:2000]  # Limit response size
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500


# def search_youtube_archive(query, limit=3):
#     """
#     Search YouTube archive using Filmot and return results.
#     """
#     Filmot.set_rapidapi_key("8193f045f9msh65eebd3ba67749bp1650bbjsn04c584b2672a")  # Replace with your actual RapidAPI key or load from env
#     filmot = Filmot()
#     response = filmot.search(query, limit=limit)
#     return response

# @app.route('/youtubesearch', methods=['GET'])
# def youtubesearch():
#     query = request.args.get('q')
#     limit = int(request.args.get('limit', 3))
#     if not query:
#         return jsonify({'error': 'Missing search query'}), 400
#     try:
#         results = search_youtube_archive(query, limit)
#         return jsonify({'results': results})
#     except Exception as e:
#         return jsonify({'error': str(e)}), 500
    
    
    
    
if __name__ == '__main__':
    app.run(debug=True)
