import gradio as gr
import pdfplumber
import asyncio
import edge_tts
import os
from datetime import datetime

AUDIO_FOLDER = "./audios"
os.makedirs(AUDIO_FOLDER, exist_ok=True)

def read_pdf_text(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                lines = [line.strip() for line in page_text.split('\n') if line.strip()]
                cleaned = ""
                for j, line in enumerate(lines):
                    cleaned += line
                    if not line.endswith(('.', '!', '?')) and j < len(lines) - 1:
                        cleaned += " "
                    else:
                        cleaned += "\n"
                text += cleaned + "\n"
    return text.strip()

async def convert_text_to_speech(text, output_path):
    communicate = edge_tts.Communicate(text, voice="vi-VN-HoaiMyNeural")
    await communicate.save(output_path)

def process(pdf_file):
    if not pdf_file:
        return None, "❌ Vui lòng tải lên tệp PDF."

    text = read_pdf_text(pdf_file)
    if not text.strip():
        return None, "❌ Không tìm thấy nội dung văn bản trong PDF."

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_path = os.path.join(AUDIO_FOLDER, f"audio_{timestamp}.mp3")

    asyncio.run(convert_text_to_speech(text, output_path))

    return output_path, f"✅ Hoàn tất chuyển đổi. File: audio_{timestamp}.mp3"

def list_audio_files():
    files = [f for f in os.listdir(AUDIO_FOLDER) if f.lower().endswith(".mp3")]
    files.sort()
    return files

def play_audio(file_name):
    if file_name:
        file_path = os.path.join(AUDIO_FOLDER, file_name)
        if os.path.exists(file_path):
            return file_path
    return None

def refresh_audio_list():
    files = list_audio_files()
    if files:
        return files, files[0]
    else:
        return [], None

with gr.Blocks() as demo:
    gr.Markdown("## 📚🔊 Chuyển PDF sang Giọng nói tiếng Việt (Edge TTS) và danh sách audio")

    with gr.Row():
        with gr.Column(scale=2):
            pdf_input = gr.File(label="Tải tệp PDF", file_types=[".pdf"])
            convert_button = gr.Button("Chuyển đổi PDF sang Audio")
            status_convert = gr.Textbox(label="Trạng thái chuyển đổi", interactive=False)

            output_audio = gr.Audio(label="Phát audio mới chuyển đổi", type="filepath")

        with gr.Column(scale=1):
            gr.Markdown("### Danh sách các file audio có sẵn")
            audio_dropdown = gr.Dropdown(label="Chọn file audio để phát", choices=list_audio_files())
            # refresh_button = gr.Button("Cập nhật danh sách audio")

            status_list = gr.Textbox(label="Trạng thái danh sách audio", interactive=False)
            output_audio_from_list = gr.Audio(label="Phát audio từ danh sách", type="filepath")

    convert_button.click(
        fn=process,
        inputs=pdf_input,
        outputs=[output_audio, status_convert]
    )

    # refresh_button.click(
    #     fn=refresh_audio_list,
    #     inputs=None,
    #     outputs=[audio_dropdown, audio_dropdown]
    # )

    audio_dropdown.change(
        fn=play_audio,
        inputs=audio_dropdown,
        outputs=output_audio_from_list
    )

demo.launch()
