import os
import tempfile
from moviepy import VideoFileClip
from openai import OpenAI
import math
from pydub import AudioSegment
from docx import Document
from docx.shared import Pt, RGBColor
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
import pypandoc
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Configuration
MAX_FILE_SIZE = 26214400  # 25MB in bytes
CHUNK_SIZE = MAX_FILE_SIZE // 2  # Split into ~12MB chunks
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'outputs')
os.makedirs(OUTPUT_DIR, exist_ok=True)

def setup_api_keys():
    """Setup API keys and configurations."""
    api_key = os.getenv('OPENAI_API_KEY')
    if not api_key:
        raise ValueError("OpenAI API key not found in environment variables")
    
    client = OpenAI(api_key=api_key)
    return client

def extract_audio(video_path):
    """Extract audio from video file and return path to audio file."""
    print("Extracting audio from video file...")
    temp_audio_path = os.path.join(OUTPUT_DIR, "temp_audio.mp3")
    video = VideoFileClip(video_path)
    video.audio.write_audiofile(temp_audio_path)
    video.close()
    return temp_audio_path

def split_audio_by_duration(file_path, duration_seconds=600):
    """Split an audio file into chunks of a specified duration (default 10 minutes)."""
    chunks_dir = os.path.join(OUTPUT_DIR, 'chunks')
    os.makedirs(chunks_dir, exist_ok=True)
    
    try:
        audio = AudioSegment.from_file(file_path)
    except Exception as e:
        print(f"Error loading audio file: {e}")
        return []
    
    chunks = []
    chunk_length_ms = duration_seconds * 1000  # Convert seconds to milliseconds
    
    for i, chunk_start in enumerate(range(0, len(audio), chunk_length_ms)):
        chunk_end = chunk_start + chunk_length_ms
        chunk = audio[chunk_start:chunk_end]
        
        chunk_filename = os.path.join(chunks_dir, f"chunk_{i}.mp3")
        try:
            chunk.export(chunk_filename, format="mp3")
            chunks.append(chunk_filename)
            print(f"Created chunk: {chunk_filename}")
        except Exception as e:
            print(f"Error exporting chunk: {e}")
    
    return chunks

def transcribe_audio(client, audio_chunks):
    """Transcribe audio chunks using OpenAI's Whisper API."""
    transcripts = []
    for chunk in audio_chunks:
        print(f"Transcribing chunk: {chunk}")
        audio_file = open(chunk, "rb")
        try:
            transcript = client.audio.transcriptions.create(
                file=audio_file,
                model="whisper-1",
                response_format="text",
                language="it",
                prompt="L'audio รจ il pezzo di una riunione registrata; potrebbe essere la continuazione di un precedente pezzo di audio"
            )
            transcripts.append(transcript)
        except Exception as e:
            print(f"Error in transcription: {e}")
    
    return ' '.join(transcripts)

def summarize_transcription(client, transcript):
    """Use GPT-4 to summarize the transcription."""
    system_message = """You will find next a transcript of a Teams Meeting.
    [PAY ATTENTION: The text may not be perfect as it was transcribed from video]
    
    Your task is to create detailed notes from this meeting transcript,
    including as much information as possible. At the end, include a summary
    of open points and decisions made during the meeting.
    
    The result should be in business Italian language.
    """
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": transcript}
        ],
        temperature=0.1,
        max_tokens=16000
    )
    
    return response.choices[0].message.content

def clean_summary(client, summary):
    """Clean and format the summary using GPT."""
    system_message = """Following is an automated generated text from a video transcript.
    Due to audio quality and lack of knowledge about the context, the text may not be complete or accurate.
    
    [ORIGINAL TEXT]
    {0}
    
    Please clean this text, fixing any inconsistencies, formatting issues, or errors in transcription.
    The result should be well-structured and in business Italian language.
    """.format(summary)
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": "Please clean and format this text."}
        ],
        temperature=0.5,
        max_tokens=16000
    )
    
    return response.choices[0].message.content

def generate_verbale(client, summary_clean):
    """Generate a formal meeting report (verbale) from the cleaned summary."""
    system_message = """You are tasked with creating a formal meeting report (verbale) from the following meeting summary.
    
    [SUMMARY]
    {0}
    
    Please structure the report in a professional format typical of consulting firms (like Deloitte).
    Focus particularly on decisions made and action items for future meetings.
    
    The report should be in business Italian language.
    """.format(summary_clean)
    
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_message},
            {"role": "user", "content": "Generate a formal meeting report (verbale)."}
        ],
        temperature=0.5,
        max_tokens=16000
    )
    
    return response.choices[0].message.content

def convert_markdown_to_word(markdown_text, output_file):
    """Convert markdown text to MS Word document format using pypandoc."""
    try:
        # Write markdown to a temporary file
        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.md', encoding='utf-8') as temp_file:
            temp_file.write(markdown_text)
            temp_md_file = temp_file.name
        
        # Ensure output directory exists
        os.makedirs(os.path.dirname(output_file), exist_ok=True)
        
        # Convert to docx using pypandoc
        pypandoc.convert_file(temp_md_file, 'docx', outputfile=output_file)
        
        # Clean up temp file
        os.unlink(temp_md_file)
        
        print(f"Successfully converted to Word: {output_file}")
        return True
    except Exception as e:
        print(f"Error converting to Word: {e}")
        return False

def process_video(file_path):
    """Process a video file and generate transcription, summaries, and documents."""
    base_filename = os.path.splitext(os.path.basename(file_path))[0]
    client = setup_api_keys()
    
    # Output file paths
    transcription_path = os.path.join(OUTPUT_DIR, f"{base_filename}_transcription.txt")
    summary_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary.md")
    summary_clean_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary_clean.md")
    verbale_path = os.path.join(OUTPUT_DIR, f"{base_filename}_verbale.md")
    docx_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary.docx")
    docx_verbale_path = os.path.join(OUTPUT_DIR, f"{base_filename}_verbale.docx")
    
    try:
        # Step 1: Extract audio from video
        audio_path = extract_audio(file_path)
        
        # Step 2: Split audio into chunks
        audio_chunks = split_audio_by_duration(audio_path)
        
        # Step 3: Transcribe audio
        transcription = transcribe_audio(client, audio_chunks)
        with open(transcription_path, "w", encoding='utf-8') as f:
            f.write(transcription)
        print(f"Saved transcription to: {transcription_path}")
        
        # Step 4: Generate summary
        summary = summarize_transcription(client, transcription)
        with open(summary_path, "w", encoding='utf-8') as f:
            f.write(summary)
        print(f"Saved summary to: {summary_path}")
        
        # Step 5: Clean and format summary
        summary_clean = clean_summary(client, summary)
        with open(summary_clean_path, "w", encoding='utf-8') as f:
            f.write(summary_clean)
        print(f"Saved cleaned summary to: {summary_clean_path}")
        
        # Step 6: Generate formal report (verbale)
        verbale = generate_verbale(client, summary_clean)
        with open(verbale_path, "w", encoding='utf-8') as f:
            f.write(verbale)
        print(f"Saved verbale to: {verbale_path}")
        
        # Step 7: Convert markdown to Word documents
        convert_markdown_to_word(summary_clean, docx_path)
        convert_markdown_to_word(verbale, docx_verbale_path)
        
        # Step 8: Clean up temporary files
        for chunk in audio_chunks:
            if os.path.exists(chunk):
                os.remove(chunk)
        if os.path.exists(audio_path):
            os.remove(audio_path)
        
        # Return paths to all generated files
        return {
            'transcription_path': transcription_path,
            'summary_path': summary_path,
            'summary_clean_path': summary_clean_path,
            'verbale_path': verbale_path,
            'docx_path': docx_path,
            'docx_verbale_path': docx_verbale_path
        }
    
    except Exception as e:
        print(f"Error processing video: {e}")
        raise