252 lines
9.2 KiB
Python
252 lines
9.2 KiB
Python
import os
|
|
import tempfile
|
|
from moviepy import VideoFileClip
|
|
from openai import OpenAI
|
|
import math
|
|
from pydub import AudioSegment
|
|
from docx import Document
|
|
from docx.shared import Pt, RGBColor
|
|
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
|
import pypandoc
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
# Configuration
|
|
MAX_FILE_SIZE = 26214400 # 25MB in bytes
|
|
CHUNK_SIZE = MAX_FILE_SIZE // 2 # Split into ~12MB chunks
|
|
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'outputs')
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
|
|
def setup_api_keys():
|
|
"""Setup API keys and configurations."""
|
|
api_key = os.getenv('OPENAI_API_KEY')
|
|
if not api_key:
|
|
raise ValueError("OpenAI API key not found in environment variables")
|
|
|
|
client = OpenAI(api_key=api_key)
|
|
return client
|
|
|
|
def extract_audio(video_path):
|
|
"""Extract audio from video file and return path to audio file."""
|
|
print("Extracting audio from video file...")
|
|
temp_audio_path = os.path.join(OUTPUT_DIR, "temp_audio.mp3")
|
|
video = VideoFileClip(video_path)
|
|
video.audio.write_audiofile(temp_audio_path)
|
|
video.close()
|
|
return temp_audio_path
|
|
|
|
def split_audio_by_duration(file_path, duration_seconds=600):
|
|
"""Split an audio file into chunks of a specified duration (default 10 minutes)."""
|
|
chunks_dir = os.path.join(OUTPUT_DIR, 'chunks')
|
|
os.makedirs(chunks_dir, exist_ok=True)
|
|
|
|
try:
|
|
audio = AudioSegment.from_file(file_path)
|
|
except Exception as e:
|
|
print(f"Error loading audio file: {e}")
|
|
return []
|
|
|
|
chunks = []
|
|
chunk_length_ms = duration_seconds * 1000 # Convert seconds to milliseconds
|
|
|
|
for i, chunk_start in enumerate(range(0, len(audio), chunk_length_ms)):
|
|
chunk_end = chunk_start + chunk_length_ms
|
|
chunk = audio[chunk_start:chunk_end]
|
|
|
|
chunk_filename = os.path.join(chunks_dir, f"chunk_{i}.mp3")
|
|
try:
|
|
chunk.export(chunk_filename, format="mp3")
|
|
chunks.append(chunk_filename)
|
|
print(f"Created chunk: {chunk_filename}")
|
|
except Exception as e:
|
|
print(f"Error exporting chunk: {e}")
|
|
|
|
return chunks
|
|
|
|
def transcribe_audio(client, audio_chunks):
|
|
"""Transcribe audio chunks using OpenAI's Whisper API."""
|
|
transcripts = []
|
|
for chunk in audio_chunks:
|
|
print(f"Transcribing chunk: {chunk}")
|
|
audio_file = open(chunk, "rb")
|
|
try:
|
|
transcript = client.audio.transcriptions.create(
|
|
file=audio_file,
|
|
model="whisper-1",
|
|
response_format="text",
|
|
language="it",
|
|
prompt="L'audio è il pezzo di una riunione registrata; potrebbe essere la continuazione di un precedente pezzo di audio"
|
|
)
|
|
transcripts.append(transcript)
|
|
except Exception as e:
|
|
print(f"Error in transcription: {e}")
|
|
|
|
return ' '.join(transcripts)
|
|
|
|
def summarize_transcription(client, transcript):
|
|
"""Use GPT-4 to summarize the transcription."""
|
|
system_message = """You will find next a transcript of a Teams Meeting.
|
|
[PAY ATTENTION: The text may not be perfect as it was transcribed from video]
|
|
|
|
Your task is to create detailed notes from this meeting transcript,
|
|
including as much information as possible. At the end, include a summary
|
|
of open points and decisions made during the meeting.
|
|
|
|
The result should be in business Italian language.
|
|
"""
|
|
|
|
response = client.chat.completions.create(
|
|
model="gpt-4o",
|
|
messages=[
|
|
{"role": "system", "content": system_message},
|
|
{"role": "user", "content": transcript}
|
|
],
|
|
temperature=0.1,
|
|
max_tokens=16000
|
|
)
|
|
|
|
return response.choices[0].message.content
|
|
|
|
def clean_summary(client, summary):
|
|
"""Clean and format the summary using GPT."""
|
|
system_message = """Following is an automated generated text from a video transcript.
|
|
Due to audio quality and lack of knowledge about the context, the text may not be complete or accurate.
|
|
|
|
[ORIGINAL TEXT]
|
|
{0}
|
|
|
|
Please clean this text, fixing any inconsistencies, formatting issues, or errors in transcription.
|
|
The result should be well-structured and in business Italian language.
|
|
""".format(summary)
|
|
|
|
response = client.chat.completions.create(
|
|
model="gpt-4o",
|
|
messages=[
|
|
{"role": "system", "content": system_message},
|
|
{"role": "user", "content": "Please clean and format this text."}
|
|
],
|
|
temperature=0.5,
|
|
max_tokens=16000
|
|
)
|
|
|
|
return response.choices[0].message.content
|
|
|
|
def generate_verbale(client, summary_clean):
|
|
"""Generate a formal meeting report (verbale) from the cleaned summary."""
|
|
system_message = """You are tasked with creating a formal meeting report (verbale) from the following meeting summary.
|
|
|
|
[SUMMARY]
|
|
{0}
|
|
|
|
Please structure the report in a professional format typical of consulting firms (like Deloitte).
|
|
Focus particularly on decisions made and action items for future meetings.
|
|
|
|
The report should be in business Italian language.
|
|
""".format(summary_clean)
|
|
|
|
response = client.chat.completions.create(
|
|
model="gpt-4o",
|
|
messages=[
|
|
{"role": "system", "content": system_message},
|
|
{"role": "user", "content": "Generate a formal meeting report (verbale)."}
|
|
],
|
|
temperature=0.5,
|
|
max_tokens=16000
|
|
)
|
|
|
|
return response.choices[0].message.content
|
|
|
|
def convert_markdown_to_word(markdown_text, output_file):
|
|
"""Convert markdown text to MS Word document format using pypandoc."""
|
|
try:
|
|
# Write markdown to a temporary file
|
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.md', encoding='utf-8') as temp_file:
|
|
temp_file.write(markdown_text)
|
|
temp_md_file = temp_file.name
|
|
|
|
# Ensure output directory exists
|
|
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
|
|
|
# Convert to docx using pypandoc
|
|
pypandoc.convert_file(temp_md_file, 'docx', outputfile=output_file)
|
|
|
|
# Clean up temp file
|
|
os.unlink(temp_md_file)
|
|
|
|
print(f"Successfully converted to Word: {output_file}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error converting to Word: {e}")
|
|
return False
|
|
|
|
def process_video(file_path):
|
|
"""Process a video file and generate transcription, summaries, and documents."""
|
|
base_filename = os.path.splitext(os.path.basename(file_path))[0]
|
|
client = setup_api_keys()
|
|
|
|
# Output file paths
|
|
transcription_path = os.path.join(OUTPUT_DIR, f"{base_filename}_transcription.txt")
|
|
summary_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary.md")
|
|
summary_clean_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary_clean.md")
|
|
verbale_path = os.path.join(OUTPUT_DIR, f"{base_filename}_verbale.md")
|
|
docx_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary.docx")
|
|
docx_verbale_path = os.path.join(OUTPUT_DIR, f"{base_filename}_verbale.docx")
|
|
|
|
try:
|
|
# Step 1: Extract audio from video
|
|
audio_path = extract_audio(file_path)
|
|
|
|
# Step 2: Split audio into chunks
|
|
audio_chunks = split_audio_by_duration(audio_path)
|
|
|
|
# Step 3: Transcribe audio
|
|
transcription = transcribe_audio(client, audio_chunks)
|
|
with open(transcription_path, "w", encoding='utf-8') as f:
|
|
f.write(transcription)
|
|
print(f"Saved transcription to: {transcription_path}")
|
|
|
|
# Step 4: Generate summary
|
|
summary = summarize_transcription(client, transcription)
|
|
with open(summary_path, "w", encoding='utf-8') as f:
|
|
f.write(summary)
|
|
print(f"Saved summary to: {summary_path}")
|
|
|
|
# Step 5: Clean and format summary
|
|
summary_clean = clean_summary(client, summary)
|
|
with open(summary_clean_path, "w", encoding='utf-8') as f:
|
|
f.write(summary_clean)
|
|
print(f"Saved cleaned summary to: {summary_clean_path}")
|
|
|
|
# Step 6: Generate formal report (verbale)
|
|
verbale = generate_verbale(client, summary_clean)
|
|
with open(verbale_path, "w", encoding='utf-8') as f:
|
|
f.write(verbale)
|
|
print(f"Saved verbale to: {verbale_path}")
|
|
|
|
# Step 7: Convert markdown to Word documents
|
|
convert_markdown_to_word(summary_clean, docx_path)
|
|
convert_markdown_to_word(verbale, docx_verbale_path)
|
|
|
|
# Step 8: Clean up temporary files
|
|
for chunk in audio_chunks:
|
|
if os.path.exists(chunk):
|
|
os.remove(chunk)
|
|
if os.path.exists(audio_path):
|
|
os.remove(audio_path)
|
|
|
|
# Return paths to all generated files
|
|
return {
|
|
'transcription_path': transcription_path,
|
|
'summary_path': summary_path,
|
|
'summary_clean_path': summary_clean_path,
|
|
'verbale_path': verbale_path,
|
|
'docx_path': docx_path,
|
|
'docx_verbale_path': docx_verbale_path
|
|
}
|
|
|
|
except Exception as e:
|
|
print(f"Error processing video: {e}")
|
|
raise
|