Versione con array di folder.
This commit is contained in:
parent
3c4904521f
commit
014b45bbe2
3
.gitignore
vendored
3
.gitignore
vendored
@ -5,3 +5,6 @@
|
||||
venv
|
||||
__pycache__
|
||||
videos/*.*
|
||||
elaborati
|
||||
node_modules
|
||||
|
||||
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
36
assembly.py
36
assembly.py
@ -387,32 +387,16 @@ def processo_verbale():
|
||||
convert_markdown_to_word(verbaleGen, docx_full_verbale_file_name)
|
||||
|
||||
if __name__ == "__main__":
|
||||
folder = "C:\\Users\\rosat\\ARMUNDIA GROUP SRL\\Trasformazione PdC - Documents\\03 - Analisi e Disegno\\meetings\\03 Mar 2025 FFIT Replatforming PDC PO - Analisi Tecnica - Follow up pt2"
|
||||
folders = [
|
||||
"C:\\Users\\rosat\\ARMUNDIA GROUP SRL\\Trasformazione PdC - Documents\\03 - Analisi e Disegno\\meetings\\18 mar 2025 TAVOLO 1 - CONSOLIDAMENTO MACRO ANALISI DI SESSIONE UTENTE E PIANIFICAZIONE CON IMPATTI CONV. FONDI",
|
||||
"C:\\Users\\rosat\\ARMUNDIA GROUP SRL\\Trasformazione PdC - Documents\\03 - Analisi e Disegno\\meetings\\20 Mar 2025 PDC Armundia DB Schemi Cat Prodotti e Consulenza Unica + Mappatura - Configurazione dei Prodotti e Relativi Servizi",
|
||||
"C:\\Users\\rosat\\ARMUNDIA GROUP SRL\\Trasformazione PdC - Documents\\03 - Analisi e Disegno\\meetings\\20 Mar 2025 PDC Armundia Accesso Sorgenti Batch + Giro NEO4J + Microservizi",
|
||||
"C:\\Users\\rosat\\ARMUNDIA GROUP SRL\\Trasformazione PdC - Documents\\03 - Analisi e Disegno\\meetings\\20 Mar 2025 FFIT Replatforming PDC PO Analisi Funzionale"
|
||||
]
|
||||
|
||||
# for file in os.listdir(folder):
|
||||
# if file.endswith(".mp4"):
|
||||
# main(os.path.join(folder, file))
|
||||
|
||||
# folder = "C:\\Users\\rosat\\ARMUNDIA GROUP SRL\\Trasformazione PdC - Documents\\03 - Analisi e Disegno\\meetings\\03 Mar 2025 FFIT PDC ALLINEAMENTO PERIODICO"
|
||||
|
||||
# for file in os.listdir(folder):
|
||||
# if file.endswith(".mp4"):
|
||||
# main(os.path.join(folder, file))
|
||||
|
||||
# folder = "C:\\Users\\rosat\\ARMUNDIA GROUP SRL\\Trasformazione PdC - Documents\\03 - Analisi e Disegno\\meetings\\24 Feb 2025 FFIT - Replatforming PDC-PO - Definizione soluzione da adottare - Analisi tecnica"
|
||||
|
||||
# for file in os.listdir(folder):
|
||||
# if file.endswith(".mp4"):
|
||||
# main(os.path.join(folder, file))
|
||||
|
||||
main("E:\\obs\\PdC Review Piano Parte1 - 2025-03-05 09-11-44.mp4")
|
||||
|
||||
|
||||
|
||||
# folder = "C:\\Users\\rosat\\ARMUNDIA GROUP SRL\\Trasformazione PdC - Documents\\03 - Analisi e Disegno\\meetings\\24 Feb 2025 FFIT - Replatforming PDC-PO - Definizione soluzione da adottare - Analisi tecnica"
|
||||
|
||||
# for file in os.listdir(folder):
|
||||
# if file.endswith(".mp4"):
|
||||
# main(os.path.join(folder, file))
|
||||
for folder in folders:
|
||||
for file in os.listdir(folder):
|
||||
if file.endswith(".mp4"):
|
||||
main(os.path.join(folder, file))
|
||||
|
||||
#processo_verbale()
|
3
react-flask-app/client/.npmrc
Normal file
3
react-flask-app/client/.npmrc
Normal file
@ -0,0 +1,3 @@
|
||||
registry=https://registry.npmjs.org/
|
||||
//nexus.armundia.com/repository/:_authToken=NpmToken.514b83be-94d3-3327-95aa-2f723888b405
|
||||
strict-ssl=false
|
13881
react-flask-app/client/package-lock.json
generated
Normal file
13881
react-flask-app/client/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
46
react-flask-app/client/package.json
Normal file
46
react-flask-app/client/package.json
Normal file
@ -0,0 +1,46 @@
|
||||
{
|
||||
"name": "video-ai-client",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"dependencies": {
|
||||
"@emotion/react": "^11.11.1",
|
||||
"@emotion/styled": "^11.11.0",
|
||||
"@mui/icons-material": "^5.14.15",
|
||||
"@mui/material": "^5.14.15",
|
||||
"@mui/x-data-grid": "^6.16.3",
|
||||
"@testing-library/jest-dom": "^5.17.0",
|
||||
"@testing-library/react": "^13.4.0",
|
||||
"@testing-library/user-event": "^13.5.0",
|
||||
"axios": "^1.5.1",
|
||||
"react": "^18.2.0",
|
||||
"react-dom": "^18.2.0",
|
||||
"react-router-dom": "^6.17.0",
|
||||
"react-scripts": "5.0.1",
|
||||
"web-vitals": "^2.1.4"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "react-scripts start",
|
||||
"build": "react-scripts build",
|
||||
"test": "react-scripts test",
|
||||
"eject": "react-scripts eject"
|
||||
},
|
||||
"eslintConfig": {
|
||||
"extends": [
|
||||
"react-app",
|
||||
"react-app/jest"
|
||||
]
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
">0.2%",
|
||||
"not dead",
|
||||
"not op_mini all"
|
||||
],
|
||||
"development": [
|
||||
"last 1 chrome version",
|
||||
"last 1 firefox version",
|
||||
"last 1 safari version"
|
||||
]
|
||||
},
|
||||
"proxy": "http://localhost:5000"
|
||||
}
|
24
react-flask-app/client/public/index.html
Normal file
24
react-flask-app/client/public/index.html
Normal file
@ -0,0 +1,24 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8" />
|
||||
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||
<meta name="theme-color" content="#000000" />
|
||||
<meta
|
||||
name="description"
|
||||
content="Video AI Processing Application"
|
||||
/>
|
||||
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
|
||||
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
|
||||
<link
|
||||
rel="stylesheet"
|
||||
href="https://fonts.googleapis.com/css?family=Roboto:300,400,500,700&display=swap"
|
||||
/>
|
||||
<title>Video AI Processor</title>
|
||||
</head>
|
||||
<body>
|
||||
<noscript>You need to enable JavaScript to run this app.</noscript>
|
||||
<div id="root"></div>
|
||||
</body>
|
||||
</html>
|
25
react-flask-app/client/public/manifest.json
Normal file
25
react-flask-app/client/public/manifest.json
Normal file
@ -0,0 +1,25 @@
|
||||
{
|
||||
"short_name": "Video AI",
|
||||
"name": "Video AI Processing Application",
|
||||
"icons": [
|
||||
{
|
||||
"src": "favicon.ico",
|
||||
"sizes": "64x64 32x32 24x24 16x16",
|
||||
"type": "image/x-icon"
|
||||
},
|
||||
{
|
||||
"src": "logo192.png",
|
||||
"type": "image/png",
|
||||
"sizes": "192x192"
|
||||
},
|
||||
{
|
||||
"src": "logo512.png",
|
||||
"type": "image/png",
|
||||
"sizes": "512x512"
|
||||
}
|
||||
],
|
||||
"start_url": ".",
|
||||
"display": "standalone",
|
||||
"theme_color": "#000000",
|
||||
"background_color": "#ffffff"
|
||||
}
|
2
react-flask-app/server/.env.example
Normal file
2
react-flask-app/server/.env.example
Normal file
@ -0,0 +1,2 @@
|
||||
OPENAI_API_KEY=your_openai_api_key_here
|
||||
MONGO_URI=mongodb://localhost:27017/video_ai
|
192
react-flask-app/server/app.py
Normal file
192
react-flask-app/server/app.py
Normal file
@ -0,0 +1,192 @@
|
||||
import os
|
||||
import uuid
|
||||
import tempfile
|
||||
from flask import Flask, request, jsonify, send_file
|
||||
from flask_cors import CORS
|
||||
from werkzeug.utils import secure_filename
|
||||
from pymongo import MongoClient
|
||||
from datetime import datetime
|
||||
from bson.objectid import ObjectId
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
from processing import process_video
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
app = Flask(__name__)
|
||||
CORS(app)
|
||||
|
||||
# MongoDB configuration
|
||||
MONGO_URI = os.getenv('MONGO_URI', 'mongodb+srv://human:kY5ORJCzW0unboME@cluster0.8cnqn.mongodb.net/VIDEO-AI')
|
||||
client = MongoClient(MONGO_URI)
|
||||
db = client.get_database()
|
||||
documents_collection = db.documents
|
||||
|
||||
# Configure upload folder
|
||||
UPLOAD_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'uploads')
|
||||
DOCUMENT_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'documents')
|
||||
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
|
||||
os.makedirs(DOCUMENT_FOLDER, exist_ok=True)
|
||||
|
||||
# Allowed file extensions
|
||||
ALLOWED_EXTENSIONS = {'mp4'}
|
||||
|
||||
def allowed_file(filename):
|
||||
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
|
||||
|
||||
@app.route('/api/upload', methods=['POST'])
|
||||
def upload_file():
|
||||
if 'file' not in request.files:
|
||||
return jsonify({'error': 'No file part'}), 400
|
||||
|
||||
file = request.files['file']
|
||||
|
||||
if file.filename == '':
|
||||
return jsonify({'error': 'No selected file'}), 400
|
||||
|
||||
if not allowed_file(file.filename):
|
||||
return jsonify({'error': 'File type not allowed'}), 400
|
||||
|
||||
# Generate a unique filename
|
||||
filename = secure_filename(file.filename)
|
||||
unique_filename = f"{uuid.uuid4()}_{filename}"
|
||||
file_path = os.path.join(UPLOAD_FOLDER, unique_filename)
|
||||
|
||||
# Save the file
|
||||
file.save(file_path)
|
||||
|
||||
# Create document record in database
|
||||
document_id = documents_collection.insert_one({
|
||||
'original_filename': filename,
|
||||
'stored_filename': unique_filename,
|
||||
'upload_date': datetime.now(),
|
||||
'status': 'uploaded',
|
||||
'file_path': file_path
|
||||
}).inserted_id
|
||||
|
||||
# Return the document ID to the client
|
||||
return jsonify({
|
||||
'message': 'File uploaded successfully',
|
||||
'document_id': str(document_id)
|
||||
}), 201
|
||||
|
||||
@app.route('/api/process/<document_id>', methods=['POST'])
|
||||
def process_document(document_id):
|
||||
# Find the document in MongoDB
|
||||
document = documents_collection.find_one({'_id': ObjectId(document_id)})
|
||||
|
||||
if not document:
|
||||
return jsonify({'error': 'Document not found'}), 404
|
||||
|
||||
# Update status
|
||||
documents_collection.update_one(
|
||||
{'_id': ObjectId(document_id)},
|
||||
{'$set': {'status': 'processing'}}
|
||||
)
|
||||
|
||||
try:
|
||||
# Process the video
|
||||
result = process_video(document['file_path'])
|
||||
|
||||
# Update document with generated files
|
||||
documents_collection.update_one(
|
||||
{'_id': ObjectId(document_id)},
|
||||
{'$set': {
|
||||
'status': 'completed',
|
||||
'transcription_path': result['transcription_path'],
|
||||
'summary_path': result['summary_path'],
|
||||
'summary_clean_path': result['summary_clean_path'],
|
||||
'verbale_path': result['verbale_path'],
|
||||
'docx_path': result['docx_path'],
|
||||
'docx_verbale_path': result['docx_verbale_path'],
|
||||
'completion_date': datetime.now()
|
||||
}}
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'message': 'Document processed successfully',
|
||||
'document_id': document_id
|
||||
}), 200
|
||||
except Exception as e:
|
||||
# Update status to failed
|
||||
documents_collection.update_one(
|
||||
{'_id': ObjectId(document_id)},
|
||||
{'$set': {
|
||||
'status': 'failed',
|
||||
'error': str(e)
|
||||
}}
|
||||
)
|
||||
|
||||
return jsonify({
|
||||
'error': f'Processing failed: {str(e)}'
|
||||
}), 500
|
||||
|
||||
@app.route('/api/documents', methods=['GET'])
|
||||
def get_documents():
|
||||
documents = list(documents_collection.find({}, {
|
||||
'original_filename': 1,
|
||||
'upload_date': 1,
|
||||
'status': 1,
|
||||
'completion_date': 1
|
||||
}))
|
||||
|
||||
# Convert ObjectId to string
|
||||
for doc in documents:
|
||||
doc['_id'] = str(doc['_id'])
|
||||
if 'upload_date' in doc:
|
||||
doc['upload_date'] = doc['upload_date'].isoformat()
|
||||
if 'completion_date' in doc:
|
||||
doc['completion_date'] = doc['completion_date'].isoformat()
|
||||
|
||||
return jsonify(documents), 200
|
||||
|
||||
@app.route('/api/documents/<document_id>', methods=['GET'])
|
||||
def get_document(document_id):
|
||||
document = documents_collection.find_one({'_id': ObjectId(document_id)})
|
||||
|
||||
if not document:
|
||||
return jsonify({'error': 'Document not found'}), 404
|
||||
|
||||
# Convert ObjectId to string
|
||||
document['_id'] = str(document['_id'])
|
||||
if 'upload_date' in document:
|
||||
document['upload_date'] = document['upload_date'].isoformat()
|
||||
if 'completion_date' in document:
|
||||
document['completion_date'] = document['completion_date'].isoformat()
|
||||
|
||||
return jsonify(document), 200
|
||||
|
||||
@app.route('/api/download/<document_id>/<file_type>', methods=['GET'])
|
||||
def download_file(document_id, file_type):
|
||||
document = documents_collection.find_one({'_id': ObjectId(document_id)})
|
||||
|
||||
if not document:
|
||||
return jsonify({'error': 'Document not found'}), 404
|
||||
|
||||
file_type_mapping = {
|
||||
'docx': 'docx_path',
|
||||
'verbale_docx': 'docx_verbale_path',
|
||||
'summary': 'summary_path',
|
||||
'summary_clean': 'summary_clean_path',
|
||||
'verbale': 'verbale_path',
|
||||
'transcription': 'transcription_path'
|
||||
}
|
||||
|
||||
if file_type not in file_type_mapping:
|
||||
return jsonify({'error': 'Invalid file type'}), 400
|
||||
|
||||
file_path_key = file_type_mapping[file_type]
|
||||
|
||||
if file_path_key not in document or not document[file_path_key]:
|
||||
return jsonify({'error': f'{file_type} not available for this document'}), 404
|
||||
|
||||
file_path = document[file_path_key]
|
||||
|
||||
try:
|
||||
return send_file(file_path, as_attachment=True)
|
||||
except Exception as e:
|
||||
return jsonify({'error': f'Error downloading file: {str(e)}'}), 500
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, port=5000)
|
251
react-flask-app/server/processing.py
Normal file
251
react-flask-app/server/processing.py
Normal file
@ -0,0 +1,251 @@
|
||||
import os
|
||||
import tempfile
|
||||
from moviepy import VideoFileClip
|
||||
from openai import OpenAI
|
||||
import math
|
||||
from pydub import AudioSegment
|
||||
from docx import Document
|
||||
from docx.shared import Pt, RGBColor
|
||||
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
|
||||
import pypandoc
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Configuration
|
||||
MAX_FILE_SIZE = 26214400 # 25MB in bytes
|
||||
CHUNK_SIZE = MAX_FILE_SIZE // 2 # Split into ~12MB chunks
|
||||
OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'outputs')
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
def setup_api_keys():
|
||||
"""Setup API keys and configurations."""
|
||||
api_key = os.getenv('OPENAI_API_KEY')
|
||||
if not api_key:
|
||||
raise ValueError("OpenAI API key not found in environment variables")
|
||||
|
||||
client = OpenAI(api_key=api_key)
|
||||
return client
|
||||
|
||||
def extract_audio(video_path):
|
||||
"""Extract audio from video file and return path to audio file."""
|
||||
print("Extracting audio from video file...")
|
||||
temp_audio_path = os.path.join(OUTPUT_DIR, "temp_audio.mp3")
|
||||
video = VideoFileClip(video_path)
|
||||
video.audio.write_audiofile(temp_audio_path)
|
||||
video.close()
|
||||
return temp_audio_path
|
||||
|
||||
def split_audio_by_duration(file_path, duration_seconds=600):
|
||||
"""Split an audio file into chunks of a specified duration (default 10 minutes)."""
|
||||
chunks_dir = os.path.join(OUTPUT_DIR, 'chunks')
|
||||
os.makedirs(chunks_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
audio = AudioSegment.from_file(file_path)
|
||||
except Exception as e:
|
||||
print(f"Error loading audio file: {e}")
|
||||
return []
|
||||
|
||||
chunks = []
|
||||
chunk_length_ms = duration_seconds * 1000 # Convert seconds to milliseconds
|
||||
|
||||
for i, chunk_start in enumerate(range(0, len(audio), chunk_length_ms)):
|
||||
chunk_end = chunk_start + chunk_length_ms
|
||||
chunk = audio[chunk_start:chunk_end]
|
||||
|
||||
chunk_filename = os.path.join(chunks_dir, f"chunk_{i}.mp3")
|
||||
try:
|
||||
chunk.export(chunk_filename, format="mp3")
|
||||
chunks.append(chunk_filename)
|
||||
print(f"Created chunk: {chunk_filename}")
|
||||
except Exception as e:
|
||||
print(f"Error exporting chunk: {e}")
|
||||
|
||||
return chunks
|
||||
|
||||
def transcribe_audio(client, audio_chunks):
|
||||
"""Transcribe audio chunks using OpenAI's Whisper API."""
|
||||
transcripts = []
|
||||
for chunk in audio_chunks:
|
||||
print(f"Transcribing chunk: {chunk}")
|
||||
audio_file = open(chunk, "rb")
|
||||
try:
|
||||
transcript = client.audio.transcriptions.create(
|
||||
file=audio_file,
|
||||
model="whisper-1",
|
||||
response_format="text",
|
||||
language="it",
|
||||
prompt="L'audio è il pezzo di una riunione registrata; potrebbe essere la continuazione di un precedente pezzo di audio"
|
||||
)
|
||||
transcripts.append(transcript)
|
||||
except Exception as e:
|
||||
print(f"Error in transcription: {e}")
|
||||
|
||||
return ' '.join(transcripts)
|
||||
|
||||
def summarize_transcription(client, transcript):
|
||||
"""Use GPT-4 to summarize the transcription."""
|
||||
system_message = """You will find next a transcript of a Teams Meeting.
|
||||
[PAY ATTENTION: The text may not be perfect as it was transcribed from video]
|
||||
|
||||
Your task is to create detailed notes from this meeting transcript,
|
||||
including as much information as possible. At the end, include a summary
|
||||
of open points and decisions made during the meeting.
|
||||
|
||||
The result should be in business Italian language.
|
||||
"""
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": transcript}
|
||||
],
|
||||
temperature=0.1,
|
||||
max_tokens=16000
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
def clean_summary(client, summary):
|
||||
"""Clean and format the summary using GPT."""
|
||||
system_message = """Following is an automated generated text from a video transcript.
|
||||
Due to audio quality and lack of knowledge about the context, the text may not be complete or accurate.
|
||||
|
||||
[ORIGINAL TEXT]
|
||||
{0}
|
||||
|
||||
Please clean this text, fixing any inconsistencies, formatting issues, or errors in transcription.
|
||||
The result should be well-structured and in business Italian language.
|
||||
""".format(summary)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": "Please clean and format this text."}
|
||||
],
|
||||
temperature=0.5,
|
||||
max_tokens=16000
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
def generate_verbale(client, summary_clean):
|
||||
"""Generate a formal meeting report (verbale) from the cleaned summary."""
|
||||
system_message = """You are tasked with creating a formal meeting report (verbale) from the following meeting summary.
|
||||
|
||||
[SUMMARY]
|
||||
{0}
|
||||
|
||||
Please structure the report in a professional format typical of consulting firms (like Deloitte).
|
||||
Focus particularly on decisions made and action items for future meetings.
|
||||
|
||||
The report should be in business Italian language.
|
||||
""".format(summary_clean)
|
||||
|
||||
response = client.chat.completions.create(
|
||||
model="gpt-4o",
|
||||
messages=[
|
||||
{"role": "system", "content": system_message},
|
||||
{"role": "user", "content": "Generate a formal meeting report (verbale)."}
|
||||
],
|
||||
temperature=0.5,
|
||||
max_tokens=16000
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
def convert_markdown_to_word(markdown_text, output_file):
|
||||
"""Convert markdown text to MS Word document format using pypandoc."""
|
||||
try:
|
||||
# Write markdown to a temporary file
|
||||
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.md', encoding='utf-8') as temp_file:
|
||||
temp_file.write(markdown_text)
|
||||
temp_md_file = temp_file.name
|
||||
|
||||
# Ensure output directory exists
|
||||
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||
|
||||
# Convert to docx using pypandoc
|
||||
pypandoc.convert_file(temp_md_file, 'docx', outputfile=output_file)
|
||||
|
||||
# Clean up temp file
|
||||
os.unlink(temp_md_file)
|
||||
|
||||
print(f"Successfully converted to Word: {output_file}")
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"Error converting to Word: {e}")
|
||||
return False
|
||||
|
||||
def process_video(file_path):
|
||||
"""Process a video file and generate transcription, summaries, and documents."""
|
||||
base_filename = os.path.splitext(os.path.basename(file_path))[0]
|
||||
client = setup_api_keys()
|
||||
|
||||
# Output file paths
|
||||
transcription_path = os.path.join(OUTPUT_DIR, f"{base_filename}_transcription.txt")
|
||||
summary_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary.md")
|
||||
summary_clean_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary_clean.md")
|
||||
verbale_path = os.path.join(OUTPUT_DIR, f"{base_filename}_verbale.md")
|
||||
docx_path = os.path.join(OUTPUT_DIR, f"{base_filename}_summary.docx")
|
||||
docx_verbale_path = os.path.join(OUTPUT_DIR, f"{base_filename}_verbale.docx")
|
||||
|
||||
try:
|
||||
# Step 1: Extract audio from video
|
||||
audio_path = extract_audio(file_path)
|
||||
|
||||
# Step 2: Split audio into chunks
|
||||
audio_chunks = split_audio_by_duration(audio_path)
|
||||
|
||||
# Step 3: Transcribe audio
|
||||
transcription = transcribe_audio(client, audio_chunks)
|
||||
with open(transcription_path, "w", encoding='utf-8') as f:
|
||||
f.write(transcription)
|
||||
print(f"Saved transcription to: {transcription_path}")
|
||||
|
||||
# Step 4: Generate summary
|
||||
summary = summarize_transcription(client, transcription)
|
||||
with open(summary_path, "w", encoding='utf-8') as f:
|
||||
f.write(summary)
|
||||
print(f"Saved summary to: {summary_path}")
|
||||
|
||||
# Step 5: Clean and format summary
|
||||
summary_clean = clean_summary(client, summary)
|
||||
with open(summary_clean_path, "w", encoding='utf-8') as f:
|
||||
f.write(summary_clean)
|
||||
print(f"Saved cleaned summary to: {summary_clean_path}")
|
||||
|
||||
# Step 6: Generate formal report (verbale)
|
||||
verbale = generate_verbale(client, summary_clean)
|
||||
with open(verbale_path, "w", encoding='utf-8') as f:
|
||||
f.write(verbale)
|
||||
print(f"Saved verbale to: {verbale_path}")
|
||||
|
||||
# Step 7: Convert markdown to Word documents
|
||||
convert_markdown_to_word(summary_clean, docx_path)
|
||||
convert_markdown_to_word(verbale, docx_verbale_path)
|
||||
|
||||
# Step 8: Clean up temporary files
|
||||
for chunk in audio_chunks:
|
||||
if os.path.exists(chunk):
|
||||
os.remove(chunk)
|
||||
if os.path.exists(audio_path):
|
||||
os.remove(audio_path)
|
||||
|
||||
# Return paths to all generated files
|
||||
return {
|
||||
'transcription_path': transcription_path,
|
||||
'summary_path': summary_path,
|
||||
'summary_clean_path': summary_clean_path,
|
||||
'verbale_path': verbale_path,
|
||||
'docx_path': docx_path,
|
||||
'docx_verbale_path': docx_verbale_path
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing video: {e}")
|
||||
raise
|
11
react-flask-app/server/requirements.txt
Normal file
11
react-flask-app/server/requirements.txt
Normal file
@ -0,0 +1,11 @@
|
||||
flask==2.3.3
|
||||
flask-cors==4.0.0
|
||||
pymongo==4.5.0
|
||||
python-dotenv==1.0.0
|
||||
pydub==0.25.1
|
||||
moviepy==1.0.3
|
||||
python-docx==0.8.11
|
||||
pypandoc==1.11
|
||||
openpyxl==3.1.2
|
||||
openai==1.3.7
|
||||
werkzeug==2.3.7
|
File diff suppressed because one or more lines are too long
Loading…
x
Reference in New Issue
Block a user