diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..afd1d22 --- /dev/null +++ b/.gitignore @@ -0,0 +1,42 @@ +# Arquivos temporários +*.pyc +__pycache__/ +*.py[cod] +*$py.class + +# Jupyter Notebook +.ipynb_checkpoints +*/.ipynb_checkpoints/* + +# Ambientes virtuais +.venv/ +venv/ +ENV/ +env/ + +# IDEs +.vscode/ +.idea/ +*.swp +*.swo + +# OS +.DS_Store +Thumbs.db +desktop.ini + +# Credenciais (NUNCA fazer commit) +*secret*.json +*credentials*.json +*cookie*.txt +*api*.txt +*.pem + +# Arquivos de processamento +VIRALS/ +*.mp4 +*.mp3 +*.wav + +# Scripts temporários +add_docs.py diff --git a/LICENSE b/LICENSE index f288702..8c5bfeb 100644 --- a/LICENSE +++ b/LICENSE @@ -671,4 +671,4 @@ into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read -. +.) diff --git a/README.md b/README.md index f2d23d2..e29e4f6 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,172 @@ +# 🎬 ViralCutter - Smooth Face Tracking Edition + +[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/masterface77/ViralCutter/blob/smooth-zoom/ViralCutter-SmoothZoom.ipynb) +[![Open in Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://www.kaggle.com/code/levireis77/viralcutter-cyclic-smooth-zoom-edition-kag) +[![Discord](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud) + +> **🎯 Branch `smooth-zoom`** - Versão com **YOLO Smooth Tracking** + face tracking configurável! + +Fork do [ViralCutter](https://github.com/RafaelGodoyEbert/ViralCutter) com **Smooth Face Tracking** - a câmera segue o rosto de forma suave e cinematográfica. + +--- + +## ✨ Novidades v0.9 - Smooth Face Tracking + +![Smooth Tracking](https://img.shields.io/badge/🎥-Smooth_Tracking-blueviolet?style=for-the-badge) + +### 🎥 Tracking Suave Configurável +A suavidade do tracking agora pode ser ajustada no Gradio! + +| Alpha | Efeito | +|-------|--------| +| 0.02 | Ultra Suave (câmera bem lenta) | +| 0.05 | Normal (padrão recomendado) | +| 0.10 | Rápido (acompanha mais de perto) | + +### ⚙️ Configuração via Interface +- **"Advanced Face Settings"** no Gradio +- **Slider "Tracking Smoothness"** para ajustar alpha +- Aplica-se apenas ao modo YOLO + +**Tecnologias:** +- 🔍 **YOLOv8** - Detecção e tracking de pessoas em tempo real +- 📊 **ByteTrack** - IDs persistentes para cada pessoa +- 📈 **EMA (Exponential Moving Average)** - Suavização configurável + +### ⚡ Otimização T4 (WhisperX) +Transcrição otimizada para GPUs T4 do Colab/Kaggle (16GB VRAM): + +| Configuração | Valor | Benefício | +|--------------|-------|-----------| +| `compute_type` | int8_float16 | 50% menos VRAM | +| `model` | large-v2 | Mais estável que v3 | +| `batch_size` | 8 | Evita OOM em vídeos longos | +| `language` | pt (padrão) | Pula detecção automática | +| `chunk_size` | 15 | Chunks maiores = mais eficiente | + +--- + +## 🚀 Notebooks Disponíveis + +### 🔵 Colab - Smooth Zoom +**Características:** +- ✅ YOLO Smooth Zoom **ATIVADO POR PADRÃO** +- ✅ Upload automático para Google Drive +- ✅ Otimizado para GPUs T4 +- ✅ Instalação rápida (3-5 min) + +**Como usar:** +1. Abra o notebook [ViralCutter-SmoothZoom.ipynb](https://colab.research.google.com/github/masterface77/ViralCutter/blob/smooth-zoom/ViralCutter-SmoothZoom.ipynb) +2. Execute a célula principal +3. Aguarde a instalação (inclui `ultralytics`) +4. Clique no link `gradio.live` gerado +5. Face Model já vem selecionado como **yolo** 🎯 + +### 🟠 Kaggle - Smooth Zoom +**Características:** +- ✅ 30h/semana de GPU grátis +- ✅ Upload OAuth para sua conta Drive +- ✅ YOLO Smooth Zoom incluído +- ✅ Suporte a cookies e datasets + +--- + +## 📦 Configurando Datasets no Kaggle + +O notebook do Kaggle requer algumas credenciais. Siga o guia completo: + +### 1️⃣ client_secret.json (Obrigatório) +Credenciais OAuth do Google Cloud para upload no Drive. + +**Passos:** +1. Acesse [Google Cloud Console](https://console.cloud.google.com/) +2. Crie um projeto ou use um existente +3. Vá em **APIs & Services** → **Credentials** +4. **Create Credentials** → **OAuth 2.0 Client ID** +5. Escolha **Desktop App** +6. Baixe o JSON +7. No Kaggle: **Add Data** → **Upload** → Faça upload +8. Nomeie o dataset como `client-secret-json` + +### 2️⃣ cookie (Opcional) +Cookies para download de vídeos privados/restritos. + +**Passos:** +1. Instale [Get cookies.txt LOCAL](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc) +2. Acesse www.kaggle.com (logado) +3. Clique na extensão → **Export** +4. Salve como `www.youtube.com_cookies.txt` +5. No Kaggle: **Add Data** → **Upload** +6. Nomeie o dataset como `cookie` + +### 3️⃣ credenciais-google (Obrigatório) +API Key do Gemini para análise com IA. + +**Passos:** +1. Acesse [Google AI Studio](https://makersuite.google.com/app/apikey) +2. **Create API Key** +3. Copie a chave +4. Crie arquivo `gemini_api.txt` com a chave +5. No Kaggle: **Add Data** → **Upload** +6. Nomeie o dataset como `credenciais-google` + +### 4️⃣ google-drive-credentials (Opcional) +Token OAuth reutilizável (gerado na primeira execução). + +**Como reutilizar:** +1. Execute o notebook uma vez +2. Após autenticação, baixe o arquivo `.json` gerado em `/kaggle/working/` +3. Crie dataset no Kaggle com este arquivo +4. Nomeie como `google-drive-credentials` + +--- + +## ✨ Diferenças entre Colab e Kaggle + +| Característica | Colab | Kaggle | +|----------------|-------|--------| +| GPU Grátis | ✅ 12h/dia | ✅ 30h/semana | +| Configuração | Mais simples | Requer datasets | +| Upload Drive | Nativo | OAuth manual | +| Zoom IA | ❌ Removido | ✅ Disponível | +| Persistência | ❌ Nenhuma | ✅ Datasets | + +--- + +## 🎯 Recursos + +- **Detecção automática** de momentos virais +- **Transcrição com IA** (WhisperX) +- **Corte inteligente** com análise semântica +- **Legendas automáticas** +- **Processamento em batch** + +--- + +## 🔗 Links Úteis + +- **Licença (GPL v3):** [LICENSE](LICENSE) +- **Repositório Original:** [RafaelGodoyEbert/ViralCutter](https://github.com/RafaelGodoyEbert/ViralCutter) +- **Discord (Suporte):** [discord.gg/tAdPHFAbud](https://discord.gg/tAdPHFAbud) + +--- + +## 📝 Créditos + +Desenvolvido por **Rafa.Godoy** +- [GitHub](https://github.com/rafaelGodoyEbert) +- [Twitter](https://twitter.com/GodoyEbert) +- [Instagram](https://www.instagram.com/rafael.godoy.ebert/) + +Fork customizado para facilitar uso em Kaggle e Colab. + +--- + +## 📄 Licença + +Este projeto é licenciado sob a **GNU General Public License v3**, permitindo que você copie, distribua e modifique o software livremente, desde que mantenha a mesma licença. [Leia a licença completa aqui](LICENSE). + + # ViralCutter [![Discord](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud)
diff --git a/README_en.md b/README_en.md index 8d092ab..8ebd015 100644 --- a/README_en.md +++ b/README_en.md @@ -102,5 +102,9 @@ ViralCutter is community-maintained. Join us to democratize AI content creation! - **Discord**: [AI Hub Brasil](https://discord.gg/aihubbrasil) - **Github**: Give us a ⭐ star if this project helped you! +## 📄 License + +This project is licensed under the **GNU General Public License v3**. [Read the full license here](LICENSE). + **Current Version**: 0.8v Alpha *ViralCutter: Because viral clips shouldn't cost a fortune.* 🚀 diff --git a/ViralCutter-Colab.ipynb b/ViralCutter-Colab.ipynb new file mode 100644 index 0000000..868eb50 --- /dev/null +++ b/ViralCutter-Colab.ipynb @@ -0,0 +1,216 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "pa36OeArowme" + }, + "source": [ + "# ViralCutter\n", + "Uma alternativa gratuita ao `opus.pro` e ao `vidyo.ai`" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6Q-ljfsw1unE" + }, + "source": [ + "# Suporte em:\n", + "[![](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "e76jiRnjONmj", + "outputId": "a2a5eeb8-04de-42f2-834e-8ad1c4d04393" + }, + "outputs": [], + "source": [ + "#@title 🎬 VIRALCUTTER COLAB (VRAM Fix + Drive Sync)\n", + "#@markdown Este script roda o ViralCutter com otimização de memória e sincroniza os vídeos processados com seu Google Drive.\n", + "\n", + "import os\n", + "import shutil\n", + "import subprocess\n", + "import threading\n", + "import time\n", + "import sys\n", + "from IPython.display import clear_output\n", + "from google.colab import drive\n", + "\n", + "# ==========================================\n", + "# ⚙️ CONFIGURAÇÕES (EDITE AQUI)\n", + "# ==========================================\n", + "PASTA_DRIVE_FINAL = \"/content/drive/MyDrive/ViralCutter_Prontos\" # Onde salvar\n", + "\n", + "# ==========================================\n", + "# 1. MONTAGEM DO DRIVE (NATIVO COLAB)\n", + "# ==========================================\n", + "print(\"1️⃣ Conectando ao Google Drive...\")\n", + "if not os.path.exists('/content/drive'):\n", + " drive.mount('/content/drive')\n", + "\n", + "os.makedirs(PASTA_DRIVE_FINAL, exist_ok=True)\n", + "print(f\"✅ Drive conectado! Vídeos irão para: {PASTA_DRIVE_FINAL}\")\n", + "\n", + "# ==========================================\n", + "# 2. INSTALAÇÃO OTIMIZADA (VRAM FIX)\n", + "# ==========================================\n", + "# Verifica se já está instalado para economizar tempo\n", + "if not os.path.exists(\"/content/ViralCutter\"):\n", + " print(\"\\n📦 Instalando ViralCutter Otimizado (3-5 min)...\")\n", + "\n", + " # Clone\n", + " subprocess.run(\"git clone https://github.com/RafaelGodoyEbert/ViralCutter.git /content/ViralCutter\", shell=True)\n", + " os.chdir(\"/content/ViralCutter\")\n", + "\n", + " # Instalação do gerenciador rápido (UV)\n", + " subprocess.run(\"pip install uv -q\", shell=True)\n", + "\n", + " # Drivers de Sistema (FFmpeg e CUDA basics)\n", + " subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n", + "\n", + " # Cria ambiente\n", + " subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n", + "\n", + " # --- OTIMIZAÇÃO DE VRAM AQUI ---\n", + " # Instalamos o faster-whisper e forçamos versões específicas do Torch\n", + " # para evitar que o Colab carregue CUDA kernels duplicados.\n", + " print(\" 🚀 Otimizando dependências de Vídeo e IA...\")\n", + "\n", + " cmds = [\n", + " # Motor de transcrição leve\n", + " \"uv pip install --python .venv faster-whisper\",\n", + " # Bibliotecas base\n", + " \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n", + " \"uv pip install --python .venv -r requirements.txt\",\n", + " \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n", + " # Gemini e Processamento\n", + " \"uv pip install --python .venv google-generativeai pandas onnxruntime-gpu\",\n", + " # Correção de versão do Transformers (Evita erro de alinhamento)\n", + " \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n", + " # Torch Otimizado para T4 (Colab)\n", + " \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n", + " # Visão Computacional (MediaPipe leve)\n", + " \"uv pip install --python .venv insightface\",\n", + " \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n", + " \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n", + " ]\n", + "\n", + " for cmd in cmds:\n", + " subprocess.run(cmd, shell=True, check=True)\n", + "\n", + " print(\"✅ Instalação Concluída!\")\n", + "else:\n", + " os.chdir(\"/content/ViralCutter\")\n", + " print(\"\\n✅ Sistema já instalado. Pulando etapa.\")\n", + "\n", + "# ==========================================\n", + "# 3. MONITOR AUTOMÁTICO (ENGINE)\n", + "# ==========================================\n", + "def engine_loop():\n", + " print(\"👀 Engine: Monitorando pasta VIRALS...\")\n", + " processed = set()\n", + " WATCH_DIR = \"/content/ViralCutter/VIRALS\"\n", + "\n", + " while True:\n", + " if os.path.exists(WATCH_DIR):\n", + " for r, d, f in os.walk(WATCH_DIR):\n", + " for file in f:\n", + " if not file.endswith(\".mp4\"): continue\n", + " # Ignora arquivos temporários e originais\n", + " if any(x in file for x in [\"input\", \"temp\", \"original\"]): continue\n", + "\n", + " full_path = os.path.join(r, file)\n", + "\n", + " # Verifica se o arquivo terminou de ser salvo\n", + " if full_path not in processed and os.path.getsize(full_path) > 1e6:\n", + " s1 = os.path.getsize(full_path)\n", + " time.sleep(5)\n", + " if os.path.getsize(full_path) != s1: continue\n", + "\n", + " print(f\"\\n💎 NOVO VÍDEO DETECTADO: {file}\")\n", + "\n", + " # Envia para o Drive\n", + " print(f\" ☁️ Salvando no Drive: {PASTA_DRIVE_FINAL}...\")\n", + " shutil.copy(full_path, os.path.join(PASTA_DRIVE_FINAL, os.path.basename(full_path)))\n", + " print(\" ✅ Concluído!\")\n", + "\n", + " processed.add(full_path)\n", + "\n", + " time.sleep(5)\n", + "\n", + "# Inicia o monitor em segundo plano\n", + "threading.Thread(target=engine_loop, daemon=True).start()\n", + "\n", + "# ==========================================\n", + "# 4. INICIAR INTERFACE\n", + "# ==========================================\n", + "print(\"\\n🚀 INICIANDO INTERFACE...\")\n", + "print(\"⚠️ CLIQUE NO LINK PÚBLICO (gradio.live) ABAIXO:\")\n", + "print(\"=\"*60)\n", + "\n", + "# Configuração de Display Virtual (Necessário para Colab)\n", + "subprocess.Popen(['Xvfb', ':1', '-screen', '0', '2560x1440x8'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n", + "time.sleep(2)\n", + "os.environ.update({'DISPLAY':':1.0', 'MPLBACKEND':'Agg', 'CUDA_VISIBLE_DEVICES':'0'})\n", + "\n", + "# Roda o App\n", + "!/content/ViralCutter/.venv/bin/python webui/app.py --colab" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nEIsMZLwJ5kD" + }, + "source": [ + "#Créditos\n", + "\n", + "Inspirado no [reels clips automator](https://github.com/eddieoz/reels-clips-automator) e no [YoutubeVideoToAIPoweredShorts](https://github.com/Fitsbit/YoutubeVideoToAIPoweredShorts)
\n", + "\n", + "---\n", + "![Rafa.png](https://i.imgur.com/cGknQpU.png;base64)\n", + "\n", + "Desenvolvido por **Rafa.Godoy**
\n", + "[ ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white) ](https://github.com/rafaelGodoyEbert)
\n", + "[ ![X](https://img.shields.io/twitter/url?url=https%3A%2F%2Ftwitter.com%2FGodoyEbert) ](https://twitter.com/GodoyEbert)
\n", + "[Instagram](https://www.instagram.com/rafael.godoy.ebert/)
\n", + "[ ![](https://dcbadge.vercel.app/api/server/aihubbrasil) ](https://discord.gg/aihubbrasil)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tpLJmPqGT5_u" + }, + "source": [ + "`0.8v Alpha`
\n", + "\n", + "Apenas uma alternativa gratuita ao `opus.pro` e ao `vidyo.ai`
\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/ViralCutter-SmoothZoom.ipynb b/ViralCutter-SmoothZoom.ipynb new file mode 100644 index 0000000..36bc6cb --- /dev/null +++ b/ViralCutter-SmoothZoom.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "header_smooth_zoom" + }, + "source": [ + "# 🎬 ViralCutter - Cyclic Smooth Zoom Edition\n", + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/masterface77/ViralCutter/blob/smooth-zoom/ViralCutter-SmoothZoom.ipynb)\n", + "\n", + "Versão especial com **YOLO Tracking + Cyclic Smooth Zoom** automático!\n", + "\n", + "### ✨ Novidades v0.9\n", + "- 🔄 **Zoom Cíclico**: zoom in (3s) → hold (2s) → **SNAP BACK** (instantâneo) → repete\n", + "- 🎥 **Tracking Ultra Suave**: alpha 0.02 (câmera segue o rosto bem devagar)\n", + "- 🎯 **Efeito Dinâmico**: aproximação lenta e retorno imediato\n", + "- ⚡ **T4 Otimizado**: WhisperX com int8 quantization (50% menos VRAM)\n", + "\n", + "### 🚀 Recursos\n", + "- ✅ Download de vídeos do YouTube\n", + "- ✅ Cortes automáticos com IA (Gemini)\n", + "- ✅ Legendas dinâmicas\n", + "- ✅ Upload automático para Google Drive\n", + "- ✅ Transcrição otimizada para GPUs T4\n", + "\n", + "---\n", + "[![Discord](https://dcbadge.limes.pink/api/server/tAdPHFAbud)](https://discord.gg/tAdPHFAbud)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "main_cell_smooth_zoom" + }, + "outputs": [], + "source": [ + "#@title 🚀 INICIAR VIRALCUTTER (CYCLIC SMOOTH ZOOM + T4 OTIMIZADO)\n", + "#@markdown Execute esta célula para instalar e iniciar a interface.\n", + "#@markdown\n", + "#@markdown ### ⚙️ Configurações do Zoom Cíclico\n", + "#@markdown - **Zoom In**: 3 segundos (aproxima suavemente no rosto)\n", + "#@markdown - **Hold**: 2 segundos (mantém close-up)\n", + "#@markdown - **SNAP BACK**: (instantâneo para visão ampla)\n", + "#@markdown - **Hold**: 2 segundos | **Repete** até o fim\n", + "#@markdown\n", + "#@markdown ### ⚡ Otimização T4 (WhisperX)\n", + "#@markdown - Modelo: `large-v2` (quantizado)\n", + "#@markdown - Compute type: `int8_float16` (50% menos VRAM)\n", + "#@markdown - Batch size: 8 (evita OOM em vídeos longos)\n", + "#@markdown - Idioma padrão: Português\n", + "\n", + "import os\n", + "import shutil\n", + "import subprocess\n", + "import threading\n", + "import time\n", + "import sys\n", + "from IPython.display import clear_output\n", + "from google.colab import drive\n", + "\n", + "# ==========================================\n", + "# ⚙️ CONFIGURAÇÕES\n", + "# ==========================================\n", + "PASTA_DRIVE_FINAL = \"/content/drive/MyDrive/ViralCutter_SmoothZoom\"\n", + "REPO_URL = \"https://github.com/masterface77/ViralCutter.git\"\n", + "BRANCH = \"smooth-zoom\" # Branch com Cyclic Smooth Zoom + T4 Optimization\n", + "\n", + "# ==========================================\n", + "# 1. MONTAGEM DO DRIVE\n", + "# ==========================================\n", + "print(\"1️⃣ Conectando ao Google Drive...\")\n", + "if not os.path.exists('/content/drive'):\n", + " drive.mount('/content/drive')\n", + "\n", + "os.makedirs(PASTA_DRIVE_FINAL, exist_ok=True)\n", + "print(f\"✅ Drive conectado! Vídeos irão para: {PASTA_DRIVE_FINAL}\")\n", + "\n", + "# ==========================================\n", + "# 2. INSTALAÇÃO (BRANCH SMOOTH-ZOOM)\n", + "# ==========================================\n", + "if not os.path.exists(\"/content/ViralCutter\"):\n", + " print(f\"\\n📦 Clonando ViralCutter (branch: {BRANCH})...\")\n", + "\n", + " # Clone da branch smooth-zoom diretamente\n", + " subprocess.run(f\"git clone -b {BRANCH} {REPO_URL} /content/ViralCutter\", shell=True)\n", + " os.chdir(\"/content/ViralCutter\")\n", + "\n", + " # UV (gerenciador rápido)\n", + " subprocess.run(\"pip install uv -q\", shell=True)\n", + "\n", + " # Drivers de Sistema\n", + " subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n", + "\n", + " # Cria ambiente virtual\n", + " subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n", + "\n", + " # Instalação das dependências (T4 Optimized)\n", + " print(\" 🚀 Instalando dependências + Ultralytics (T4 Optimizado)...\")\n", + " cmds = [\n", + " \"uv pip install --python .venv faster-whisper\",\n", + " \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n", + " \"uv pip install --python .venv -r requirements.txt\",\n", + " \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n", + " \"uv pip install --python .venv google-generativeai pandas onnxruntime-gpu\",\n", + " \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n", + " \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n", + " \"uv pip install --python .venv insightface\",\n", + " # YOLO Cyclic Smooth Zoom\n", + " \"uv pip install --python .venv ultralytics\",\n", + " \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n", + " \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n", + " ]\n", + "\n", + " for cmd in cmds:\n", + " subprocess.run(cmd, shell=True, check=True)\n", + "\n", + " print(\"✅ Instalação Concluída!\")\n", + " print(\"🔄 Cyclic Smooth Zoom + T4 Optimization ativados!\")\n", + "else:\n", + " os.chdir(\"/content/ViralCutter\")\n", + " print(\"\\n✅ Sistema já instalado.\")\n", + " # Atualizar para última versão\n", + " subprocess.run(\"git pull origin smooth-zoom\", shell=True)\n", + " print(\"🔄 Atualizado para última versão!\")\n", + "\n", + "# ==========================================\n", + "# 3. MONITOR DE ARQUIVOS (DRIVE SYNC)\n", + "# APENAS arquivos _subtitled (COM LEGENDA)\n", + "# ==========================================\n", + "def engine_loop():\n", + " print(\"👀 Engine: Monitorando pasta VIRALS...\")\n", + " print(\"📌 APENAS arquivos COM LEGENDA (_subtitled) serão salvos!\")\n", + " processed = set()\n", + " WATCH_DIR = \"/content/ViralCutter/VIRALS\"\n", + "\n", + " while True:\n", + " try:\n", + " if os.path.exists(WATCH_DIR):\n", + " for r, d, f in os.walk(WATCH_DIR):\n", + " for file in f:\n", + " if not file.endswith(\".mp4\"): continue\n", + " \n", + " # Ignora arquivos temporários\n", + " if any(x in file.lower() for x in [\"input\", \"temp\", \"original\", \".part\", \"processing\"]): continue\n", + " \n", + " # ✅ APENAS faz upload de arquivos COM LEGENDA (_subtitled)\n", + " if \"_subtitled\" not in file.lower(): continue\n", + "\n", + " full_path = os.path.join(r, file)\n", + "\n", + " if full_path not in processed:\n", + " try:\n", + " if os.path.getsize(full_path) > 1e6:\n", + " s1 = os.path.getsize(full_path)\n", + " time.sleep(5)\n", + " if not os.path.exists(full_path): continue\n", + " if os.path.getsize(full_path) != s1: continue\n", + "\n", + " print(f\"\\n💎 Corte COM LEGENDA: {file}\")\n", + " print(f\" ☁️ Enviando para Drive...\")\n", + " shutil.copy(full_path, os.path.join(PASTA_DRIVE_FINAL, os.path.basename(full_path)))\n", + " print(\" ✅ Salvo no Drive!\")\n", + "\n", + " processed.add(full_path)\n", + " except FileNotFoundError:\n", + " continue\n", + " except Exception as e:\n", + " pass\n", + "\n", + " time.sleep(5)\n", + "\n", + "threading.Thread(target=engine_loop, daemon=True).start()\n", + "\n", + "# ==========================================\n", + "# 4. INICIAR INTERFACE GRADIO\n", + "# ==========================================\n", + "print(\"\\n🚀 INICIANDO VIRALCUTTER...\")\n", + "print(\"⚡ T4 Otimizado: int8_float16, batch_size=8, large-v2\")\n", + "print(\"⚠️ CLIQUE NO LINK gradio.live ABAIXO:\")\n", + "print(\"=\"*60)\n", + "\n", + "# Display Virtual\n", + "subprocess.Popen(['Xvfb', ':1', '-screen', '0', '2560x1440x8'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)\n", + "time.sleep(2)\n", + "os.environ.update({'DISPLAY':':1.0', 'MPLBACKEND':'Agg', 'CUDA_VISIBLE_DEVICES':'0', 'VIRALCUTTER_FACE_MODEL': 'yolo'})\n", + "\n", + "# Roda o App com YOLO como padrão\n", + "!/content/ViralCutter/.venv/bin/python webui/app.py --colab --face-model yolo" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "credits_smooth_zoom" + }, + "source": [ + "## 📝 Créditos\n", + "\n", + "**ViralCutter** por [Rafa.Godoy](https://github.com/rafaelGodoyEbert)\n", + "\n", + "**Cyclic Smooth Zoom** implementado com:\n", + "- [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics)\n", + "- ByteTrack para tracking persistente\n", + "- EMA (Exponential Moving Average) com alpha=0.02\n", + "\n", + "**Otimização T4:**\n", + "- WhisperX com int8 quantization\n", + "- Modelo large-v2 (mais estável)\n", + "- Batch size otimizado para 16GB VRAM\n", + "\n", + "### 🔄 Zoom Cíclico | ⚡ T4 Otimizado\n", + "```\n", + "Zoom: 1.0x ──(3s)──► 1.4x ──(2s hold)──► SNAP ──(2s hold)──► repete\n", + "```\n", + "\n", + "---\n", + "`v0.9 Alpha + Cyclic Smooth Zoom + T4 Optimization`" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/main_improved.py b/main_improved.py index 17451a2..7eb983f 100644 --- a/main_improved.py +++ b/main_improved.py @@ -118,15 +118,16 @@ def main(): parser.add_argument("--project-path", help="Path to existing project folder (overrides URL/Latest)") parser.add_argument("--workflow", choices=["1", "2", "3"], default="1", help="Workflow choice: 1=Full, 2=Cut Only, 3=Subtitles Only") - parser.add_argument("--face-model", choices=["insightface", "mediapipe"], default="insightface", help="Face detection model") + parser.add_argument("--face-model", choices=["yolo", "insightface", "mediapipe"], default="insightface", help="Face detection model: 'yolo' (Smooth Zoom), 'insightface' (default), 'mediapipe'") parser.add_argument("--face-mode", choices=["auto", "1", "2"], default="auto", help="Face tracking mode: auto, 1, 2") parser.add_argument("--subtitle-config", help="Path to subtitle configuration JSON file") - parser.add_argument("--no-face-mode", choices=["padding", "zoom"], default="padding", help="Method to handle segments with no face detected: 'padding' (9:16 frame with black bars) or 'zoom' (Center Crop Zoom)") + parser.add_argument("--no-face-mode", choices=["padding", "zoom", "blur"], default="padding", help="Method to handle segments with no face detected: 'padding' (9:16 frame with black bars), 'zoom' (Center Crop Zoom), or 'blur' (Blur Background - original centered with blurred fill)") parser.add_argument("--face-detect-interval", type=str, default="0.17,1.0", help="Face detection interval in seconds. Single value or 'interval_1face,interval_2face'") parser.add_argument("--face-filter-threshold", type=float, default=0.35, help="Relative area threshold to ignore background faces (default: 0.35)") parser.add_argument("--face-two-threshold", type=float, default=0.60, help="Relative area threshold to trigger 2-face mode (default: 0.60)") parser.add_argument("--face-confidence-threshold", type=float, default=0.30, help="Face detection confidence threshold (0.0 - 1.0) (default: 0.30)") parser.add_argument("--face-dead-zone", type=str, default="40", help="Camera movement dead zone in pixels (default: 40)") # str to support future "auto" + parser.add_argument("--tracking-alpha", type=float, default=0.05, help="Camera tracking smoothness (0.02=Ultra Smooth, 0.05=Normal, 0.10=Fast)") parser.add_argument("--focus-active-speaker", action="store_true", help="Enable experimental active speaker focus (InsightFace only)") parser.add_argument("--active-speaker-mar", type=float, default=0.03, help="Mouth Aspect Ratio threshold for active speaker (0.0 - 1.0) (default: 0.03)") parser.add_argument("--active-speaker-score-diff", type=float, default=1.5, help="Score difference to focus on active speaker (default: 1.5)") @@ -135,7 +136,7 @@ def main(): parser.add_argument("--active-speaker-motion-sensitivity", type=float, default=0.05, help="Motion sensitivity multiplier (default: 0.05)") parser.add_argument("--active-speaker-decay", type=float, default=2.0, help="Activity score decay rate (default: 2.0)") parser.add_argument("--skip-prompts", action="store_true", help="Skip interactive prompts and use defaults/existing files") - parser.add_argument("--video-quality", choices=["best", "1080p", "720p", "480p"], default="best", help="Video download quality") + parser.add_argument("--video-quality", choices=["best", "4k", "1440p", "1080p", "720p", "480p"], default="best", help="Video download quality") parser.add_argument("--skip-youtube-subs", action="store_true", help="Skip downloading YouTube subtitles") parser.add_argument("--translate-target", help="Target language code for subtitle translation (e.g. 'pt', 'en').") @@ -569,6 +570,7 @@ def main(): two_face_threshold=args.face_two_threshold, confidence_threshold=args.face_confidence_threshold, dead_zone=dead_zone_val, + tracking_alpha=args.tracking_alpha, focus_active_speaker=args.focus_active_speaker, active_speaker_mar=args.active_speaker_mar, active_speaker_score_diff=args.active_speaker_score_diff, diff --git a/requirements.txt b/requirements.txt index 0e221bd..ee34a9d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ whisperx mediapipe google-genai insightface +ultralytics onnxruntime-gpu gradio opencv-python diff --git a/run.bat b/run.bat index ab571eb..cb6e05e 100644 --- a/run.bat +++ b/run.bat @@ -1,9 +1,21 @@ @echo off setlocal -title ViralCutter +title ViralCutter - Visual Opus Quality +echo ========================================== +echo ViralCutter - Visual Opus Quality (CLI) +echo ========================================== +echo. +echo Pipeline: Denoise - Auto Illumination - Color Grading - Unsharp +echo Use --no-face-mode blur para Blur Background! +echo. +echo ========================================== cd /d "%~dp0" call .venv\Scripts\activate.bat -python main_improved.py + +echo Iniciando ViralCutter... +echo. +python main_improved.py %* + echo. pause \ No newline at end of file diff --git a/run_webui.bat b/run_webui.bat index 4024052..049bfa9 100644 --- a/run_webui.bat +++ b/run_webui.bat @@ -1,4 +1,21 @@ @echo off +setlocal +title ViralCutter - Visual Opus Quality +echo ========================================== +echo ViralCutter - Visual Opus Quality WebUI +echo ========================================== +echo. +echo Pipeline: Denoise - Auto Illumination - Color Grading - Unsharp +echo Blur Background + YOLO Talking-Head disponiveis! +echo. +echo ========================================== + +cd /d "%~dp0" call .venv\Scripts\activate.bat + +echo Iniciando WebUI... +echo. python webui\app.py + +echo. pause diff --git a/scripts/download_video.py b/scripts/download_video.py index c0d6096..869a6f4 100644 --- a/scripts/download_video.py +++ b/scripts/download_video.py @@ -5,6 +5,43 @@ from i18n.i18n import I18nAuto i18n = I18nAuto() +def _get_cookie_opts(): + """ + Detecta automaticamente a melhor forma de passar cookies para o yt-dlp. + + Prioridade: + 1. Arquivo cookies.txt (ideal para Kaggle/Colab/Docker) + - Procura em: ./cookies.txt, ../cookies.txt, /kaggle/working/cookies.txt + 2. Cookies do navegador Chrome (ideal para uso local no Windows/Mac/Linux) + 3. Nenhum cookie (fallback final) + """ + # Locais comuns para cookies.txt + cookie_search_paths = [ + os.path.join(os.getcwd(), 'cookies.txt'), + os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'cookies.txt'), + '/kaggle/working/cookies.txt', + '/content/cookies.txt', # Google Colab + ] + + for cookie_path in cookie_search_paths: + if os.path.exists(cookie_path) and os.path.getsize(cookie_path) > 0: + print(f"🍪 Usando cookies de arquivo: {cookie_path}") + return {'cookiefile': cookie_path} + + # Fallback: tentar cookies do navegador (só funciona em ambiente local) + try: + # Teste rápido para ver se o Chrome está acessível + import sqlite3 + return {'cookiesfrombrowser': ('chrome',)} + except Exception: + pass + + print("⚠️ Nenhum cookie encontrado. Downloads podem falhar com 'Sign in to confirm you're not a bot'.") + print(" → Para Kaggle/Colab: faça upload de um arquivo cookies.txt na raiz do projeto.") + print(" → Para uso local: certifique-se de ter o Chrome instalado.") + return {} + + def sanitize_filename(name): """Remove caracteres inválidos e emojis para evitar erro de encoding no Windows.""" # Remove caracteres reservados do sistema de arquivos @@ -32,19 +69,18 @@ def progress_hook(d): print(f"[download] Download concluído: {d['filename']}", flush=True) def download(url, base_root="VIRALS", download_subs=True, quality="best"): - # 1. Extrair informações do vídeo para pegar o título + # Detectar cookies automaticamente (cookies.txt para Kaggle/Colab, Chrome para local) + cookie_opts = _get_cookie_opts() + # 1. Extrair informações do vídeo para pegar o título print(i18n("Extracting video information...")) title = None - # ... (Keep existing title extraction logic) ... - # Instead of repeating it effectively, I will rely on the diff to keep it or re-write it if I have to replace the whole block. - # Since replace_file_content works on line ranges, I should be careful. - # Let's assume I'm replacing the whole function body or significant parts. - # Tentativa 1: Com cookies try: - with yt_dlp.YoutubeDL({'quiet': True, 'no_warnings': True, 'cookiesfrombrowser': ('chrome',)}) as ydl: + info_opts = {'quiet': True, 'no_warnings': True} + info_opts.update(cookie_opts) + with yt_dlp.YoutubeDL(info_opts) as ydl: info = ydl.extract_info(url, download=False) title = info.get('title') except Exception as e: @@ -114,6 +150,8 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"): # Mapeamento de Qualidade quality_map = { "best": 'bestvideo+bestaudio/best', + "4k": 'bestvideo[height<=2160]+bestaudio/best', + "1440p": 'bestvideo[height<=1440]+bestaudio/best', "1080p": 'bestvideo[height<=1080]+bestaudio/best[height<=1080]', "720p": 'bestvideo[height<=720]+bestaudio/best[height<=720]', "480p": 'bestvideo[height<=480]+bestaudio/best[height<=480]' @@ -143,7 +181,8 @@ def download(url, base_root="VIRALS", download_subs=True, quality="best"): 'force_ipv4': True, } - + # Injetar configuração de cookies detectada automaticamente + ydl_opts.update(cookie_opts) if download_subs: ydl_opts['postprocessors'] = [{ diff --git a/scripts/edit_video.py b/scripts/edit_video.py index ebefb2b..4365278 100644 --- a/scripts/edit_video.py +++ b/scripts/edit_video.py @@ -3,7 +3,7 @@ import os import subprocess import mediapipe as mp -from scripts.one_face import crop_and_resize_single_face, resize_with_padding, detect_face_or_body, crop_center_zoom +from scripts.one_face import crop_and_resize_single_face, resize_with_padding, resize_with_blur_background, detect_face_or_body, crop_center_zoom from scripts.two_face import crop_and_resize_two_faces, detect_face_or_body_two_faces try: from scripts.face_detection_insightface import init_insightface, detect_faces_insightface, crop_and_resize_insightface @@ -12,6 +12,15 @@ INSIGHTFACE_AVAILABLE = False print("InsightFace not found or error importing. Install with: pip install insightface onnxruntime-gpu") +# YOLO Tracking (Smooth Zoom) +try: + from scripts.face_tracking_yolo import init_yolo, generate_short_yolo, is_yolo_available + YOLO_TRACKING_AVAILABLE = True +except ImportError: + YOLO_TRACKING_AVAILABLE = False + print("YOLO Tracking not available. Install with: pip install ultralytics") + + # Global cache for encoder CACHED_ENCODER = None @@ -141,6 +150,8 @@ def generate_short_fallback(input_file, output_file, index, project_folder, fina if no_face_mode == "zoom": result = crop_center_zoom(frame) + elif no_face_mode == "blur": + result = resize_with_blur_background(frame) else: result = resize_with_padding(frame) @@ -172,8 +183,11 @@ def finalize_video(input_file, output_file, index, fps, project_folder, final_fo "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats", "-i", output_file, "-i", audio_file, - "-c:v", encoder_name, "-preset", encoder_preset, "-b:v", "5M", + "-c:v", encoder_name, "-preset", encoder_preset, + "-crf", "18", # Visually lossless quality + "-b:v", "25M", # 4K quality bitrate "-c:a", "aac", "-b:a", "192k", + "-pix_fmt", "yuv420p", # YouTube/TikTok compatibility "-r", str(fps), final_output ] @@ -336,6 +350,8 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_ else: if no_face_mode == "zoom": result = crop_center_zoom(frame) + elif no_face_mode == "blur": + result = resize_with_blur_background(frame) else: result = resize_with_padding(frame) coordinate_log.append({"frame": frame_index, "faces": []}) @@ -355,6 +371,8 @@ def generate_short_mediapipe(input_file, output_file, index, face_mode, project_ else: if no_face_mode == "zoom": result = crop_center_zoom(frame) + elif no_face_mode == "blur": + result = resize_with_blur_background(frame) else: result = resize_with_padding(frame) @@ -449,6 +467,8 @@ def generate_short_haar(input_file, output_file, index, project_folder, final_fo # No face detected for a while -> Center/Padding fallback if no_face_mode == "zoom": result = crop_center_zoom(frame) + elif no_face_mode == "blur": + result = resize_with_blur_background(frame) else: result = resize_with_padding(frame) out.write(result) @@ -963,6 +983,8 @@ def sort_score(f): # Fallback for this frame if no_face_mode == "zoom": result = crop_center_zoom(frame) + elif no_face_mode == "blur": + result = resize_with_blur_background(frame) else: result = resize_with_padding(frame) out.write(result) @@ -1084,7 +1106,7 @@ def sort_score(f): return "1" -def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, segments_data=None, no_face_mode="padding"): +def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detection_period=None, filter_threshold=0.35, two_face_threshold=0.60, confidence_threshold=0.30, dead_zone=40, tracking_alpha=0.05, focus_active_speaker=False, active_speaker_mar=0.03, active_speaker_score_diff=1.5, include_motion=False, active_speaker_motion_deadzone=3.0, active_speaker_motion_sensitivity=0.05, active_speaker_decay=2.0, segments_data=None, no_face_mode="padding"): # Lazy init solutions only when needed to avoid AttributeError if import failed partially mp_face_detection = None mp_face_mesh = None @@ -1099,10 +1121,24 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec # Priority: User Choice -> Fallbacks + # NEW: YOLO Tracking (Smooth Zoom) - highest priority if selected + yolo_working = False + if YOLO_TRACKING_AVAILABLE and face_model == "yolo": + try: + print("Initializing YOLO Tracking (Smooth Zoom)...") + if init_yolo(): + yolo_working = True + print("YOLO Tracking Initialized Successfully!") + else: + print("WARNING: YOLO init returned False. Will try InsightFace.") + except Exception as e: + print(f"WARNING: YOLO Initialization Failed ({e}). Will try InsightFace.") + yolo_working = False + insightface_working = False - # Only init InsightFace if selected or default - if INSIGHTFACE_AVAILABLE and (face_model == "insightface"): + # Only init InsightFace if selected or default (and YOLO not working) + if INSIGHTFACE_AVAILABLE and (face_model == "insightface" or (face_model == "yolo" and not yolo_working)): try: print("Initializing InsightFace...") init_insightface() @@ -1112,6 +1148,7 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec print(f"WARNING: InsightFace Initialization Failed ({e}). Will try MediaPipe.") insightface_working = False + mediapipe_working = False use_haar = False @@ -1176,8 +1213,25 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec success = False detected_mode = "1" # Default if detection fails or fallback + # 0. Try YOLO (Smooth Zoom) - NEW + if yolo_working and not success: + try: + print(f"[YOLO Smooth Zoom] Processing: {input_filename}") + res = generate_short_yolo(input_file, output_file, index, + project_folder, final_folder, + face_mode=face_mode, + no_face_mode=no_face_mode, + alpha=tracking_alpha) + if res: detected_mode = res + success = True + except Exception as e: + import traceback + traceback.print_exc() + print(f"YOLO processing failed for {input_filename}: {e}") + print("Falling back to InsightFace...") + # 1. Try InsightFace - if insightface_working: + if insightface_working and not success: try: # Capture returned mode res = generate_short_insightface(input_file, output_file, index, project_folder, final_folder, face_mode=face_mode, detection_period=detection_period, @@ -1194,6 +1248,7 @@ def edit(project_folder="tmp", face_model="insightface", face_mode="auto", detec traceback.print_exc() print(f"InsightFace processing failed for {input_filename}: {e}") print("Falling back to MediaPipe/Haar...") + # 2. Try MediaPipe if InsightFace failed or not available if not success and mediapipe_working: diff --git a/scripts/face_detection_insightface.py b/scripts/face_detection_insightface.py index 4a1dbab..235b1b6 100644 --- a/scripts/face_detection_insightface.py +++ b/scripts/face_detection_insightface.py @@ -14,6 +14,13 @@ except ImportError: INSIGHTFACE_AVAILABLE = False +# Import quality enhancement functions +try: + from scripts.video_quality import enhance_frame + QUALITY_AVAILABLE = True +except ImportError: + QUALITY_AVAILABLE = False + app = None @contextmanager @@ -86,61 +93,70 @@ def detect_faces_insightface(frame): def crop_and_resize_insightface(frame, face_bbox, target_width=1080, target_height=1920): """ - Crops and resizes the frame to target dimensions centered on the face_bbox. + Crops and resizes the frame centered on face_bbox. + Uses wider crop + blur background for landscape sources (less zoom, better quality). face_bbox: [x1, y1, x2, y2] """ h, w, _ = frame.shape x1, y1, x2, y2 = face_bbox - face_center_x = (x1 + x2) // 2 - face_center_y = (y1 + y2) // 2 - # Calculate crop area based on target aspect ratio and face position - # We want to keep the face roughly in the upper-middle or center? - # Usually center for simple implementation, or slightly upper for "talking head". + target_ar = target_width / target_height # 0.5625 - # Logic similar to one_face.py but adapted + # Calculate tight 9:16 crop (minimum width) + tight_w = int(h * target_ar) + if tight_w > w: + tight_w = w - # Determine the scaling factor to ensure the crop covers the target height - # Ideally we want the height of the video to match the target height after resize - # But usually we source from landscape (16:9) to portrait (9:16). - # We need to crop a 9:16 area from the source. + # Wider crop: show ~42% of source width (less zoom, more context) + wide_w = int(w * 0.42) - # Calculate source crop height/width maintaining 9:16 ratio - # Trying to maximize height usage of the source frame usually. + # Pick the wider option for less zoom + source_w = min(max(tight_w, wide_w), w) + source_h = h # Always use full height - # Let's say we want to use the full height of the source if possible - source_h = h - source_w = int(source_h * (target_width / target_height)) + # Center crop on face horizontally + crop_x = max(0, min(face_center_x - source_w // 2, w - source_w)) - if source_w > w: - # If the calculated width is wider than the source image, we are limited by width - source_w = w - source_h = int(source_w * (target_height / target_width)) - - # Calculate top-left corner of the crop - crop_x1 = face_center_x - (source_w // 2) - crop_y1 = face_center_y - (source_h // 2) # Center vertically on face + cropped = frame[0:source_h, crop_x:crop_x + source_w] - # Adjust to stay within bounds - if crop_x1 < 0: - crop_x1 = 0 - elif crop_x1 + source_w > w: - crop_x1 = w - source_w + crop_ar = source_w / source_h + + if abs(crop_ar - target_ar) < 0.03: + # Crop is already ~9:16 (portrait/square source) → direct resize + result = cv2.resize(cropped, (target_width, target_height), interpolation=cv2.INTER_LANCZOS4) + else: + # Wider than 9:16 → compose with blur background (TikTok/Reels style) + # 1. Create blur background (fill-crop source to 9:16, then blur) + bg_crop_w = min(int(h * target_ar), w) + bg_x = (w - bg_crop_w) // 2 + bg_slice = frame[0:h, bg_x:bg_x + bg_crop_w] + bg_small = cv2.resize(bg_slice, (target_width // 2, target_height // 2), interpolation=cv2.INTER_AREA) + bg_small = cv2.GaussianBlur(bg_small, (51, 51), 0) + result = cv2.resize(bg_small, (target_width, target_height), interpolation=cv2.INTER_LINEAR) - if crop_y1 < 0: - crop_y1 = 0 - elif crop_y1 + source_h > h: - crop_y1 = h - source_h + # 2. Scale foreground to fit target width + scale = target_width / source_w + fg_w = target_width + fg_h = int(source_h * scale) - crop_x2 = crop_x1 + source_w - crop_y2 = crop_y1 + source_h - - # Crop - cropped = frame[crop_y1:crop_y2, crop_x1:crop_x2] + if fg_h > target_height: + fg_h = target_height + fg_w = int(source_w * (target_height / source_h)) + + foreground = cv2.resize(cropped, (fg_w, fg_h), interpolation=cv2.INTER_LANCZOS4) + + # 3. Center vertically on canvas + pad_top = (target_height - fg_h) // 2 + pad_left = (target_width - fg_w) // 2 + result[pad_top:pad_top + fg_h, pad_left:pad_left + fg_w] = foreground - # Resize to final target - result = cv2.resize(cropped, (target_width, target_height), interpolation=cv2.INTER_LINEAR) + # Apply full enhancement pipeline: Denoise -> Color Grading -> Unsharp + if QUALITY_AVAILABLE: + result = enhance_frame(result, preset_name="high") + else: + gaussian = cv2.GaussianBlur(result, (0, 0), 3.0) + result = cv2.addWeighted(result, 1.8, gaussian, -0.8, 0) return result diff --git a/scripts/face_tracking_yolo.py b/scripts/face_tracking_yolo.py new file mode 100644 index 0000000..6a87479 --- /dev/null +++ b/scripts/face_tracking_yolo.py @@ -0,0 +1,386 @@ +# Face Tracking with Ultralytics YOLO and Smooth Zoom +""" +This module provides YOLO-based face detection and tracking with EMA smoothing +for a "cinematic" camera follow effect. + +Features: +- Uses YOLOv8 tracking with ByteTrack for persistent face IDs +- EMA smoothing for smooth camera movement +- Automatic GPU detection (CUDA) +- Fallback to InsightFace or center crop if YOLO fails +""" + +import cv2 +import numpy as np +import os +import subprocess + +# Lazy import to avoid errors if ultralytics is not installed +YOLO_AVAILABLE = False +YOLO_MODEL = None + +def init_yolo(model_name="yolov8n.pt"): + """ + Initialize YOLO model for tracking. + Tries yolov8n-face.pt first, then falls back to yolov8n.pt + """ + global YOLO_AVAILABLE, YOLO_MODEL + + try: + from ultralytics import YOLO + import torch + + # Determine device + device = 'cuda' if torch.cuda.is_available() else 'cpu' + print(f"[YOLO] Using device: {device}") + + # Try to find face-specific model first + models_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "models") + face_model_path = os.path.join(models_dir, "yolov8n-face.pt") + + if os.path.exists(face_model_path): + print(f"[YOLO] Loading face model: {face_model_path}") + YOLO_MODEL = YOLO(face_model_path) + else: + print(f"[YOLO] Face model not found. Using default: {model_name}") + YOLO_MODEL = YOLO(model_name) + + YOLO_MODEL.to(device) + YOLO_AVAILABLE = True + print("[YOLO] Initialization successful!") + return True + + except ImportError as e: + print(f"[YOLO] ultralytics not installed: {e}") + YOLO_AVAILABLE = False + return False + except Exception as e: + print(f"[YOLO] Initialization failed: {e}") + YOLO_AVAILABLE = False + return False + + +class SmoothBBox: + """ + Exponential Moving Average (EMA) smoothing for bounding boxes. + Provides smooth face tracking without zoom effects. + """ + + def __init__(self, alpha=0.05): + """ + Args: + alpha: Smoothing factor (0.0 = no movement, 1.0 = instant snap) + 0.05 = smooth but responsive tracking + """ + self.alpha = alpha + self.smooth_bbox = None + self.target_bbox = None + self.frames_without_detection = 0 + self.max_frames_hold = 90 + + def update(self, detected_bbox): + """ + Update the smooth bounding box with a new detection. + + Args: + detected_bbox: (x1, y1, x2, y2) or None if no detection + + Returns: + Tuple of (smoothed_bbox, zoom) - zoom is always 1.0 (no zoom) + """ + if detected_bbox is not None: + self.target_bbox = np.array(detected_bbox, dtype=float) + self.frames_without_detection = 0 + + if self.smooth_bbox is None: + # First detection - snap to it + self.smooth_bbox = self.target_bbox.copy() + else: + # Apply EMA smoothing + self.smooth_bbox = ( + self.alpha * self.target_bbox + + (1 - self.alpha) * self.smooth_bbox + ) + else: + # No detection - hold position + self.frames_without_detection += 1 + + if self.frames_without_detection > self.max_frames_hold: + return None, 1.0 + + if self.smooth_bbox is not None: + return tuple(self.smooth_bbox.astype(int)), 1.0 + return None, 1.0 + + def reset(self): + """Reset the smoother state.""" + self.smooth_bbox = None + self.target_bbox = None + self.frames_without_detection = 0 + + +def get_best_encoder(): + """Detect best available video encoder.""" + try: + result = subprocess.run(['ffmpeg', '-hide_banner', '-encoders'], + capture_output=True, text=True) + output = result.stdout + + if "h264_nvenc" in output: + return ("h264_nvenc", "fast") + if "h264_amf" in output: + return ("h264_amf", "speed") + if "h264_qsv" in output: + return ("h264_qsv", "veryfast") + if "h264_videotoolbox" in output: + return ("h264_videotoolbox", "default") + except Exception: + pass + + return ("libx264", "ultrafast") + + +# Import quality enhancement functions +try: + from scripts.video_quality import enhance_frame + QUALITY_AVAILABLE = True +except ImportError: + QUALITY_AVAILABLE = False + + +def crop_to_vertical(frame, center_x, center_y, frame_width, frame_height, zoom=1.0): + """ + Crop frame to vertical format centered on (center_x, center_y). + Uses wider crop + blur background for landscape sources (less zoom, better quality). + """ + target_w, target_h = 1080, 1920 + target_ar = target_w / target_h # 0.5625 + + # Calculate tight 9:16 crop + tight_w = int(frame_height * target_ar) + if tight_w > frame_width: + tight_w = frame_width + + # Wider crop: show ~42% of source width (less zoom, more context) + wide_w = int(frame_width * 0.42) + + # Pick wider option for less zoom + source_w = min(max(tight_w, wide_w), frame_width) + source_h = frame_height + + # Center on face horizontally + crop_x = int(center_x - source_w // 2) + crop_x = max(0, min(crop_x, frame_width - source_w)) + + cropped = frame[0:source_h, crop_x:crop_x + source_w] + + crop_ar = source_w / source_h + + if abs(crop_ar - target_ar) < 0.03: + # Already ~9:16 → direct resize + resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4) + else: + # Wider → compose with blur background + bg_crop_w = min(int(frame_height * target_ar), frame_width) + bg_x = (frame_width - bg_crop_w) // 2 + bg_slice = frame[0:frame_height, bg_x:bg_x + bg_crop_w] + bg_small = cv2.resize(bg_slice, (target_w // 2, target_h // 2), interpolation=cv2.INTER_AREA) + bg_small = cv2.GaussianBlur(bg_small, (51, 51), 0) + resized = cv2.resize(bg_small, (target_w, target_h), interpolation=cv2.INTER_LINEAR) + + scale = target_w / source_w + fg_w = target_w + fg_h = int(source_h * scale) + + if fg_h > target_h: + fg_h = target_h + fg_w = int(source_w * (target_h / source_h)) + + foreground = cv2.resize(cropped, (fg_w, fg_h), interpolation=cv2.INTER_LANCZOS4) + + pad_top = (target_h - fg_h) // 2 + pad_left = (target_w - fg_w) // 2 + resized[pad_top:pad_top + fg_h, pad_left:pad_left + fg_w] = foreground + + # Apply full enhancement pipeline: Denoise -> Color Grading -> Unsharp + if QUALITY_AVAILABLE: + return enhance_frame(resized, preset_name="high") + else: + gaussian = cv2.GaussianBlur(resized, (0, 0), 3.0) + return cv2.addWeighted(resized, 1.8, gaussian, -0.8, 0) + + +def generate_short_yolo(input_file, output_file, index, project_folder, final_folder, + face_mode="auto", no_face_mode="zoom", alpha=0.05): + """ + Process video with YOLO tracking and smooth face following. + + Args: + input_file: Path to input video + output_file: Path for temporary output + index: Segment index + project_folder: Project folder path + final_folder: Final output folder + face_mode: "auto", "1", or "2" + no_face_mode: "zoom" or "padding" when no face detected + alpha: EMA smoothing factor (0.02=Ultra Smooth, 0.05=Normal, 0.10=Fast) + """ + global YOLO_MODEL + + if not YOLO_AVAILABLE or YOLO_MODEL is None: + raise RuntimeError("YOLO not initialized. Call init_yolo() first.") + + print(f"[YOLO] Processing with smooth tracking (alpha={alpha}): {input_file}") + + cap = cv2.VideoCapture(input_file) + if not cap.isOpened(): + raise IOError(f"Cannot open video: {input_file}") + + fps = cap.get(cv2.CAP_PROP_FPS) + frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + # Initialize smoother with alpha from UI + smoother = SmoothBBox(alpha=alpha) + + # Video writer + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(output_file, fourcc, fps, (1080, 1920)) + + # Track the dominant person (by ID persistence or size) + tracked_id = None + + frame_idx = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + # Run YOLO tracking + # persist=True maintains tracking IDs across frames + results = YOLO_MODEL.track(frame, persist=True, conf=0.3, iou=0.5, + verbose=False, classes=[0]) # class 0 = person + + # Extract best detection + best_bbox = None + + if results and len(results) > 0 and results[0].boxes is not None: + boxes = results[0].boxes + + if len(boxes) > 0: + # Get boxes data + xyxy = boxes.xyxy.cpu().numpy() # (x1, y1, x2, y2) + confs = boxes.conf.cpu().numpy() + ids = boxes.id.cpu().numpy() if boxes.id is not None else None + + # Strategy: Track the same person if possible, else pick largest + if tracked_id is not None and ids is not None: + # Try to find our tracked person + match_idx = np.where(ids == tracked_id)[0] + if len(match_idx) > 0: + best_bbox = xyxy[match_idx[0]] + + if best_bbox is None: + # Pick largest (by area) + areas = (xyxy[:, 2] - xyxy[:, 0]) * (xyxy[:, 3] - xyxy[:, 1]) + best_idx = np.argmax(areas) + best_bbox = xyxy[best_idx] + + # Remember this person's ID for tracking + if ids is not None: + tracked_id = ids[best_idx] + + # Apply EMA smoothing with progressive zoom + smoothed, current_zoom = smoother.update(best_bbox) + + if smoothed is not None: + # Calculate face center — offset Y to head/shoulders for "talking head" framing + x1, y1, x2, y2 = smoothed + center_x = (x1 + x2) / 2 + # Offset: 30% from top of bbox = head/shoulders area (not torso center) + center_y = y1 + (y2 - y1) * 0.30 + + # Crop and resize with progressive zoom + result = crop_to_vertical(frame, center_x, center_y, + frame_width, frame_height, zoom=current_zoom) + else: + # Fallback: center crop or padding (still use progressive zoom) + if no_face_mode == "zoom": + # Center crop with current zoom level + result = crop_to_vertical(frame, frame_width/2, frame_height/2, + frame_width, frame_height, zoom=current_zoom) + elif no_face_mode == "blur": + # Blur Background (import from one_face) + from scripts.one_face import resize_with_blur_background + result = resize_with_blur_background(frame) + else: + # Padding (import from one_face) + from scripts.one_face import resize_with_padding + result = resize_with_padding(frame) + + out.write(result) + frame_idx += 1 + + # Progress indicator every 100 frames + if frame_idx % 100 == 0: + print(f"[YOLO] Progress: {frame_idx}/{total_frames} frames") + + cap.release() + out.release() + + print(f"[YOLO] Processing complete: {frame_idx} frames") + + # Finalize (mux audio) + _finalize_video(input_file, output_file, index, fps, project_folder, final_folder) + + return "1" # Return face mode for compatibility + + +def _finalize_video(input_file, output_file, index, fps, project_folder, final_folder): + """Mux audio with processed video.""" + audio_file = os.path.join(project_folder, "cuts", f"output-audio-{index}.aac") + + # Extract audio + subprocess.run([ + "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", + "-i", input_file, "-vn", "-acodec", "copy", audio_file + ], check=False, capture_output=True) + + if os.path.exists(audio_file) and os.path.getsize(audio_file) > 0: + final_output = os.path.join(final_folder, f"final-output{str(index).zfill(3)}_processed.mp4") + encoder_name, encoder_preset = get_best_encoder() + + command = [ + "ffmpeg", "-y", "-hide_banner", "-loglevel", "error", "-stats", + "-i", output_file, + "-i", audio_file, + "-c:v", encoder_name, "-preset", encoder_preset, + "-crf", "18", # Visually lossless quality + "-b:v", "25M", # 4K quality bitrate + "-c:a", "aac", "-b:a", "192k", + "-pix_fmt", "yuv420p", # YouTube/TikTok compatibility + "-r", str(fps), + final_output + ] + + try: + subprocess.run(command, check=True) + print(f"[YOLO] Final output: {final_output}") + + # Cleanup temp files + try: + os.remove(audio_file) + os.remove(output_file) + except: + pass + + except subprocess.CalledProcessError as e: + print(f"[YOLO] Muxing error: {e}") + else: + print(f"[YOLO] Warning: No audio extracted for {input_file}") + + +# Convenience function to check if YOLO is ready +def is_yolo_available(): + return YOLO_AVAILABLE and YOLO_MODEL is not None diff --git a/scripts/one_face.py b/scripts/one_face.py index ae5fc5e..3c99ace 100644 --- a/scripts/one_face.py +++ b/scripts/one_face.py @@ -4,33 +4,73 @@ import subprocess import mediapipe as mp +# Import quality enhancement functions +try: + from scripts.video_quality import enhance_frame + QUALITY_AVAILABLE = True +except ImportError: + QUALITY_AVAILABLE = False + def crop_and_resize_single_face(frame, face): frame_height, frame_width = frame.shape[:2] x, y, w, h = face face_center_x = x + w // 2 - face_center_y = y + h // 2 - # Cálculo da proporção desejada (9:16) - target_aspect_ratio = 9 / 16 + target_w, target_h = 1080, 1920 + target_ar = target_w / target_h # 0.5625 - # Cálculo da área de corte para evitar barras pretas - if frame_width / frame_height > target_aspect_ratio: - new_width = int(frame_height * target_aspect_ratio) - new_height = frame_height + # Calculate tight 9:16 crop (minimum width) + tight_w = int(frame_height * target_ar) + if tight_w > frame_width: + tight_w = frame_width + + # Wider crop: show ~42% of source width (less zoom, more context) + wide_w = int(frame_width * 0.42) + + # Pick wider option for less zoom + source_w = min(max(tight_w, wide_w), frame_width) + source_h = frame_height + + # Center crop on face horizontally + crop_x = max(0, min(face_center_x - source_w // 2, frame_width - source_w)) + + cropped = frame[0:source_h, crop_x:crop_x + source_w] + + crop_ar = source_w / source_h + + if abs(crop_ar - target_ar) < 0.03: + # Already ~9:16 → direct resize + resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4) else: - new_width = frame_width - new_height = int(frame_width / target_aspect_ratio) + # Wider → compose with blur background (TikTok/Reels style) + bg_crop_w = min(int(frame_height * target_ar), frame_width) + bg_x = (frame_width - bg_crop_w) // 2 + bg_slice = frame[0:frame_height, bg_x:bg_x + bg_crop_w] + bg_small = cv2.resize(bg_slice, (target_w // 2, target_h // 2), interpolation=cv2.INTER_AREA) + bg_small = cv2.GaussianBlur(bg_small, (51, 51), 0) + resized = cv2.resize(bg_small, (target_w, target_h), interpolation=cv2.INTER_LINEAR) + + scale = target_w / source_w + fg_w = target_w + fg_h = int(source_h * scale) + + if fg_h > target_h: + fg_h = target_h + fg_w = int(source_w * (target_h / source_h)) - # Garantir que o corte esteja dentro dos limites - crop_x = max(0, min(face_center_x - new_width // 2, frame_width - new_width)) - crop_y = max(0, min(face_center_y - new_height // 2, frame_height - new_height)) - crop_x2 = crop_x + new_width - crop_y2 = crop_y + new_height + foreground = cv2.resize(cropped, (fg_w, fg_h), interpolation=cv2.INTER_LANCZOS4) - # Recorte e redimensionamento para 1080x1920 (9:16) - crop_img = frame[crop_y:crop_y2, crop_x:crop_x2] - resized = cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA) + pad_top = (target_h - fg_h) // 2 + pad_left = (target_w - fg_w) // 2 + resized[pad_top:pad_top + fg_h, pad_left:pad_left + fg_w] = foreground + + # Apply full enhancement pipeline: Denoise -> Color Grading -> Unsharp + if QUALITY_AVAILABLE: + resized = enhance_frame(resized, preset_name="high") + else: + gaussian = cv2.GaussianBlur(resized, (0, 0), 3.0) + resized = cv2.addWeighted(resized, 1.8, gaussian, -0.8, 0) return resized @@ -56,7 +96,76 @@ def resize_with_padding(frame): result[pad_top:pad_top+frame_height, pad_left:pad_left+frame_width] = frame # Redimensionar para as dimensões finais - return cv2.resize(result, (1080, 1920), interpolation=cv2.INTER_AREA) + result = cv2.resize(result, (1080, 1920), interpolation=cv2.INTER_LANCZOS4) + + # Apply full enhancement pipeline + if QUALITY_AVAILABLE: + result = enhance_frame(result, preset_name="high") + + return result + +def resize_with_blur_background(frame): + """ + Composição de camadas: vídeo original nítido no centro + fundo desfocado (Blur Background). + Preserva a qualidade nativa da imagem sem crop agressivo. + Usa downscale antes do blur para economia de memória (~75% menos RAM). + """ + frame_height, frame_width = frame.shape[:2] + target_w, target_h = 1080, 1920 + target_ar = target_w / target_h # 9/16 = 0.5625 + + # === Background Layer (desfocado, preenche tudo) === + # Downscale para economia de memória antes do blur + small_w, small_h = target_w // 2, target_h // 2 # 540x960 + + # Fill-crop para o aspect ratio 9:16 + src_ar = frame_width / frame_height + if src_ar > target_ar: + # Mais largo que o target → crop lateral + new_h = small_h + new_w = int(small_h * src_ar) + else: + # Mais alto que o target → crop vertical + new_w = small_w + new_h = int(small_w / src_ar) + + bg_small = cv2.resize(frame, (new_w, new_h), interpolation=cv2.INTER_AREA) + + # Center crop to exact 540x960 + cx, cy = new_w // 2, new_h // 2 + x1 = max(0, cx - small_w // 2) + y1 = max(0, cy - small_h // 2) + bg_small = bg_small[y1:y1 + small_h, x1:x1 + small_w] + + # Gaussian Blur no frame pequeno (kernel grande para desfoque forte) + bg_small = cv2.GaussianBlur(bg_small, (51, 51), 0) + + # Upscale para resolução final + background = cv2.resize(bg_small, (target_w, target_h), interpolation=cv2.INTER_LINEAR) + + # === Foreground Layer (nítido, aspect ratio original mantido) === + if src_ar > target_ar: + # Fit by width + fg_w = target_w + fg_h = int(target_w / src_ar) + else: + # Fit by height + fg_h = target_h + fg_w = int(target_h * src_ar) + + foreground = cv2.resize(frame, (fg_w, fg_h), interpolation=cv2.INTER_LANCZOS4) + + # === Composição: foreground centralizado sobre background === + pad_top = (target_h - fg_h) // 2 + pad_left = (target_w - fg_w) // 2 + background[pad_top:pad_top + fg_h, pad_left:pad_left + fg_w] = foreground + + # Apply full enhancement pipeline + if QUALITY_AVAILABLE: + background = enhance_frame(background, preset_name="high") + + return background + def detect_face_or_body(frame, face_detection, face_mesh, pose): # Converter a imagem para RGB @@ -110,30 +219,59 @@ def detect_face_or_body(frame, face_detection, face_mesh, pose): def crop_center_zoom(frame): """ - Crops the center of the frame to fill 9:16 aspect ratio (Zoom effect). + Center crop with blur background for 9:16 output. + Uses wider crop (less zoom) to preserve quality. """ frame_height, frame_width = frame.shape[:2] - target_aspect_ratio = 9 / 16 + target_w, target_h = 1080, 1920 + target_ar = target_w / target_h # 0.5625 - # Calculate crop dimensions to FILL the target ratio - if frame_width / frame_height > target_aspect_ratio: - # Source is wider than target (e.g. 16:9 source, 9:16 target) -> Crop Width - new_width = int(frame_height * target_aspect_ratio) - new_height = frame_height - else: - # Source is taller than target -> Crop Height - new_width = frame_width - new_height = int(frame_width / target_aspect_ratio) - - start_x = (frame_width - new_width) // 2 - start_y = (frame_height - new_height) // 2 + # Calculate tight 9:16 crop + tight_w = int(frame_height * target_ar) + if tight_w > frame_width: + tight_w = frame_width + + # Wider crop: show ~42% of source width + wide_w = int(frame_width * 0.42) + + source_w = min(max(tight_w, wide_w), frame_width) + source_h = frame_height + + # Center crop + crop_x = (frame_width - source_w) // 2 + cropped = frame[0:source_h, crop_x:crop_x + source_w] - # Ensure bounds - start_x = max(0, start_x) - start_y = max(0, start_y) + crop_ar = source_w / source_h - crop_img = frame[start_y:start_y+new_height, start_x:start_x+new_width] + if abs(crop_ar - target_ar) < 0.03: + resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4) + else: + # Blur background composite + bg_crop_w = min(int(frame_height * target_ar), frame_width) + bg_x = (frame_width - bg_crop_w) // 2 + bg_slice = frame[0:frame_height, bg_x:bg_x + bg_crop_w] + bg_small = cv2.resize(bg_slice, (target_w // 2, target_h // 2), interpolation=cv2.INTER_AREA) + bg_small = cv2.GaussianBlur(bg_small, (51, 51), 0) + resized = cv2.resize(bg_small, (target_w, target_h), interpolation=cv2.INTER_LINEAR) + + scale = target_w / source_w + fg_w = target_w + fg_h = int(source_h * scale) + + if fg_h > target_h: + fg_h = target_h + fg_w = int(source_w * (target_h / source_h)) + + foreground = cv2.resize(cropped, (fg_w, fg_h), interpolation=cv2.INTER_LANCZOS4) + + pad_top = (target_h - fg_h) // 2 + pad_left = (target_w - fg_w) // 2 + resized[pad_top:pad_top + fg_h, pad_left:pad_left + fg_w] = foreground - # Resize to final 1080x1920 - return cv2.resize(crop_img, (1080, 1920), interpolation=cv2.INTER_AREA) + # Apply full enhancement pipeline + if QUALITY_AVAILABLE: + return enhance_frame(resized, preset_name="high") + else: + gaussian = cv2.GaussianBlur(resized, (0, 0), 3.0) + return cv2.addWeighted(resized, 1.8, gaussian, -0.8, 0) diff --git a/scripts/transcribe_video.py b/scripts/transcribe_video.py index f219119..976d660 100644 --- a/scripts/transcribe_video.py +++ b/scripts/transcribe_video.py @@ -165,7 +165,7 @@ def vtt_time_to_seconds(t_str): return None return segments -def transcribe(input_file, model_name='large-v3', project_folder='tmp'): +def transcribe(input_file, model_name='large-v2', project_folder='tmp', language=None): print(i18n(f"Iniciando transcrição de {input_file}...")) # Diagnóstico de Ambiente @@ -192,10 +192,14 @@ def transcribe(input_file, model_name='large-v3', project_folder='tmp'): print(f"Os arquivos SRT, TSV e JSON já existem. Pulando a transcrição.") return srt_file, tsv_file - # Device Setup + # Device Setup - Optimized for T4 GPUs (Colab/Kaggle) device = "cuda" if torch.cuda.is_available() else "cpu" print(f"DEBUG: Usando dispositivo: {device}") - compute_type = "float16" if device == "cuda" else "float32" + + # Use int8_float16 for T4 GPUs (16GB VRAM) - better memory efficiency + # int8 quantization reduces VRAM usage by ~50% with minimal quality loss + compute_type = "int8_float16" if device == "cuda" else "float32" + print(f"DEBUG: Compute type: {compute_type} (T4 optimizado)") try: apply_safe_globals_hack() @@ -216,8 +220,10 @@ def transcribe(input_file, model_name='large-v3', project_folder='tmp'): start_segments = None alignment_only = False - # Default blind guess if we have no info - detected_language = "en" + # Use forced language if provided, otherwise detect + detected_language = language if language else "pt" # Default to Portuguese + language_forced = language is not None + print(f"DEBUG: Idioma {'forçado' if language_forced else 'padrão'}: {detected_language}") if potential_subs: sub_path = potential_subs[0] @@ -249,20 +255,26 @@ def transcribe(input_file, model_name='large-v3', project_folder='tmp'): # Mas o align recebe segments como lista. pass else: - # 3. Transcrever (Caminho Normal) + # 3. Transcrever (Caminho Normal) - Optimized for T4 GPUs print("Nenhuma legenda válida encontrada. Realizando transcrição completa (WhisperX)...") - print(f"Carregando modelo {model_name}...") + print(f"Carregando modelo {model_name} (compute_type={compute_type})...") + + # Batch size 8 for T4 (16GB VRAM) - prevents OOM on long podcasts + # For higher VRAM GPUs, can increase to 16 or 24 + t4_batch_size = 8 + model = whisperx.load_model( model_name, device, compute_type=compute_type, + language=detected_language if not language_forced else language, asr_options={"hotwords": None} ) result = model.transcribe( audio, - batch_size=16, - chunk_size=10 + batch_size=t4_batch_size, + chunk_size=15 # Slightly larger chunks for efficiency ) detected_language = result["language"] diff --git a/scripts/two_face.py b/scripts/two_face.py index 4761d5d..ccc3c82 100644 --- a/scripts/two_face.py +++ b/scripts/two_face.py @@ -2,6 +2,13 @@ import mediapipe as mp import numpy as np +# Import quality enhancement functions +try: + from scripts.video_quality import enhance_frame + QUALITY_AVAILABLE = True +except ImportError: + QUALITY_AVAILABLE = False + def crop_and_maintain_ar(frame, face_box, target_w, target_h, zoom_out_factor=2.2): """ Recorta uma região baseada no rosto mantendo o aspect ratio do target. @@ -75,7 +82,7 @@ def crop_and_maintain_ar(frame, face_box, target_w, target_h, zoom_out_factor=2. # Redimensionar para o tamanho alvo final (1080x960) # Como garantimos o AR, o resize mantém a proporção correta - resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LINEAR) + resized = cv2.resize(cropped, (target_w, target_h), interpolation=cv2.INTER_LANCZOS4) return resized def crop_and_resize_two_faces(frame, face_positions, zoom_out_factor=2.2): @@ -100,6 +107,14 @@ def crop_and_resize_two_faces(frame, face_positions, zoom_out_factor=2.2): # Compor imagem final (Stack Vertical) result_frame = np.vstack((face1_img, face2_img)) + # Apply full enhancement pipeline: Denoise -> Color Grading -> Unsharp + if QUALITY_AVAILABLE: + result_frame = enhance_frame(result_frame, preset_name="high") + else: + # Fallback to basic unsharp if module not available + gaussian = cv2.GaussianBlur(result_frame, (0, 0), 3.0) + result_frame = cv2.addWeighted(result_frame, 1.8, gaussian, -0.8, 0) + return result_frame diff --git a/scripts/video_quality.py b/scripts/video_quality.py new file mode 100644 index 0000000..b5da553 --- /dev/null +++ b/scripts/video_quality.py @@ -0,0 +1,269 @@ +# Video Quality Enhancement Module +""" +Provides quality enhancement functions for ViralCutter. +Uses Lanczos interpolation, Denoise, Color Grading and Unsharp Mask for better upscaling quality. +""" + +import cv2 +import numpy as np + +# Quality presets for video enhancement +QUALITY_PRESETS = { + "standard": { + "interpolation": cv2.INTER_LANCZOS4, + "denoise_strength": 0.0, # No denoise + "auto_illumination": False, # No auto-light + "unsharp_kernel": (0, 0), # Auto kernel + "unsharp_sigma": 3.0, + "unsharp_strength": 0.8, + "contrast": 1.0, + "saturation": 1.0, + "crf": 23, + "max_bitrate": "5M", + }, + "high": { + "interpolation": cv2.INTER_LANCZOS4, + "denoise_strength": 1.5, # Light denoise + "auto_illumination": True, # Opus-style auto brightness/contrast + "auto_illumination_clip_limit": 2.0, # CLAHE clip limit (subtle) + "unsharp_kernel": (5, 5), # Fixed kernel — Visual Opus (FFmpeg unsharp=5:5:1.0) + "unsharp_sigma": 1.0, + "unsharp_strength": 1.0, # Stronger sharpening for upscale recovery + "contrast": 1.05, + "saturation": 1.1, + "crf": 18, + "max_bitrate": "25M", + }, + "max": { + "interpolation": cv2.INTER_LANCZOS4, + "denoise_strength": 2.0, # Stronger denoise + "auto_illumination": True, # Opus-style auto brightness/contrast + "auto_illumination_clip_limit": 3.0, # Higher clip = more correction + "unsharp_kernel": (5, 5), # Fixed kernel — Visual Opus + "unsharp_sigma": 1.0, + "unsharp_strength": 1.2, # Aggressive sharpening + "contrast": 1.08, + "saturation": 1.15, + "crf": 15, + "max_bitrate": "35M", + } +} + +# Default preset +DEFAULT_PRESET = "high" + + +def get_quality_preset(name=None): + """Get quality preset by name. Returns 'high' preset by default.""" + if name is None: + name = DEFAULT_PRESET + return QUALITY_PRESETS.get(name, QUALITY_PRESETS[DEFAULT_PRESET]) + + +def apply_denoise(frame, strength=1.5): + """ + Apply fast denoise filter to remove compression artifacts BEFORE upscaling. + Uses bilateral filter which preserves edges while smoothing. + + Args: + frame: Input BGR frame (numpy array) + strength: Denoise strength (0.0-3.0, default 1.5) + 0.0 = off, 1.5 = light, 2.0+ = aggressive + + Returns: + Denoised frame + """ + if strength <= 0: + return frame + + # Bilateral filter: preserves edges while removing noise + # d=5: diameter of pixel neighborhood + # sigmaColor: higher = more colors are mixed + # sigmaSpace: higher = pixels farther away influence each other + d = 5 + sigma_color = int(strength * 50) # 75 for strength 1.5 + sigma_space = int(strength * 50) + + denoised = cv2.bilateralFilter(frame, d, sigma_color, sigma_space) + return denoised + + +def apply_auto_illumination(frame, clip_limit=2.0): + """ + Opus-style automatic illumination adjustment. + Uses CLAHE (Contrast Limited Adaptive Histogram Equalization) on the + luminance channel to normalize brightness and contrast automatically. + Preserves colors while fixing dark/overexposed areas. + + Args: + frame: Input BGR frame (numpy array) + clip_limit: CLAHE clip limit (1.0-4.0, default 2.0) + 2.0 = subtle/natural, 3.0 = stronger correction + + Returns: + Illumination-corrected frame + """ + # Convert to LAB color space (L = luminance, A/B = color) + lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB) + l_channel, a_channel, b_channel = cv2.split(lab) + + # Apply CLAHE only to luminance — preserves original colors + clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=(8, 8)) + l_corrected = clahe.apply(l_channel) + + # Merge back and convert to BGR + lab_corrected = cv2.merge([l_corrected, a_channel, b_channel]) + result = cv2.cvtColor(lab_corrected, cv2.COLOR_LAB2BGR) + + return result + + +def apply_color_grading(frame, contrast=1.05, saturation=1.1): + """ + Apply color grading to make video look more "expensive". + Higher contrast = perceived sharpness. Higher saturation = more vibrant. + + Args: + frame: Input BGR frame (numpy array) + contrast: Contrast multiplier (1.0 = no change, 1.05 = 5% increase) + saturation: Saturation multiplier (1.0 = no change, 1.1 = 10% increase) + + Returns: + Color graded frame + """ + if contrast == 1.0 and saturation == 1.0: + return frame + + # Apply contrast (centered around 127) + if contrast != 1.0: + frame = cv2.convertScaleAbs(frame, alpha=contrast, beta=127 * (1 - contrast)) + + # Apply saturation in HSV space + if saturation != 1.0: + hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV).astype(np.float32) + hsv[:, :, 1] = np.clip(hsv[:, :, 1] * saturation, 0, 255) + frame = cv2.cvtColor(hsv.astype(np.uint8), cv2.COLOR_HSV2BGR) + + return frame + + +def apply_unsharp_mask(frame, strength=1.0, kernel=(5, 5), sigma=1.0): + """ + Apply unsharp mask to recover sharpness lost during upscaling. + Calibrated to match FFmpeg unsharp=5:5:1.0 (Visual Opus quality). + + Args: + frame: Input BGR frame (numpy array) + strength: Sharpening strength (0.0-2.0, default 1.0) + 0.8 = subtle, 1.0 = Visual Opus, 1.2 = aggressive + kernel: Gaussian kernel size, e.g. (5,5). Use (0,0) for auto. + sigma: Gaussian sigma (1.0 = Visual Opus standard) + + Returns: + Sharpened frame + """ + if strength <= 0: + return frame + + # Create Gaussian blur with configurable kernel and sigma + gaussian = cv2.GaussianBlur(frame, kernel, sigma) + + # Apply unsharp mask: original + strength * (original - blurred) + sharpened = cv2.addWeighted(frame, 1.0 + strength, gaussian, -strength, 0) + + return sharpened + + +def enhance_frame(frame, preset_name=None): + """ + Apply full Opus-style enhancement pipeline to a frame. + Order: Denoise → Auto Illumination → Color Grading → Unsharp (sharpness last!) + + Pipeline matches Opus Clip quality filters: + 1. Denoising — removes grain and compression artifacts + 2. Auto Illumination — normalizes brightness/contrast automatically (CLAHE) + 3. Color Grading — contrast + saturation boost for "polished" look + 4. Sharpening — reinforces edges and fine details (unsharp mask) + + Args: + frame: Input BGR frame (already resized to target size) + preset_name: Quality preset name ('standard', 'high', 'max') + + Returns: + Enhanced frame + """ + preset = get_quality_preset(preset_name) + + # 1. Denoise (clean compression artifacts and grain) + frame = apply_denoise(frame, preset["denoise_strength"]) + + # 2. Auto Illumination (normalize brightness/contrast — Opus-style) + if preset.get("auto_illumination", False): + clip_limit = preset.get("auto_illumination_clip_limit", 2.0) + frame = apply_auto_illumination(frame, clip_limit=clip_limit) + + # 3. Color grading (contrast + saturation for polished look) + frame = apply_color_grading(frame, preset["contrast"], preset["saturation"]) + + # 4. Unsharp mask (sharpness - reinforces edges, ALWAYS LAST) + frame = apply_unsharp_mask( + frame, + strength=preset["unsharp_strength"], + kernel=preset.get("unsharp_kernel", (5, 5)), + sigma=preset.get("unsharp_sigma", 1.0) + ) + + return frame + + +def resize_with_quality(frame, target_size, apply_enhancement=True, preset_name=None): + """ + Resize frame with high quality (Lanczos) and full enhancement pipeline. + + Args: + frame: Input BGR frame + target_size: Tuple (width, height) + apply_enhancement: Whether to apply full enhancement pipeline + preset_name: Quality preset name ('standard', 'high', 'max') + + Returns: + Resized and optionally enhanced frame + """ + preset = get_quality_preset(preset_name) + + # Use Lanczos interpolation for best upscaling quality + resized = cv2.resize(frame, target_size, interpolation=preset["interpolation"]) + + # Apply full enhancement pipeline + if apply_enhancement: + resized = enhance_frame(resized, preset_name) + + return resized + + +def get_ffmpeg_quality_args(preset_name=None, encoder_name="libx264"): + """ + Get FFmpeg arguments for quality encoding. + + Args: + preset_name: Quality preset name + encoder_name: Video encoder name + + Returns: + List of FFmpeg arguments + """ + preset = get_quality_preset(preset_name) + + args = [] + + # CRF for quality (only for libx264 and similar) + if "264" in encoder_name or encoder_name == "libx264": + args.extend(["-crf", str(preset["crf"])]) + + # Max bitrate + args.extend(["-b:v", preset["max_bitrate"]]) + + # Pixel format for compatibility with YouTube/TikTok + args.extend(["-pix_fmt", "yuv420p"]) + + return args diff --git a/viralcutter-Colab-Quality.ipynb b/viralcutter-Colab-Quality.ipynb new file mode 100644 index 0000000..95cce0f --- /dev/null +++ b/viralcutter-Colab-Quality.ipynb @@ -0,0 +1,130 @@ +{ + "metadata": { + "kernelspec": { + "language": "python", + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.10.12", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "accelerator": "GPU" + }, + "nbformat_minor": 0, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# \ud83c\udfac ViralCutter - VISUAL OPUS QUALITY (Google Colab)\n", + "\n", + "Vers\u00e3o com **qualidade Visual Opus** para Google Colab com GPU T4!\n", + "\n", + "### \ud83c\udfac Visual Opus Pipeline (4 Filtros)\n", + "\n", + "| Filtro | T\u00e9cnica | Efeito |\n", + "|--------|---------|--------|\n", + "| **1. Denoise** | Bilateral Filter | Remove gra\u00e3o e artefatos de compress\u00e3o |\n", + "| **2. Auto Illumination** | CLAHE (LAB) | Normaliza brilho/contraste automaticamente |\n", + "| **3. Color Grading** | Contrast 1.05 + Sat 1.1 | Apar\u00eancia polida e profissional |\n", + "| **4. Sharpening** | Unsharp 5:5:1.0 | Refor\u00e7a bordas e detalhes finos |\n", + "\n", + "### \ud83c\udd95 Recursos\n", + "- \u2705 **Blur Background** \u2014 fundo desfocado em vez de barras pretas\n", + "- \u2705 **YOLO Talking-Head** \u2014 enquadramento calibrado cabe\u00e7a/ombros\n", + "- \u2705 **Auto Illumination** \u2014 ajuste autom\u00e1tico de brilho (CLAHE)\n", + "- \u2705 **Visual Opus Sharpening** \u2014 unsharp=5:5:1.0\n", + "- \u2705 Upload autom\u00e1tico para Google Drive\n", + "\n", + "---\n", + "\n", + "## \u2699\ufe0f Pr\u00e9-requisitos\n", + "\n", + "1. Ativar GPU: `Runtime > Change runtime type > T4 GPU`\n", + "2. Ter `client_secret.json` do Google Cloud (para upload no Drive)\n", + "3. (Opcional) API Key do Gemini para cortes com IA\n", + "\n", + "---" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": [ + "#@title \ud83d\udce6 1. Upload de Credenciais\n", + "#@markdown Fa\u00e7a upload do `client_secret.json` e (opcionalmente) do arquivo de cookies do YouTube.\n", + "\n", + "from google.colab import files\n", + "import os\n", + "\n", + "print('\ud83d\udd11 Fa\u00e7a upload do client_secret.json...')\n", + "print(' (Se n\u00e3o tiver, o upload para o Drive ser\u00e1 desativado)\\n')\n", + "\n", + "try:\n", + " uploaded = files.upload()\n", + " for fn in uploaded.keys():\n", + " print(f'\u2705 Arquivo recebido: {fn}')\n", + " os.makedirs('/content/credentials', exist_ok=True)\n", + " with open(f'/content/credentials/{fn}', 'wb') as f:\n", + " f.write(uploaded[fn])\n", + "except Exception as e:\n", + " print(f'\u26a0\ufe0f Upload cancelado ou erro: {e}')\n", + " print(' Continuando sem credenciais...')" + ], + "metadata": { + "cellView": "form" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "source": "#@title \ud83d\ude80 2. INSTALAR E INICIAR VIRALCUTTER\n#@markdown ### \ud83c\udfac Visual Opus Quality + Colab T4\n#@markdown **Pipeline (4 filtros):**\n#@markdown - 1\ufe0f\u20e3 Denoise (bilateral filter)\n#@markdown - 2\ufe0f\u20e3 Auto Illumination (CLAHE)\n#@markdown - 3\ufe0f\u20e3 Color Grading (contrast=1.05, saturation=1.1)\n#@markdown - 4\ufe0f\u20e3 Unsharp Mask (5:5:1.0) + Lanczos Scale\n#@markdown\n#@markdown **Novidades:**\n#@markdown - \ud83c\udd95 Blur Background (fundo desfocado 9:16)\n#@markdown - \ud83c\udd95 YOLO Talking-Head (enquadramento calibrado)\n#@markdown - \ud83c\udd95 Auto Illumination (ajuste autom\u00e1tico de brilho)\n\nimport os\nimport time\nimport threading\nimport json\nimport shutil\nimport subprocess\nimport sys\n\n# ==============================================================================\n# 1. SETUP DE BIBLIOTECAS (FIX AUTH)\n# ==============================================================================\nprint(\"1\ufe0f\u20e3 Ajustando bibliotecas de autentica\u00e7\u00e3o...\")\ntry:\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth-oauthlib==0.4.6\", \"google-api-python-client\", \"--force-reinstall\", \"--no-deps\", \"-q\"],\n check=True\n )\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth>=1.0.0\", \"requests-oauthlib>=0.7.0\", \"-q\"], \n check=True\n )\nexcept Exception as e:\n print(f\"\u26a0\ufe0f Aviso na instala\u00e7\u00e3o de libs: {e}\")\n\nfrom google_auth_oauthlib.flow import InstalledAppFlow\nfrom googleapiclient.discovery import build\nfrom googleapiclient.http import MediaFileUpload\n\n# ==============================================================================\n# 2. INSTALA\u00c7\u00c3O DO SISTEMA\n# ==============================================================================\nVENV_PYTHON = '/content/ViralCutter/.venv/bin/python'\nBRANCH = \"video-quality-enhancement\"\n\nif not os.path.exists(VENV_PYTHON):\n print(f\"\\n\ud83d\udce6 Instalando ViralCutter + Visual Opus Quality (branch: {BRANCH})...\")\n \n if os.path.exists(\"/content/ViralCutter\"):\n shutil.rmtree(\"/content/ViralCutter\")\n \n subprocess.run(f\"git clone -b {BRANCH} https://github.com/masterface77/ViralCutter.git /content/ViralCutter\", shell=True, check=True)\n os.chdir(\"/content/ViralCutter\")\n \n print(\" - Instalando drivers...\")\n subprocess.run(\"pip install uv -q\", shell=True)\n subprocess.run(\"apt-get update -y -qq && apt-get install -y ffmpeg xvfb -qq\", shell=True)\n \n print(\" - Criando ambiente virtual...\")\n subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n \n print(\" - Baixando pacotes de IA + Ultralytics...\")\n cmds = [\n \"uv pip install --python .venv faster-whisper\",\n \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n \"uv pip install --python .venv -r requirements.txt\",\n \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n \"uv pip install --python .venv pytubefix google-generativeai pandas onnxruntime-gpu\",\n \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n \"uv pip install --python .venv 'numpy<2.0' setuptools==69.5.1\",\n \"uv pip install --python .venv insightface onnxruntime-gpu\",\n \"uv pip install --python .venv ultralytics\",\n \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n ]\n \n for cmd in cmds:\n subprocess.run(cmd, shell=True, check=True)\n \n print(\"\u2705 Instala\u00e7\u00e3o Conclu\u00edda!\")\n print(\"\ud83c\udfac Visual Opus Quality ativado!\")\n print(\" Pipeline: Denoise \u2192 Auto Illumination \u2192 Color Grading \u2192 Unsharp\")\nelse:\n os.chdir(\"/content/ViralCutter\")\n print(\"\\n\u2705 Sistema j\u00e1 instalado.\")\n subprocess.run(f\"git pull origin {BRANCH}\", shell=True)\n print(\"\ud83c\udfac Atualizado para \u00faltima vers\u00e3o!\")\n\n# ==============================================================================\n# 3. PATCH NO DOWNLOADER\n# ==============================================================================\nprint(\"\\n\ud83d\udd27 Aplicando patch no sistema de download...\")\ndownload_script_content = r'''\nimport yt_dlp\nimport os\nimport glob\nimport shutil\n\ndef download(url, download_subs=False, quality=\"best\", output_folder=\"VIRALS\"):\n print(f\"\ud83d\ude80 Iniciando download: {url}\")\n title_temp = \"video_temp\"\n try:\n with yt_dlp.YoutubeDL({'quiet': True}) as ydl:\n info = ydl.extract_info(url, download=False)\n if info:\n t = info.get('title', 'video_temp')\n title_temp = \"\".join([c for c in t if c.isalnum() or c in \" .-_\"]).strip()\n except: pass\n\n final_path = os.path.join(output_folder, title_temp)\n os.makedirs(final_path, exist_ok=True)\n output_template = f\"{final_path}/input.%(ext)s\"\n\n ydl_opts = {\n 'format': \"bestvideo+bestaudio/best\",\n 'outtmpl': output_template,\n 'noplaylist': True,\n 'writesubtitles': False,\n 'writeautomaticsub': False,\n 'extractor_args': {'youtube': {'player_client': ['android', 'web']}}, \n 'quiet': False,\n 'no_warnings': True,\n 'merge_output_format': 'mp4'\n }\n\n try:\n with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n ydl.download([url])\n \n mp4_files = glob.glob(os.path.join(final_path, \"*.mp4\"))\n if mp4_files:\n found_file = mp4_files[0]\n print(f\"\u2705 Arquivo baixado: {found_file}\")\n expected_input = os.path.join(final_path, \"input.mp4\")\n if found_file != expected_input:\n shutil.move(found_file, expected_input)\n return expected_input\n return found_file\n raise FileNotFoundError(\"Arquivo MP4 n\u00e3o gerado\")\n except Exception as e:\n print(f\"\u274c Falha cr\u00edtica no Download: {e}\")\n raise e\n'''\nwith open('/content/ViralCutter/scripts/download_video.py', 'w') as f:\n f.write(download_script_content)\n\n# ==============================================================================\n# 4. AUTENTICA\u00c7\u00c3O OAUTH\n# ==============================================================================\nCLIENT_SECRET_FILE = None\nSCOPES = ['https://www.googleapis.com/auth/drive.file']\nAPI_SERVICE_NAME = 'drive'\nAPI_VERSION = 'v3'\n\nprint(\"\\n\ud83d\udd11 Procurando 'client_secret.json'...\")\nfor root, dirs, files_list in os.walk('/content/credentials'):\n for file in files_list:\n if 'client_secret' in file and file.endswith('.json'):\n CLIENT_SECRET_FILE = os.path.join(root, file)\n print(f\"\u2705 Credencial encontrada: {CLIENT_SECRET_FILE}\")\n break\n if CLIENT_SECRET_FILE: break\n\nif not CLIENT_SECRET_FILE:\n for root, dirs, files_list in os.walk('/content'):\n if 'ViralCutter' in root or '.venv' in root:\n continue\n for file in files_list:\n if 'client_secret' in file and file.endswith('.json'):\n CLIENT_SECRET_FILE = os.path.join(root, file)\n print(f\"\u2705 Credencial encontrada: {CLIENT_SECRET_FILE}\")\n break\n if CLIENT_SECRET_FILE: break\n\nservice = None\nfolder_id = None\n\nif CLIENT_SECRET_FILE:\n print(\"\\n\ud83c\udf10 INICIANDO LOGIN GOOGLE...\")\n print(\" \u26a0\ufe0f Siga os passos abaixo com aten\u00e7\u00e3o:\")\n print(\" 1. Clique no link que aparecer\u00e1 abaixo.\")\n print(\" 2. Fa\u00e7a login e autorize o acesso.\")\n print(\" 3. Copie o c\u00f3digo gerado pelo Google.\")\n print(\" 4. COLE O C\u00d3DIGO na caixa de entrada e aperte Enter.\\n\")\n \n try:\n flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)\n \n if hasattr(flow, 'run_console'):\n creds = flow.run_console()\n else:\n flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'\n auth_url, _ = flow.authorization_url(prompt='consent')\n print(f\" Acesse este URL: {auth_url}\")\n code = input(\" Cole o c\u00f3digo de autoriza\u00e7\u00e3o aqui: \")\n flow.fetch_token(code=code)\n creds = flow.credentials\n\n service = build(API_SERVICE_NAME, API_VERSION, credentials=creds)\n print(\"\\n\u2705 Autentica\u00e7\u00e3o realizada com SUCESSO!\")\n \n try:\n results = service.files().list(q=\"name='ViralCutter_VisualOpus' and mimeType='application/vnd.google-apps.folder' and trashed=false\", fields=\"files(id)\").execute()\n items = results.get('files', [])\n if items:\n folder_id = items[0]['id']\n print(f\"\u2705 Pasta 'ViralCutter_VisualOpus' encontrada (ID: {folder_id})\")\n else:\n file_metadata = {'name': 'ViralCutter_VisualOpus', 'mimeType': 'application/vnd.google-apps.folder'}\n folder = service.files().create(body=file_metadata, fields='id').execute()\n folder_id = folder.get('id')\n print(f\"\u2705 Pasta criada (ID: {folder_id})\")\n except Exception as e:\n print(f\"\u26a0\ufe0f Erro ao criar pasta: {e}\")\n except Exception as e:\n print(f\"\u274c Erro no Login: {e}\")\nelse:\n print(\"\u274c 'client_secret.json' n\u00e3o encontrado. Upload desativado.\")\n print(\" Dica: Execute a c\u00e9lula 1 para fazer upload das credenciais.\")\n\n# ==============================================================================\n# 5. MONITOR DE UPLOAD\n# ==============================================================================\ndef monitor_oauth():\n if not service or not folder_id: \n return\n \n print(\"\\n\ud83d\udc40 Monitor OAuth Ativo\")\n print(\"\ud83d\udccc APENAS arquivos COM LEGENDA (_subtitled) ser\u00e3o enviados!\")\n uploaded = set()\n \n while True:\n try:\n watch_path = '/content/ViralCutter/VIRALS'\n if os.path.exists(watch_path):\n for r, d, f in os.walk(watch_path):\n for file in f:\n if not file.endswith('.mp4'):\n continue\n ignore_patterns = ['input.mp4', '_original_scale', '_temp', 'temp_', '.part', 'processing']\n if any(p in file.lower() for p in ignore_patterns):\n continue\n if '_subtitled' not in file.lower():\n continue\n path = os.path.join(r, file)\n if path not in uploaded:\n try:\n if os.path.getsize(path) > 1e6:\n s1 = os.path.getsize(path)\n time.sleep(5)\n if not os.path.exists(path) or os.path.getsize(path) != s1:\n continue\n print(f\"\\n\ud83d\udc8e Corte COM LEGENDA: {file}\")\n print(f\" \u2601\ufe0f Enviando para Google Drive...\")\n try:\n meta = {'name': file, 'parents': [folder_id]}\n media = MediaFileUpload(path, resumable=True)\n service.files().create(body=meta, media_body=media).execute()\n uploaded.add(path)\n print(\" \u2705 Upload Completo!\")\n except Exception as e_up:\n print(f\" \u274c Erro Upload: {e_up}\")\n except FileNotFoundError:\n continue\n time.sleep(10)\n except Exception as e:\n print(f\"\u26a0\ufe0f Erro no monitor: {e}\")\n time.sleep(10)\n\nthreading.Thread(target=monitor_oauth, daemon=True).start()\n\n# ==============================================================================\n# 6. INICIAR INTERFACE\n# ==============================================================================\nprint(\"\\n\ud83d\ude80 INICIANDO VIRALCUTTER COM VISUAL OPUS QUALITY...\")\nprint(\"\ud83c\udfac Pipeline: Denoise \u2192 Auto Illumination \u2192 Color Grading \u2192 Unsharp\")\nprint(\"\ud83d\udcca Codec: h264_nvenc CRF 18 + 25M bitrate\")\nprint(\"\ud83c\udd95 Blur Background + YOLO Talking-Head dispon\u00edveis!\")\nprint(\"\u26a0\ufe0f CLIQUE NO LINK P\u00daBLICO (gradio.live) ABAIXO:\")\nprint(\"=\"*60)\n\nsubprocess.Popen(\n ['Xvfb', ':1', '-screen', '0', '2560x1440x8'],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL\n)\ntime.sleep(2)\n\nos.environ['DISPLAY'] = ':1.0'\nos.environ['MPLBACKEND'] = 'Agg'\nos.environ['CUDA_VISIBLE_DEVICES'] = '0'\n\n!/content/ViralCutter/.venv/bin/python webui/app.py --colab --face-model yolo", + "metadata": { + "cellView": "form" + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "## \ud83d\udcdd Cr\u00e9ditos\n", + "\n", + "**ViralCutter** por [Rafa.Godoy](https://github.com/rafaelGodoyEbert)\n", + "\n", + "---\n", + "\n", + "## \ud83c\udfac Visual Opus Pipeline\n", + "\n", + "```\n", + "Frame \u2192 Denoise \u2192 Auto Illumination \u2192 Color Grading \u2192 Unsharp \u2192 Output\n", + "```\n", + "\n", + "| Filtro | O que faz |\n", + "|--------|-----------|\n", + "| **Denoise** | Remove gra\u00e3o e artefatos de compress\u00e3o |\n", + "| **Auto Illumination** | Ajusta brilho/contraste automaticamente |\n", + "| **Color Grading** | Apar\u00eancia polida: contrast + saturation |\n", + "| **Unsharp** | Refor\u00e7a bordas e detalhes finos |\n", + "\n", + "---\n", + "`v1.0 Visual Opus + Blur Background + YOLO Talking-Head`" + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/viralcutter-Kaggle-Quality.ipynb b/viralcutter-Kaggle-Quality.ipynb new file mode 100644 index 0000000..4f9e843 --- /dev/null +++ b/viralcutter-Kaggle-Quality.ipynb @@ -0,0 +1,146 @@ +{ + "metadata": { + "kernelspec": { + "language": "python", + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.12", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "sourceId": 14644893, + "sourceType": "datasetVersion", + "datasetId": 9355102 + }, + { + "sourceId": 14669723, + "sourceType": "datasetVersion", + "datasetId": 9371811 + }, + { + "sourceId": 14678475, + "sourceType": "datasetVersion", + "datasetId": 9377534 + }, + { + "sourceId": 14683135, + "sourceType": "datasetVersion", + "datasetId": 9380125 + }, + { + "sourceId": 14683138, + "sourceType": "datasetVersion", + "datasetId": 9380127 + } + ], + "dockerImageVersionId": 31260, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook", + "isGpuEnabled": true + } + }, + "nbformat_minor": 4, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# \ud83c\udfac ViralCutter - VISUAL OPUS QUALITY (Kaggle)\n", + "\n", + "Vers\u00e3o com **qualidade Visual Opus** - o mesmo padr\u00e3o de nitidez do Opus Clip!\n", + "\n", + "### \ud83c\udfac Visual Opus Pipeline (4 Filtros)\n", + "\n", + "| Filtro | T\u00e9cnica | Efeito |\n", + "|--------|---------|--------|\n", + "| **1. Denoise** | Bilateral Filter | Remove gra\u00e3o e artefatos de compress\u00e3o |\n", + "| **2. Auto Illumination** | CLAHE (LAB) | Normaliza brilho/contraste automaticamente |\n", + "| **3. Color Grading** | Contrast 1.05 + Sat 1.1 | Apar\u00eancia \"polida\" e profissional |\n", + "| **4. Sharpening** | Unsharp 5:5:1.0 | Refor\u00e7a bordas e detalhes finos |\n", + "\n", + "### \ud83c\udd95 Novidades nesta Vers\u00e3o\n", + "- \u2705 **Blur Background** \u2014 fundo desfocado em vez de barras pretas ou crop\n", + "- \u2705 **YOLO Talking-Head** \u2014 enquadramento calibrado para cabe\u00e7a/ombros\n", + "- \u2705 **Auto Illumination** \u2014 ajuste autom\u00e1tico de brilho (CLAHE)\n", + "- \u2705 **Visual Opus Sharpening** \u2014 unsharp=5:5:1.0 calibrado\n", + "- \u2705 Upload autom\u00e1tico para Google Drive\n", + "\n", + "---\n", + "\n", + "## \ud83d\udce6 Datasets Necess\u00e1rios\n", + "\n", + "| Dataset | Descri\u00e7\u00e3o |\n", + "|---------|----------|\n", + "| `client-secret-json` | OAuth do Google Cloud |\n", + "| `cookie` | Cookies YouTube (opcional) |\n", + "| `credenciais-google` | API Key Gemini |\n", + "\n", + "---" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": "#@title \ud83d\ude80 VIRALCUTTER VISUAL OPUS QUALITY (Kaggle + T4 Otimizado)\n#@markdown ### \ud83c\udfac Qualidade Visual Opus + T4 Optimization\n#@markdown **Visual Opus Pipeline (4 filtros):**\n#@markdown - 1\ufe0f\u20e3 Denoise (bilateral filter)\n#@markdown - 2\ufe0f\u20e3 Auto Illumination (CLAHE)\n#@markdown - 3\ufe0f\u20e3 Color Grading (contrast=1.05, saturation=1.1)\n#@markdown - 4\ufe0f\u20e3 Unsharp Mask (5:5:1.0) + Lanczos Scale\n#@markdown\n#@markdown **Novidades:**\n#@markdown - \ud83c\udd95 Blur Background (fundo desfocado 9:16)\n#@markdown - \ud83c\udd95 YOLO Talking-Head (enquadramento calibrado)\n#@markdown - \ud83c\udd95 Auto Illumination (ajuste autom\u00e1tico de brilho)\n\nimport os\nimport time\nimport threading\nimport json\nimport shutil\nimport subprocess\nimport sys\nfrom IPython.display import clear_output\n\n# ==============================================================================\n# 1. SETUP DE BIBLIOTECAS (FIX AUTH)\n# ==============================================================================\nprint(\"1\ufe0f\u20e3 Ajustando bibliotecas de autentica\u00e7\u00e3o...\")\ntry:\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth-oauthlib==0.4.6\", \"google-api-python-client\", \"--force-reinstall\", \"--no-deps\", \"-q\"],\n check=True\n )\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth>=1.0.0\", \"requests-oauthlib>=0.7.0\", \"-q\"], \n check=True\n )\nexcept Exception as e:\n print(f\"\u26a0\ufe0f Aviso na instala\u00e7\u00e3o de libs: {e}\")\n\nfrom google_auth_oauthlib.flow import InstalledAppFlow\nfrom googleapiclient.discovery import build\nfrom googleapiclient.http import MediaFileUpload\n\n# ==============================================================================\n# 2. INSTALA\u00c7\u00c3O DO SISTEMA + VISUAL OPUS QUALITY + T4 OPTIMIZATION\n# ==============================================================================\nVENV_PYTHON = '/kaggle/working/ViralCutter/.venv/bin/python'\nBRANCH = \"video-quality-enhancement\" # Branch com Visual Opus Quality\n\nif not os.path.exists(VENV_PYTHON):\n print(f\"\\n\ud83d\udce6 Instalando ViralCutter + Visual Opus Quality (branch: {BRANCH})...\")\n \n if os.path.exists(\"/kaggle/working/ViralCutter\"):\n shutil.rmtree(\"/kaggle/working/ViralCutter\")\n \n # Clone da branch video-quality-enhancement\n subprocess.run(f\"git clone -b {BRANCH} https://github.com/masterface77/ViralCutter.git /kaggle/working/ViralCutter\", shell=True, check=True)\n os.chdir(\"/kaggle/working/ViralCutter\")\n \n print(\" - Instalando drivers...\")\n subprocess.run(\"pip install uv -q\", shell=True)\n subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n \n print(\" - Criando ambiente virtual...\")\n subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n \n print(\" - Baixando pacotes de IA + Ultralytics (T4 Otimizado)...\")\n cmds = [\n \"uv pip install --python .venv faster-whisper\",\n \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n \"uv pip install --python .venv -r requirements.txt\",\n \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n \"uv pip install --python .venv pytubefix google-generativeai pandas onnxruntime-gpu\",\n \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n \"uv pip install --python .venv 'numpy<2.0' setuptools==69.5.1\",\n \"uv pip install --python .venv insightface onnxruntime-gpu\",\n # YOLO Tracking\n \"uv pip install --python .venv ultralytics\",\n \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n ]\n \n for cmd in cmds:\n subprocess.run(cmd, shell=True, check=True)\n \n print(\"\u2705 Instala\u00e7\u00e3o Conclu\u00edda!\")\n print(\"\ud83c\udfac Visual Opus Quality ativado!\")\n print(\" Pipeline: Denoise \u2192 Auto Illumination \u2192 Color Grading \u2192 Unsharp\")\n print(\" Codec: h264_nvenc CRF 18 + 25M bitrate\")\n print(\"\u26a1 WhisperX: int8_float16, batch_size=8, large-v2\")\nelse:\n os.chdir(\"/kaggle/working/ViralCutter\")\n print(\"\\n\u2705 Sistema j\u00e1 instalado.\")\n # Atualizar para \u00faltima vers\u00e3o\n subprocess.run(\"git pull origin video-quality-enhancement\", shell=True)\n print(\"\ud83c\udfac Atualizado para \u00faltima vers\u00e3o com Visual Opus Quality!\")\n\n# ==============================================================================\n# 3. PATCH NO DOWNLOADER\n# ==============================================================================\nprint(\"\\n\ud83d\udd27 Aplicando patch no sistema de download...\")\ndownload_script_content = r'''\nimport yt_dlp\nimport os\nimport glob\nimport shutil\n\ndef download(url, download_subs=False, quality=\"best\", output_folder=\"VIRALS\"):\n print(f\"\ud83d\ude80 Iniciando download: {url}\")\n title_temp = \"video_temp\"\n try:\n with yt_dlp.YoutubeDL({'quiet': True}) as ydl:\n info = ydl.extract_info(url, download=False)\n if info:\n t = info.get('title', 'video_temp')\n title_temp = \"\".join([c for c in t if c.isalnum() or c in \" .-_\"]).strip()\n except: pass\n\n final_path = os.path.join(output_folder, title_temp)\n os.makedirs(final_path, exist_ok=True)\n output_template = f\"{final_path}/input.%(ext)s\"\n\n ydl_opts = {\n 'format': \"bestvideo+bestaudio/best\",\n 'outtmpl': output_template,\n 'noplaylist': True,\n 'writesubtitles': False,\n 'writeautomaticsub': False,\n 'extractor_args': {'youtube': {'player_client': ['android', 'web']}}, \n 'quiet': False,\n 'no_warnings': True,\n 'merge_output_format': 'mp4'\n }\n\n try:\n with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n ydl.download([url])\n \n mp4_files = glob.glob(os.path.join(final_path, \"*.mp4\"))\n if mp4_files:\n found_file = mp4_files[0]\n print(f\"\u2705 Arquivo baixado: {found_file}\")\n expected_input = os.path.join(final_path, \"input.mp4\")\n if found_file != expected_input:\n shutil.move(found_file, expected_input)\n return expected_input\n return found_file\n raise FileNotFoundError(\"Arquivo MP4 n\u00e3o gerado\")\n except Exception as e:\n print(f\"\u274c Falha cr\u00edtica no Download: {e}\")\n raise e\n'''\nwith open('/kaggle/working/ViralCutter/scripts/download_video.py', 'w') as f:\n f.write(download_script_content)\n\n# ==============================================================================\n# 4. AUTENTICA\u00c7\u00c3O OAUTH\n# ==============================================================================\nCLIENT_SECRET_FILE = None\nSCOPES = ['https://www.googleapis.com/auth/drive.file']\nAPI_SERVICE_NAME = 'drive'\nAPI_VERSION = 'v3'\n\nprint(\"\\n\ud83d\udd11 Procurando 'client_secret.json'...\")\nfor root, dirs, files in os.walk('/kaggle/input'):\n for file in files:\n if 'client_secret' in file and file.endswith('.json'):\n CLIENT_SECRET_FILE = os.path.join(root, file)\n print(f\"\u2705 Credencial encontrada: {CLIENT_SECRET_FILE}\")\n break\n if CLIENT_SECRET_FILE: break\n\nservice = None\nfolder_id = None\n\nif CLIENT_SECRET_FILE:\n print(\"\\n\ud83c\udf10 INICIANDO LOGIN GOOGLE...\")\n print(\" \u26a0\ufe0f Siga os passos abaixo com aten\u00e7\u00e3o:\")\n print(\" 1. Clique no link que aparecer\u00e1 abaixo.\")\n print(\" 2. Fa\u00e7a login e autorize o acesso.\")\n print(\" 3. Copie o c\u00f3digo gerado pelo Google.\")\n print(\" 4. COLE O C\u00d3DIGO na caixa de entrada aqui no Kaggle e aperte Enter.\\n\")\n \n try:\n flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)\n \n if hasattr(flow, 'run_console'):\n creds = flow.run_console()\n else:\n print(\"\u274c AVISO CR\u00cdTICO: O downgrade da biblioteca falhou.\")\n print(\" Tentando m\u00e9todo alternativo via OOB...\")\n flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'\n auth_url, _ = flow.authorization_url(prompt='consent')\n print(f\" Acesse este URL: {auth_url}\")\n code = input(\" Cole o c\u00f3digo de autoriza\u00e7\u00e3o aqui: \")\n flow.fetch_token(code=code)\n creds = flow.credentials\n\n service = build(API_SERVICE_NAME, API_VERSION, credentials=creds)\n print(\"\\n\u2705 Autentica\u00e7\u00e3o realizada com SUCESSO!\")\n \n try:\n results = service.files().list(q=\"name='ViralCutter_VisualOpus' and mimeType='application/vnd.google-apps.folder' and trashed=false\", fields=\"files(id)\").execute()\n items = results.get('files', [])\n if items:\n folder_id = items[0]['id']\n print(f\"\u2705 Pasta 'ViralCutter_VisualOpus' encontrada (ID: {folder_id})\")\n else:\n file_metadata = {'name': 'ViralCutter_VisualOpus', 'mimeType': 'application/vnd.google-apps.folder'}\n folder = service.files().create(body=file_metadata, fields='id').execute()\n folder_id = folder.get('id')\n print(f\"\u2705 Pasta criada (ID: {folder_id})\")\n except Exception as e:\n print(f\"\u26a0\ufe0f Erro ao criar pasta: {e}\")\n except Exception as e:\n print(f\"\u274c Erro no Login: {e}\")\n print(\" Nota: Se aparecer 'invalid_grant' ou 'redirect_uri_mismatch', suas credenciais podem n\u00e3o ser do tipo 'Desktop App'.\")\nelse:\n print(\"\u274c 'client_secret.json' n\u00e3o encontrado. O upload ser\u00e1 desativado.\")\n\n# ==============================================================================\n# 5. MONITOR DE UPLOAD OAUTH - APENAS ARQUIVOS COM LEGENDA (_subtitled)\n# ==============================================================================\ndef monitor_oauth():\n if not service or not folder_id: \n return\n \n print(\"\\n\ud83d\udc40 Monitor OAuth Ativo: Uploads usar\u00e3o SEU espa\u00e7o.\")\n print(\"\ud83d\udccc APENAS arquivos COM LEGENDA (_subtitled) ser\u00e3o enviados!\")\n uploaded = set()\n \n while True:\n try:\n watch_path = '/kaggle/working/ViralCutter/VIRALS'\n if os.path.exists(watch_path):\n for r, d, f in os.walk(watch_path):\n for file in f:\n if not file.endswith('.mp4'):\n continue\n \n # Ignora arquivos tempor\u00e1rios\n ignore_patterns = [\n 'input.mp4', '_original_scale', '_temp', \n 'temp_', '.part', 'processing'\n ]\n \n if any(pattern in file.lower() for pattern in ignore_patterns):\n continue\n \n # \u2705 APENAS faz upload de arquivos COM LEGENDA (_subtitled)\n if '_subtitled' not in file.lower():\n continue\n \n path = os.path.join(r, file)\n \n if path not in uploaded:\n try:\n if os.path.getsize(path) > 1e6:\n s1 = os.path.getsize(path)\n time.sleep(5)\n if not os.path.exists(path):\n continue\n if os.path.getsize(path) != s1:\n continue\n \n print(f\"\\n\ud83d\udc8e Corte COM LEGENDA Detectado: {file}\")\n print(f\" \ud83d\udcc1 Tamanho: {os.path.getsize(path) / 1e6:.2f} MB\")\n print(f\" \u2601\ufe0f Enviando para Google Drive (Conta Pessoal)...\")\n \n try:\n meta = {'name': file, 'parents': [folder_id]}\n media = MediaFileUpload(path, resumable=True)\n service.files().create(body=meta, media_body=media).execute()\n uploaded.add(path)\n print(\" \u2705 Upload Completo!\")\n except Exception as e_up:\n print(f\" \u274c Erro Upload: {e_up}\")\n time.sleep(5)\n except FileNotFoundError:\n continue\n \n time.sleep(10)\n except Exception as e:\n print(f\"\u26a0\ufe0f Erro no monitor: {e}\")\n time.sleep(10)\n\nthreading.Thread(target=monitor_oauth, daemon=True).start()\n\n# ==============================================================================\n# 6. INICIAR INTERFACE\n# ==============================================================================\nprint(\"\\n\ud83d\ude80 INICIANDO VIRALCUTTER COM VISUAL OPUS QUALITY...\")\nprint(\"\ud83c\udfac Pipeline: Denoise \u2192 Auto Illumination \u2192 Color Grading \u2192 Unsharp\")\nprint(\"\ud83d\udcca Codec: h264_nvenc CRF 18 + 25M bitrate (4K)\")\nprint(\"\u26a1 T4 Otimizado: int8_float16, batch_size=8, large-v2\")\nprint(\"\ud83c\udd95 Blur Background + YOLO Talking-Head dispon\u00edveis!\")\nprint(\"\u26a0\ufe0f CLIQUE NO LINK P\u00daBLICO (gradio.live) ABAIXO:\")\nprint(\"=\"*60)\n\nsubprocess.Popen(\n ['Xvfb', ':1', '-screen', '0', '2560x1440x8'],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL\n)\ntime.sleep(2)\n\nos.environ['DISPLAY'] = ':1.0'\nos.environ['MPLBACKEND'] = 'Agg'\nos.environ['CUDA_VISIBLE_DEVICES'] = '0,1'\n\n!/kaggle/working/ViralCutter/.venv/bin/python webui/app.py --colab --face-model yolo", + "metadata": { + "_uuid": "visual-opus-quality-kaggle-t4", + "_cell_guid": "visual-opus-quality-kaggle-cell-t4", + "trusted": true, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "## \ud83d\udcdd Cr\u00e9ditos\n", + "\n", + "**ViralCutter** por [Rafa.Godoy](https://github.com/rafaelGodoyEbert)\n", + "\n", + "---\n", + "\n", + "## \ud83c\udfac Visual Opus Pipeline \u2014 Os 4 Filtros\n", + "\n", + "```\n", + "Frame \u2192 Denoise \u2192 Auto Illumination \u2192 Color Grading \u2192 Unsharp \u2192 Output\n", + "```\n", + "\n", + "| Filtro | O que faz |\n", + "|--------|-----------|\n", + "| **Denoise** | Remove gra\u00e3o visual e artefatos de compress\u00e3o (bilateral filter) |\n", + "| **Auto Illumination** | Ajusta brilho/contraste automaticamente (CLAHE no canal L) |\n", + "| **Color Grading** | Apar\u00eancia polida: contrast=1.05, saturation=1.1 |\n", + "| **Unsharp** | Refor\u00e7a bordas e detalhes finos (5:5:1.0 calibrado) |\n", + "\n", + "### Modos de Enquadramento\n", + "| Modo | Descri\u00e7\u00e3o |\n", + "|------|-----------|\n", + "| **Blur Background** | V\u00eddeo n\u00edtido no centro + fundo desfocado (sem barras pretas) |\n", + "| **Zoom (Center)** | Crop agressivo do centro |\n", + "| **Padding (9:16)** | Barras pretas nas laterais |\n", + "\n", + "---\n", + "`v1.0 Visual Opus + Blur Background + YOLO Talking-Head`" + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/viralcutter-Kaggle-SmoothZoom.ipynb b/viralcutter-Kaggle-SmoothZoom.ipynb new file mode 100644 index 0000000..1a9a0dd --- /dev/null +++ b/viralcutter-Kaggle-SmoothZoom.ipynb @@ -0,0 +1,132 @@ +{ + "metadata": { + "kernelspec": { + "language": "python", + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.12", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "sourceId": 14644893, + "sourceType": "datasetVersion", + "datasetId": 9355102 + }, + { + "sourceId": 14669723, + "sourceType": "datasetVersion", + "datasetId": 9371811 + }, + { + "sourceId": 14678475, + "sourceType": "datasetVersion", + "datasetId": 9377534 + }, + { + "sourceId": 14683135, + "sourceType": "datasetVersion", + "datasetId": 9380125 + }, + { + "sourceId": 14683138, + "sourceType": "datasetVersion", + "datasetId": 9380127 + } + ], + "dockerImageVersionId": 31260, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook", + "isGpuEnabled": true + } + }, + "nbformat_minor": 4, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# 🎬 ViralCutter - Cyclic Smooth Zoom Edition (Kaggle)\n", + "\n", + "Versão especial com **YOLO Tracking + Cyclic Smooth Zoom** automático!\n", + "\n", + "### ✨ Novidades v0.9\n", + "- 🔄 **Zoom Cíclico**: zoom in (3s) → hold (2s) → **SNAP BACK** (instantâneo) → repete\n", + "- 🎥 **Tracking Ultra Suave**: alpha 0.02 (câmera segue o rosto bem devagar)\n", + "- ⚡ **T4 Otimizado**: WhisperX com int8 quantization (50% menos VRAM)\n", + "\n", + "### 🚀 Recursos\n", + "- ✅ Download de vídeos do YouTube\n", + "- ✅ Cortes automáticos com IA (Gemini)\n", + "- ✅ Legendas dinâmicas\n", + "- ✅ Upload automático para Google Drive\n", + "- ✅ Transcrição otimizada para GPUs T4\n", + "\n", + "---\n", + "\n", + "## 📦 Datasets Necessários\n", + "\n", + "| Dataset | Descrição |\n", + "|---------|----------|\n", + "| `client-secret-json` | OAuth do Google Cloud |\n", + "| `cookie` | Cookies YouTube (opcional) |\n", + "| `credenciais-google` | API Key Gemini |\n", + "| `google-drive-credentials` | Credenciais salvas (gerado após 1ª autenticação) |\n", + "\n", + "---" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": "#@title 🚀 VIRALCUTTER CYCLIC SMOOTH ZOOM (Kaggle + T4 Otimizado)\n#@markdown ### ⚙️ Zoom Cíclico + T4 Optimization\n#@markdown **Zoom:**\n#@markdown - Zoom In (3s) → Hold (2s) → SNAP BACK → Hold (2s) → Repete\n#@markdown\n#@markdown **WhisperX T4 Otimizado:**\n#@markdown - Modelo: `large-v2` (quantizado)\n#@markdown - Compute: `int8_float16` (50% menos VRAM)\n#@markdown - Batch: 8 (evita OOM)\n#@markdown - Idioma: Português (padrão)\n\nimport os\nimport time\nimport threading\nimport json\nimport shutil\nimport subprocess\nimport sys\nfrom IPython.display import clear_output\n\n# ==============================================================================\n# 1. SETUP DE BIBLIOTECAS (FIX AUTH)\n# ==============================================================================\nprint(\"1️⃣ Ajustando bibliotecas de autenticação...\")\ntry:\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth-oauthlib==0.4.6\", \"google-api-python-client\", \"--force-reinstall\", \"--no-deps\", \"-q\"],\n check=True\n )\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth>=1.0.0\", \"requests-oauthlib>=0.7.0\", \"-q\"], \n check=True\n )\nexcept Exception as e:\n print(f\"⚠️ Aviso na instalação de libs: {e}\")\n\nfrom google_auth_oauthlib.flow import InstalledAppFlow\nfrom googleapiclient.discovery import build\nfrom googleapiclient.http import MediaFileUpload\n\n# ==============================================================================\n# 2. INSTALAÇÃO DO SISTEMA + YOLO CYCLIC SMOOTH ZOOM + T4 OPTIMIZATION\n# ==============================================================================\nVENV_PYTHON = '/kaggle/working/ViralCutter/.venv/bin/python'\nBRANCH = \"smooth-zoom\" # Branch com Cyclic Smooth Zoom + T4 Optimization\n\nif not os.path.exists(VENV_PYTHON):\n print(f\"\\n📦 Instalando ViralCutter + Cyclic Smooth Zoom + T4 Opt (branch: {BRANCH})...\")\n \n if os.path.exists(\"/kaggle/working/ViralCutter\"):\n shutil.rmtree(\"/kaggle/working/ViralCutter\")\n \n # Clone da branch smooth-zoom\n subprocess.run(f\"git clone -b {BRANCH} https://github.com/masterface77/ViralCutter.git /kaggle/working/ViralCutter\", shell=True, check=True)\n os.chdir(\"/kaggle/working/ViralCutter\")\n \n print(\" - Instalando drivers...\")\n subprocess.run(\"pip install uv -q\", shell=True)\n subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n \n print(\" - Criando ambiente virtual...\")\n subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n \n print(\" - Baixando pacotes de IA + Ultralytics (T4 Otimizado)...\")\n cmds = [\n \"uv pip install --python .venv faster-whisper\",\n \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n \"uv pip install --python .venv -r requirements.txt\",\n \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n \"uv pip install --python .venv pytubefix google-generativeai pandas onnxruntime-gpu\",\n \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n \"uv pip install --python .venv 'numpy<2.0' setuptools==69.5.1\",\n \"uv pip install --python .venv insightface onnxruntime-gpu\",\n # YOLO Cyclic Smooth Zoom\n \"uv pip install --python .venv ultralytics\",\n \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n ]\n \n for cmd in cmds:\n subprocess.run(cmd, shell=True, check=True)\n \n print(\"✅ Instalação Concluída!\")\n print(\"🔄 Cyclic Smooth Zoom + T4 Optimization ativados!\")\n print(\"⚡ WhisperX: int8_float16, batch_size=8, large-v2\")\nelse:\n os.chdir(\"/kaggle/working/ViralCutter\")\n print(\"\\n✅ Sistema já instalado.\")\n # Atualizar para última versão\n subprocess.run(\"git pull origin smooth-zoom\", shell=True)\n print(\"🔄 Atualizado para última versão!\")\n\n# ==============================================================================\n# 3. PATCH NO DOWNLOADER\n# ==============================================================================\nprint(\"\\n🔧 Aplicando patch no sistema de download...\")\ndownload_script_content = r'''\nimport yt_dlp\nimport os\nimport glob\nimport shutil\n\ndef download(url, download_subs=False, quality=\"best\", output_folder=\"VIRALS\"):\n print(f\"🚀 Iniciando download: {url}\")\n title_temp = \"video_temp\"\n try:\n with yt_dlp.YoutubeDL({'quiet': True}) as ydl:\n info = ydl.extract_info(url, download=False)\n if info:\n t = info.get('title', 'video_temp')\n title_temp = \"\".join([c for c in t if c.isalnum() or c in \" .-_\"]).strip()\n except: pass\n\n final_path = os.path.join(output_folder, title_temp)\n os.makedirs(final_path, exist_ok=True)\n output_template = f\"{final_path}/input.%(ext)s\"\n\n ydl_opts = {\n 'format': \"bestvideo+bestaudio/best\",\n 'outtmpl': output_template,\n 'noplaylist': True,\n 'writesubtitles': False,\n 'writeautomaticsub': False,\n 'extractor_args': {'youtube': {'player_client': ['android', 'web']}}, \n 'quiet': False,\n 'no_warnings': True,\n 'merge_output_format': 'mp4'\n }\n\n try:\n with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n ydl.download([url])\n \n mp4_files = glob.glob(os.path.join(final_path, \"*.mp4\"))\n if mp4_files:\n found_file = mp4_files[0]\n print(f\"✅ Arquivo baixado: {found_file}\")\n expected_input = os.path.join(final_path, \"input.mp4\")\n if found_file != expected_input:\n shutil.move(found_file, expected_input)\n return expected_input\n return found_file\n raise FileNotFoundError(\"Arquivo MP4 não gerado\")\n except Exception as e:\n print(f\"❌ Falha crítica no Download: {e}\")\n raise e\n'''\nwith open('/kaggle/working/ViralCutter/scripts/download_video.py', 'w') as f:\n f.write(download_script_content)\n\n# ==============================================================================\n# 4. AUTENTICAÇÃO OAUTH\n# ==============================================================================\nCLIENT_SECRET_FILE = None\nSCOPES = ['https://www.googleapis.com/auth/drive.file']\nAPI_SERVICE_NAME = 'drive'\nAPI_VERSION = 'v3'\n\nprint(\"\\n🔑 Procurando 'client_secret.json'...\")\nfor root, dirs, files in os.walk('/kaggle/input'):\n for file in files:\n if 'client_secret' in file and file.endswith('.json'):\n CLIENT_SECRET_FILE = os.path.join(root, file)\n print(f\"✅ Credencial encontrada: {CLIENT_SECRET_FILE}\")\n break\n if CLIENT_SECRET_FILE: break\n\nservice = None\nfolder_id = None\n\nif CLIENT_SECRET_FILE:\n print(\"\\n🌐 INICIANDO LOGIN GOOGLE...\")\n print(\" ⚠️ Siga os passos abaixo com atenção:\")\n print(\" 1. Clique no link que aparecerá abaixo.\")\n print(\" 2. Faça login e autorize o acesso.\")\n print(\" 3. Copie o código gerado pelo Google.\")\n print(\" 4. COLE O CÓDIGO na caixa de entrada aqui no Kaggle e aperte Enter.\\n\")\n \n try:\n flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)\n \n if hasattr(flow, 'run_console'):\n creds = flow.run_console()\n else:\n print(\"❌ AVISO CRÍTICO: O downgrade da biblioteca falhou.\")\n print(\" Tentando método alternativo via OOB...\")\n flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'\n auth_url, _ = flow.authorization_url(prompt='consent')\n print(f\" Acesse este URL: {auth_url}\")\n code = input(\" Cole o código de autorização aqui: \")\n flow.fetch_token(code=code)\n creds = flow.credentials\n\n service = build(API_SERVICE_NAME, API_VERSION, credentials=creds)\n print(\"\\n✅ Autenticação realizada com SUCESSO!\")\n \n try:\n results = service.files().list(q=\"name='ViralCutter_SmoothZoom' and mimeType='application/vnd.google-apps.folder' and trashed=false\", fields=\"files(id)\").execute()\n items = results.get('files', [])\n if items:\n folder_id = items[0]['id']\n print(f\"✅ Pasta 'ViralCutter_SmoothZoom' encontrada (ID: {folder_id})\")\n else:\n file_metadata = {'name': 'ViralCutter_SmoothZoom', 'mimeType': 'application/vnd.google-apps.folder'}\n folder = service.files().create(body=file_metadata, fields='id').execute()\n folder_id = folder.get('id')\n print(f\"✅ Pasta criada (ID: {folder_id})\")\n except Exception as e:\n print(f\"⚠️ Erro ao criar pasta: {e}\")\n except Exception as e:\n print(f\"❌ Erro no Login: {e}\")\n print(\" Nota: Se aparecer 'invalid_grant' ou 'redirect_uri_mismatch', suas credenciais podem não ser do tipo 'Desktop App'.\")\nelse:\n print(\"❌ 'client_secret.json' não encontrado. O upload será desativado.\")\n\n# ==============================================================================\n# 5. MONITOR DE UPLOAD OAUTH - APENAS ARQUIVOS COM LEGENDA (_subtitled)\n# ==============================================================================\ndef monitor_oauth():\n if not service or not folder_id: \n return\n \n print(\"\\n👀 Monitor OAuth Ativo: Uploads usarão SEU espaço.\")\n print(\"📌 APENAS arquivos COM LEGENDA (_subtitled) serão enviados!\")\n uploaded = set()\n \n while True:\n try:\n watch_path = '/kaggle/working/ViralCutter/VIRALS'\n if os.path.exists(watch_path):\n for r, d, f in os.walk(watch_path):\n for file in f:\n if not file.endswith('.mp4'):\n continue\n \n # Ignora arquivos temporários e de processamento\n ignore_patterns = [\n 'input.mp4', '_original_scale', '_temp', \n 'temp_', '.part', 'processing'\n ]\n \n if any(pattern in file.lower() for pattern in ignore_patterns):\n continue\n \n # ✅ APENAS faz upload de arquivos COM LEGENDA (_subtitled)\n if '_subtitled' not in file.lower():\n continue\n \n path = os.path.join(r, file)\n \n if path not in uploaded:\n try:\n if os.path.getsize(path) > 1e6:\n s1 = os.path.getsize(path)\n time.sleep(5)\n if not os.path.exists(path):\n continue\n if os.path.getsize(path) != s1:\n continue\n \n print(f\"\\n💎 Corte COM LEGENDA Detectado: {file}\")\n print(f\" 📁 Tamanho: {os.path.getsize(path) / 1e6:.2f} MB\")\n print(f\" ☁️ Enviando para Google Drive (Conta Pessoal)...\")\n \n try:\n meta = {'name': file, 'parents': [folder_id]}\n media = MediaFileUpload(path, resumable=True)\n service.files().create(body=meta, media_body=media).execute()\n uploaded.add(path)\n print(\" ✅ Upload Completo!\")\n except Exception as e_up:\n print(f\" ❌ Erro Upload: {e_up}\")\n time.sleep(5)\n except FileNotFoundError:\n continue\n \n time.sleep(10)\n except Exception as e:\n print(f\"⚠️ Erro no monitor: {e}\")\n time.sleep(10)\n\nthreading.Thread(target=monitor_oauth, daemon=True).start()\n\n# ==============================================================================\n# 6. INICIAR INTERFACE\n# ==============================================================================\nprint(\"\\n🚀 INICIANDO VIRALCUTTER...\")\nprint(\"⚡ T4 Otimizado: int8_float16, batch_size=8, large-v2\")\nprint(\"⚠️ CLIQUE NO LINK PÚBLICO (gradio.live) ABAIXO:\")\nprint(\"=\"*60)\n\nsubprocess.Popen(\n ['Xvfb', ':1', '-screen', '0', '2560x1440x8'],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL\n)\ntime.sleep(2)\n\nos.environ['DISPLAY'] = ':1.0'\nos.environ['MPLBACKEND'] = 'Agg'\nos.environ['CUDA_VISIBLE_DEVICES'] = '0,1'\n\n!/kaggle/working/ViralCutter/.venv/bin/python webui/app.py --colab --face-model yolo", + "metadata": { + "_uuid": "cyclic-smooth-zoom-kaggle-t4", + "_cell_guid": "cyclic-smooth-zoom-kaggle-cell-t4", + "trusted": true, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + } + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": [ + "## 📝 Créditos\n", + "\n", + "**ViralCutter** por [Rafa.Godoy](https://github.com/rafaelGodoyEbert)\n", + "\n", + "**Cyclic Smooth Zoom + T4 Optimization:**\n", + "- [Ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) - Face tracking\n", + "- WhisperX com int8 quantization - Transcrição otimizada\n", + "- EMA alpha=0.02 - Camera suave\n", + "\n", + "### ⚡ Configuração T4\n", + "```\n", + "WhisperX: large-v2 | int8_float16 | batch=8 | lang=pt\n", + "Zoom: 1.0x ──(3s)──► 1.4x ──(2s)──► SNAP ──(2s)──► repete\n", + "```\n", + "\n", + "---\n", + "`v0.9 Alpha + Cyclic Smooth Zoom + T4 Optimization`" + ], + "metadata": {} + } + ] +} \ No newline at end of file diff --git a/viralcutter-Kaggle.ipynb b/viralcutter-Kaggle.ipynb new file mode 100644 index 0000000..c470b6c --- /dev/null +++ b/viralcutter-Kaggle.ipynb @@ -0,0 +1,206 @@ +{ + "metadata": { + "kernelspec": { + "language": "python", + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.12.12", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [ + { + "sourceId": 14644893, + "sourceType": "datasetVersion", + "datasetId": 9355102 + }, + { + "sourceId": 14669723, + "sourceType": "datasetVersion", + "datasetId": 9371811 + }, + { + "sourceId": 14678475, + "sourceType": "datasetVersion", + "datasetId": 9377534 + }, + { + "sourceId": 14683135, + "sourceType": "datasetVersion", + "datasetId": 9380125 + }, + { + "sourceId": 14683138, + "sourceType": "datasetVersion", + "datasetId": 9380127 + } + ], + "dockerImageVersionId": 31260, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook", + "isGpuEnabled": true + } + }, + "nbformat_minor": 4, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# 📦 Como Configurar os Datasets no Kaggle\n", + "\n", + "Este notebook requer alguns datasets com credenciais para funcionar corretamente. Siga os passos abaixo:\n", + "\n", + "## 1️⃣ client_secret.json (Obrigatório)\n", + "Credenciais OAuth do Google Cloud para autenticação e upload no Drive.\n", + "\n", + "**Como obter:**\n", + "1. Acesse [Google Cloud Console](https://console.cloud.google.com/)\n", + "2. Crie um novo projeto ou selecione um existente\n", + "3. Vá em **APIs & Services** → **Credentials**\n", + "4. Clique em **Create Credentials** → **OAuth 2.0 Client ID**\n", + "5. Escolha **Desktop App** como tipo de aplicação\n", + "6. Baixe o arquivo JSON\n", + "7. No Kaggle: **Add Data** → **Upload** → Faça upload do `client_secret.json`\n", + "8. Nomeie o dataset como `client-secret-json`\n", + "\n", + "## 2️⃣ cookie (Opcional)\n", + "Cookies do YouTube/Kaggle para download de vídeos privados/restritos.\n", + "\n", + "**Como obter:**\n", + "1. Instale a extensão **Get cookies.txt LOCAL** no Chrome/Edge\n", + " - [Chrome Web Store](https://chrome.google.com/webstore/detail/get-cookiestxt-locally/cclelndahbckbenkjhflpdbgdldlbecc)\n", + "2. Acesse [www.kaggle.com](https://www.kaggle.com) (logado)\n", + "3. Clique na extensão e em **Export** ou **Export As**\n", + "4. Salve o arquivo como `www.youtube.com_cookies.txt`\n", + "5. No Kaggle: **Add Data** → **Upload** → Faça upload do arquivo\n", + "6. Nomeie o dataset como `cookie`\n", + "\n", + "## 3️⃣ credenciais-google (Obrigatório)\n", + "API Key do Google Gemini para análise de vídeos com IA.\n", + "\n", + "**Como obter:**\n", + "1. Acesse [Google AI Studio](https://makersuite.google.com/app/apikey)\n", + "2. Clique em **Create API Key**\n", + "3. Copie a chave gerada\n", + "4. Crie um arquivo de texto `gemini_api.txt` e cole a chave\n", + "5. No Kaggle: **Add Data** → **Upload** → Faça upload do arquivo\n", + "6. Nomeie o dataset como `credenciais-google`\n", + "\n", + "## 4️⃣ google-drive-credentials (Gerado Automaticamente)\n", + "Este arquivo é gerado durante a autenticação OAuth na primeira execução.\n", + "\n", + "**Como usar:**\n", + "1. Na primeira execução, siga as instruções de autenticação\n", + "2. **Após a autenticação bem-sucedida**, o notebook gerará o arquivo `kaggle-viral-cutte-*.json`\n", + "3. Para reutilizar nas próximas execuções:\n", + " - Baixe o arquivo gerado em `/kaggle/working/`\n", + " - Crie um novo dataset no Kaggle com este arquivo\n", + " - Nomeie como `google-drive-credentials`\n", + "\n", + "---\n", + "\n", + "## ✅ Checklist Final\n", + "Antes de executar, verifique se você adicionou aos datasets:\n", + "- ✅ `client_secret.json` → dataset `client-secret-json`\n", + "- ⚠️ `www.youtube.com_cookies.txt` → dataset `cookie` (opcional)\n", + "- ✅ `gemini_api.txt` → dataset `credenciais-google`\n", + "- ℹ️ `kaggle-viral-cutte-*.json` → dataset `google-drive-credentials` (opcional, para reuso)\n", + "\n", + "## 🎬 Pronto para Começar!\n", + "Após configurar os datasets, execute a célula abaixo para iniciar o ViralCutter.\n", + "\n", + "---" + ], + "metadata": {} + }, + { + "cell_type": "code", + "source": "#@title 🚀 VIRALCUTTER: Instalação + Upload OAuth (Versão Final v3 + Faster-Whisper)\nimport os\nimport time\nimport threading\nimport json\nimport shutil\nimport subprocess\nimport sys\nfrom IPython.display import clear_output\n\n# ==============================================================================\n# 1. SETUP DE BIBLIOTECAS (FIX AUTH - DOWNGRADE FORCE)\n# ==============================================================================\nprint(\"1️⃣ Ajustando bibliotecas de autenticação...\")\ntry:\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth-oauthlib==0.4.6\", \"google-api-python-client\", \"--force-reinstall\", \"--no-deps\", \"-q\"],\n check=True\n )\n subprocess.run(\n [sys.executable, \"-m\", \"pip\", \"install\", \"google-auth>=1.0.0\", \"requests-oauthlib>=0.7.0\", \"-q\"], \n check=True\n )\nexcept Exception as e:\n print(f\"⚠️ Aviso na instalação de libs: {e}\")\n\nfrom google_auth_oauthlib.flow import InstalledAppFlow\nfrom googleapiclient.discovery import build\nfrom googleapiclient.http import MediaFileUpload\n\n# ==============================================================================\n# 2. INSTALAÇÃO DO SISTEMA (AUTO-REPAIR)\n# ==============================================================================\nVENV_PYTHON = '/kaggle/working/ViralCutter/.venv/bin/python'\n\nif not os.path.exists(VENV_PYTHON):\n print(\"\\n📦 Sistema não detectado. Instalando ViralCutter (3-5 min)...\")\n \n if os.path.exists(\"/kaggle/working/ViralCutter\"):\n shutil.rmtree(\"/kaggle/working/ViralCutter\")\n \n subprocess.run(\"git clone https://github.com/RafaelGodoyEbert/ViralCutter.git /kaggle/working/ViralCutter\", shell=True, check=True)\n os.chdir(\"/kaggle/working/ViralCutter\")\n \n print(\" - Instalando drivers...\")\n subprocess.run(\"pip install uv -q\", shell=True)\n subprocess.run(\"apt-get update -y -qq && apt-get install -y libcudnn8 ffmpeg xvfb -qq\", shell=True)\n \n print(\" - Criando ambiente virtual...\")\n subprocess.run([\"uv\", \"venv\", \".venv\"], check=True)\n \n print(\" - Baixando pacotes de IA...\")\n cmds = [\n \"uv pip install --python .venv faster-whisper\", # <--- ÚNICA MUDANÇA: Adiciona motor otimizado\n \"uv pip install --python .venv git+https://github.com/m-bain/whisperx.git\",\n \"uv pip install --python .venv -r requirements.txt\",\n \"uv pip install --python .venv -U --pre 'yt-dlp[default]'\",\n \"uv pip install --python .venv pytubefix google-generativeai pandas onnxruntime-gpu\",\n \"uv pip install --python .venv transformers==4.46.3 accelerate>=0.26.0\",\n \"uv pip install --python .venv torch==2.3.1+cu121 torchvision==0.18.1+cu121 torchaudio==2.3.1+cu121 --index-url https://download.pytorch.org/whl/cu121\",\n \"uv pip install --python .venv 'numpy<2.0' setuptools==69.5.1\",\n \"uv pip install --python .venv insightface onnxruntime-gpu\",\n \"uv pip uninstall --python .venv mediapipe protobuf flatbuffers\",\n \"uv pip install --python .venv 'mediapipe>=0.10.0' 'protobuf>=3.20,<5.0' 'flatbuffers>=2.0'\"\n ]\n \n for cmd in cmds:\n subprocess.run(cmd, shell=True, check=True)\n \n print(\"✅ Instalação Concluída!\")\n print(\" 💡 Faster-Whisper instalado: Agora você pode usar Large sem estourar memória!\")\nelse:\n print(\"\\n✅ Sistema já instalado. Pulando etapa.\")\n\n# ==============================================================================\n# 3. PATCH NO DOWNLOADER\n# ==============================================================================\nprint(\"\\n🔧 Aplicando patch no sistema de download...\")\ndownload_script_content = r'''\nimport yt_dlp\nimport os\nimport glob\nimport shutil\n\ndef download(url, download_subs=False, quality=\"best\", output_folder=\"VIRALS\"):\n print(f\"🚀 Iniciando download: {url}\")\n title_temp = \"video_temp\"\n try:\n with yt_dlp.YoutubeDL({'quiet': True}) as ydl:\n info = ydl.extract_info(url, download=False)\n if info:\n t = info.get('title', 'video_temp')\n title_temp = \"\".join([c for c in t if c.isalnum() or c in \" .-_\"]).strip()\n except: pass\n\n final_path = os.path.join(output_folder, title_temp)\n os.makedirs(final_path, exist_ok=True)\n output_template = f\"{final_path}/input.%(ext)s\"\n\n ydl_opts = {\n 'format': \"bestvideo+bestaudio/best\",\n 'outtmpl': output_template,\n 'noplaylist': True,\n 'writesubtitles': False,\n 'writeautomaticsub': False,\n 'extractor_args': {'youtube': {'player_client': ['android', 'web']}}, \n 'quiet': False,\n 'no_warnings': True,\n 'merge_output_format': 'mp4'\n }\n\n try:\n with yt_dlp.YoutubeDL(ydl_opts) as ydl:\n ydl.download([url])\n \n mp4_files = glob.glob(os.path.join(final_path, \"*.mp4\"))\n if mp4_files:\n found_file = mp4_files[0]\n print(f\"✅ Arquivo baixado: {found_file}\")\n expected_input = os.path.join(final_path, \"input.mp4\")\n if found_file != expected_input:\n shutil.move(found_file, expected_input)\n return expected_input\n return found_file\n raise FileNotFoundError(\"Arquivo MP4 não gerado\")\n except Exception as e:\n print(f\"❌ Falha crítica no Download: {e}\")\n raise e\n'''\nwith open('/kaggle/working/ViralCutter/scripts/download_video.py', 'w') as f:\n f.write(download_script_content)\n\n# ==============================================================================\n# 4. AUTENTICAÇÃO OAUTH (MANTIDO 100% ORIGINAL)\n# ==============================================================================\nCLIENT_SECRET_FILE = None\nSCOPES = ['https://www.googleapis.com/auth/drive.file']\nAPI_SERVICE_NAME = 'drive'\nAPI_VERSION = 'v3'\n\nprint(\"\\n🔑 Procurando 'client_secret.json'...\")\nfor root, dirs, files in os.walk('/kaggle/input'):\n for file in files:\n if 'client_secret' in file and file.endswith('.json'):\n CLIENT_SECRET_FILE = os.path.join(root, file)\n print(f\"✅ Credencial encontrada: {CLIENT_SECRET_FILE}\")\n break\n if CLIENT_SECRET_FILE: break\n\nservice = None\nfolder_id = None\n\nif CLIENT_SECRET_FILE:\n print(\"\\n🌐 INICIANDO LOGIN GOOGLE...\")\n print(\" ⚠️ Siga os passos abaixo com atenção:\")\n print(\" 1. Clique no link que aparecerá abaixo.\")\n print(\" 2. Faça login e autorize o acesso.\")\n print(\" 3. Copie o código gerado pelo Google.\")\n print(\" 4. COLE O CÓDIGO na caixa de entrada aqui no Kaggle e aperte Enter.\\n\")\n \n try:\n flow = InstalledAppFlow.from_client_secrets_file(CLIENT_SECRET_FILE, SCOPES)\n \n if hasattr(flow, 'run_console'):\n creds = flow.run_console()\n else:\n print(\"❌ AVISO CRÍTICO: O downgrade da biblioteca falhou.\")\n print(\" Tentando método alternativo via OOB...\")\n flow.redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'\n auth_url, _ = flow.authorization_url(prompt='consent')\n print(f\" Acesse este URL: {auth_url}\")\n code = input(\" Cole o código de autorização aqui: \")\n flow.fetch_token(code=code)\n creds = flow.credentials\n\n service = build(API_SERVICE_NAME, API_VERSION, credentials=creds)\n print(\"\\n✅ Autenticação realizada com SUCESSO!\")\n \n try:\n results = service.files().list(q=\"name='ViralCutter_Videos' and mimeType='application/vnd.google-apps.folder' and trashed=false\", fields=\"files(id)\").execute()\n items = results.get('files', [])\n if items:\n folder_id = items[0]['id']\n print(f\"✅ Pasta 'ViralCutter_Videos' encontrada (ID: {folder_id})\")\n else:\n file_metadata = {'name': 'ViralCutter_Videos', 'mimeType': 'application/vnd.google-apps.folder'}\n folder = service.files().create(body=file_metadata, fields='id').execute()\n folder_id = folder.get('id')\n print(f\"✅ Pasta criada (ID: {folder_id})\")\n except Exception as e:\n print(f\"⚠️ Erro ao criar pasta: {e}\")\n except Exception as e:\n print(f\"❌ Erro no Login: {e}\")\n print(\" Nota: Se aparecer 'invalid_grant' ou 'redirect_uri_mismatch', suas credenciais podem não ser do tipo 'Desktop App'.\")\nelse:\n print(\"❌ 'client_secret.json' não encontrado. O upload será desativado.\")\n\n# ==============================================================================\n# 5. MONITOR DE UPLOAD OAUTH (MANTIDO 100% ORIGINAL)\n# ==============================================================================\ndef monitor_oauth():\n if not service or not folder_id: \n return\n \n print(\"\\n👀 Monitor OAuth Ativo: Uploads usarão SEU espaço.\")\n uploaded = set()\n \n while True:\n try:\n watch_path = '/kaggle/working/ViralCutter/VIRALS'\n if os.path.exists(watch_path):\n for r, d, f in os.walk(watch_path):\n for file in f:\n if not file.endswith('.mp4'):\n continue\n \n ignore_patterns = [\n 'input.mp4', '_original_scale', '_subtitled', '_temp', \n 'temp_', 'final_', '.part', 'processing'\n ]\n \n if any(pattern in file.lower() for pattern in ignore_patterns):\n continue\n \n path = os.path.join(r, file)\n \n if path not in uploaded and os.path.getsize(path) > 1e6:\n s1 = os.path.getsize(path)\n time.sleep(5)\n if os.path.getsize(path) != s1:\n continue\n \n print(f\"\\n💎 Novo Corte FINAL Detectado: {file}\")\n print(f\" 📁 Tamanho: {os.path.getsize(path) / 1e6:.2f} MB\")\n print(f\" ☁️ Enviando para Google Drive (Conta Pessoal)...\")\n \n try:\n meta = {'name': file, 'parents': [folder_id]}\n media = MediaFileUpload(path, resumable=True)\n service.files().create(body=meta, media_body=media).execute()\n uploaded.add(path)\n print(\" ✅ Upload Completo!\")\n except Exception as e_up:\n print(f\" ❌ Erro Upload: {e_up}\")\n time.sleep(5)\n \n time.sleep(10)\n except Exception as e:\n print(f\"⚠️ Erro no monitor: {e}\")\n time.sleep(10)\n\nthreading.Thread(target=monitor_oauth, daemon=True).start()\n\n# ==============================================================================\n# 6. INICIAR (MANTIDO 100% ORIGINAL)\n# ==============================================================================\nprint(\"\\n🚀 INICIANDO VIRALCUTTER...\")\nprint(\"⚠️ CLIQUE NO LINK PÚBLICO (gradio.live) ABAIXO:\")\nprint(\"=\"*60)\n\nsubprocess.Popen(\n ['Xvfb', ':1', '-screen', '0', '2560x1440x8'],\n stdout=subprocess.DEVNULL,\n stderr=subprocess.DEVNULL\n)\ntime.sleep(2)\n\nos.environ['DISPLAY'] = ':1.0'\nos.environ['MPLBACKEND'] = 'Agg'\nos.environ['CUDA_VISIBLE_DEVICES'] = '0,1'\n\n!/kaggle/working/ViralCutter/.venv/bin/python webui/app.py --colab", + "metadata": { + "_uuid": "14ab286a-9624-45d5-9bc5-e005704d6d30", + "_cell_guid": "1478c514-9c52-4c9b-98b3-b58856322d1c", + "trusted": true, + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "execution": { + "iopub.status.busy": "2026-02-05T12:40:17.810752Z", + "iopub.execute_input": "2026-02-05T12:40:17.811470Z", + "execution_failed": "2026-02-05T13:01:51.614Z" + } + }, + "outputs": [ + { + "name": "stdout", + "text": "1️⃣ Ajustando bibliotecas de autenticação...\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.5/14.5 MB 108.7 MB/s eta 0:00:00\n\n📦 Sistema não detectado. Instalando ViralCutter (3-5 min)...\n", + "output_type": "stream" + }, + { + "name": "stderr", + "text": "Cloning into '/kaggle/working/ViralCutter'...\n", + "output_type": "stream" + }, + { + "name": "stdout", + "text": " - Instalando drivers...\n ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 22.8/22.8 MB 84.6 MB/s eta 0:00:00\n", + "output_type": "stream" + }, + { + "name": "stderr", + "text": "W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)\n", + "output_type": "stream" + }, + { + "name": "stdout", + "text": "Selecting previously unselected package libcudnn8.\n(Reading database ... 129073 files and directories currently installed.)\nPreparing to unpack .../libcudnn8_8.9.7.29-1+cuda12.2_amd64.deb ...\nUnpacking libcudnn8 (8.9.7.29-1+cuda12.2) ...\nSetting up libcudnn8 (8.9.7.29-1+cuda12.2) ...\n - Criando ambiente virtual...\n", + "output_type": "stream" + }, + { + "name": "stderr", + "text": "warning: The `--system` flag has no effect, `uv venv` always ignores virtual environments when finding a Python interpreter; did you mean `--no-managed-python`?\nUsing CPython 3.12.12 interpreter at: /usr/bin/python3\nCreating virtual environment at: .venv\nActivate with: source .venv/bin/activate\n", + "output_type": "stream" + }, + { + "name": "stdout", + "text": " - Baixando pacotes de IA...\n", + "output_type": "stream" + }, + { + "name": "stderr", + "text": "Resolved 30 packages in 261ms\nPrepared 18 packages in 1.58s\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 30 packages in 213ms\n + anyio==4.12.1\n + av==16.1.0\n + certifi==2026.1.4\n + click==8.3.1\n + coloredlogs==15.0.1\n + ctranslate2==4.7.1\n + faster-whisper==1.2.1\n + filelock==3.20.3\n + flatbuffers==25.12.19\n + fsspec==2026.1.0\n + h11==0.16.0\n + hf-xet==1.2.0\n + httpcore==1.0.9\n + httpx==0.28.1\n + huggingface-hub==1.4.0\n + humanfriendly==10.0\n + idna==3.11\n + mpmath==1.3.0\n + numpy==2.4.2\n + onnxruntime==1.23.2\n + packaging==26.0\n + protobuf==6.33.5\n + pyyaml==6.0.3\n + setuptools==80.10.2\n + shellingham==1.5.4\n + sympy==1.14.0\n + tokenizers==0.22.2\n + tqdm==4.67.3\n + typer-slim==0.21.1\n + typing-extensions==4.15.0\nResolved 117 packages in 4.83s\nPrepared 72 packages in 46.44s\nUninstalled 1 package in 4ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 93 packages in 10.65s\n + aiohappyeyeballs==2.6.1\n + aiohttp==3.13.3\n + aiosignal==1.4.0\n + alembic==1.18.3\n + antlr4-python3-runtime==4.9.3\n + asteroid-filterbanks==0.4.0\n + attrs==25.4.0\n + cffi==2.0.0\n + charset-normalizer==3.4.4\n + colorlog==6.10.1\n + contourpy==1.3.3\n + cycler==0.12.1\n + docopt==0.6.2\n + einops==0.8.2\n + fonttools==4.61.1\n + frozenlist==1.8.0\n + greenlet==3.3.1\n - huggingface-hub==1.4.0\n + huggingface-hub==0.36.1\n + hyperpyyaml==1.2.3\n + jinja2==3.1.6\n + joblib==1.5.3\n + julius==0.2.7\n + kiwisolver==1.4.9\n + lightning==2.6.1\n + lightning-utilities==0.15.2\n + mako==1.3.10\n + markdown-it-py==4.0.0\n + markupsafe==3.0.3\n + matplotlib==3.10.8\n + mdurl==0.1.2\n + multidict==6.7.1\n + networkx==3.6.1\n + nltk==3.9.2\n + nvidia-cublas-cu12==12.8.4.1\n + nvidia-cuda-cupti-cu12==12.8.90\n + nvidia-cuda-nvrtc-cu12==12.8.93\n + nvidia-cuda-runtime-cu12==12.8.90\n + nvidia-cudnn-cu12==9.10.2.21\n + nvidia-cufft-cu12==11.3.3.83\n + nvidia-cufile-cu12==1.13.1.3\n + nvidia-curand-cu12==10.3.9.90\n + nvidia-cusolver-cu12==11.7.3.90\n + nvidia-cusparse-cu12==12.5.8.93\n + nvidia-cusparselt-cu12==0.7.1\n + nvidia-nccl-cu12==2.27.3\n + nvidia-nvjitlink-cu12==12.8.93\n + nvidia-nvtx-cu12==12.8.90\n + omegaconf==2.3.0\n + optuna==4.7.0\n + pandas==3.0.0\n + pillow==12.1.0\n + primepy==1.3\n + propcache==0.4.1\n + pyannote-audio==3.4.0\n + pyannote-core==5.0.0\n + pyannote-database==5.1.3\n + pyannote-metrics==3.2.1\n + pyannote-pipeline==3.0.1\n + pycparser==3.0\n + pygments==2.19.2\n + pyparsing==3.3.2\n + python-dateutil==2.9.0.post0\n + pytorch-lightning==2.6.1\n + pytorch-metric-learning==2.9.0\n + regex==2026.1.15\n + requests==2.32.5\n + rich==14.3.2\n + ruamel-yaml==0.18.17\n + ruamel-yaml-clib==0.2.15\n + safetensors==0.7.0\n + scikit-learn==1.8.0\n + scipy==1.17.0\n + semver==3.0.4\n + sentencepiece==0.2.1\n + six==1.17.0\n + sortedcontainers==2.4.0\n + soundfile==0.13.1\n + speechbrain==1.0.3\n + sqlalchemy==2.0.46\n + tabulate==0.9.0\n + tensorboardx==2.6.4\n + threadpoolctl==3.6.0\n + torch==2.8.0+cu128\n + torch-audiomentations==0.12.0\n + torch-pitch-shift==1.2.5\n + torchaudio==2.8.0+cu128\n + torchmetrics==1.8.2\n + transformers==4.57.6\n + triton==3.4.0\n + typer==0.21.1\n + urllib3==2.6.3\n + whisperx==3.7.6 (from git+https://github.com/m-bain/whisperx.git@6ec4a020489d904c4f2cd1ed097674232d2692d4)\n + yarl==1.22.0\nResolved 243 packages in 29.96s\nPrepared 107 packages in 17.67s\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 122 packages in 6.51s\n + a2wsgi==1.10.10\n + absl-py==2.4.0\n + aiofile==3.9.0\n + aiofiles==24.1.0\n + aiohttp-socks==0.11.0\n + albucore==0.0.24\n + albumentations==2.0.8\n + annotated-doc==0.0.4\n + annotated-types==0.7.0\n + azure-ai-documentintelligence==1.0.2\n + azure-core==1.38.0\n + azure-identity==1.25.1\n + beautifulsoup4==4.14.3\n + blinker==1.9.0\n + brotli==1.2.0\n + browser-cookie3==0.20.1\n + caio==0.9.25\n + cairocffi==1.7.1\n + cairosvg==2.8.2\n + cloudscraper==1.2.71\n + cobble==0.1.4\n + cryptography==46.0.4\n + cssselect2==0.8.0\n + curl-cffi==0.14.0\n + cython==3.2.4\n + ddgs==9.10.0\n + deep-translator==1.11.4\n + defusedxml==0.7.1\n + deprecated==1.3.1\n + diskcache==5.6.3\n + distro==1.9.0\n + easydict==1.13\n + et-xmlfile==2.0.0\n + fake-useragent==2.2.0\n + fastapi==0.128.1\n + ffmpeg-python==0.2.0\n + ffmpy==1.0.0\n + flask==3.1.2\n + future==1.0.0\n + g4f==7.0.0\n + google-auth==2.48.0\n + google-genai==1.62.0\n + gradio==6.5.1\n + gradio-client==2.0.3\n + groovy==0.1.2\n + h2==4.3.0\n + hpack==4.1.0\n + hyperframe==6.1.0\n + imageio==2.37.2\n + insightface==0.7.3\n + isodate==0.7.2\n + itsdangerous==2.2.0\n + jeepney==0.9.0\n + lazy-loader==0.4\n + llama-cpp-python==0.3.16\n + lxml==6.0.2\n + lz4==4.4.5\n + magika==0.6.3\n + mammoth==1.11.0\n + markdownify==1.2.2\n + markitdown==0.1.4\n + mediapipe==0.10.32\n + ml-dtypes==0.5.4\n + msal==1.34.0\n + msal-extensions==1.3.1\n + mss==10.1.0\n + nest-asyncio2==1.7.1\n + nodriver==0.48.1\n + olefile==0.47\n + onnx==1.20.1\n + onnxruntime-gpu==1.23.2\n + opencv-contrib-python==4.13.0.92\n + opencv-python==4.13.0.92\n + opencv-python-headless==4.13.0.92\n + openpyxl==3.1.5\n + orjson==3.11.7\n + pdfminer-six==20260107\n + platformdirs==4.5.1\n + prettytable==3.17.0\n + primp==0.15.0\n + psutil==7.2.2\n + pyasn1==0.6.2\n + pyasn1-modules==0.4.2\n + pycryptodome==3.23.0\n + pycryptodomex==3.23.0\n + pydantic==2.12.5\n + pydantic-core==2.41.5\n + pydub==0.25.1\n + pyjwt==2.11.0\n + python-dotenv==1.2.1\n + python-multipart==0.0.22\n + python-pptx==1.0.2\n + python-socks==2.8.0\n + pytz==2025.2\n + requests-toolbelt==1.0.0\n + rsa==4.9.1\n + safehttpx==0.1.7\n + scikit-image==0.26.0\n + semantic-version==2.10.0\n + simsimd==6.5.12\n + sniffio==1.3.1\n + socksio==1.0.0\n + sounddevice==0.5.5\n + soupsieve==2.8.3\n + speechrecognition==3.14.5\n + starlette==0.50.0\n + stringzilla==4.6.0\n + tenacity==9.1.3\n + tifffile==2026.1.28\n + tinycss2==1.5.1\n + tomlkit==0.13.3\n + typing-inspection==0.4.2\n + uvicorn==0.40.0\n + wcwidth==0.5.3\n + webencodings==0.5.1\n + websockets==15.0.1\n + werkzeug==3.1.5\n + wrapt==2.1.1\n + xlrd==2.0.2\n + xlsxwriter==3.2.9\n + youtube-transcript-api==1.0.3\n + yt-dlp==2026.2.4\nResolved 11 packages in 106ms\nPrepared 4 packages in 211ms\nUninstalled 2 packages in 26ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 4 packages in 62ms\n + mutagen==1.47.0\n - websockets==15.0.1\n + websockets==16.0\n - yt-dlp==2026.2.4\n + yt-dlp==2026.2.4.233607.dev0\n + yt-dlp-ejs==0.4.0\nResolved 52 packages in 249ms\nPrepared 14 packages in 1.56s\nUninstalled 1 package in 2ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 14 packages in 430ms\n + google-ai-generativelanguage==0.6.15\n + google-api-core==2.29.0\n + google-api-python-client==2.189.0\n + google-auth-httplib2==0.3.0\n + google-generativeai==0.8.6\n + googleapis-common-protos==1.72.0\n + grpcio==1.76.0\n + grpcio-status==1.71.2\n + httplib2==0.31.2\n + nodejs-wheel-binaries==24.13.0\n + proto-plus==1.27.1\n - protobuf==6.33.5\n + protobuf==5.29.6\n + pytubefix==10.3.6\n + uritemplate==4.2.0\nResolved 42 packages in 75ms\nPrepared 3 packages in 435ms\nUninstalled 2 packages in 92ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 3 packages in 127ms\n + accelerate==1.12.0\n - tokenizers==0.22.2\n + tokenizers==0.20.3\n - transformers==4.57.6\n + transformers==4.46.3\nResolved 25 packages in 1.02s\nPrepared 14 packages in 24.71s\nUninstalled 13 packages in 609ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 14 packages in 4.13s\n - nvidia-cublas-cu12==12.8.4.1\n + nvidia-cublas-cu12==12.1.3.1\n - nvidia-cuda-cupti-cu12==12.8.90\n + nvidia-cuda-cupti-cu12==12.1.105\n - nvidia-cuda-nvrtc-cu12==12.8.93\n + nvidia-cuda-nvrtc-cu12==12.1.105\n - nvidia-cuda-runtime-cu12==12.8.90\n + nvidia-cuda-runtime-cu12==12.1.105\n - nvidia-cudnn-cu12==9.10.2.21\n + nvidia-cudnn-cu12==8.9.2.26\n - nvidia-cufft-cu12==11.3.3.83\n + nvidia-cufft-cu12==11.0.2.54\n - nvidia-curand-cu12==10.3.9.90\n + nvidia-curand-cu12==10.3.2.106\n - nvidia-cusolver-cu12==11.7.3.90\n + nvidia-cusolver-cu12==11.4.5.107\n - nvidia-cusparse-cu12==12.5.8.93\n + nvidia-cusparse-cu12==12.1.0.106\n - nvidia-nccl-cu12==2.27.3\n + nvidia-nccl-cu12==2.20.5\n - nvidia-nvtx-cu12==12.8.90\n + nvidia-nvtx-cu12==12.1.105\n - torch==2.8.0+cu128\n + torch==2.3.1+cu121\n - torchaudio==2.8.0+cu128\n + torchaudio==2.3.1+cu121\n + torchvision==0.18.1+cu121\nResolved 2 packages in 637ms\nPrepared 2 packages in 16.18s\nUninstalled 2 packages in 43ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 2 packages in 92ms\n - numpy==2.4.2\n + numpy==1.26.4\n - setuptools==80.10.2\n + setuptools==69.5.1\nResolved 51 packages in 262ms\nPrepared 1 package in 616ms\nUninstalled 1 package in 31ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 1 package in 95ms\n - opencv-python-headless==4.13.0.92\n + opencv-python-headless==4.11.0.86\nUninstalled 3 packages in 15ms\n - flatbuffers==25.12.19\n - mediapipe==0.10.32\n - protobuf==5.29.6\nResolved 19 packages in 83ms\nPrepared 2 packages in 917ms\nUninstalled 1 package in 36ms\nwarning: Failed to hardlink files; falling back to full copy. This may lead to degraded performance.\n If the cache and target directories are on different filesystems, hardlinking may not be supported.\n If this is intentional, set `export UV_LINK_MODE=copy` or use `--link-mode=copy` to suppress this warning.\nInstalled 4 packages in 302ms\n + flatbuffers==25.12.19\n + mediapipe==0.10.32\n - opencv-contrib-python==4.13.0.92\n + opencv-contrib-python==4.11.0.86\n + protobuf==4.25.8\n", + "output_type": "stream" + }, + { + "name": "stdout", + "text": "✅ Instalação Concluída!\n 💡 Faster-Whisper instalado: Agora você pode usar Large sem estourar memória!\n\n🔧 Aplicando patch no sistema de download...\n\n🔑 Procurando 'client_secret.json'...\n✅ Credencial encontrada: /kaggle/input/client-secret-json/client_secret.json\n\n🌐 INICIANDO LOGIN GOOGLE...\n ⚠️ Siga os passos abaixo com atenção:\n 1. Clique no link que aparecerá abaixo.\n 2. Faça login e autorize o acesso.\n 3. Copie o código gerado pelo Google.\n 4. COLE O CÓDIGO na caixa de entrada aqui no Kaggle e aperte Enter.\n\nPlease visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=6470460908-46mrv6m55s52s5cuj7nioqm8t57s1n4l.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.file&state=Oqb7A4mqoRs7NyrOH0jH12QqsNFujR&prompt=consent&access_type=offline\n", + "output_type": "stream" + }, + { + "output_type": "stream", + "name": "stdin", + "text": "Enter the authorization code: 4/1ASc3gC1FY0jcZpMngnkjlzX6xvInHKlu_BgLYqgYMP0nb_nMOIdu6WVg2Ho\n" + }, + { + "name": "stdout", + "text": "\n✅ Autenticação realizada com SUCESSO!\n✅ Pasta 'ViralCutter_Videos' encontrada (ID: 1dkq555mhQkzDRe7DAN3v1mcZ6YqLt0cG)\n\n👀 Monitor OAuth Ativo: Uploads usarão SEU espaço.\n\n🚀 INICIANDO VIRALCUTTER...\n⚠️ CLIQUE NO LINK PÚBLICO (gradio.live) ABAIXO:\n============================================================\nRunning in Colab mode. Generating public link with Static Mounts...\nDEBUG: Registered static paths: ['/kaggle/working/ViralCutter/VIRALS', '/kaggle/working/ViralCutter', '/kaggle/working/ViralCutter', '.']\nDEBUG: Allowed paths for Gradio: ['/kaggle/working/ViralCutter/VIRALS', '/kaggle/working/ViralCutter', '/kaggle/working/ViralCutter', '.']\n* Running on local URL: http://127.0.0.1:7860\n* Running on public URL: https://e6e28ea40b0b9652dd.gradio.live\n\nThis share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\nMounted /virals to /kaggle/working/ViralCutter/VIRALS\n\n💎 Novo Corte FINAL Detectado: 000_Da_Loja_Física_ao_Mercado_Livre_O_Segredo_para_Faturar_Dormi.mp4\n 📁 Tamanho: 44.38 MB\n ☁️ Enviando para Google Drive (Conta Pessoal)...\n ✅ Upload Completo!\n⚠️ Erro no monitor: [Errno 2] No such file or directory: '/kaggle/working/ViralCutter/VIRALS/Marco Guedes revela como faturou milhões com mercado livre/final/final-output001_processed.mp4'\n\n💎 Novo Corte FINAL Detectado: 001_O_Poder_ABSURDO_do_Vídeo_nas_Vendas_e_o_Futuro_do_Mercado_Li.mp4\n 📁 Tamanho: 44.77 MB\n ☁️ Enviando para Google Drive (Conta Pessoal)...\n ✅ Upload Completo!\n\n💎 Novo Corte FINAL Detectado: 002_Sem_Capital_para_Começar_no_Mercado_Livre_A_Solução_é_Mais_S.mp4\n 📁 Tamanho: 55.86 MB\n ☁️ Enviando para Google Drive (Conta Pessoal)...\n ✅ Upload Completo!\n", + "output_type": "stream" + } + ], + "execution_count": null + } + ] +} \ No newline at end of file diff --git a/webui/app.py b/webui/app.py index 6dd882c..9569f24 100644 --- a/webui/app.py +++ b/webui/app.py @@ -179,7 +179,7 @@ def apply_experimental_preset(preset_name): def run_viral_cutter(input_source, project_name, url, video_file, segments, viral, themes, min_duration, max_duration, model, ai_backend, api_key, ai_model_name, chunk_size, workflow, face_model, face_mode, face_detect_interval, no_face_mode, - face_filter_thresh, face_two_thresh, face_conf_thresh, face_dead_zone, focus_active_speaker, active_speaker_mar, active_speaker_score_diff, include_motion, active_speaker_motion_threshold, active_speaker_motion_sensitivity, active_speaker_decay, + tracking_alpha, face_filter_thresh, face_two_thresh, face_conf_thresh, face_dead_zone, focus_active_speaker, active_speaker_mar, active_speaker_score_diff, include_motion, active_speaker_motion_threshold, active_speaker_motion_sensitivity, active_speaker_decay, use_custom_subs, font_name, font_size, font_color, highlight_color, outline_color, outline_thickness, shadow_color, shadow_size, is_bold, is_italic, is_uppercase, vertical_pos, alignment, h_size, w_block, gap, mode, under, strike, border_s, remove_punc, video_quality, use_youtube_subs, translate_target): @@ -248,6 +248,7 @@ def run_viral_cutter(input_source, project_name, url, video_file, segments, vira workflow_map = {"Full": "1", "Cut Only": "2", "Subtitles Only": "3"} cmd.extend(["--workflow", workflow_map.get(workflow, "1")]) + print(f"[DEBUG] Using face_model: {face_model}") cmd.extend(["--face-model", face_model]) cmd.extend(["--face-mode", face_mode]) if face_detect_interval: cmd.extend(["--face-detect-interval", str(face_detect_interval)]) @@ -258,6 +259,7 @@ def run_viral_cutter(input_source, project_name, url, video_file, segments, vira if face_two_thresh is not None: cmd.extend(["--face-two-threshold", str(face_two_thresh)]) if face_conf_thresh is not None: cmd.extend(["--face-confidence-threshold", str(face_conf_thresh)]) if face_dead_zone is not None: cmd.extend(["--face-dead-zone", str(face_dead_zone)]) + if tracking_alpha is not None: cmd.extend(["--tracking-alpha", str(tracking_alpha)]) @@ -399,7 +401,7 @@ def run_viral_cutter(input_source, project_name, url, video_file, segments, vira video_upload = gr.File(label=i18n("Upload Video"), file_count="single", file_types=["video"], visible=False) with gr.Row(): - video_quality_input = gr.Dropdown(choices=["best", "1080p", "720p", "480p"], label=i18n("Video Quality"), value="best") + video_quality_input = gr.Dropdown(choices=["best", "4k", "1440p", "1080p", "720p", "480p"], label=i18n("Video Quality"), value="4k") translate_input = gr.Dropdown(choices=["None", "pt", "en", "es", "fr", "de", "it", "ru", "ja", "ko", "zh-CN"], label=i18n("Translate Subtitles To"), value="None") use_youtube_subs_input = gr.Checkbox(label=i18n("Use YouTube Subs"), value=True, info=i18n("Download and use official subtitles if available. (Recommended, it speeds up the process)")) @@ -479,17 +481,26 @@ def refresh_local_models(): model_input = gr.Dropdown(["tiny", "small", "medium", "large", "large-v1", "large-v2", "large-v3", "turbo", "large-v3-turbo", "distil-large-v2", "distil-medium.en", "distil-small.en", "distil-large-v3"], label=i18n("Whisper Model"), value="large-v3-turbo") with gr.Row(): workflow_input = gr.Dropdown(choices=[(i18n("Full"), "Full"), (i18n("Cut Only"), "Cut Only"), (i18n("Subtitles Only"), "Subtitles Only")], label=i18n("Workflow"), value="Full") - face_model_input = gr.Dropdown(["insightface", "mediapipe"], label=i18n("Face Model"), value="insightface") + face_model_input = gr.Dropdown(["yolo", "insightface", "mediapipe"], label=i18n("Face Model"), value="yolo", info="YOLO = Smooth Zoom") with gr.Row(): face_mode_input = gr.Dropdown(choices=[(i18n("Auto"), "auto"), ("1", "1"), ("2", "2")], label=i18n("Face Mode"), value="auto") face_detect_interval_input = gr.Textbox(label=i18n("Face Det. Interval"), value="0.17,1.0") - no_face_mode_input = gr.Dropdown(choices=[(i18n("Padding (9:16)"), "padding"), (i18n("Zoom (Center)"), "zoom")], label=i18n("No Face Fallback"), value="zoom") + no_face_mode_input = gr.Dropdown(choices=[(i18n("Padding (9:16)"), "padding"), (i18n("Zoom (Center)"), "zoom"), (i18n("Blur Background"), "blur")], label=i18n("No Face Fallback"), value="zoom") # Update listeners now that all components are defined input_source.change(on_source_change, inputs=input_source, outputs=[url_input, project_selector, video_upload, workflow_input]) with gr.Accordion(i18n("Advanced Face Settings"), open=False): + # Tracking Smoothness Slider (YOLO only) + gr.Markdown(f"### {i18n('Camera Tracking')}") + tracking_alpha_input = gr.Slider( + label=i18n("Tracking Smoothness"), + minimum=0.01, maximum=0.15, value=0.05, step=0.01, + info=i18n("0.02 = Ultra Suave (lento) | 0.05 = Normal | 0.10 = Rápido") + ) + + gr.Markdown(f"### {i18n('Face Detection')}") face_preset_input = gr.Dropdown(choices=[(i18n(k), k) for k in FACE_PRESETS.keys()], label=i18n("Configuration Presets"), value="Default (Balanced)", interactive=True) with gr.Row(): face_filter_thresh_input = gr.Slider(label=i18n("Ignore Small Faces (0.0 - 1.0)"), minimum=0.0, maximum=1.0, value=0.35, step=0.05, info=i18n("Relative size to ignore background.")) @@ -629,7 +640,7 @@ def refresh_local_models(): input_source, project_selector, url_input, video_upload, segments_input, viral_input, themes_input, min_dur_input, max_dur_input, model_input, ai_backend_input, api_key_input, ai_model_input, chunk_size_input, workflow_input, face_model_input, face_mode_input, face_detect_interval_input, no_face_mode_input, - face_filter_thresh_input, face_two_thresh_input, face_conf_thresh_input, face_dead_zone_input, focus_active_speaker_input, + tracking_alpha_input, face_filter_thresh_input, face_two_thresh_input, face_conf_thresh_input, face_dead_zone_input, focus_active_speaker_input, active_speaker_mar_input, active_speaker_score_diff_input, include_motion_input, active_speaker_motion_threshold_input, active_speaker_motion_sensitivity_input, active_speaker_decay_input, use_custom_subs, # Expanded Manual Inputs mapping @@ -838,6 +849,7 @@ def on_select_project(proj_name): return library.generate_project_gallery(proj_n parser = argparse.ArgumentParser() parser.add_argument("--colab", action="store_true", help="Run in Google Colab mode") + parser.add_argument("--face-model", default="insightface", help="Default face model (env var VIRALCUTTER_FACE_MODEL takes precedence for UI default)") args = parser.parse_args() if args.colab: