import argparse
import copy
import io
import json
import os
import re
import subprocess
import time
import sys
import statistics
import threading
from concurrent.futures import ThreadPoolExecutor, TimeoutError
import traceback
import urllib.error
import urllib.request
import unicodedata
import uuid
import mimetypes
import stat
from datetime import datetime, UTC
try:
    from zoneinfo import ZoneInfo  # Python 3.9+
except ImportError:  # pragma: no cover - fallback for very old Python
    ZoneInfo = None  # type: ignore[misc]
from dataclasses import dataclass
from pathlib import Path
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import Any, Dict, List, Optional, Sequence, Tuple
import importlib
import http.client
from urllib.parse import urlsplit

os.environ.setdefault("PYTHONIOENCODING", "utf-8")
try:
    if hasattr(sys.stdout, "reconfigure"):
        sys.stdout.reconfigure(encoding="utf-8")
    if hasattr(sys.stderr, "reconfigure"):
        sys.stderr.reconfigure(encoding="utf-8")
except Exception:
    pass


ROOT_DIR = Path(__file__).resolve().parent
whisper = None  # type: ignore
cv2 = None  # type: ignore
mp = None  # type: ignore

_HTTP_CONNECTIONS: Dict[str, http.client.HTTPSConnection] = {}


DEFAULT_INPUT = "video.mp4"
DEFAULT_OUTPUT_DIR = "cortes"
DEFAULT_MODEL = "small"
DEFAULT_MIN_DUR = 22.0
DEFAULT_MAX_DUR = 38.0
DEFAULT_TOP_K = 1
DEFAULT_MAX_WORDS_ON_SCREEN = 4
MAX_TITLE_LEN = 120
TITLE_PREFIXES = ["\U0001F525", "\u26A1", "\U0001F680", "\U0001F3AF", "\U0001F632"]
DEFAULT_GEMINI_KEY = "AIzaSyCxZfjzfQtWtUMuffzP0xK2it6z7zujQp4"
DEFAULT_TITLE_WEBHOOK = "https://n8n.gudanbolizante.site/webhook/tvthathi1"
DEFAULT_NOTIFY_WEBHOOK = "https://n8n.gudanbolizante.site/webhook/cortes"
SUPPORTED_BATCH_PATTERNS = ("*.mp4", "*.mov", "*.m4v", "*.mkv", "*.avi", "*.mxf")

# Garante que arquivos MXF sejam tratados como video ao servir resultados.
mimetypes.add_type("video/mxf", ".mxf")

if ZoneInfo is not None:
    try:
        SAO_PAULO_TZ = ZoneInfo("America/Sao_Paulo")
    except Exception:  # pragma: no cover - tzdata ausente
        SAO_PAULO_TZ = UTC
else:  # pragma: no cover - Python muito antigo
    SAO_PAULO_TZ = UTC


def isoformat_sao_paulo(dt: Optional[datetime] = None) -> str:
    """
    Retorna timestamp ISO em horario de Sao Paulo sem depender do fuso do servidor.
    """
    base = dt or datetime.now(UTC)
    try:
        return base.astimezone(SAO_PAULO_TZ).isoformat()
    except Exception:
        return base.isoformat()


def _resolve_inbox_dir() -> Path:
    env_value = os.getenv("INBOX_DIR") or os.getenv("INPUT_DIR")
    if env_value:
        return Path(env_value).expanduser()
    candidates = [ROOT_DIR / "in", Path("/home/user157/in")]
    for candidate in candidates:
        expanded = Path(candidate).expanduser()
        if expanded.exists():
            return expanded
    return (ROOT_DIR / "in").expanduser()


INBOX_DIR = _resolve_inbox_dir()


def ensure_whisper_module():
    global whisper
    if whisper is not None:
        return whisper
    try:
        whisper = importlib.import_module("whisper")
    except ImportError as exc:  # pragma: no cover - dependencia externa
        raise RuntimeError(
            "Biblioteca 'whisper' nao encontrada. Instale 'openai-whisper' para continuar."
        ) from exc
    return whisper


def ensure_cv2_module():
    global cv2
    if cv2 is not None:
        return cv2
    try:
        cv2 = importlib.import_module("cv2")
    except ImportError:
        cv2 = None
    return cv2


def ensure_mediapipe_module():
    global mp
    if mp is not None:
        return mp
    try:
        mp = importlib.import_module("mediapipe")
    except ImportError:
        mp = None
    return mp


def resolve_input_file(raw: str) -> Path:
    if not raw:
        raise FileNotFoundError("Nenhum arquivo de entrada informado.")
    candidate = Path(raw).expanduser()
    search_paths: List[Path] = [candidate]
    if not candidate.is_absolute():
        search_paths.append((ROOT_DIR / raw).expanduser())
        if INBOX_DIR:
            search_paths.append(INBOX_DIR / raw)
    for path in search_paths:
        if path.exists():
            return path
    attempted = ", ".join(str(path) for path in search_paths)
    raise FileNotFoundError(f"Video nao encontrado. Caminhos verificados: {attempted}")


def _get_https_connection(host: str) -> http.client.HTTPSConnection:
    conn = _HTTP_CONNECTIONS.get(host)
    if conn is not None:
        return conn
    conn = http.client.HTTPSConnection(host, timeout=60)
    _HTTP_CONNECTIONS[host] = conn
    return conn


def _perform_https_post(
    host: str,
    path: str,
    data: bytes,
    headers: Dict[str, str],
    label: str,
) -> str:
    conn = _get_https_connection(host)
    try:
        conn.request("POST", path, body=data, headers=headers)
        response = conn.getresponse()
        body = response.read().decode("utf-8", errors="replace")
    except Exception as exc:
        try:
            conn.close()
        finally:
            _HTTP_CONNECTIONS.pop(host, None)
        raise RuntimeError(f"Falha ao conectar no {label}: {exc}") from exc
    if response.status >= 400:
        raise RuntimeError(f"{label} retornou erro HTTP {response.status}: {body}")
    return body

DEFAULT_PROMPT_HIGHLIGHT_SELECTION = (
    "Voce e um editor experiente em cortes virais para TikTok e Instagram.\n"
    "Analise os segmentos abaixo e selecione ate {top_k} trechos com maior potencial de viralizar.\n"
    "Cada trecho precisa ter entre {min_dur:.1f}s e {max_dur:.1f}s. Priorize momentos emotivos, opinioes fortes, "
    "historias curiosas, humor ou revelacoes. Evite longos periodos de silencio.\n"
    "Responda exclusivamente em JSON com a estrutura:\n"
    "{{\"highlights\":[{{\"start\":12.3,\"end\":34.5,\"reason\":\"motivo do corte\"}},...]}}\n"
    "Use segundos absolutos do video. Se nao houver um bom trecho, retorne uma lista vazia.\n"
    "SEGMENTOS (indice|inicio|fim|fala):\n"
    "{segments}\n"
)

DEFAULT_PROMPT_TRANSCRIPT_CLEANUP = (
    "Voce e um revisor profissional de portugues falado no Brasil. Algumas falas transcritas pelo modelo de ASR "
    "ficaram com erros ortograficos ou de reconhecimento de fala.\n"
    "Ajuste apenas as palavras incorretas mantendo girias, tom e sentido original. Nao invente conteudo novo e nao "
    "traduza.\n"
    "Retorne apenas JSON com a estrutura {{\"segments\":[{{\"index\":0,\"text\":\"fala corrigida\"}},...]}}.\n"
    "Seguem as falas no formato indice|inicio|fim|texto:\n"
    "{segments}\n"
)

DEFAULT_PROMPT_SOCIAL_CAPTION = (
    "Voce e um jornalista brasileiro preparando uma descricao de cunho jornalistico para redes sociais. "
    "A partir da transcricao do corte, escreva um texto com estrutura de lead: apresente o fato principal na primeira "
    "frase, traga detalhes essenciais nas frases seguintes e feche com contexto adicional ou proximos passos.\n"
    "Use entre 4 e 5 frases, mantenha tom informativo, objetivo e respeitoso, evitando opinioes pessoais.\n"
    "Inclua ate 4 hashtags em portugues do Brasil que reflitam o conteudo noticioso.\n"
    "Transcricao:\n"
    "{transcript}\n"
    "---\n"
    "Retorne apenas a descricao final, sem texto adicional."
)

DEFAULT_PROMPT_TITLE = (
    "Voce e uma especialista em titulos. Use o texto abaixo (captions.txt) e escreva UM titulo curto e natural que resuma o fato principal.\n"
    "Regras simples:\n"
    "- Ate 45 caracteres.\n"
    "- Sem emojis, hashtags, aspas ou ponto final.\n"
    "- Frase coerente e direta em portugues, sem inventar nada alem do texto base.\n"
    "TEXTO BASE (caption):\n"
    "{transcript}\n"
    "Retorne apenas JSON valido: {\"title\":\"TEXTO AQUI\"}\n"
)
DEFAULT_PROMPT_PORTAL_ARTICLE = (
    "Voce e um redator senior de portal de noticias brasileiro.\n"
    "Com base na transcricao do corte, elabore uma materia completa, objetiva e aprofundada.\n"
    "Produza um titulo informativo e impactante e um texto com pelo menos 8 paragrafos curtos.\n"
    "Comece com um lead que resuma o fato principal, desenvolva com detalhes, contexto, repercussao e proximos passos.\n"
    "Mantenha rigor jornalistico, citando dados concretos quando possivel e evitando opinioes pessoais.\n"
    "Retorne apenas JSON valido no formato {{\"title\":\"TITULO\",\"article\":\"TEXTO EM MULTIPLOS PARAGRAFOS\"}}.\n"
    "Transcricao integral:\n"
    "{transcript}\n"
)


# Extend PATH so ffmpeg bundled with the project is found automatically.
FFMPEG_DIR = Path(__file__).with_name("ffmpeg-2025-10-05-git-6231fa7fb7-essentials_build").joinpath("bin")
if FFMPEG_DIR.exists():
    os.environ["PATH"] = os.environ.get("PATH", "") + os.pathsep + str(FFMPEG_DIR)


def ensure_ffmpeg_permissions() -> None:
    if os.name == "nt":
        return
    candidates: List[Path] = []
    if FFMPEG_DIR.exists():
        for name in ("ffmpeg", "ffprobe"):
            base = FFMPEG_DIR / name
            if base.exists():
                candidates.append(base)
            exe = base.with_suffix(".exe")
            if exe.exists():
                candidates.append(exe)
    for name in ("ffmpeg", "ffprobe"):
        path = Path(name)
        if path.exists():
            candidates.append(path)
    for path in candidates:
        try:
            current_mode = path.stat().st_mode
            os.chmod(path, current_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
        except OSError:
            continue


ensure_ffmpeg_permissions()

LOGO_IMAGE_PATH = Path(__file__).with_name("logo.png")
LOGO_FIT_IMAGE_PATH = Path(__file__).with_name("logo2.png")
LOGO_TARGET_HEIGHT = 200
LOGO_TOP_MARGIN = 30
LOGO_TITLE_MARGIN_EXTRA = 120
TITLE_BELOW_LOGO_GAP = 60
FIT_TITLE_ADDITIONAL_OFFSET = 35
FIT_TITLE_LINE_GAP = 85
FIT_TITLE_POS_X = 540
FIT_TITLE_MAX_CHARS_PER_LINE = 22
FIT_TITLE_MAX_LINES = 4
FAST_MODE = os.getenv("CUTS_FAST_MODE", "1") == "1"
AI_RETRY_ATTEMPTS = max(1, int(os.getenv("AI_RETRY_ATTEMPTS", "3")))
AI_RETRY_DELAY = max(0.5, float(os.getenv("AI_RETRY_DELAY", "2.5")))
BACKGROUND_IMAGE_PATH = Path(__file__).with_name("fundo.jpg")
BACKGROUND_TARGET_SIZE = "1080:1920"
FONTS_DIR = Path(__file__).with_name("fonts")
ANTON_FONT_PATH = FONTS_DIR / "Anton-Regular.ttf"


def safe_format_template(template: str, **kwargs: Any) -> str:
    try:
        return template.format(**kwargs)
    except KeyError:
        pattern = re.compile(r"\{(\w+)(:[^}]*)?\}")
        placeholders: Dict[str, Tuple[str, str]] = {}

        def repl(match: re.Match[str]) -> str:
            key = match.group(1)
            spec = match.group(2) or ""
            if key in kwargs:
                token = f"__FMT_PLACEHOLDER_{key.upper()}__"
                placeholders[token] = (key, spec)
                return token
            return match.group(0)

        temp = pattern.sub(repl, template)
        temp = temp.replace("{", "{{").replace("}", "}}")
        for token, (key, spec) in placeholders.items():
            temp = temp.replace(token, "{" + key + spec + "}")
        return temp.format(**kwargs)


@dataclass
class Segment:
    start: float
    end: float
    text: str
    avg_logprob: float
    no_speech_prob: float

    @property
    def duration(self) -> float:
        return max(1e-3, self.end - self.start)

    @property
    def word_count(self) -> int:
        return max(1, len(self.text.split()))


@dataclass
class CropWindow:
    x: int
    y: int
    width: int
    height: int
    confidence: float = 0.0
    detections: int = 0
    samples: int = 0


@dataclass
class HighlightCandidate:
    start: float
    end: float
    score: float
    segments: List[Segment]
    reason: str = ""
    title: str = ""
    origin: str = "unknown"
    crop: Optional[CropWindow] = None

    @property
    def duration(self) -> float:
        return self.end - self.start

    def preview(self, limit: int = 180) -> str:
        text = " ".join(seg.text for seg in self.segments if seg.text)
        return (text[: limit - 3] + "...") if len(text) > limit else text


@dataclass
class GeneratedCut:
    index: int
    base_name: str
    start: float
    end: float
    duration: float
    score: float
    origin: str
    title: str
    reason: str
    preview: str
    zoom_video: Optional[Path]
    fit_video: Optional[Path]
    ass_fill: Path
    ass_fit: Path
    srt: Path
    caption: Optional[Path]
    portal_article: Optional[Path]
    portal_title: str = ""


@dataclass
class PipelineSummary:
    input_path: Path
    output_dir: Path
    run_tag: str
    started_at: datetime
    finished_at: datetime
    highlights: List[GeneratedCut]


@dataclass
class AIConfig:
    provider: str
    model: str
    api_key: str
    temperature: float


def slugify_for_filename(text: str, max_length: int = 48) -> str:
    """Transforma texto livre em um fragmento amigavel para nomes de arquivo."""
    normalized = unicodedata.normalize("NFKD", text)
    ascii_text = normalized.encode("ascii", "ignore").decode("ascii")
    cleaned = re.sub(r"[^a-zA-Z0-9]+", "-", ascii_text).strip("-").lower()
    cleaned = re.sub(r"-{2,}", "-", cleaned)
    if not cleaned:
        return ""
    trimmed = cleaned[:max_length].rstrip("-")
    return trimmed or cleaned


def build_output_stem(index: int, highlight: HighlightCandidate, run_tag: str) -> str:
    """Gera um nome base estavel e organizado para todos os artefatos do corte."""
    slug_source = highlight.title or highlight.preview()
    slug = slugify_for_filename(slug_source)
    if not slug:
        start_ms = int(round(highlight.start * 1000))
        end_ms = int(round(highlight.end * 1000))
        slug = f"{start_ms:06d}ms-{end_ms:06d}ms"
    return f"corte_{run_tag}_{index:02d}_{slug}"


def highlights_are_similar(
    a: HighlightCandidate, b: HighlightCandidate, tolerance: float = 0.5
) -> bool:
    return abs(a.start - b.start) < tolerance and abs(a.end - b.end) < tolerance


def coerce_type(value: Any, reference: Any) -> Any:
    if isinstance(reference, bool):
        if isinstance(value, str):
            return value.strip().lower() in {"1", "true", "yes", "on", "sim"}
        return bool(value)
    if isinstance(reference, int) and not isinstance(reference, bool):
        return int(value)
    if isinstance(reference, float):
        return float(value)
    return value


def resolve_notify_webhook(value: Optional[str]) -> str:
    if value is None:
        env = os.getenv("CORTES_NOTIFY_WEBHOOK")
        if env is not None:
            return env.strip()
        return DEFAULT_NOTIFY_WEBHOOK
    return value.strip()

def discover_batch_inputs(args: argparse.Namespace) -> List[Path]:
    search_root = Path.cwd() if not args.input or args.input == DEFAULT_INPUT else Path(args.input).resolve().parent
    output_dir = Path(args.output_dir).resolve()
    candidates: List[Path] = []
    suffixes = [
        pattern[1:].lower() if pattern.startswith("*.") else pattern.lower()
        for pattern in SUPPORTED_BATCH_PATTERNS
    ]
    seen: set[Path] = set()
    roots: List[Path] = []
    if search_root.exists():
        roots.append(search_root)
    inbox_root = INBOX_DIR
    try:
        if inbox_root.exists() and not any(inbox_root.samefile(r) for r in roots):
            roots.append(inbox_root)
    except OSError:
        pass
    for root in roots:
        try:
            entries = list(root.iterdir())
        except OSError:
            continue
        for path in entries:
            if not path.is_file():
                continue
            name_lower = path.name.lower()
            if not any(name_lower.endswith(suffix) for suffix in suffixes):
                continue
            if name_lower.startswith("corte_"):
                continue
            resolved = path.resolve()
            if resolved in seen:
                continue
            if output_dir in resolved.parents or resolved == output_dir:
                continue
            candidates.append(resolved)
            seen.add(resolved)
    candidates.sort()
    return candidates


def generate_fallback_highlights(
    segments: Sequence[Segment],
    min_dur: float,
    max_dur: float,
    required: int,
    existing: Sequence[HighlightCandidate],
) -> List[HighlightCandidate]:
    if required <= 0 or not segments:
        return []

    total_start = segments[0].start
    total_end = segments[-1].end
    total_duration = max(1e-3, total_end - total_start)
    target_duration = min(max_dur, max(min_dur, total_duration / max(len(existing) + required, 1)))
    step = max(target_duration * 0.5, min_dur / 2.0, 2.0)

    starts: List[float] = []
    current = total_start
    while current + min_dur <= total_end + 1e-3:
        starts.append(current)
        current += step
    if not starts:
        starts = [total_start]

    candidates: List[HighlightCandidate] = []
    for start in starts:
        end = min(total_end, start + target_duration)
        if end - start < min_dur:
            continue
        score, selected = score_window(segments, start, end)
        if score == float("-inf") or not selected:
            continue
        candidates.append(
            HighlightCandidate(
                start=start,
                end=end,
                score=score,
                segments=list(selected),
                origin="fallback",
            )
        )

    candidates.sort(key=lambda c: c.score, reverse=True)
    limit = max(required * 3, required)
    return candidates[:limit]


def parse_args(argv: Sequence[str]) -> argparse.Namespace:
    env_host = os.getenv("CORTES_HOST") or os.getenv("HOST") or "0.0.0.0"
    env_port_raw = os.getenv("CORTES_PORT") or os.getenv("PORT")
    try:
        env_port = int(env_port_raw) if env_port_raw else 8765
    except ValueError:
        env_port = 8765
    parser = argparse.ArgumentParser(
        description="Gera automaticamente o melhor corte vertical com legendas dinamicas."
    )
    parser.add_argument("--input", default=DEFAULT_INPUT, help="Arquivo de video de entrada.")
    parser.add_argument("--output-dir", default=DEFAULT_OUTPUT_DIR, help="Pasta onde os cortes serao salvos.")
    parser.add_argument("--model", default=DEFAULT_MODEL, help="Modelo Whisper a ser utilizado.")
    parser.add_argument(
        "--min-duration",
        type=float,
        default=DEFAULT_MIN_DUR,
        help="Duracao minima do corte em segundos.",
    )
    parser.add_argument(
        "--max-duration",
        type=float,
        default=DEFAULT_MAX_DUR,
        help="Duracao maxima do corte em segundos.",
    )
    parser.add_argument("--top", type=int, default=DEFAULT_TOP_K, help="Quantidade de melhores cortes a exportar.")
    parser.add_argument(
        "--variant",
        choices=["zoom", "fit", "both"],
        default="zoom",
        help="Formato do corte vertical a exportar (zoom, fit ou both).",
    )
    parser.add_argument("--lang", default="pt", help="Idioma dominante do video.")
    parser.add_argument("--batch-all", action="store_true", help="Processa todos os arquivos de video encontrados automaticamente.")
    parser.add_argument(
        "--ai-provider",
        default="gemini",
        choices=["auto", "none", "openai", "gemini"],
        help="Provedor de IA para escolher o melhor trecho (padrao: gemini).",
    )
    parser.add_argument(
        "--ai-model",
        default=None,
        help="Modelo do provedor de IA (opcional, usa um padrao para cada provedor).",
    )
    parser.add_argument(
        "--ai-temperature",
        type=float,
        default=0.2,
        help="Temperatura da IA ao selecionar o corte (valores baixos focam em respostas deterministicas).",
    )
    parser.add_argument(
        "--gemini-key",
        default=None,
        help="Chave de API do Gemini (sobrescreve variaveis de ambiente GEMINI_API_KEY/GOOGLE_API_KEY).",
    )
    parser.add_argument(
        "--notify-webhook",
        default=None,
        help=(
            "URL para receber os resultados do processamento via POST. "
            "Use string vazia para desabilitar (padrao envia para webhook N8N)."
        ),
    )
    parser.add_argument(
        "--notify-timeout",
        type=int,
        default=45,
        help="Timeout (em segundos) para a requisicao HTTP de notificacao do webhook.",
    )
    parser.add_argument(
        "--serve-webhook",
        action="store_true",
        help="Inicia um servidor HTTP que permite disparar cortes via requisicoes POST.",
    )
    parser.add_argument(
        "--serve-host",
        default=env_host,
        help="Host/interface para o servidor de webhook (padrao: 0.0.0.0 ou variavel de ambiente CORTES_HOST/HOST).",
    )
    parser.add_argument(
        "--serve-port",
        type=int,
        default=env_port,
        help="Porta do servidor de webhook (padrao: 8765 ou variavel CORTES_PORT/PORT).",
    )
    return parser.parse_args(argv)


def resolve_ai_config(args: argparse.Namespace) -> Optional[AIConfig]:
    provided_gemini_key = (
        args.gemini_key
        or os.getenv("GEMINI_API_KEY")
        or os.getenv("GOOGLE_API_KEY")
        or DEFAULT_GEMINI_KEY
    )
    provider = args.ai_provider.lower()
    if provider == "auto":
        if provided_gemini_key:
            provider = "gemini"
        elif os.getenv("OPENAI_API_KEY"):
            provider = "openai"
        else:
            provider = "none"

    if provider == "none":
        raise RuntimeError(
            "Nenhum provedor de IA configurado. Defina GEMINI_API_KEY (ou GOOGLE_API_KEY) antes de executar."
        )

    if provider == "openai":
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise RuntimeError("[IA] Variavel OPENAI_API_KEY nao encontrada.")
        model = args.ai_model or "gpt-4o-mini"
        return AIConfig(provider="openai", model=model, api_key=api_key, temperature=args.ai_temperature)

    if provider == "gemini":
        api_key = provided_gemini_key
        if not api_key:
            raise RuntimeError(
                "[IA] Chave do Gemini nao encontrada. Defina --gemini-key ou a variavel GEMINI_API_KEY/GOOGLE_API_KEY."
            )
        if args.gemini_key:
            source = "--gemini-key"
        elif os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY"):
            source = "variavel de ambiente existente"
        else:
            source = "DEFAULT_GEMINI_KEY embutida"
        print(f"[IA] Usando chave Gemini a partir de {source}.")
        # Atualiza o ambiente para que chamadas subsequentes (ex. bibliotecas) tambem enxerguem a chave.
        os.environ["GEMINI_API_KEY"] = api_key
        model = args.ai_model or "gemini-2.5-flash"
        return AIConfig(provider="gemini", model=model, api_key=api_key, temperature=args.ai_temperature)

    sys.stderr.write(f"[IA] Provedor desconhecido: {provider}\n")
    return None


def prepare_segments_for_ai(
    segments: Sequence[Segment],
    max_chars: int = 12000,
    max_segments: int = 220,
) -> List[Dict[str, Any]]:
    prepared: List[Dict[str, Any]] = []
    total_chars = 0
    for idx, seg in enumerate(segments):
        text = " ".join(seg.text.strip().split())
        if not text:
            continue
        if len(text) > 240:
            text = text[:237] + "..."
        entry = {
            "index": idx,
            "start": round(seg.start, 2),
            "end": round(seg.end, 2),
            "duration": round(seg.duration, 2),
            "text": text,
        }
        encoded = json.dumps(entry, ensure_ascii=False)
        if prepared and total_chars + len(encoded) > max_chars:
            break
        prepared.append(entry)
        total_chars += len(encoded)
        if len(prepared) >= max_segments:
            break
    return prepared


def build_ai_prompt(
    entries: Sequence[Dict[str, Any]],
    min_dur: float,
    max_dur: float,
    top_k: int,
) -> str:
    lines = [
        f"{item['index']}|{item['start']}|{item['end']}|{item['text']}"
        for item in entries
    ]
    segments_blob = "\n".join(lines)
    template = os.getenv("PROMPT_HIGHLIGHT_SELECTION") or DEFAULT_PROMPT_HIGHLIGHT_SELECTION
    try:
        return safe_format_template(
            template,
            top_k=top_k,
            min_dur=min_dur,
            max_dur=max_dur,
            segments=segments_blob,
        )
    except Exception as exc:  # pragma: no cover - fallback
        print(f"[PROMPT] Falha ao formatar PROMPT_HIGHLIGHT_SELECTION ({exc}); usando padrao.")
        return safe_format_template(
            DEFAULT_PROMPT_HIGHLIGHT_SELECTION,
            top_k=top_k,
            min_dur=min_dur,
            max_dur=max_dur,
            segments=segments_blob,
        )


def call_openai_chat(
    config: AIConfig,
    prompt: str,
    *,
    system_prompt: Optional[str] = None,
    response_format: Optional[Dict[str, Any]] = None,
) -> str:
    system_prompt = system_prompt or (
        "Voce e um assistente que retorna apenas JSON valido. Nao adicione texto fora do JSON."
    )
    if response_format is None:
        response_format = {"type": "json_object"}
    payload = {
        "model": config.model,
        "temperature": max(0.0, min(2.0, config.temperature)),
        "response_format": response_format,
        "messages": [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt},
        ],
    }
    data = json.dumps(payload).encode("utf-8")
    headers = {
        "Authorization": f"Bearer {config.api_key}",
        "Content-Type": "application/json",
    }
    body = _perform_https_post("api.openai.com", "/v1/chat/completions", data, headers, "OpenAI")

    parsed = json.loads(body)
    choices = parsed.get("choices") or []
    if not choices:
        raise RuntimeError("OpenAI retornou resposta sem choices.")
    message = choices[0].get("message") or {}
    content = message.get("content")
    if not content:
        raise RuntimeError("OpenAI retornou resposta vazia.")
    return content


def call_gemini_generate(
    config: AIConfig,
    prompt: str,
    *,
    response_mime_type: str = "application/json",
) -> str:
    url = (
        f"https://generativelanguage.googleapis.com/v1beta/models/{config.model}:generateContent"
        f"?key={config.api_key}"
    )
    payload = {
        "contents": [{"parts": [{"text": prompt}]}],
        "generationConfig": {
            "temperature": max(0.0, min(2.0, config.temperature)),
            "responseMimeType": response_mime_type,
        },
    }
    data = json.dumps(payload).encode("utf-8")
    parsed_url = urlsplit(url)
    path = parsed_url.path or "/"
    if parsed_url.query:
        path = f"{path}?{parsed_url.query}"
    body = _perform_https_post(
        parsed_url.hostname or "generativelanguage.googleapis.com",
        path,
        data,
        {"Content-Type": "application/json"},
        "Gemini",
    )

    parsed = json.loads(body)
    candidates = parsed.get("candidates") or []
    if not candidates:
        raise RuntimeError("Gemini retornou resposta sem candidatos.")
    first = candidates[0]
    content = first.get("content") or {}
    parts = content.get("parts") or []
    if not parts:
        raise RuntimeError("Gemini retornou resposta vazia.")
    text = parts[0].get("text")
    if not text:
        raise RuntimeError("Gemini nao retornou texto.")
    return text


def extract_json_payload(raw_text: str) -> Optional[Any]:
    if not raw_text:
        return None
    raw_text = raw_text.strip()
    try:
        return json.loads(raw_text)
    except json.JSONDecodeError:
        pass

    obj_match = re.search(r"\{.*\}", raw_text, re.DOTALL)
    if obj_match:
        snippet = obj_match.group(0)
        try:
            return json.loads(snippet)
        except json.JSONDecodeError:
            pass

    array_match = re.search(r"\[.*\]", raw_text, re.DOTALL)
    if array_match:
        snippet = array_match.group(0)
        try:
            return json.loads(snippet)
        except json.JSONDecodeError:
            pass
    return None


def request_ai_json(
    config: AIConfig,
    prompt: str,
    *,
    system_prompt: Optional[str] = None,
) -> Optional[Any]:
    if config.provider == "openai":
        raw = call_openai_chat(
            config,
            prompt,
            system_prompt=system_prompt,
            response_format={"type": "json_object"},
        )
    elif config.provider == "gemini":
        raw = call_gemini_generate(config, prompt, response_mime_type="application/json")
    else:
        return None
    return extract_json_payload(raw)


def load_segments(result: dict) -> List[Segment]:
    segments: List[Segment] = []
    for seg in result.get("segments", []):
        segments.append(
            Segment(
                start=float(seg.get("start", 0.0)),
                end=float(seg.get("end", 0.0)),
                text=str(seg.get("text", "")).strip(),
                avg_logprob=float(seg.get("avg_logprob", -5.0)),
                no_speech_prob=float(seg.get("no_speech_prob", 1.0)),
            )
        )
    return segments


def normalize_segment_text(text: str) -> str:
    text = text.replace("\u2013", "-").replace("\u2014", "-")
    text = text.replace("\u201c", '"').replace("\u201d", '"')
    text = text.replace("\u2018", "'").replace("\u2019", "'")
    text = re.sub(r"\s+", " ", text.strip())
    return text


def normalize_segments_in_place(segments: Sequence[Segment]) -> None:
    for seg in segments:
        seg.text = normalize_segment_text(seg.text)


def iter_transcript_batches(
    segments: Sequence[Segment],
    *,
    max_chars: int = 10000,
    max_segments: int = 160,
) -> List[List[Dict[str, Any]]]:
    batches: List[List[Dict[str, Any]]] = []
    current: List[Dict[str, Any]] = []
    total_chars = 0
    for idx, seg in enumerate(segments):
        text = normalize_segment_text(seg.text)
        if not text:
            continue
        entry = {
            "index": idx,
            "start": round(seg.start, 2),
            "end": round(seg.end, 2),
            "text": text,
        }
        encoded_len = len(json.dumps(entry, ensure_ascii=False))
        if current and (
            total_chars + encoded_len > max_chars or len(current) >= max_segments
        ):
            batches.append(current)
            current = []
            total_chars = 0
        current.append(entry)
        total_chars += encoded_len
    if current:
        batches.append(current)
    return batches


def build_transcript_cleanup_prompt(entries: Sequence[Dict[str, Any]]) -> str:
    lines = [
        f"{item['index']}|{item['start']}|{item['end']}|{item['text']}"
        for item in entries
    ]
    segments_blob = "\n".join(lines)
    template = os.getenv("PROMPT_TRANSCRIPT_CLEANUP") or DEFAULT_PROMPT_TRANSCRIPT_CLEANUP
    try:
        return safe_format_template(template, segments=segments_blob)
    except Exception as exc:  # pragma: no cover - fallback
        print(f"[PROMPT] Falha ao formatar PROMPT_TRANSCRIPT_CLEANUP ({exc}); usando padrao.")
        return safe_format_template(DEFAULT_PROMPT_TRANSCRIPT_CLEANUP, segments=segments_blob)


def apply_segment_corrections(
    segments: Sequence[Segment], items: Sequence[Dict[str, Any]]
) -> int:
    applied = 0
    for item in items:
        if not isinstance(item, dict):
            continue
        try:
            idx = int(item.get("index"))
        except (TypeError, ValueError):
            continue
        if idx < 0 or idx >= len(segments):
            continue
        raw_text = item.get("text")
        if not isinstance(raw_text, str):
            continue
        corrected = normalize_segment_text(raw_text)
        if not corrected:
            continue
        if corrected != segments[idx].text:
            segments[idx].text = corrected
            applied += 1
    return applied


def correct_segments_with_ai(
    segments: Sequence[Segment], config: Optional[AIConfig]
) -> int:
    if not segments:
        return 0
    normalize_segments_in_place(segments)
    if not config or config.provider not in {"openai", "gemini"}:
        return 0

    total_applied = 0
    batches = iter_transcript_batches(segments)
    with ThreadPoolExecutor(max_workers=1) as executor:
        for batch in batches:
            prompt = build_transcript_cleanup_prompt(batch)
            try:
                future = executor.submit(
                    request_ai_json,
                    config,
                    prompt,
                    system_prompt="Retorne apenas JSON valido com as correcoes solicitadas.",
                )
                payload = future.result(timeout=60)
            except TimeoutError:
                sys.stderr.write("[IA] Revisao de transcricao excedeu 60s; prosseguindo sem ajustes.\n")
                continue
            except RuntimeError as exc:
                sys.stderr.write(f"[IA] Falha ao corrigir transcricao: {exc}\n")
                break
            if not payload:
                continue
            items: Optional[Sequence[Dict[str, Any]]] = None
            if isinstance(payload, dict):
                maybe = payload.get("segments")
                if isinstance(maybe, list):
                    items = maybe
            elif isinstance(payload, list):
                items = payload  # type: ignore[assignment]
            if not items:
                continue
            total_applied += apply_segment_corrections(segments, items)
    return total_applied


def overlap_amount(a_start: float, a_end: float, b_start: float, b_end: float) -> float:
    return max(0.0, min(a_end, b_end) - max(a_start, b_start))


def score_window(segments: Sequence[Segment], win_start: float, win_end: float) -> Tuple[float, List[Segment]]:
    win_duration = max(1e-3, win_end - win_start)
    speech_time = 0.0
    weighted_conf = 0.0
    weighted_words = 0.0
    idle_time = 0.0
    selected_segments: List[Segment] = []
    cursor = win_start

    for seg in segments:
        overlap = overlap_amount(seg.start, seg.end, win_start, win_end)
        if overlap <= 0:
            continue

        # Penalise long gaps between segments inside the window.
        gap = max(0.0, max(seg.start, win_start) - cursor)
        idle_time += gap

        speech_time += overlap
        proportion = overlap / seg.duration
        weighted_conf += seg.avg_logprob * overlap
        weighted_words += seg.word_count * proportion

        cursor = min(seg.end, win_end)
        selected_segments.append(seg)

    if not selected_segments:
        return float("-inf"), []

    speech_ratio = speech_time / win_duration
    word_density = weighted_words / win_duration
    avg_conf = weighted_conf / speech_time if speech_time else -5.0
    silence_penalty = idle_time / win_duration

    score = (
        word_density * 1.5
        + speech_ratio * 1.2
        + max(-3.0, avg_conf) * 0.45
        - silence_penalty * 0.9
    )
    return score, selected_segments


def select_highlights_with_ai(
    segments: Sequence[Segment],
    min_dur: float,
    max_dur: float,
    top_k: int,
    config: AIConfig,
) -> List[HighlightCandidate]:
    prepared = prepare_segments_for_ai(segments)
    if not prepared:
        return []

    prompt = build_ai_prompt(prepared, min_dur=min_dur, max_dur=max_dur, top_k=top_k)
    try:
        if config.provider == "openai":
            raw = call_openai_chat(config, prompt)
        elif config.provider == "gemini":
            raw = call_gemini_generate(config, prompt)
        else:
            return []
    except RuntimeError as exc:
        sys.stderr.write(f"[IA] Falha ao consultar {config.provider}: {exc}\n")
        return []

    payload = extract_json_payload(raw)
    items: List[Dict[str, Any]] = []
    if isinstance(payload, dict):
        raw_items = payload.get("highlights")
        if isinstance(raw_items, list):
            items = raw_items
        elif isinstance(raw_items, dict):
            items = [raw_items]
        elif raw_items is None and {"start", "end"} <= set(payload.keys()):
            items = [payload]
    elif isinstance(payload, list):
        items = payload  # type: ignore[assignment]

    if not items:
        return []

    first_start = segments[0].start
    last_end = segments[-1].end
    highlights: List[HighlightCandidate] = []

    for item in items:
        if not isinstance(item, dict):
            continue
        try:
            start = float(item.get("start"))
            end = float(item.get("end"))
        except (TypeError, ValueError):
            continue

        start = max(first_start, start)
        end = min(last_end, end)
        if end <= start:
            continue

        duration = end - start
        if duration < min_dur:
            end = min(last_end, start + min_dur)
        duration = end - start
        if duration > max_dur:
            end = min(last_end, start + max_dur)
            duration = end - start
            if duration < min_dur:
                start = max(first_start, end - min_dur)

        score, selected = score_window(segments, start, end)
        if score == float("-inf") or not selected:
            continue

        reason = str(item.get("reason", "") or "").strip()
        title = str(item.get("title", "") or "").strip()
        highlights.append(
            HighlightCandidate(
                start=start,
                end=end,
                score=score,
                segments=list(selected),
                reason=reason,
                title=title,
                origin="ai",
            )
        )
        if len(highlights) >= max(1, top_k):
            break

    return highlights


def summarise_highlight_text(highlight: HighlightCandidate, limit: int = 260) -> str:
    text = " ".join(seg.text for seg in highlight.segments if seg.text).strip()
    text = re.sub(r"\s+", " ", text)
    if len(text) <= limit:
        return text
    trimmed = text[: limit - 3]
    trimmed = trimmed.rsplit(" ", 1)[0] if " " in trimmed else trimmed
    return trimmed + "..."


def get_highlight_transcript(highlight: HighlightCandidate, limit: int = 4000) -> str:
    text = " ".join(seg.text.strip() for seg in highlight.segments if seg.text)
    text = re.sub(r"\s+", " ", text).strip()
    if len(text) > limit:
        trimmed = text[:limit]
        trimmed = trimmed.rsplit(" ", 1)[0] if " " in trimmed else trimmed
        text = trimmed
    return text


def generate_social_caption(highlight: HighlightCandidate, ai_config: AIConfig) -> Optional[str]:
    if not ai_config or ai_config.provider != "gemini":
        return None

    transcript = get_highlight_transcript(highlight, limit=2000)
    if not transcript:
        return None

    template = os.getenv("PROMPT_SOCIAL_CAPTION") or DEFAULT_PROMPT_SOCIAL_CAPTION
    try:
        prompt = safe_format_template(template, transcript=transcript)
    except Exception as exc:  # pragma: no cover - fallback
        print(f"[PROMPT] Falha ao formatar PROMPT_SOCIAL_CAPTION ({exc}); usando padrao.")
        prompt = safe_format_template(DEFAULT_PROMPT_SOCIAL_CAPTION, transcript=transcript)

    response: Optional[str] = None
    last_error: Optional[Exception] = None
    for attempt in range(1, AI_RETRY_ATTEMPTS + 1):
        try:
            response = call_gemini_generate(ai_config, prompt, response_mime_type="text/plain")
            break
        except Exception as exc:
            last_error = exc
            print(f"[IA][legenda] tentativa {attempt}/{AI_RETRY_ATTEMPTS} falhou: {exc}")
            if attempt >= AI_RETRY_ATTEMPTS:
                break
            time.sleep(AI_RETRY_DELAY)
    if last_error and response is None:
        print("[IA][legenda] desistindo apos repetidas falhas.")
        return None

    caption = response.strip()
    return caption or None


def generate_portal_article(
    highlight: HighlightCandidate,
    ai_config: AIConfig,
) -> Optional[Dict[str, str]]:
    if not ai_config or ai_config.provider != "gemini":
        return None

    transcript = get_highlight_transcript(highlight, limit=6000)
    if not transcript:
        return None

    template = os.getenv("PROMPT_PORTAL_ARTICLE") or DEFAULT_PROMPT_PORTAL_ARTICLE
    try:
        prompt = safe_format_template(template, transcript=transcript)
    except Exception as exc:  # pragma: no cover - fallback
        print(f"[PROMPT] Falha ao formatar PROMPT_PORTAL_ARTICLE ({exc}); usando padrao.")
        prompt = safe_format_template(DEFAULT_PROMPT_PORTAL_ARTICLE, transcript=transcript)

    payload: Optional[Dict[str, Any]] = None
    last_error: Optional[Exception] = None
    for attempt in range(1, AI_RETRY_ATTEMPTS + 1):
        try:
            payload = request_ai_json(ai_config, prompt)
            break
        except Exception as exc:  # pragma: no cover - defensive
            last_error = exc
            print(f"[IA][portal] tentativa {attempt}/{AI_RETRY_ATTEMPTS} falhou: {exc}")
            if attempt >= AI_RETRY_ATTEMPTS:
                payload = None
                break
            time.sleep(AI_RETRY_DELAY)
    if last_error and payload is None:
        print("[IA][portal] desistindo apos repetidas falhas.")
        return None

    if not isinstance(payload, dict):
        print("[IA][portal] resposta invalida: JSON esperado.")
        return None

    title = str(payload.get("title", "") or "").strip()
    article = str(payload.get("article", "") or "").strip()
    if not article:
        print("[IA][portal] conteudo ausente na resposta.")
        return None
    if not title:
        title = highlight.title or summarise_highlight_text(highlight, limit=80)
    return {"title": title, "article": article}


def derive_banned_phrases(transcript: str, reason: str, max_words: int = 8) -> List[str]:
    banned: List[str] = []
    clean_transcript = transcript.upper()
    words = clean_transcript.split()
    if words:
        banned.append(" ".join(words[: max_words]))
    sentence_match = re.split(r"[.!?]", clean_transcript)
    first_sentence = next((s.strip() for s in sentence_match if s.strip()), "")
    if first_sentence:
        banned.append(first_sentence)
    reason = (reason or "").strip().upper()
    if reason:
        reason_words = reason.split()
        if len(reason_words) > 3:
            banned.append(" ".join(reason_words[: max_words]))
    unique_banned = []
    for item in banned:
        item = re.sub(r"\s+", " ", item).strip()
        if item and item not in unique_banned:
            unique_banned.append(item)
    return unique_banned


def build_title_prompt(
    transcript: str,
    reason: str,
    highlight: HighlightCandidate,
    banned_phrases: Sequence[str],
    attempt: int,
    previous_titles: Sequence[str],
) -> str:
    reason = reason or "(sem resumo adicional)"
    first_words = " ".join(transcript.split()[:6]).upper()

    banned_section_parts: List[str] = []
    if first_words:
        banned_section_parts.append(f'Primeiras palavras proibidas: "{first_words}"')
    if banned_phrases:
        banned_section_parts.append(
            "Expressoes proibidas: " + "; ".join(f'"{phrase}"' for phrase in banned_phrases)
        )
    banned_section = "\n".join(banned_section_parts)

    previous_section = ""
    if previous_titles:
        previous_section = (
            "Titulos descartados anteriormente (produza algo completamente diferente):\n"
            + "; ".join(f'"{title}"' for title in previous_titles if title)
            + "\n"
        )

    attempt_section = "PRIMEIRA ENTREGA.\n" if attempt == 0 else "REFACA DO ZERO! Traga outro gancho.\n"

    template = os.getenv("PROMPT_TITLE_GENERATION") or (
        "Voce e uma especialista em titulos. Use o texto abaixo (captions.txt) e escreva UM titulo curto e natural que resuma o fato principal.\n"
        "Regras simples:\n"
        "- Ate 45 caracteres.\n"
        "- Sem emojis, hashtags, aspas ou ponto final.\n"
        "- Frase coerente e direta em portugues, sem inventar nada alem do texto base.\n"
        "TEXTO BASE (caption):\n"
        "{transcript}\n"
        "Retorne apenas JSON valido: {{\"title\":\"TEXTO AQUI\"}}\n"
    )
    formatted_banned = f"{banned_section}\n" if banned_section else ""
    try:
        return safe_format_template(
            template,
            attempt_section=attempt_section,
            reason=reason,
            duration=highlight.duration,
            banned_section=formatted_banned,
            previous_section=previous_section,
            transcript=transcript,
        )
    except Exception as exc:  # pragma: no cover - fallback
        print(f"[PROMPT] Falha ao formatar PROMPT_TITLE_GENERATION ({exc}); usando padrao.")
        return safe_format_template(
            DEFAULT_PROMPT_TITLE,
            attempt_section=attempt_section,
            reason=reason,
            duration=highlight.duration,
            banned_section=formatted_banned,
            previous_section=previous_section,
            transcript=transcript,
        )


def generate_title_with_ai(highlight: HighlightCandidate, config: AIConfig) -> Optional[str]:
    transcript = get_highlight_transcript(highlight)
    if not transcript:
        return None
    banned_phrases = derive_banned_phrases(transcript, highlight.reason)
    previous_titles: List[str] = []

    max_attempts = 1 if FAST_MODE else 3
    for attempt in range(max_attempts):
        prompt = build_title_prompt(
            transcript,
            highlight.reason,
            highlight,
            banned_phrases,
            attempt,
            previous_titles,
        )
        try:
            payload = request_ai_json(config, prompt)
        except RuntimeError as exc:
            sys.stderr.write(f"[IA] Falha ao gerar titulo com {config.provider}: {exc}\n")
            return None

        try:
            payload_preview = json.dumps(payload, ensure_ascii=False) if payload is not None else "null"
        except TypeError:
            payload_preview = str(payload)
        print(f"[IA][titulo] tentativa {attempt + 1}: {payload_preview}")

        candidate: Optional[str] = None
        if isinstance(payload, dict) and "title" in payload:
            candidate = str(payload.get("title") or "").strip()
        elif isinstance(payload, str):
            candidate = payload.strip()
        elif isinstance(payload, list) and payload:
            first = payload[0]
            if isinstance(first, dict) and "title" in first:
                candidate = str(first.get("title") or "").strip()

        if not candidate:
            previous_titles.append("(resposta vazia)")
            continue

        candidate = sanitise_title_text(candidate)
        candidate = force_uppercase_title(candidate)

        if not candidate or title_is_too_similar(candidate, highlight):
            reason = "invalido" if not candidate else "parecido_demais"
            print(
                f"[IA][titulo] tentativa {attempt + 1} rejeitada ({reason}): "
                f"{candidate!r}"
            )
            previous_titles.append(candidate or "(invalido)")
            if candidate:
                banned_phrases.append(candidate)
            continue

        print(f"[IA][titulo] aprovado na tentativa {attempt + 1}: {candidate!r}")
        return candidate

    print("[IA][titulo] nenhuma tentativa valida, fallback sera aplicado.")
    return None


def request_title_from_webhook(caption_text: str, timeout: float = 120.0) -> Optional[str]:
    """Envia o caption para o webhook externo e tenta obter um titulo pronto."""
    base_url = os.getenv("TITLE_WEBHOOK_URL", DEFAULT_TITLE_WEBHOOK)
    if not base_url:
        return None
    url = base_url if ("wait=" in base_url.lower()) else (base_url + ("&wait=true" if "?" in base_url else "?wait=true"))
    if not caption_text.strip():
        print("[Webhook] Caption vazio; titulo nao solicitado.", flush=True)
        return None
    payload = json.dumps(
        {"caption": caption_text, "prompt": DEFAULT_PROMPT_TITLE},
        ensure_ascii=False
    ).encode("utf-8")
    req = urllib.request.Request(url, data=payload, headers={"Content-Type": "application/json"}, method="POST")
    def _strip_code_fence(text: str) -> str:
        txt = text.strip()
        if txt.startswith("```"):
            # remove fenced block markers and optional language hint
            txt = txt.lstrip("`")
            parts = txt.split("\n", 1)
            if len(parts) == 2:
                txt = parts[1]
            txt = txt.rsplit("```", 1)[0].strip()
        return txt

    def _extract_title_from_text(txt: str) -> Optional[str]:
        if not txt:
            return None
        # captura permitindo aspas escapadas
        patterns = [
            r'"title"\s*:\s*"((?:\\.|[^"\\])*)"',
            r"'title'\s*:\s*'((?:\\.|[^'\\])*)'",
        ]
        for pat in patterns:
            match = re.search(pat, txt, re.IGNORECASE | re.DOTALL)
            if match:
                return match.group(1).strip()
        return None

    body: bytes = b""
    raw_decoded = ""
    data: Any = None
    print(f"[Webhook] Enviando caption para {url} (aguardando resposta unica)", flush=True)
    title: Optional[str] = None
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            status = getattr(resp, "status", None)
            body = resp.read()
            raw_decoded = body.decode("utf-8", errors="ignore")
            print(f"[Webhook] Resposta bruta: {raw_decoded[:400]}", flush=True)
            cleaned = _strip_code_fence(raw_decoded)
            try:
                data = json.loads(cleaned)
            except Exception:
                data = cleaned.strip()
            if status:
                print(f"[Webhook] Status {status}", flush=True)
    except Exception as exc:
        sys.stderr.write(f"[Webhook] Falha ao obter titulo: {exc}\n")
        return None

    regex_title = _extract_title_from_text(cleaned) or _extract_title_from_text(raw_decoded)

    if isinstance(data, dict):
        if "title" in data:
            title = str(data.get("title") or "").strip()
        elif "Titulo" in data:
            title = str(data.get("Titulo") or "").strip()
        elif "data" in data and isinstance(data["data"], dict):
            nested = data["data"]
            if "title" in nested:
                title = str(nested.get("title") or "").strip()
        elif data:
            for val in data.values():
                if isinstance(val, str) and val.strip():
                    title = val.strip()
                    break
    elif isinstance(data, list) and data:
        first = data[0]
        if isinstance(first, dict):
            title = str(first.get("title") or first.get("Titulo") or "").strip()
        elif isinstance(first, str):
            title = first.strip()
    elif isinstance(data, str):
        title = data.strip()

    if (title and isinstance(title, str) and "WORKFLOW" in title.upper()) or ("WORKFLOW" in raw_decoded.upper()):
        title = None

    if (not title) or (isinstance(title, str) and "WORKFLOW" in title.upper()) or (not isinstance(data, dict)):
        if regex_title:
            title = regex_title
        else:
            fallback_text = raw_decoded or ""
            cleaned_text = _strip_code_fence(fallback_text)
            extracted = _extract_title_from_text(cleaned_text) or _extract_title_from_text(fallback_text)
            if extracted:
                title = extracted

    if not title:
        print(f"[Webhook] Nenhum titulo encontrado. Resposta bruta: {raw_decoded[:500]}", flush=True)
        return None

    if (title.startswith('"') and title.endswith('"')) or (title.startswith("'") and title.endswith("'")):
        title = title[1:-1].strip()
    # remove escapes e aspas residuais
    title = title.replace('\\"', '"').replace("'", "")
    title = sanitise_title_text(title)
    title = force_uppercase_title(title)
    # Caso webhook retorne JSON bruto com chave TITLE, faz parsing
    if title.startswith("{") and title.endswith("}"):
        try:
            data = json.loads(title)
            for key in ("title", "TITLE", "Title"):
                if key in data:
                    title = force_uppercase_title(sanitise_title_text(str(data[key])))
                    break
        except Exception:
            pass
    print(f"[Webhook] Titulo retornado: {title}")
    return title or None


def normalise_text_for_compare(text: str) -> str:
    text = unicodedata.normalize("NFKD", text or "")
    text = "".join(ch for ch in text if not unicodedata.combining(ch))
    text = text.lower()
    text = re.sub(r"[^a-z0-9\s]+", " ", text)
    return re.sub(r"\s+", " ", text).strip()


def title_is_too_similar(title: str, highlight: HighlightCandidate) -> bool:
    title_norm = normalise_text_for_compare(title)
    if not title_norm:
        return True

    title_words = title_norm.split()
    if len(title_words) <= 2:
        return True

    snippet_norm = normalise_text_for_compare(summarise_highlight_text(highlight, limit=80))
    if snippet_norm:
        if title_norm in snippet_norm or snippet_norm.startswith(title_norm):
            return True
        snippet_words = snippet_norm.split()
        common_prefix = 0
        for tw, sw in zip(title_words, snippet_words):
            if tw == sw:
                common_prefix += 1
            else:
                break
        if common_prefix >= min(3, len(title_words), len(snippet_words)):
            return True
        overlap = set(title_words) & set(snippet_words)
        if overlap and len(overlap) / max(1, len(set(title_words))) >= 0.6:
            return True

    reason_norm = normalise_text_for_compare(highlight.reason)
    if reason_norm:
        if title_norm in reason_norm or reason_norm.startswith(title_norm):
            return True
        reason_words = reason_norm.split()
        overlap_reason = set(title_words) & set(reason_words)
        if overlap_reason and len(overlap_reason) / max(1, len(set(title_words))) >= 0.6:
            return True

    if len(title_norm) <= 10:
        return True

    return False


def sanitise_title_text(title: str) -> str:
    title = (title or "").replace("\n", " ")
    title = re.sub(r"\s+", " ", title).strip()
    title = title.strip(" \"'")
    if not title:
        return ""

    if title and title[-1] in ".!?":
        title = title[:-1]

    # Preserve conectivos e palavras curtas em maiusculas; nao remover mais
    title = title.strip()
    if not title:
        return ""

    if len(title) > MAX_TITLE_LEN:
        trimmed = title[:MAX_TITLE_LEN]
        trimmed = trimmed.rsplit(" ", 1)[0] if " " in trimmed else trimmed
        title = trimmed

    if title and not title[0].isupper():
        title = title[0].upper() + title[1:]

    return title


def build_title_from_webhook(source_text: str, fallback_title: str) -> str:
    payload = (source_text or "").strip()
    fallback = fallback_title.strip()
    if not payload:
        return fallback
    try:
        print("   [Titulo] Chamando webhook...", flush=True)
        webhook_title = request_title_from_webhook(payload)
    except Exception as exc:  # pragma: no cover - depende de rede externa
        print(f"   [Webhook] Falha na requisicao: {exc}")
        webhook_title = None
    if webhook_title:
        neat = force_uppercase_title(sanitise_title_text(webhook_title))
        if neat:
            print(f"   Titulo webhook: {neat}")
            return neat
    print("   [Webhook] Nenhum titulo retornado; usando fallback.")
    return fallback


def force_uppercase_title(title: str) -> str:
    if not title:
        return ""
    # Remove emojis and other symbols before uppercasing.
    cleaned = "".join(ch for ch in title if ch.isascii() or ch.isalnum() or ch.isspace() or ch in "!?.,:-_'\"()")
    cleaned = cleaned.strip()
    prefix = ""
    rest = cleaned
    while rest and not rest[0].isalnum():
        prefix += rest[0]
        rest = rest[1:]
    return (prefix + rest.upper()).strip()


def compress_title(text: str, max_words: int = 12, min_words: int = 5) -> str:
    if not text:
        return ""
    words = text.split()
    if len(words) > max_words:
        words = words[:max_words]
    if len(words) < min_words:
        return " ".join(words)
    return " ".join(words)


def ensure_aux_and_connective(title: str) -> str:
    if not title:
        return ""
    words = title.split()
    auxiliaries = {"E", "FOI", "ESTA", "ESTÁ", "SERA", "SERÁ"}
    connectives = {"DE", "DO", "DA", "EM", "POR", "APOS", "APÓS"}

    def insert_after(idx: int, token: str) -> None:
        pos = max(1, min(idx + 1, len(words)))
        words.insert(pos, token)

    has_aux = any(w in auxiliaries for w in words)
    has_conn = any(w in connectives for w in words)
    if not has_aux:
        insert_after(0, "FOI")
    if not has_conn:
        aux_idx = next((i for i, w in enumerate(words) if w in auxiliaries), 0)
        insert_after(aux_idx + 1, "EM")

    fixed = " ".join(words)
    fixed = force_uppercase_title(sanitise_title_text(fixed))
    if len(fixed.split()) > 14:
        fixed = " ".join(fixed.split()[:14])
    if len(fixed) > MAX_TITLE_LEN:
        fixed = fixed[:MAX_TITLE_LEN]
        fixed = fixed.rsplit(" ", 1)[0] if " " in fixed else fixed
    return fixed


STOPWORDS = {
    "A",
    "AS",
    "O",
    "OS",
    "DE",
    "DA",
    "DO",
    "DOS",
    "DAS",
    "UM",
    "UMA",
    "E",
    "EM",
    "POR",
    "COM",
    "PARA",
    "QUE",
    "NA",
    "NO",
    "NOS",
    "NAS",
    "SE",
    "AO",
    "AOS",
    "PRA",
    "PRAO",
    "PRAA",
    "PRAOS",
    "PRAAS",
    "MAS",
    "OU",
    "SER",
    "FOI",
    "ERA",
    "SOB",
}


def extract_keywords_from_transcript(transcript: str, limit: int = 4) -> List[str]:
    words = re.findall(r"[A-Z0-9ÃÃ‰ÃÃ“ÃšÃƒÃ•Ã‚ÃŠÃ”Ã‡]+", unicodedata.normalize("NFKD", transcript.upper()))
    freq: Dict[str, int] = {}
    for word in words:
        cleaned = "".join(ch for ch in unicodedata.normalize("NFKD", word) if ch.isalnum())
        if len(cleaned) <= 2 or cleaned in STOPWORDS:
            continue
        freq[cleaned] = freq.get(cleaned, 0) + 1
    sorted_words = sorted(freq.items(), key=lambda item: (-item[1], item[0]))
    return [word for word, _ in sorted_words[:limit]]


def fallback_title_from_highlight(highlight: HighlightCandidate) -> str:
    transcript = get_highlight_transcript(highlight, limit=600)
    keywords = extract_keywords_from_transcript(transcript)
    prefix = TITLE_PREFIXES[int(round(highlight.start * 10)) % len(TITLE_PREFIXES)]

    if len(keywords) >= 3:
        hook_core = f"{keywords[0]} {keywords[1]} VS {keywords[2]}"
    elif len(keywords) == 2:
        hook_core = f"{keywords[0]} SOB PRESSAO {keywords[1]}"
    elif len(keywords) == 1:
        hook_core = f"{keywords[0]} EM XEQUE"
    else:
        base = highlight.reason or "VERDADE ESCONDIDA"
        base = re.sub(r"\s+", " ", base).strip().upper()
        hook_core = base[: max(12, min(len(base), MAX_TITLE_LEN - 4))]

    hook = f"{prefix} {hook_core}"
    hook = sanitise_title_text(hook)
    hook = force_uppercase_title(hook)

    if len(hook) > MAX_TITLE_LEN:
        hook = hook[:MAX_TITLE_LEN]
        hook = hook.rsplit(" ", 1)[0] if " " in hook else hook

    if len(hook) < 12:
        hook = force_uppercase_title(f"{prefix} SEGREDO REVELADO")

    return hook


def assign_titles_to_highlights(highlights: Sequence[HighlightCandidate], ai_config: Optional[AIConfig]) -> None:
    pending: List[Tuple[int, HighlightCandidate]] = [
        (idx, highlight) for idx, highlight in enumerate(highlights, start=1) if not highlight.title
    ]
    if not pending:
        return

    for _, highlight in pending:
        title = None
        if ai_config:
            title = generate_title_with_ai(highlight, ai_config)
        if not title:
            title = fallback_title_from_highlight(highlight)
            print(f"[IA][titulo] usando fallback: {title!r}")

        highlight.title = force_uppercase_title(sanitise_title_text(title))
        print(
            f"[IA][titulo] final para corte {fmt_ts(highlight.start)}-{fmt_ts(highlight.end)}: "
            f"{highlight.title!r}"
        )


def find_highlights(segments: Sequence[Segment], min_dur: float, max_dur: float, top_k: int) -> List[HighlightCandidate]:
    candidates: List[HighlightCandidate] = []
    if not segments:
        return candidates

    for start_idx in range(len(segments)):
        win_start = segments[start_idx].start
        for end_idx in range(start_idx, len(segments)):
            win_end = segments[end_idx].end
            duration = win_end - win_start
            if duration < min_dur:
                continue
            if duration > max_dur:
                break
            score, selected = score_window(segments, win_start, win_end)
            if score == float("-inf"):
                continue
            candidates.append(
                HighlightCandidate(
                    start=win_start,
                    end=win_end,
                    score=score,
                    segments=selected,
                    origin="heuristic",
                )
            )

    if not candidates:
        total_start = segments[0].start
        total_end = segments[-1].end
        score, selected = score_window(segments, total_start, total_end)
        candidates.append(
            HighlightCandidate(
                start=total_start,
                end=total_end,
                score=score,
                segments=selected or list(segments),
                origin="heuristic",
            )
        )

    candidates.sort(key=lambda c: c.score, reverse=True)
    return candidates[: max(1, top_k)]


def fmt_ts(seconds: float) -> str:
    ms = int(round((seconds - int(seconds)) * 1000))
    s = int(seconds) % 60
    m = (int(seconds) // 60) % 60
    h = int(seconds) // 3600
    return f"{h:02}:{m:02}:{s:02},{ms:03}"


def fmt_ass_time(seconds: float) -> str:
    total_cs = int(round(seconds * 100))
    cs = total_cs % 100
    total_s = total_cs // 100
    s = total_s % 60
    total_m = total_s // 60
    m = total_m % 60
    h = total_m // 60
    return f"{h:d}:{m:02d}:{s:02d}.{cs:02d}"


def escape_ass(text: str) -> str:
    return text.replace("\\", "\\\\").replace("{", "\\{").replace("}", "\\}")


def apply_ass_line_spacing(text: str, line_spacing: int) -> str:
    """Inject ASS baseline offsets so multi-line titles gain extra spacing."""
    if line_spacing <= 0 or "\\N" not in text:
        return text
    parts = text.split("\\N")
    if not parts:
        return text
    spaced: List[str] = [parts[0]]
    for segment in parts[1:]:
        spaced.append(f"\\N{{\\pbo-{line_spacing}}}{segment}")
    spaced.append("{\\pbo0}")
    return "".join(spaced)


def build_subtitle_chunks(
    highlight: HighlightCandidate,
    max_words_per_screen: int = DEFAULT_MAX_WORDS_ON_SCREEN,
) -> List[Dict[str, Any]]:
    chunks: List[Dict[str, Any]] = []
    if max_words_per_screen <= 0:
        max_words_per_screen = DEFAULT_MAX_WORDS_ON_SCREEN
    highlight_duration = max(0.1, highlight.duration)

    for seg in highlight.segments:
        seg_start = max(seg.start, highlight.start)
        seg_end = min(seg.end, highlight.end)
        if seg_end <= seg_start:
            continue

        words = [w.upper() for w in seg.text.replace("\n", " ").split() if w]
        if not words:
            continue

        absolute_duration = max(0.2, seg_end - seg_start)
        per_word = absolute_duration / len(words)

        for idx in range(0, len(words), max_words_per_screen):
            block = words[idx : idx + max_words_per_screen]
            block_start_abs = seg_start + per_word * idx
            block_end_abs = seg_start + per_word * (idx + len(block))
            block_end_abs = min(seg_end, block_end_abs)

            rel_start = block_start_abs - highlight.start
            rel_end = block_end_abs - highlight.start

            if rel_end <= rel_start:
                rel_end = rel_start + max(0.18, per_word * len(block))

            rel_start = max(0.0, rel_start)
            rel_end = min(highlight_duration, rel_end)
            if rel_end - rel_start < 0.05:
                rel_end = min(highlight_duration, rel_start + 0.2)

            chunk_duration = max(0.18, rel_end - rel_start)
            rel_end = min(highlight_duration, rel_start + chunk_duration)
            per_word_chunk = max(0.05, chunk_duration / len(block))

            chunks.append(
                {
                    "start": rel_start,
                    "end": rel_end,
                    "words": block,
                    "per_word": per_word_chunk,
                }
            )

    return chunks


def split_fit_title_lines(text: str) -> List[str]:
    clean = " ".join(text.split())
    if not clean:
        return []
    words = clean.split()
    lines: List[str] = []
    current: List[str] = []
    for word in words:
        candidate = " ".join(current + [word]) if current else word
        if len(candidate) > FIT_TITLE_MAX_CHARS_PER_LINE and current:
            lines.append(" ".join(current))
            current = [word]
        else:
            current.append(word)
    if current:
        lines.append(" ".join(current))
    if len(lines) > FIT_TITLE_MAX_LINES:
        keep = lines[: FIT_TITLE_MAX_LINES - 1]
        keep.append(" ".join(lines[FIT_TITLE_MAX_LINES - 1 :]))
        lines = keep
    return lines


def write_ass_subtitles(
    highlight: HighlightCandidate,
    ass_path: Path,
    title_margin_v: int = 100,
    *,
    exclude_title: bool = False,
    title_only_path: Optional[Path] = None,
    variant_mode: str = "fit",
) -> bool:
    is_fit = variant_mode.lower() == "fit"
    subtitle_font_size = 170 if is_fit else 150
    subtitle_margin_v = 120 if is_fit else 120
    base_title_font_size = 90
    title_font_size = base_title_font_size
    title_line_spacing = 0
    if is_fit:
        title_font_size = int(base_title_font_size * 1.5)
    else:
        # Para o zoom, reduz um pouco para caber com menos quebras
        title_font_size = int(base_title_font_size * 1.2)
        # Espaçamento de linha bem fechado, similar ao fit
        title_line_spacing = int(round(title_font_size * 0.02))
    # Contorno/caixa igual às legendas apenas para zoom; fit sem borda
    if is_fit:
        title_outline = 0
        title_outline_colour = "&H00000000"
        title_back_colour = "&H00000000"
    else:
        title_outline = 6
        title_outline_colour = "&H00151515"
        title_back_colour = "&H64000000"
    title_primary = "&H00FFFFFF"

    start_lead = 0.05

    header = (
        "[Script Info]\n"
        "ScriptType: v4.00+\n"
        "PlayResX: 1080\n"
        "PlayResY: 1920\n"
        "ScaledBorderAndShadow: yes\n"
        "YCbCr Matrix: TV.709\n"
        "\n"
        "[V4+ Styles]\n"
        "Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, "
        "Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding\n"
        f"Style: Default,Anton,{subtitle_font_size},&H00FFFFFF,&H00777777,&H00151515,&H64000000,-1,0,0,0,100,100,0,0,1,6,3,2,60,60,{subtitle_margin_v},1\n"
        f"Style: Title,Anton,{title_font_size},{title_primary},&H00FFFFFF,{title_outline_colour},{title_back_colour},-1,0,0,0,100,100,0,0,1,{title_outline},3,8,80,80,{title_margin_v},1\n"
        "\n"
        "[Events]\n"
        "Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text\n"
    )

    lines: List[str] = []
    chunks = build_subtitle_chunks(highlight)
    for chunk in chunks:
        start = max(0.0, chunk["start"] + start_lead)
        end = min(highlight.duration + start_lead, max(start + 0.2, chunk["end"] + start_lead))
        if end <= start:
            end = start + 0.25
        per_word = chunk["per_word"]
        words = chunk["words"]

        line = (
            f"Dialogue: 0,{fmt_ass_time(start)},{fmt_ass_time(end)},Default,,0,0,0,,"
            f"{{\\blur0.8\\bord6\\shad3\\fad(120,80)}}{escape_ass(' '.join(words))}"
        )
        lines.append(line)

    title_text = highlight.title.strip()
    title_lines: List[str] = []
    has_title = False
    if title_text:
        base_segments = [seg.strip() for seg in title_text.replace("\r", "\n").split("\n")]
        base_segments = [seg for seg in base_segments if seg]
        if not base_segments:
            base_segments = [title_text.strip()]
        segments = list(base_segments)
        if is_fit:
            # auto-break long titles into multiple lines
            auto_lines: List[str] = []
            if len(segments) == 1:
                auto_lines = split_fit_title_lines(segments[0])
            else:
                for seg in segments:
                    expanded = split_fit_title_lines(seg) or [seg]
                    auto_lines.extend(expanded)
            segments = [line for line in auto_lines if line] or segments
            base_y = title_margin_v
            gap = FIT_TITLE_LINE_GAP
            for idx, segment in enumerate(segments):
                escaped_segment = escape_ass(segment)
                y_pos = base_y + idx * gap
                title_lines.append(
                    f"Dialogue: 0,0:00:00.00,{fmt_ass_time(highlight.duration)},Title,,0,0,0,,"
                    f"{{\\bord0\\shad0\\fad(120,120)\\fs{title_font_size}\\pos({FIT_TITLE_POS_X},{y_pos})}}{escaped_segment}"
                )
        else:
            escaped_title = escape_ass(" \\N ".join(segments))
            if title_line_spacing > 0:
                escaped_title = apply_ass_line_spacing(escaped_title, title_line_spacing)
            title_lines.append(
                f"Dialogue: 0,0:00:00.00,{fmt_ass_time(highlight.duration)},Title,,0,0,0,,"
                f"{{\\blur0.8\\bord6\\shad3\\fad(120,120)}}{escaped_title}"
            )
        has_title = bool(title_lines)

    output_lines = list(lines)
    if has_title and not exclude_title:
        output_lines[0:0] = title_lines

    if not lines:
        fallback_line = (
            f"Dialogue: 0,0:00:00.00,{fmt_ass_time(highlight.duration)},Default,,0,0,0,,"
            "{\\blur2\\bord7\\fad(120,120)}(sem fala detectada)"
        )
        lines.append(fallback_line)

    ass_path.write_text(header + "\n".join(output_lines) + "\n", encoding="utf-8")

    if title_only_path and has_title:
        title_only_path.write_text(header + "\n".join(title_lines) + "\n", encoding="utf-8")
    elif title_only_path and title_only_path.exists():
        title_only_path.unlink(missing_ok=True)

    return has_title


def write_srt_subtitles(highlight: HighlightCandidate, srt_path: Path) -> None:
    cues: List[str] = []
    idx = 1
    title_text = highlight.title.strip()
    if title_text:
        end_time = max(0.5, highlight.duration - 0.05)
        cues.append(f"{idx}\n{fmt_ts(0.0)} --> {fmt_ts(end_time)}\n{title_text}\n")
        idx += 1
    chunks = build_subtitle_chunks(highlight)
    for chunk in chunks:
        start = chunk["start"]
        end = min(highlight.duration, max(start + 0.2, chunk["end"]))
        text = " ".join(chunk["words"])
        cues.append(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n")
        idx += 1

    if not cues:
        cues.append("1\n00:00:00,000 --> 00:00:03,000\n(sem fala detectada)\n")

    srt_path.write_text("\n".join(cues), encoding="utf-8")


def _mp_face_module_available() -> bool:
    return ensure_mediapipe_module() is not None and ensure_cv2_module() is not None


def _clamp(value: float, min_value: float, max_value: float) -> float:
    if value < min_value:
        return min_value
    if value > max_value:
        return max_value
    return value


class FaceCropAnalyzer:
    def __init__(self, video_path: Path) -> None:
        cv2_module = ensure_cv2_module()
        if cv2_module is None:
            raise RuntimeError("OpenCV nao esta instalado.")
        self.video_path = str(video_path)
        self._cv2 = cv2_module
        self.cap = cv2_module.VideoCapture(self.video_path)
        if not self.cap.isOpened():
            raise RuntimeError("Nao foi possivel abrir o video para analise com MediaPipe.")

        self.width = int(self.cap.get(cv2_module.CAP_PROP_FRAME_WIDTH) or 0)
        self.height = int(self.cap.get(cv2_module.CAP_PROP_FRAME_HEIGHT) or 0)
        self.fps = float(self.cap.get(cv2_module.CAP_PROP_FPS) or 30.0)
        if self.width <= 0 or self.height <= 0:
            self.close()
            raise RuntimeError("Dimensoes do video sao invalidas para analise de rosto.")

        self.scale_target_height = 1920.0
        self.scale_factor = self.scale_target_height / float(self.height)
        self.scaled_width = max(1, int(round(self.width * self.scale_factor)))
        self.crop_width = 1080
        self.crop_height = 1920
        self.supports_crop = self.scaled_width >= self.crop_width
        self.max_x = max(0, self.scaled_width - self.crop_width)
        self.frame_count = int(self.cap.get(cv2_module.CAP_PROP_FRAME_COUNT) or 0)

    def close(self) -> None:
        if hasattr(self, "cap") and self.cap is not None:
            self.cap.release()
            self.cap = None  # type: ignore

    def __enter__(self) -> "FaceCropAnalyzer":
        return self

    def __exit__(self, exc_type, exc, tb) -> None:
        self.close()

    def compute_window(self, start: float, end: float, detector: Any) -> Optional[CropWindow]:
        if not self.supports_crop:
            return None

        if start >= end:
            end = start + 0.2

        start_frame = max(0, int(start * self.fps))
        end_frame = max(start_frame + 1, int(end * self.fps))
        if self.frame_count > 0:
            end_frame = min(end_frame, self.frame_count - 1)

        sample_stride = max(1, int(round(self.fps / 5.0)))
        detections: List[Tuple[float, float, float]] = []
        total_samples = 0
        frame_idx = start_frame

        while frame_idx <= end_frame:
            self.cap.set(self._cv2.CAP_PROP_POS_FRAMES, frame_idx)
            ok, frame = self.cap.read()
            if not ok or frame is None:
                break

            total_samples += 1
            rgb_frame = self._cv2.cvtColor(frame, self._cv2.COLOR_BGR2RGB)
            results = detector.process(rgb_frame)
            if results and results.detections:
                best = max(
                    results.detections,
                    key=lambda det: det.score[0] if det.score else 0.0,
                )
                bbox = best.location_data.relative_bounding_box
                left = _clamp(bbox.xmin, 0.0, 1.0)
                width = _clamp(bbox.width, 0.0, 1.0)
                height = _clamp(bbox.height, 0.0, 1.0)
                if width <= 0 or height <= 0:
                    frame_idx += sample_stride
                    continue
                right = _clamp(left + width, 0.0, 1.0)
                center = _clamp(left + width / 2.0, 0.0, 1.0)
                detections.append(
                    (left * self.scaled_width, right * self.scaled_width, center * self.scaled_width)
                )

            frame_idx += sample_stride

        if not detections:
            return None

        lefts, rights, centers = zip(*detections)
        min_left = min(lefts)
        max_right = max(rights)
        span = max_right - min_left
        base_margin = 0.08 * self.crop_width
        adaptive_margin = span * 0.25
        margin = max(base_margin, adaptive_margin)
        padded_left = max(0.0, min_left - margin)
        padded_right = min(float(self.scaled_width), max_right + margin)

        if padded_right - padded_left >= self.crop_width:
            center_px = (padded_left + padded_right) / 2.0
        else:
            center_px = statistics.median(centers)

        crop_x = int(round(center_px - self.crop_width / 2.0))
        crop_x = max(0, min(self.max_x, crop_x))

        detections_count = len(detections)
        confidence = detections_count / max(1, total_samples)

        return CropWindow(
            x=crop_x,
            y=0,
            width=self.crop_width,
            height=self.crop_height,
            confidence=confidence,
            detections=detections_count,
            samples=total_samples,
        )


def apply_face_tracking(video_path: Path, highlights: Sequence[HighlightCandidate]) -> None:
    if not highlights:
        return

    if not _mp_face_module_available():
        print(
            "   [!] MediaPipe/OpenCV nao encontrados. Instale 'mediapipe' e 'opencv-python' "
            "para habilitar o enquadramento automatico."
        )
        return

    try:
        mp_module = ensure_mediapipe_module()
        with FaceCropAnalyzer(video_path) as analyzer:
            if not analyzer.supports_crop:
                print("   [!] Video ja e suficientemente estreito. Mantendo enquadramento original.")
                return

            with mp_module.solutions.face_detection.FaceDetection(
                model_selection=1, min_detection_confidence=0.5
            ) as detector:
                print("   -> Detectando cabecas com MediaPipe para centralizar o corte vertical...")
                for idx, highlight in enumerate(highlights, start=1):
                    crop = analyzer.compute_window(highlight.start, highlight.end, detector)
                    if crop:
                        highlight.crop = crop
                        print(
                            f"      Corte #{idx}: rosto localizado (conf={crop.confidence:.2f}, "
                            f"amostras={crop.detections}/{crop.samples})."
                        )
                    else:
                        print(
                            f"      Corte #{idx}: nenhum rosto consistente encontrado; usando enquadramento central."
                        )
    except RuntimeError as exc:
        print(f"   [!] Face tracking desabilitado: {exc}")
    except Exception as exc:  # pragma: no cover - depende de libs externas
        print(f"   [!] Erro inesperado ao usar MediaPipe ({exc}); continuando sem face tracking.")


def run_ffmpeg(cmd: Sequence[str]) -> None:
    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if proc.returncode != 0:
        sys.stderr.write("FFmpeg erro:\n")
        sys.stderr.write(proc.stderr)
        raise RuntimeError("FFmpeg falhou")


def escape_ffmpeg_sub_path(sub_path: Path) -> str:
    resolved = sub_path.resolve().as_posix()
    return resolved.replace("\\", "\\\\").replace(":", "\\:").replace("'", r"\'")


def build_filter_chain(
    sub_path: Path,
    crop: Optional[CropWindow],
    mode: str,
    bg_input_label: Optional[str] = None,
    title_sub_path: Optional[Path] = None,
) -> str:
    subtitle_source = escape_ffmpeg_sub_path(sub_path)
    if mode not in {"fill", "fit"}:
        raise ValueError(f"Modo de exportacao desconhecido: {mode}")

    video_filters: List[str] = []
    if mode == "fill":
        video_filters.append("scale=-2:1920")
        if crop:
            video_filters.append(f"crop={crop.width}:{crop.height}:{crop.x}:{crop.y}")
        else:
            video_filters.append("crop=1080:1920")
    else:  # fit
        video_filters.append("scale=1080:1920:force_original_aspect_ratio=decrease")

    filter_graph_parts: List[str] = [f"[0:v]{','.join(video_filters)}[video_base]"]
    current_label = "video_base"

    fonts_option = ""
    if FONTS_DIR.exists():
        fonts_option = f":fontsdir='{escape_ffmpeg_sub_path(FONTS_DIR)}'"

    filter_graph_parts.append(
        f"[{current_label}]subtitles='{subtitle_source}'{fonts_option}[video_sub]"
    )
    current_label = "video_sub"

    # Prepare for overlays by working in RGBA when necessary
    logo_path: Optional[Path] = None
    if mode == "fit" and LOGO_FIT_IMAGE_PATH.exists():
        logo_path = LOGO_FIT_IMAGE_PATH
    elif LOGO_IMAGE_PATH.exists():
        logo_path = LOGO_IMAGE_PATH
    needs_logo = logo_path is not None
    if needs_logo:
        filter_graph_parts.append(f"[{current_label}]format=rgba[video_rgba]")
        current_label = "video_rgba"
    else:
        filter_graph_parts.append(f"[{current_label}]format=yuv420p[video_yuv]")
        current_label = "video_yuv"

    if current_label != "video_yuv":
        filter_graph_parts.append(f"[{current_label}]format=yuv420p[video_yuv]")
        current_label = "video_yuv"

    if mode == "fit":
        filter_graph_parts.append(
            f"[{current_label}]setpts=PTS-STARTPTS[video_zero]"
        )
        current_label = "video_zero"
        filter_graph_parts.append(
            f"[{current_label}]format=rgba,colorchannelmixer=aa=1[video_rgba_bg]"
        )
        current_label = "video_rgba_bg"
        if bg_input_label is not None:
            filter_graph_parts.append(
                f"[{bg_input_label}]scale={BACKGROUND_TARGET_SIZE},format=rgba,colorchannelmixer=aa=1[bg_rgba]"
            )
        elif BACKGROUND_IMAGE_PATH.exists():
            bg_source = escape_ffmpeg_sub_path(BACKGROUND_IMAGE_PATH)
            filter_graph_parts.append(
                f"movie='{bg_source}',scale={BACKGROUND_TARGET_SIZE},format=rgba,colorchannelmixer=aa=1[bg_rgba]"
            )
        else:
            filter_graph_parts.append("color=color=black:size=1080x1920,format=rgba,colorchannelmixer=aa=1[bg_rgba]")
        filter_graph_parts.append(
            f"[bg_rgba][{current_label}]overlay=(main_w-overlay_w)/2:(main_h-overlay_h)/2:shortest=1[video_with_bg]"
        )
        current_label = "video_with_bg"

    if title_sub_path:
        title_source = escape_ffmpeg_sub_path(title_sub_path)
        filter_graph_parts.append(
            f"[{current_label}]subtitles='{title_source}'{fonts_option}[video_with_title]"
        )
        current_label = "video_with_title"

    if needs_logo and logo_path:
        logo_source = escape_ffmpeg_sub_path(logo_path)
        filter_graph_parts.append(
            f"movie='{logo_source}',scale=-1:{LOGO_TARGET_HEIGHT},format=rgba[logo]"
        )
        filter_graph_parts.append(
            f"[{current_label}][logo]overlay=(main_w-overlay_w)/2:{LOGO_TOP_MARGIN}:eval=init:format=auto[video_logo]"
        )
        current_label = "video_logo"

    filter_graph_parts.append(f"[{current_label}]format=yuv420p,setsar=1[outv]")

    return ";".join(filter_graph_parts)


def prepare_highlight_subtitles(
    highlight: HighlightCandidate,
    output_dir: Path,
    base_name: str,
    fit_title_margin: int = 200,
) -> Tuple[Path, Path, Path, Optional[Path]]:
    output_dir.mkdir(parents=True, exist_ok=True)
    ass_fill_path = output_dir / f"{base_name}.ass"
    ass_fit_path = output_dir / f"{base_name}_fit.ass"
    srt_path = output_dir / f"{base_name}.srt"

    fill_margin = 120
    fit_margin = fit_title_margin
    if LOGO_IMAGE_PATH.exists():
        fill_margin += LOGO_TITLE_MARGIN_EXTRA
        fit_margin += LOGO_TITLE_MARGIN_EXTRA

    write_ass_subtitles(highlight, ass_fill_path, title_margin_v=fill_margin, variant_mode="fill")
    fit_title_path = output_dir / f"{base_name}_fit_title.ass"
    title_overlay_margin = LOGO_TOP_MARGIN + LOGO_TARGET_HEIGHT + TITLE_BELOW_LOGO_GAP
    title_overlay_margin += FIT_TITLE_ADDITIONAL_OFFSET
    has_title_fit = write_ass_subtitles(
        highlight,
        ass_fit_path,
        title_margin_v=title_overlay_margin,
        exclude_title=True,
        title_only_path=fit_title_path,
        variant_mode="fit",
    )
    if not has_title_fit:
        fit_title_path.unlink(missing_ok=True)
        fit_title_path = None
    write_srt_subtitles(highlight, srt_path)
    return ass_fill_path, ass_fit_path, srt_path, fit_title_path


def export_highlight_variant(
    video_path: Path,
    highlight: HighlightCandidate,
    output_dir: Path,
    base_name: str,
    variant: str,
    ass_path: Path,
    mode: str,
    title_ass_path: Optional[Path] = None,
) -> Path:
    variant_base = f"{base_name}_{variant}"
    final_path = output_dir / f"{variant_base}_tiktok.mp4"

    crop_to_use = getattr(highlight, "crop", None) if mode == "fill" else None
    bg_label: Optional[str] = None
    if mode == "fit" and BACKGROUND_IMAGE_PATH.exists():
        background_args = ["-loop", "1", "-i", str(BACKGROUND_IMAGE_PATH)]
    else:
        background_args = []
    if background_args:
        bg_label = "1:v"
    filter_graph = build_filter_chain(
        ass_path,
        crop_to_use,
        mode,
        bg_input_label=bg_label,
        title_sub_path=title_ass_path,
    )
    debug_filters = os.environ.get("DEBUG_FILTER_GRAPH")
    if debug_filters:
        print(f"[DEBUG] filter graph ({mode}): {filter_graph}")
    cmd = [
        "ffmpeg",
        "-y",
        "-ss",
        f"{highlight.start:.3f}",
        "-to",
        f"{highlight.end:.3f}",
        "-i",
        str(video_path),
    ]
    if background_args:
        cmd.extend(background_args)
    gop_size = 72
    cmd.extend([
        "-filter_complex",
        filter_graph,
        "-map",
        "[outv]",
        "-map",
        "0:a?",
        "-c:v",
        "libx264",
        "-preset",
        "slow",
        "-profile:v",
        "high",
        "-level",
        "4.1",
        "-b:v",
        "10M",
        "-maxrate",
        "12M",
        "-bufsize",
        "20M",
        "-g",
        str(gop_size),
        "-keyint_min",
        str(gop_size),
        "-pix_fmt",
        "yuv420p",
        "-movflags",
        "+faststart",
        "-c:a",
        "aac",
        "-b:a",
        "192k",
        "-ar",
        "44100",
        "-ac",
        "2",
        str(final_path),
    ])
    if debug_filters:
        print("[DEBUG] ffmpeg cmd:", " ".join(cmd))

    print(f"-> Gerando {final_path.name} ({highlight.duration:.1f}s, modo={variant})...")
    run_ffmpeg(cmd)
    if title_ass_path and title_ass_path.exists():
        try:
            title_ass_path.unlink()
        except OSError:
            pass
    return final_path


def run_pipeline(args: argparse.Namespace) -> PipelineSummary:
    started_at = datetime.now(UTC)
    video_path = resolve_input_file(str(args.input or "").strip())

    print("1/5 Transcrevendo audio com Whisper...")
    whisper_module = ensure_whisper_module()
    model = whisper_module.load_model(args.model)
    result = model.transcribe(str(video_path), task="transcribe", language=args.lang)
    segments = load_segments(result)
    if not segments:
        raise RuntimeError("Nenhum trecho de fala foi detectado.")
    normalize_segments_in_place(segments)

    ai_config = resolve_ai_config(args)
    variant_option = (getattr(args, "variant", "both") or "both").lower()
    if variant_option not in {"zoom", "fit", "both"}:
        variant_option = "both"
    generate_zoom = variant_option in {"zoom", "both"}
    generate_fit = variant_option in {"fit", "both"}
    if not (generate_zoom or generate_fit):
        raise ValueError("Nenhum formato de corte selecionado para exportacao.")
    selected_formats = []
    if generate_zoom:
        selected_formats.append("zoom")
    if generate_fit:
        selected_formats.append("fit")
    print(f"   -> Format(o/s) selecionado(s): {', '.join(selected_formats)}")

    if FAST_MODE:
        print("2/5 Revisando ortografia com IA... (pulado - modo rapido)")
    else:
        print("2/5 Revisando ortografia com IA...")
        corrected = correct_segments_with_ai(segments, ai_config)
        if corrected > 0:
            print(f"   -> {corrected} trechos ajustados.")
        elif ai_config:
            print("   -> Nenhuma correcao adicional foi necessaria.")
        else:
            print("   -> IA indisponivel; mantendo transcricao original.")

    print("3/5 Selecionando cortes virais...")
    desired_variations = max(1, args.top)
    ai_highlights: List[HighlightCandidate] = []
    if ai_config:
        print("   -> Consultando IA para sugerir os melhores trechos...")
        ai_highlights = select_highlights_with_ai(
            segments, args.min_duration, args.max_duration, desired_variations, ai_config
        )
        if ai_highlights:
            for i, highlight in enumerate(ai_highlights, start=1):
                if highlight.reason:
                    print(f"      IA #{i}: {highlight.reason}")
        else:
            print("      [!] Nenhum corte valido retornado pela IA.")
    else:
        print("   -> IA desabilitada; usando heuristica local.")

    print("   -> Avaliando heuristica local para cortes alternativos...")
    heuristic_limit = max(desired_variations * 5, desired_variations + 5)
    heuristic_highlights = find_highlights(segments, args.min_duration, args.max_duration, heuristic_limit)
    if not heuristic_highlights:
        print("      [!] Heuristica nao encontrou cortes validos.")

    selected_highlights: List[HighlightCandidate] = []

    def append_unique(
        target: List[HighlightCandidate], candidate: HighlightCandidate, tolerance: float = 0.5
    ) -> bool:
        for existing in target:
            if highlights_are_similar(existing, candidate, tolerance=tolerance):
                return False
        target.append(candidate)
        return True

    if ai_highlights:
        for candidate in sorted(ai_highlights, key=lambda c: c.score, reverse=True):
            append_unique(selected_highlights, candidate)
            if len(selected_highlights) >= desired_variations:
                break

    if len(selected_highlights) < desired_variations and heuristic_highlights:
        for candidate in sorted(heuristic_highlights, key=lambda c: c.score, reverse=True):
            append_unique(selected_highlights, candidate)
            if len(selected_highlights) >= desired_variations:
                break

    combined_pool = sorted(ai_highlights + heuristic_highlights, key=lambda c: c.score, reverse=True)
    for candidate in combined_pool:
        if len(selected_highlights) >= desired_variations:
            break
        append_unique(selected_highlights, candidate)

    if len(selected_highlights) < desired_variations:
        needed = desired_variations - len(selected_highlights)
        fallback_candidates = generate_fallback_highlights(
            segments,
            min_dur=args.min_duration,
            max_dur=args.max_duration,
            required=needed,
            existing=selected_highlights,
        )
        for candidate in fallback_candidates:
            if len(selected_highlights) >= desired_variations:
                break
            append_unique(selected_highlights, candidate, tolerance=0.35)

    if len(selected_highlights) < desired_variations and heuristic_highlights:
        for candidate in heuristic_highlights:
            if len(selected_highlights) >= desired_variations:
                break
            append_unique(selected_highlights, candidate, tolerance=0.2)

    if not selected_highlights:
        raise RuntimeError("Nao foi possivel encontrar cortes candidatos.")

    assign_titles_to_highlights(selected_highlights, ai_config)

    print("4/5 Ajustando enquadramento vertical...")
    apply_face_tracking(video_path, selected_highlights)

    output_dir = Path(args.output_dir)
    input_slug = slugify_for_filename(video_path.stem)
    timestamp_tag = datetime.now(SAO_PAULO_TZ).strftime("%Y%m%d_%H%M%S")
    run_tag = f"{timestamp_tag}_{input_slug}" if input_slug else timestamp_tag
    generated_files: List[Path] = []
    highlight_results: List[GeneratedCut] = []

    print("5/5 Criando cortes verticais com legendas dinamicas...")
    for i, highlight in enumerate(selected_highlights, start=1):
        print(
            f"   Corte #{i}: {fmt_ts(highlight.start)} ate {fmt_ts(highlight.end)} "
            f"({highlight.duration:.1f}s) score={highlight.score:.3f}"
        )
        origin_label = "IA" if highlight.origin == "ai" else "Heuristica" if highlight.origin == "heuristic" else highlight.origin
        print(f"   Origem: {origin_label}")
        if highlight.title:
            print(f"   Titulo: {highlight.title}")
        if highlight.reason:
            print(f"   Motivo: {highlight.reason}")
        preview_text = highlight.preview()
        print(f"   Preview: {preview_text}")
        base_name = build_output_stem(i, highlight, run_tag)

        caption_path: Optional[Path] = None
        portal_article_path: Optional[Path] = None
        portal_article_title = ""
        caption_text: Optional[str] = None
        original_title = highlight.title.strip() if highlight.title else ""
        if ai_config and ai_config.provider == "gemini":
            # Gera materia primeiro para usar o titulo jornalistico no corte
            article_info = generate_portal_article(highlight, ai_config)
            if article_info:
                portal_article_title = article_info["title"]
                portal_article_path = output_dir / f"{base_name}_portal.txt"
                article_text = f"{portal_article_title}\n\n{article_info['article'].strip()}\n"
                portal_article_path.write_text(article_text, encoding="utf-8")
                print(f"   Materia para portal salva em {portal_article_path.name}")
                generated_files.append(portal_article_path)
                # Usa a legenda (caption) como fonte para manchete curta
                caption = generate_social_caption(highlight, ai_config)
                if caption:
                    caption_text = caption
                    caption_path = output_dir / f"{base_name}_caption.txt"
                    caption_path.write_text(caption + "\n", encoding="utf-8")
                    print(f"   Legenda social salva em {caption_path.name}")
                else:
                    print("   [!] Nao foi possivel gerar legenda social para este corte.")
            else:
                print("   [!] Nao foi possivel gerar materia para portal neste corte.")
            # Sem materia => nao gera caption aqui
        else:
            print("   [!] Legenda social e materia para portal requerem provider GEMINI.")

        title_source = caption_text or preview_text
        fallback_title = original_title or force_uppercase_title(
            sanitise_title_text((title_source or "")[:MAX_TITLE_LEN])
        )
        highlight.title = build_title_from_webhook(title_source, fallback_title)

        title_note = (highlight.title or "").strip()
        if title_note:
            title_json = {"title": title_note}
            title_path = output_dir / f"{base_name}_title.txt"
            try:
                title_path.write_text(json.dumps(title_json, ensure_ascii=False) + "\n", encoding="utf-8")
                generated_files.append(title_path)
            except OSError as exc:
                print(f"   [!] Nao foi possivel salvar titulo em texto: {exc}")

        ass_fill_path, ass_fit_path, srt_path, ass_fit_title_path = prepare_highlight_subtitles(
            highlight, output_dir, base_name
        )
        zoom_path: Optional[Path] = None
        fit_path: Optional[Path] = None
        if generate_zoom:
            zoom_path = export_highlight_variant(
                video_path,
                highlight,
                output_dir,
                base_name=base_name,
                variant="zoom",
                ass_path=ass_fill_path,
                mode="fill",
            )
            generated_files.append(zoom_path)
        if generate_fit:
            fit_path = export_highlight_variant(
                video_path,
                highlight,
                output_dir,
                base_name=base_name,
                variant="fit",
                ass_path=ass_fit_path,
                mode="fit",
                title_ass_path=ass_fit_title_path,
            )
            generated_files.append(fit_path)
        highlight_results.append(
            GeneratedCut(
                index=i,
                base_name=base_name,
                start=highlight.start,
                end=highlight.end,
                duration=highlight.duration,
                score=highlight.score,
                origin=highlight.origin,
                title=highlight.title,
                reason=highlight.reason,
                preview=preview_text,
                zoom_video=zoom_path,
                fit_video=fit_path,
                ass_fill=ass_fill_path,
                ass_fit=ass_fit_path,
                srt=srt_path,
                caption=caption_path,
                portal_article=portal_article_path,
                portal_title=portal_article_title,
            )
        )
        highlight_metadata = {
            "base_name": base_name,
            "run_tag": run_tag,
            "index": i,
            "input_path": str(video_path.resolve()),
            "input_name": video_path.name,
            "output_dir": str(output_dir.resolve()),
            "start": highlight.start,
            "end": highlight.end,
            "duration": highlight.duration,
            "score": highlight.score,
            "origin": highlight.origin,
            "reason": highlight.reason,
            "preview": preview_text,
            "title": highlight.title,
            "segments": [
                {
                    "start": seg.start,
                    "end": seg.end,
                    "text": seg.text,
                    "avg_logprob": seg.avg_logprob,
                    "no_speech_prob": seg.no_speech_prob,
                }
                for seg in highlight.segments
            ],
            "crop": {
                "x": highlight.crop.x,
                "y": highlight.crop.y,
                "width": highlight.crop.width,
                "height": highlight.crop.height,
                "confidence": highlight.crop.confidence,
                "detections": highlight.crop.detections,
                "samples": highlight.crop.samples,
            }
            if highlight.crop
            else None,
            "generated_formats": {
                "zoom": zoom_path is not None,
                "fit": fit_path is not None,
            },
            "updated_at": isoformat_sao_paulo(),
        }
        metadata_path = output_dir / f"{base_name}_data.json"
        try:
            metadata_path.write_text(
                json.dumps(highlight_metadata, ensure_ascii=False, indent=2) + "\n",
                encoding="utf-8",
            )
            generated_files.append(metadata_path)
        except OSError as exc:
            print(f"   [!] Nao foi possivel salvar metadata do corte: {exc}")

    source_marker = output_dir / f"corte_{run_tag}_source.txt"
    try:
        source_marker.write_text(video_path.name + "\n", encoding="utf-8")
        generated_files.append(source_marker)
    except OSError as exc:
        print(f"   [!] Nao foi possivel registrar arquivo raiz: {exc}")

    meta_payload = {
        "input_file": video_path.name,
        "generated_at": isoformat_sao_paulo(),
    }
    request_user = os.getenv("CORTE_REQUEST_USER", "").strip()
    if request_user:
        meta_payload["generated_by"] = request_user
    meta_marker = output_dir / f"corte_{run_tag}_meta.json"
    try:
        meta_marker.write_text(json.dumps(meta_payload, ensure_ascii=False, indent=2), encoding="utf-8")
        generated_files.append(meta_marker)
    except OSError as exc:
        print(f"   [!] Nao foi possivel registrar metadados: {exc}")
    print("Finalizado!")
    for path in generated_files:
        print(f"   [OK] {path}")
    print("Os arquivos .ass e .srt correspondentes foram salvos junto com o corte.")

    finished_at = datetime.now(UTC)
    return PipelineSummary(
        input_path=video_path,
        output_dir=output_dir,
        run_tag=run_tag,
        started_at=started_at,
        finished_at=finished_at,
        highlights=highlight_results,
    )


def encode_multipart_formdata(
    fields: Dict[str, str], files: List[Tuple[str, str, str, bytes]]
) -> Tuple[str, bytes]:
    boundary = f"----CortesBoundary{uuid.uuid4().hex}"
    buffer = io.BytesIO()
    for name, value in fields.items():
        if value is None:
            continue
        buffer.write(f"--{boundary}\r\n".encode("utf-8"))
        buffer.write(f'Content-Disposition: form-data; name="{name}"\r\n\r\n'.encode("utf-8"))
        buffer.write(value.encode("utf-8"))
        buffer.write(b"\r\n")
    for field_name, filename, content_type, data in files:
        buffer.write(f"--{boundary}\r\n".encode("utf-8"))
        buffer.write(
            f'Content-Disposition: form-data; name="{field_name}"; filename="{filename}"\r\n'.encode("utf-8")
        )
        buffer.write(f"Content-Type: {content_type}\r\n\r\n".encode("utf-8"))
        buffer.write(data)
        buffer.write(b"\r\n")
    buffer.write(f"--{boundary}--\r\n".encode("utf-8"))
    return boundary, buffer.getvalue()


def send_results_to_webhook(
    webhook_url: str,
    summary: Optional[PipelineSummary],
    job_info: Dict[str, Any],
    timeout: int = 30,
    request_payload: Optional[Dict[str, Any]] = None,
) -> None:
    if not webhook_url:
        return

    if summary is None:
        payload: Dict[str, Any] = {
            "status": job_info.get("status", "failed"),
            "job_id": job_info.get("job_id"),
            "error": job_info.get("error"),
            "traceback": job_info.get("traceback"),
            "trigger_source": job_info.get("trigger_source"),
            "finished_at": isoformat_sao_paulo(),
        }
        if request_payload is not None:
            payload["trigger_payload"] = request_payload
        data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
        req = urllib.request.Request(
            webhook_url,
            data=data,
            headers={"Content-Type": "application/json", "Accept": "application/json"},
        )
        urllib.request.urlopen(req, timeout=timeout)
        return

    metadata: Dict[str, Any] = {
        "status": job_info.get("status", "completed"),
        "job_id": job_info.get("job_id"),
        "trigger_source": job_info.get("trigger_source"),
        "run_tag": summary.run_tag,
        "input": str(summary.input_path),
        "output_dir": str(summary.output_dir),
        "started_at": isoformat_sao_paulo(summary.started_at),
        "finished_at": isoformat_sao_paulo(summary.finished_at),
        "highlight_count": len(summary.highlights),
    }
    if request_payload is not None:
        metadata["trigger_payload"] = request_payload
    extra_job_info = {k: v for k, v in job_info.items() if k not in {"status", "job_id", "trigger_source"}}
    if extra_job_info:
        metadata["job_info"] = extra_job_info

    highlight_entries: List[Dict[str, Any]] = []
    attachments: List[Tuple[str, str, str, bytes]] = []
    attachments_info: List[Dict[str, Any]] = []
    missing_files: List[Dict[str, Any]] = []

    for cut in summary.highlights:
        highlight_entries.append(
            {
                "index": cut.index,
                "base_name": cut.base_name,
                "start": cut.start,
                "end": cut.end,
                "duration": cut.duration,
                "score": cut.score,
                "origin": cut.origin,
                "title": cut.title,
                "reason": cut.reason,
                "preview": cut.preview,
                "zoom_video": cut.zoom_video.name if cut.zoom_video else None,
                "fit_video": cut.fit_video.name if cut.fit_video else None,
                "ass_fill": cut.ass_fill.name,
                "ass_fit": cut.ass_fit.name,
                "srt": cut.srt.name,
                "caption": cut.caption.name if cut.caption else None,
                "portal_article": cut.portal_article.name if cut.portal_article else None,
                "portal_title": cut.portal_title,
            }
        )

        attachment_map: List[Tuple[str, Path]] = []
        if cut.zoom_video:
            attachment_map.append((f"zoom_video_{cut.index:02d}", cut.zoom_video))
        if cut.fit_video:
            attachment_map.append((f"fit_video_{cut.index:02d}", cut.fit_video))
        attachment_map.extend(
            [
                (f"ass_fill_{cut.index:02d}", cut.ass_fill),
                (f"ass_fit_{cut.index:02d}", cut.ass_fit),
                (f"srt_{cut.index:02d}", cut.srt),
            ]
        )
        if cut.caption:
            attachment_map.append((f"caption_{cut.index:02d}", cut.caption))
        if cut.portal_article:
            attachment_map.append((f"portal_{cut.index:02d}", cut.portal_article))

        for field_name, path in attachment_map:
            if not path.exists():
                missing_files.append({"field": field_name, "path": str(path)})
                continue
            data = path.read_bytes()
            content_type = mimetypes.guess_type(path.name)[0] or "application/octet-stream"
            attachments.append((field_name, path.name, content_type, data))
            attachments_info.append(
                {"field": field_name, "filename": path.name, "size": len(data), "path": str(path)}
            )

    metadata["highlights"] = highlight_entries
    if attachments_info:
        metadata["attachments"] = attachments_info
    if missing_files:
        metadata["missing_files"] = missing_files

    fields = {"metadata": json.dumps(metadata, ensure_ascii=False)}
    boundary, body = encode_multipart_formdata(fields, attachments)
    headers = {
        "Content-Type": f"multipart/form-data; boundary={boundary}",
        "Accept": "application/json",
    }
    req = urllib.request.Request(webhook_url, data=body, headers=headers)
    urllib.request.urlopen(req, timeout=timeout)


def run_webhook_server(args: argparse.Namespace) -> None:
    base_defaults = parse_args([])
    resolved_notify = resolve_notify_webhook(args.notify_webhook)
    args.notify_webhook = resolved_notify
    args.notify_timeout = max(1, int(args.notify_timeout))

    copy_exclude = {"serve_webhook", "serve_host", "serve_port"}

    class CutsWebhookHandler(BaseHTTPRequestHandler):
        default_args = base_defaults
        server_args = args
        cors_headers = {
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Methods": "POST, OPTIONS",
            "Access-Control-Allow-Headers": "Content-Type",
        }

        def log_message(self, format: str, *args: Any) -> None:  # type: ignore[override]
            return

        def _set_cors_headers(self) -> None:
            for key, value in self.cors_headers.items():
                self.send_header(key, value)

        def _respond_json(self, status: int, payload: Dict[str, Any]) -> None:
            body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
            self.send_response(status)
            self._set_cors_headers()
            self.send_header("Content-Type", "application/json; charset=utf-8")
            self.send_header("Content-Length", str(len(body)))
            self.end_headers()
            self.wfile.write(body)

        def do_OPTIONS(self) -> None:  # type: ignore[override]
            self.send_response(204)
            self._set_cors_headers()
            self.end_headers()

        def do_POST(self) -> None:  # type: ignore[override]
            path = self.path.split("?", 1)[0]
            if path not in ("/", "/cortes", "/trigger"):
                self._respond_json(404, {"error": "Endpoint desconhecido"})
                return

            length = int(self.headers.get("Content-Length", "0") or 0)
            raw_body = self.rfile.read(length) if length else b""
            payload: Dict[str, Any] = {}
            if raw_body:
                try:
                    parsed = json.loads(raw_body.decode("utf-8") or "{}")
                    if isinstance(parsed, dict):
                        payload = parsed
                    else:
                        payload = {"data": parsed}
                except json.JSONDecodeError:
                    self._respond_json(400, {"error": "JSON invalido"})
                    return

            job_id = str(payload.get("job_id") or uuid.uuid4())

            base_dict = {k: copy.deepcopy(getattr(self.default_args, k)) for k in vars(self.default_args)}
            for key, value in vars(self.server_args).items():
                if key in copy_exclude:
                    continue
                if key in base_dict:
                    base_dict[key] = copy.deepcopy(value)

            job_args = argparse.Namespace(**base_dict)
            overrides_unknown: Dict[str, Any] = {}
            for raw_key, raw_value in payload.items():
                attr = raw_key.replace("-", "_")
                if hasattr(job_args, attr):
                    reference = getattr(job_args, attr)
                    try:
                        coerced = coerce_type(raw_value, reference)
                    except Exception:
                        coerced = raw_value
                    setattr(job_args, attr, coerced)
                else:
                    overrides_unknown[raw_key] = raw_value

            job_args.serve_webhook = False
            job_args.notify_webhook = resolve_notify_webhook(getattr(job_args, "notify_webhook", None))
            if isinstance(job_args.notify_webhook, str) and not job_args.notify_webhook:
                job_args.notify_webhook = ""
            job_args.notify_timeout = max(1, int(getattr(job_args, "notify_timeout", args.notify_timeout)))

            request_payload = dict(payload)
            if overrides_unknown:
                request_payload["unknown_fields"] = overrides_unknown

            def job_runner() -> None:
                job_context: Dict[str, Any] = {
                    "job_id": job_id,
                    "status": "running",
                    "trigger_source": "webhook",
                }
                try:
                    summary = run_pipeline(job_args)
                    job_context["status"] = "completed"
                    if job_args.notify_webhook:
                        try:
                            send_results_to_webhook(
                                job_args.notify_webhook,
                                summary,
                                job_context,
                                timeout=job_args.notify_timeout,
                                request_payload=request_payload,
                            )
                            print(
                                f"[webhook] Resultados enviados para {job_args.notify_webhook} (job {job_id})"
                            )
                        except Exception as exc:
                            print(f"[webhook] Falha ao enviar resultados do job {job_id}: {exc}")
                except Exception as exc:
                    job_context["status"] = "failed"
                    job_context["error"] = f"{type(exc).__name__}: {exc}"
                    job_context["traceback"] = traceback.format_exc()
                    if getattr(job_args, "notify_webhook", ""):
                        try:
                            send_results_to_webhook(
                                job_args.notify_webhook,
                                None,
                                job_context,
                                timeout=job_args.notify_timeout,
                                request_payload=request_payload,
                            )
                        except Exception as notify_exc:
                            print(f"[webhook] Falha ao notificar erro do job {job_id}: {notify_exc}")
                    print(f"[webhook] Job {job_id} falhou: {exc}")

            threading.Thread(target=job_runner, name=f"cortes-job-{job_id}", daemon=True).start()

            self._respond_json(202, {"job_id": job_id, "status": "queued"})

    address = (args.serve_host, args.serve_port)
    httpd = ThreadingHTTPServer(address, CutsWebhookHandler)
    print(f"[webhook] Servidor ativo em http://{args.serve_host}:{args.serve_port}/cortes")
    try:
        httpd.serve_forever()
    except KeyboardInterrupt:
        print("\n[webhook] Encerrando servidor...")
    finally:
        httpd.server_close()


def main(argv: Sequence[str]) -> Optional[PipelineSummary]:
    args = parse_args(argv)
    resolved_notify = resolve_notify_webhook(args.notify_webhook)
    args.notify_webhook = resolved_notify
    args.notify_timeout = max(1, int(args.notify_timeout))

    if args.serve_webhook:
        run_webhook_server(args)
        return None

    def notify_summary(summary: Optional[PipelineSummary], run_args: argparse.Namespace, job_id: str) -> None:
        if not summary or not run_args.notify_webhook:
            return
        job_info = {"job_id": job_id, "status": "completed", "trigger_source": "cli"}
        try:
            send_results_to_webhook(
                run_args.notify_webhook,
                summary,
                job_info,
                timeout=run_args.notify_timeout,
                request_payload=None,
            )
            print(f"[webhook] Resultados enviados para {run_args.notify_webhook}")
        except Exception as exc:
            print(f"[webhook] Falha ao enviar resultados: {exc}")

    if args.batch_all:
        videos = discover_batch_inputs(args)
        if not videos:
            print("Nenhum arquivo de video encontrado para processar.")
            return None
        final_summary: Optional[PipelineSummary] = None
        for video_path in videos:
            local_args = argparse.Namespace(**vars(args))
            local_args.batch_all = False
            local_args.input = str(video_path)
            print(f"\n=== Processando {video_path.name} ===")
            final_summary = run_pipeline(local_args)
            notify_summary(final_summary, local_args, f"cli-{video_path.stem}")
        return final_summary

    summary = run_pipeline(args)
    notify_summary(summary, args, "cli")
    return summary


if __name__ == "__main__":
    main(sys.argv[1:])