goon/app/api/scenes.py

"""GET /scenes — lista i szczegóły scen z bazy kanonicznej."""
from __future__ import annotations

import logging
import re
import uuid
from typing import Annotated

from fastapi import APIRouter, Depends, HTTPException, Query, status
from pydantic import BaseModel
from sqlalchemy import distinct, exists, func, select
from sqlalchemy.exc import IntegrityError
from sqlalchemy.orm import Session

from app.auth import require_api_key

from app.api.schemas import (
    ExternalRefOut,
    PerformerOut,
    PlaybackSourceOut,
    SceneListOut,
    SceneOut,
    StudioOut,
    TagOut,
)
from app.db import get_session
from app.models.favorite_scene import FavoriteScene
from app.models.performer import Performer
from app.models.play_progress import ScenePlayProgress
from app.models.playback_source import PlaybackSource
from app.models.scene import Scene, SceneExternalRef, ScenePerformer, SceneTag
from app.models.source import Source, SourceKind
from app.models.studio import Studio
from app.models.tag import Tag

log = logging.getLogger(__name__)

router = APIRouter(prefix="/scenes", tags=["scenes"], dependencies=[Depends(require_api_key)])


_VALID_SORTS = {"created_at", "release_date", "title", "studio"}

# TTL-cache dla count'u scen-z-żywym-playback (default lista bez filtra). Full-scan
# 1.69M scen + EXISTS ~950ms; liczba zmienia się wolno i jest przybliżona (header
# paginacji), więc 10-min cache w pamięci procesu API jest akceptowalny trade-off.
_DEFAULT_COUNT_CACHE: dict = {"ts": 0.0, "val": 0}
_DEFAULT_COUNT_TTL = 600.0

# Bounded count dla list FILTROWANYCH (origin/tag/q/studio/performer/...). Exhaustive
# count z per-row stub-filter EXISTS bierze ~3-5s przy 1.7M scen (zmierzone). Liczymy
# count tylko do CAP+1 — `LIMIT` ucina po znalezieniu CAP+1 pasujących, więc koszt to
# O(min(matches, CAP)) zamiast O(all). >CAP → UI pokazuje "{CAP}+". Paginacja idzie po
# has_more (fetch per_page+1), więc bounded total NIE psuje infinite-scroll.
_COUNT_CAP = 1000


def _default_scene_count(session: Session) -> int:
    import time as _time
    now = _time.monotonic()
    if _DEFAULT_COUNT_CACHE["val"] and (now - _DEFAULT_COUNT_CACHE["ts"]) < _DEFAULT_COUNT_TTL:
        return _DEFAULT_COUNT_CACHE["val"]
    count_query = select(func.count()).select_from(
        select(Scene.id).where(
            exists(
                select(1).where(
                    PlaybackSource.scene_id == Scene.id,
                    PlaybackSource.dead_at.is_(None),
                )
            )
        ).subquery()
    )
    total = session.execute(count_query).scalar_one()
    _DEFAULT_COUNT_CACHE["ts"] = now
    _DEFAULT_COUNT_CACHE["val"] = total
    return total


def _split_csv(raw: str | None) -> list[str]:
    if not raw:
        return []
    return [s.strip() for s in raw.split(",") if s.strip()]


@router.get("", response_model=SceneListOut)
def list_scenes(
    session: Annotated[Session, Depends(get_session)],
    q: str | None = Query(default=None, description="Wyszukiwanie po title_normalized (trgm)"),
    studio_slug: str | None = Query(default=None, description="DEPRECATED — użyj studio_slugs"),
    studio_slugs: str | None = Query(
        default=None, description="Comma-separated studio slugs (OR)"
    ),
    tags: str | None = Query(
        default=None,
        description="Comma-separated tag slugs (AND — scena musi mieć wszystkie wybrane tagi)",
    ),
    performer_ids: str | None = Query(
        default=None,
        description="Comma-separated performer UUIDs (AND — scena musi mieć wszystkich wybranych performerów)",
    ),
    has_playback: bool | None = Query(
        default=None, description="True: tylko sceny z ≥1 playback_source"
    ),
    has_animated_thumbnail: bool | None = Query(
        default=None,
        description="True: tylko sceny z ≥1 playback_source z animated_thumbnail_url (hold-to-preview)",
    ),
    min_duration_sec: int | None = Query(default=None, ge=0),
    max_duration_sec: int | None = Query(default=None, ge=0),
    released_within_days: int | None = Query(
        default=None, ge=1,
        description="Tylko sceny released w ostatnich N dniach",
    ),
    min_quality_p: int | None = Query(
        default=None, ge=1,
        description=(
            "Minimum quality (pixele wysokości — 2160 = 4K, 1080 = FullHD). Filtruje "
            "po PlaybackSource.quality (string typu '720p' / '1080p Full HD')."
        ),
    ),
    origin: str | None = Query(
        default=None,
        description=(
            "Filtruj po playback origin (np. 'tube:hqpornercom'). Substring match — "
            "'hqporner' złapie tube:hqpornercom. Diagnostyka per-hoster."
        ),
    ),
    include_stubs: bool = Query(
        default=False,
        description=(
            "False (default): ukrywa sceny-szkielety bez release_date, < 10min, "
            "z jedynym playback z hqporner (~7-min Brazzers trailer clipy zalewają katalog)."
        ),
    ),
    sort: str = Query(default="created_at", description="created_at|release_date|title|studio"),
    page: int = Query(default=1, ge=1),
    per_page: int = Query(default=50, ge=1, le=200),
) -> SceneListOut:
    if sort not in _VALID_SORTS:
        raise HTTPException(status_code=400, detail=f"sort must be one of {sorted(_VALID_SORTS)}")

    base = select(Scene)

    if q:
        base = base.where(Scene.title_normalized.ilike(f"%{q.lower()}%"))

    studio_slug_list = _split_csv(studio_slugs)
    if studio_slug:
        studio_slug_list.append(studio_slug)
    if studio_slug_list:
        base = base.where(
            Scene.studio_id.in_(
                select(Studio.id).where(Studio.slug.in_(studio_slug_list))
            )
        )

    tag_slug_list = _split_csv(tags)
    # AND między tagami: scena musi mieć WSZYSTKIE zaznaczone tagi. Każdy slug → osobny
    # exists() — zaznaczanie kolejnych filtrów zawęża wyniki, jak intuicja użytkownika.
    for slug in tag_slug_list:
        base = base.where(
            exists(
                select(1)
                .select_from(SceneTag)
                .join(Tag, Tag.id == SceneTag.tag_id)
                .where(SceneTag.scene_id == Scene.id, Tag.slug == slug)
            )
        )

    perf_id_strings = _split_csv(performer_ids)
    if perf_id_strings:
        try:
            perf_ids = [uuid.UUID(s) for s in perf_id_strings]
        except ValueError as e:
            raise HTTPException(status_code=400, detail=f"invalid performer UUID: {e}") from e
        # AND między performerami (analogicznie do tagów).
        for pid in perf_ids:
            base = base.where(
                exists(
                    select(1)
                    .select_from(ScenePerformer)
                    .where(
                        ScenePerformer.scene_id == Scene.id,
                        ScenePerformer.performer_id == pid,
                    )
                )
            )

    if has_playback is True:
        # Tylko sceny z choć jednym ŻYWYM playback_source.
        base = base.where(
            exists(
                select(1).where(
                    PlaybackSource.scene_id == Scene.id,
                    PlaybackSource.dead_at.is_(None),
                )
            )
        )
    elif has_playback is False:
        base = base.where(
            ~exists(
                select(1).where(
                    PlaybackSource.scene_id == Scene.id,
                    PlaybackSource.dead_at.is_(None),
                )
            )
        )

    if origin:
        # Substring match na origin — 'hqporner' złapie 'tube:hqpornercom'.
        base = base.where(
            exists(
                select(1).where(
                    PlaybackSource.scene_id == Scene.id,
                    PlaybackSource.dead_at.is_(None),
                    PlaybackSource.origin.ilike(f"%{origin}%"),
                )
            )
        )

    # Blacklisty — globalne wykluczenia. Jeśli scena ma JAKIEGOKOLWIEK blacklisted
    # performera, jest na blacklisted studio, lub ma JAKIKOLWIEK blacklisted tag → out.
    from app.models.blacklist import (
        BlacklistedPerformer,
        BlacklistedStudio,
        BlacklistedTag,
    )
    base = base.where(
        ~exists(
            select(1)
            .select_from(ScenePerformer)
            .join(BlacklistedPerformer, BlacklistedPerformer.performer_id == ScenePerformer.performer_id)
            .where(ScenePerformer.scene_id == Scene.id)
        )
    )
    base = base.where(
        ~Scene.studio_id.in_(select(BlacklistedStudio.studio_id))
    )
    base = base.where(
        ~exists(
            select(1)
            .select_from(SceneTag)
            .join(BlacklistedTag, BlacklistedTag.tag_id == SceneTag.tag_id)
            .where(SceneTag.scene_id == Scene.id)
        )
    )

    if has_animated_thumbnail:
        base = base.where(
            exists(
                select(1).where(
                    PlaybackSource.scene_id == Scene.id,
                    PlaybackSource.dead_at.is_(None),
                    PlaybackSource.animated_thumbnail_url.isnot(None),
                )
            )
        )

    if min_duration_sec is not None:
        base = base.where(Scene.duration_sec >= min_duration_sec)
    if max_duration_sec is not None:
        base = base.where(Scene.duration_sec <= max_duration_sec)

    if released_within_days is not None:
        from datetime import date, timedelta
        cutoff = date.today() - timedelta(days=released_within_days)
        base = base.where(Scene.release_date >= cutoff)

    if min_quality_p is not None:
        # PlaybackSource.quality to wolny string — szukamy liczb w prefixie ('1080p',
        # '1080p Full HD', '2160p'). Heurystyka: wystarczy że scena ma JEDEN żywy
        # playback z quality liczbą >= min. '4K'/'UHD' aliasujemy na 2160.
        from sqlalchemy import Integer, cast, or_
        numeric_q = cast(
            func.coalesce(func.substring(PlaybackSource.quality, r"\d+"), "0"),
            Integer,
        )
        conds = [numeric_q >= min_quality_p]
        if min_quality_p <= 2160:
            conds.append(PlaybackSource.quality.ilike("%4k%"))
            conds.append(PlaybackSource.quality.ilike("%uhd%"))
        base = base.where(
            exists(
                select(1).where(
                    PlaybackSource.scene_id == Scene.id,
                    PlaybackSource.dead_at.is_(None),
                    PlaybackSource.quality.isnot(None),
                    or_(*conds),
                )
            )
        )

    if not include_stubs:
        # Stub scene heuristic: tube-only scena BEZ release_date AND BEZ canonical
        # (TPDB/StashDB) ref AND BEZ żadnego ScenePerformer linka. ScenePerformer
        # dodaje continuous worker (search-by-name → wymusza link), więc per-performer
        # search-result NIGDY nie jest stub. To filtruje tylko anonymous tube-only
        # sceny z newUrl/categories ingestu które nie zostały zsyntowane z performerem.
        canonical_exists = exists(
            select(1)
            .select_from(SceneExternalRef)
            .join(Source, Source.id == SceneExternalRef.source_id)
            .where(SceneExternalRef.scene_id == Scene.id)
            .where(Source.kind.in_([SourceKind.tpdb, SourceKind.stashdb]))
        )
        has_performer = exists(
            select(1).where(ScenePerformer.scene_id == Scene.id)
        )
        # NOT stub gdy: ma canonical_ref OR ma release_date OR ma performera
        base = base.where(
            Scene.release_date.is_not(None) | canonical_exists | has_performer
        )

    # Count strategy:
    #  - PURE default (brak jakiegokolwiek filtra): cached count całego katalogu
    #    (full-scan + EXISTS ~950ms, TTL 10 min — patrz _default_scene_count).
    #  - FILTROWANE (origin/tag/q/studio/performer/quality/duration/...): bounded
    #    count do _COUNT_CAP. Exhaustive count z per-row stub EXISTS to ~3-5s; bounded
    #    ucina po CAP+1 trafieniach. Mobile paginuje po has_more (niżej), nie po total,
    #    więc cap nie psuje infinite-scroll.
    _is_pure_default = (
        not include_stubs and not q and not studio_slug_list and not tag_slug_list
        and not perf_id_strings and origin is None and has_playback is None
        and not has_animated_thumbnail and min_duration_sec is None
        and max_duration_sec is None and released_within_days is None
        and min_quality_p is None
    )
    total_capped = False
    if _is_pure_default:
        total = _default_scene_count(session)
    else:
        cnt = session.execute(
            select(func.count()).select_from(base.limit(_COUNT_CAP + 1).subquery())
        ).scalar_one()
        if cnt > _COUNT_CAP:
            total, total_capped = _COUNT_CAP, True
        else:
            total = cnt

    # Sort: zawsze tie-break po created_at desc dla determinizmu paginacji.
    if sort == "release_date":
        ordered = base.order_by(
            Scene.release_date.desc().nullslast(), Scene.created_at.desc()
        )
    elif sort == "title":
        ordered = base.order_by(Scene.title_normalized.asc(), Scene.created_at.desc())
    elif sort == "studio":
        # Sceny bez studio na końcu; w obrębie studio — najświeższe pierwsze.
        ordered = (
            base.outerjoin(Studio, Studio.id == Scene.studio_id)
            .order_by(
                Studio.name_normalized.asc().nullslast(),
                Scene.release_date.desc().nullslast(),
                Scene.created_at.desc(),
            )
        )
    else:  # created_at
        ordered = base.order_by(
            Scene.created_at.desc(), Scene.release_date.desc().nullslast()
        )

    # Fetch per_page+1 — obecność (per_page+1)-szego wiersza = jest kolejna strona.
    # To źródło prawdy dla paginacji (mobile getNextPageParam), niezależne od bounded
    # `total`. Nadmiarowy wiersz odcinamy przed serializacją.
    rows = (
        session.execute(ordered.offset((page - 1) * per_page).limit(per_page + 1))
        .scalars()
        .all()
    )
    has_more = len(rows) > per_page
    rows = rows[:per_page]

    items = _build_scenes_out_batch(session, list(rows))

    return SceneListOut(
        items=items,
        total=total,
        page=page,
        per_page=per_page,
        has_more=has_more,
        total_capped=total_capped,
    )


@router.get("/{scene_id}", response_model=SceneOut)
def get_scene(
    scene_id: uuid.UUID,
    session: Annotated[Session, Depends(get_session)],
) -> SceneOut:
    scene = session.get(Scene, scene_id)
    if scene is None:
        raise HTTPException(status_code=404, detail="scene not found")
    return _build_scene_out(session, scene)


def _needs_proxy(url: str) -> bool:
    """Wszystkie thumbnaile z playback_sources są proxowane przez backend.
    Większość CDN-ów porn-tube'ów wymaga Refera (hqporner, mypornerleak/58img,
    inne sxyprn/eporner CDN-y) — expo-image nie wysyła Referera.
    Self-hosted lub backend-internal URL-e (zaczynające się od `/`) skipujemy."""
    return url.startswith("http") and not url.startswith("/proxy/")


def _wrap_image_proxy(url: str, referer: str) -> str:
    """Wraps a thumbnail URL through /proxy/img/{token}/img.jpg. Klient nie musi
    znać sekretu Referer — backend wstawi sam. Long TTL (30d) bo thumby
    są stabilne, krótkie ttl by tylko niepotrzebnie zaśmiecało cache."""
    from app.api.stream_proxy import make_token
    token = make_token(url, referer, ttl_sec=30 * 24 * 3600)
    # Path zachowuje rozszerzenie żeby HTTP Content-Type był rozpoznany.
    import os as _os
    ext = _os.path.splitext(url.split("?")[0])[1].lstrip(".") or "jpg"
    return f"/proxy/img/{token}/img.{ext}"


def _build_scenes_out_batch(session: Session, scenes: list[Scene]) -> list[SceneOut]:
    """Batch-fetch wszystkich relacji dla N scen w 7 zapytaniach (zamiast 7×N).

    Eliminuje N+1 z `_build_scene_out` w listach scen — `/scenes?per_page=24` szło
    z ~9.6s do <500ms. Pojedyncza scena (`/scenes/{id}`) nadal używa `_build_scene_out`
    bo overhead na batch nie ma sensu dla N=1.
    """
    from collections import defaultdict
    if not scenes:
        return []

    scene_ids = [s.id for s in scenes]
    studio_ids = list({s.studio_id for s in scenes if s.studio_id is not None})

    # 1) Studios
    studios_by_id: dict = {}
    if studio_ids:
        for st in session.execute(
            select(Studio).where(Studio.id.in_(studio_ids))
        ).scalars():
            studios_by_id[st.id] = st

    # 2) Performers
    perf_rows = session.execute(
        select(ScenePerformer, Performer)
        .join(Performer, Performer.id == ScenePerformer.performer_id)
        .where(ScenePerformer.scene_id.in_(scene_ids))
        .order_by(ScenePerformer.position.asc().nullslast())
    ).all()
    performers_by_scene: dict = defaultdict(list)
    for sp, p in perf_rows:
        performers_by_scene[sp.scene_id].append(
            PerformerOut(
                id=p.id,
                canonical_name=p.canonical_name,
                slug=p.slug,
                gender=p.gender.value if p.gender else None,
                as_alias=sp.as_alias,
            )
        )

    # 3) Tags
    tag_rows = session.execute(
        select(SceneTag.scene_id, Tag)
        .join(Tag, Tag.id == SceneTag.tag_id)
        .where(SceneTag.scene_id.in_(scene_ids))
    ).all()
    tags_by_scene: dict = defaultdict(list)
    for sid, t in tag_rows:
        tags_by_scene[sid].append(TagOut.model_validate(t))

    # 4) External refs + sources
    ref_rows = session.execute(
        select(SceneExternalRef, Source)
        .join(Source, Source.id == SceneExternalRef.source_id)
        .where(SceneExternalRef.scene_id.in_(scene_ids))
    ).all()
    refs_by_scene: dict = defaultdict(list)
    for ref, src in ref_rows:
        refs_by_scene[ref.scene_id].append(
            ExternalRefOut(
                source=src.name,
                external_id=ref.external_id,
                url=ref.url,
                last_seen=ref.last_seen,
            )
        )

    # 5) Playback sources
    pb_rows = session.execute(
        select(PlaybackSource)
        .where(
            PlaybackSource.scene_id.in_(scene_ids),
            PlaybackSource.dead_at.is_(None),
        )
        .order_by(PlaybackSource.origin.asc())
    ).scalars().all()
    pb_by_scene: dict = defaultdict(list)
    for p in pb_rows:
        out = PlaybackSourceOut.model_validate(p)
        if out.thumbnail_url and _needs_proxy(out.thumbnail_url):
            out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url)
        if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url):
            out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url)
        pb_by_scene[p.scene_id].append(out)

    # 6) Progress
    progress_by_scene: dict = {}
    for prog in session.execute(
        select(ScenePlayProgress).where(ScenePlayProgress.scene_id.in_(scene_ids))
    ).scalars():
        progress_by_scene[prog.scene_id] = prog

    # 7) Favorites
    fav_scene_ids: set = set(
        session.execute(
            select(FavoriteScene.scene_id).where(
                FavoriteScene.scene_id.in_(scene_ids)
            )
        ).scalars()
    )

    out: list[SceneOut] = []
    for scene in scenes:
        studio_out = None
        if scene.studio_id is not None and scene.studio_id in studios_by_id:
            studio_out = StudioOut.model_validate(studios_by_id[scene.studio_id])
        progress = progress_by_scene.get(scene.id)
        out.append(
            SceneOut(
                id=scene.id,
                title=scene.title,
                slug=scene.slug,
                release_date=scene.release_date,
                duration_sec=scene.duration_sec,
                description=scene.description,
                code=scene.code,
                director=scene.director,
                studio=studio_out,
                performers=performers_by_scene.get(scene.id, []),
                tags=tags_by_scene.get(scene.id, []),
                external_refs=refs_by_scene.get(scene.id, []),
                playback_sources=pb_by_scene.get(scene.id, []),
                created_at=scene.created_at,
                last_played_at=progress.last_played_at if progress else None,
                finished=progress.finished if progress else False,
                position_sec=progress.position_sec if progress else 0,
                is_favorite=scene.id in fav_scene_ids,
            )
        )
    return out


def _build_scene_out(session: Session, scene: Scene) -> SceneOut:
    studio_out: StudioOut | None = None
    if scene.studio_id is not None:
        st = session.get(Studio, scene.studio_id)
        if st is not None:
            studio_out = StudioOut.model_validate(st)

    performer_rows = session.execute(
        select(ScenePerformer, Performer)
        .join(Performer, Performer.id == ScenePerformer.performer_id)
        .where(ScenePerformer.scene_id == scene.id)
        .order_by(ScenePerformer.position.asc().nullslast())
    ).all()
    performers_out: list[PerformerOut] = []
    for sp, performer in performer_rows:
        performers_out.append(
            PerformerOut(
                id=performer.id,
                canonical_name=performer.canonical_name,
                slug=performer.slug,
                gender=performer.gender.value if performer.gender else None,
                as_alias=sp.as_alias,
            )
        )

    tag_rows = (
        session.execute(
            select(Tag).join(SceneTag, SceneTag.tag_id == Tag.id).where(SceneTag.scene_id == scene.id)
        )
        .scalars()
        .all()
    )
    tags_out = [TagOut.model_validate(t) for t in tag_rows]

    ref_rows = session.execute(
        select(SceneExternalRef, Source)
        .join(Source, Source.id == SceneExternalRef.source_id)
        .where(SceneExternalRef.scene_id == scene.id)
    ).all()
    refs_out = [
        ExternalRefOut(
            source=src.name,
            external_id=ref.external_id,
            url=ref.url,
            last_seen=ref.last_seen,
        )
        for ref, src in ref_rows
    ]

    playback_rows = (
        session.execute(
            select(PlaybackSource)
            .where(
                PlaybackSource.scene_id == scene.id,
                PlaybackSource.dead_at.is_(None),  # ukryj martwe linki
            )
            .order_by(PlaybackSource.origin.asc())
        )
        .scalars()
        .all()
    )
    playback_out: list[PlaybackSourceOut] = []
    for p in playback_rows:
        out = PlaybackSourceOut.model_validate(p)
        # Wrap thumbnail URL-e przez backend image proxy gdy CDN wymaga Refera
        # (hqporner — fastporndelivery zwraca 403 bez Referer headera, expo-image
        # nie wysyła go domyślnie). Token ma 30-dniowy TTL bo thumby są stabilne.
        if out.thumbnail_url and _needs_proxy(out.thumbnail_url):
            out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url)
        if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url):
            out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url)
        playback_out.append(out)

    progress = session.get(ScenePlayProgress, scene.id)
    is_fav = session.get(FavoriteScene, scene.id) is not None

    return SceneOut(
        id=scene.id,
        title=scene.title,
        slug=scene.slug,
        release_date=scene.release_date,
        duration_sec=scene.duration_sec,
        description=scene.description,
        code=scene.code,
        director=scene.director,
        studio=studio_out,
        performers=performers_out,
        tags=tags_out,
        external_refs=refs_out,
        playback_sources=playback_out,
        created_at=scene.created_at,
        last_played_at=progress.last_played_at if progress else None,
        finished=progress.finished if progress else False,
        position_sec=progress.position_sec if progress else 0,
        is_favorite=is_fav,
    )


@router.delete("/{scene_id}/tags/{tag_id}", status_code=status.HTTP_204_NO_CONTENT)
def remove_tag_from_scene(
    scene_id: uuid.UUID,
    tag_id: uuid.UUID,
    session: Annotated[Session, Depends(get_session)],
) -> None:
    """Usuwa relację scene↔tag (np. user uznał że tag jest błędny dla tej sceny).

    Idempotent: brak relacji = success. Nie kasuje samego Tag-a — inne sceny mogą
    z niego korzystać. Sam tag zostaje w słowniku tagów.
    """
    rel = session.execute(
        select(SceneTag).where(SceneTag.scene_id == scene_id, SceneTag.tag_id == tag_id)
    ).scalar_one_or_none()
    if rel is None:
        return
    session.delete(rel)
    session.commit()


@router.delete(
    "/{scene_id}/performers/{performer_id}", status_code=status.HTTP_204_NO_CONTENT
)
def remove_performer_from_scene(
    scene_id: uuid.UUID,
    performer_id: uuid.UUID,
    session: Annotated[Session, Depends(get_session)],
) -> None:
    """Usuwa relację scene↔performer (false-match dedup zostawił nie tą osobę).

    Idempotent. Sama Performer zostaje. Użyteczne np. gdy fuzzy match aliasu
    "Bella" wciągnął Anna Bella sceny pod Bad Bella, lub Miss Teela na xnxx
    została przypisana do scen w których jej nie ma (zgłoszenia 2026-05-10).
    """
    from app.models.scene import ScenePerformer

    rel = session.execute(
        select(ScenePerformer).where(
            ScenePerformer.scene_id == scene_id,
            ScenePerformer.performer_id == performer_id,
        )
    ).scalar_one_or_none()
    if rel is None:
        return
    session.delete(rel)
    session.commit()


class EnrichTagsOut(BaseModel):
    scene_id: uuid.UUID
    added: int
    tube_used: str | None
    tags: list[str]


@router.post("/{scene_id}/enrich-tags", response_model=EnrichTagsOut)
def enrich_tags_from_tube(
    scene_id: uuid.UUID,
    session: Annotated[Session, Depends(get_session)],
) -> EnrichTagsOut:
    """Pobiera page HTML z dowolnego tube playback_source dla tej sceny i scrape'uje
    tagi (categories/tags). Dodaje brakujące do scene_tags.

    Mobile wywołuje to przy otwarciu SceneDetail jeśli scena ma 0 tagów AND ma
    tube source z obsługiwanym extractorem (porntrex/youporn/xvideos/xnxx/redtube/
    xhamster/eporner).

    Idempotent: ponowne wywołanie z tymi samymi tagami nic nie robi (UNIQUE PK
    scene_tags). Konkretne tube źródło wybierane wg priority listy (mainstream
    bardziej rzetelne niż aggregator).
    """
    from app.extractors._fetch import browser_get
    from app.extractors._models import TubePageError
    from app.extractors.tag_extract import EXTRACTORS, extract_tags
    from app.models.playback_source import PlaybackSource
    from app.models.tag import Tag
    from app.normalize.scenes import NormalizedTag
    from app.normalize.text import slugify
    from app.resolve.tag_resolver import resolve_tag

    scene = session.get(Scene, scene_id)
    if scene is None:
        raise HTTPException(status_code=404, detail="scene not found")

    # Priority: mainstream tubes (bogate metadane) > niche (mniej tagów albo garbage).
    PRIORITY = ["xhamstercom", "porntrexcom", "epornercom", "youporncom",
                "xvideoscom", "xnxxcom", "redtubecom", "pornhatcom"]
    sources = session.execute(
        select(PlaybackSource).where(
            PlaybackSource.scene_id == scene_id,
            PlaybackSource.dead_at.is_(None),
        )
    ).scalars().all()

    # Wybierz pierwsze źródło wg priority listy które ma supported extractor
    chosen: PlaybackSource | None = None
    for tag in PRIORITY:
        for src in sources:
            if src.origin == f"tube:{tag}":
                chosen = src
                break
        if chosen:
            break
    if chosen is None:
        # Fallback: dowolne źródło z extractorem
        for src in sources:
            if src.origin.startswith("tube:"):
                sitetag = src.origin.split(":", 1)[1]
                if sitetag in EXTRACTORS:
                    chosen = src
                    break

    if chosen is None:
        return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=None, tags=[])

    sitetag = chosen.origin.split(":", 1)[1]
    try:
        r = browser_get(chosen.page_url, timeout=15.0, follow_redirects=True)
        r.raise_for_status()
    except (TubePageError, Exception) as e:
        log.warning("enrich-tags fetch failed for %s: %s", chosen.page_url, e)
        return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=sitetag, tags=[])

    tag_names = extract_tags(sitetag, r.text)
    if not tag_names:
        return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=sitetag, tags=[])

    # Upsert: dla każdego taga utwórz/znajdź Tag, dorzuć SceneTag idempotentnie.
    # Używamy PostgreSQL INSERT ... ON CONFLICT DO NOTHING zamiast ORM session.add()
    # bo `resolve_tag` robi session.flush() w pętli, emitując pending SceneTag INSERT
    # z poprzednich iteracji — gdy 2 concurrent enrich-tags collide na tym samym
    # (scene_id, tag_id), drugi flush dostaje UniqueViolation (GOON-H, 4 events
    # w 10h mimo wcześniejszego seen_tag_ids fix). ON CONFLICT skip'uje silently.
    from sqlalchemy.dialects.postgresql import insert as pg_insert
    added = 0
    seen_tag_ids: set = set()
    for name in tag_names:
        norm = NormalizedTag(name=name, slug=slugify(name), external_id=None)
        tag = resolve_tag(session, norm=norm)
        if tag is None or tag.id in seen_tag_ids:
            continue
        seen_tag_ids.add(tag.id)
        stmt = (
            pg_insert(SceneTag.__table__)
            .values(scene_id=scene_id, tag_id=tag.id, source_id=None)
            .on_conflict_do_nothing(index_elements=["scene_id", "tag_id"])
        )
        result = session.execute(stmt)
        # rowcount == 1 gdy faktycznie wstawiony, 0 gdy ON CONFLICT skip
        if result.rowcount and result.rowcount > 0:
            added += 1
    session.commit()
    return EnrichTagsOut(scene_id=scene_id, added=added, tube_used=sitetag, tags=tag_names)


class EnrichDurationOut(BaseModel):
    scene_id: uuid.UUID
    duration_sec: int | None
    tube_used: str | None


@router.post("/{scene_id}/enrich-duration", response_model=EnrichDurationOut)
def enrich_duration_from_tube(
    scene_id: uuid.UUID,
    session: Annotated[Session, Depends(get_session)],
) -> EnrichDurationOut:
    """Wyciąga duration z dowolnego tube playback_source — wszystkie znane tube'y
    udostępniają duration na detail page (og:video:duration lub LD-JSON ISO 8601).

    Mobile wywołuje to przy otwarciu SceneDetail gdy scene.duration_sec jest null
    AND ma tube source. Dla dedupu duration to najsilniejszy single signal — bez
    niego sceny z weak title-only score są capowane na 0.85 (review queue).

    Idempotent: zwraca aktualne duration_sec jeśli już ustawione.
    """
    from app.extractors._fetch import browser_get
    from app.extractors._models import TubePageError
    from app.extractors.duration_extract import extract_duration_sec
    from app.models.playback_source import PlaybackSource

    scene = session.get(Scene, scene_id)
    if scene is None:
        raise HTTPException(status_code=404, detail="scene not found")

    if scene.duration_sec is not None:
        return EnrichDurationOut(
            scene_id=scene_id, duration_sec=scene.duration_sec, tube_used=None
        )

    sources = session.execute(
        select(PlaybackSource).where(
            PlaybackSource.scene_id == scene_id,
            PlaybackSource.dead_at.is_(None),
            PlaybackSource.origin.like("tube:%"),
        )
    ).scalars().all()

    for src in sources:
        try:
            r = browser_get(src.page_url, timeout=15.0, follow_redirects=True)
            r.raise_for_status()
        except (TubePageError, Exception) as e:
            log.debug("enrich-duration fetch failed for %s: %s", src.page_url, e)
            continue
        d = extract_duration_sec(r.text)
        if d is not None and d > 0:
            scene.duration_sec = d
            # Zapisz też na poziomie playback_source dla parity (przyda się jeśli
            # potem dorobimy per-source duration mismatch detection).
            if src.duration_sec is None:
                src.duration_sec = d
            session.commit()
            return EnrichDurationOut(
                scene_id=scene_id,
                duration_sec=d,
                tube_used=src.origin.split(":", 1)[1] if ":" in src.origin else None,
            )

    return EnrichDurationOut(scene_id=scene_id, duration_sec=None, tube_used=None)


class EnrichStudioOut(BaseModel):
    scene_id: uuid.UUID
    studio_id: uuid.UUID | None
    studio_name: str | None
    tube_used: str | None


@router.post("/{scene_id}/enrich-studio", response_model=EnrichStudioOut)
def enrich_studio_from_tube(
    scene_id: uuid.UUID,
    session: Annotated[Session, Depends(get_session)],
) -> EnrichStudioOut:
    """Wyciąga studio (DVD/series) z pornhat scene page'a.

    Pornhat ma `class="info-video js-ajax-dvd" data-setup='{"title": "Adult Time", ...}'`
    dla studio. Inne tube'y obsługiwane będą gdy znajdziemy ich pattern — na razie
    tylko pornhat (najczystsze studio metadata wśród free tubes).
    """
    import json as _json

    from app.extractors._fetch import browser_get
    from app.extractors._models import TubePageError
    from app.models.playback_source import PlaybackSource
    from app.models.studio import Studio
    from app.normalize.text import slugify

    scene = session.get(Scene, scene_id)
    if scene is None:
        raise HTTPException(status_code=404, detail="scene not found")

    if scene.studio_id is not None:
        existing = session.get(Studio, scene.studio_id)
        return EnrichStudioOut(
            scene_id=scene_id,
            studio_id=scene.studio_id,
            studio_name=existing.name if existing else None,
            tube_used=None,
        )

    chosen = session.execute(
        select(PlaybackSource).where(
            PlaybackSource.scene_id == scene_id,
            PlaybackSource.dead_at.is_(None),
            PlaybackSource.origin == "tube:pornhatcom",
        )
    ).scalars().first()
    if chosen is None:
        return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used=None)

    try:
        r = browser_get(chosen.page_url, timeout=15.0, follow_redirects=True)
        r.raise_for_status()
    except (TubePageError, Exception) as e:
        log.warning("enrich-studio fetch failed for %s: %s", chosen.page_url, e)
        return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")

    m = re.search(
        r"class=\"info-video js-ajax-dvd[^\"]*\"[^>]*data-setup='([^']+)'",
        r.text, re.IGNORECASE,
    )
    if m is None:
        return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
    try:
        data = _json.loads(m.group(1))
    except _json.JSONDecodeError:
        return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")

    name = (data.get("title") or "").strip()
    if not name:
        return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
    slug = (data.get("dir") or "").strip() or slugify(name)

    studio = session.execute(
        select(Studio).where(Studio.slug == slug)
    ).scalar_one_or_none()
    if studio is None:
        studio = session.execute(
            select(Studio).where(Studio.name == name)
        ).scalar_one_or_none()
    if studio is None:
        studio = Studio(name=name, slug=slug)
        session.add(studio)
        session.flush()
    scene.studio_id = studio.id
    session.commit()
    return EnrichStudioOut(
        scene_id=scene_id, studio_id=studio.id, studio_name=studio.name, tube_used="pornhatcom"
    )


class EnrichThumbOut(BaseModel):
    scene_id: uuid.UUID
    thumbnail_url: str | None
    tube_used: str | None
    sources_updated: int


@router.post("/{scene_id}/enrich-thumbnail", response_model=EnrichThumbOut)
def enrich_thumbnail_from_tube(
    scene_id: uuid.UUID,
    session: Annotated[Session, Depends(get_session)],
) -> EnrichThumbOut:
    """Pobiera detail page z dowolnego tube playback_source bez thumbnail_url
    i wyciąga miniaturkę (og:image / twitter:image / LD-JSON thumbnailUrl /
    KVS html5player).

    Update'uje WSZYSTKIE PlaybackSource'y dla tej sceny które nie mają thumb,
    żeby kolejne otwarcia listy widziały miniaturę niezależnie od source pick.
    Mobile auto-wywoła to przy otwarciu SceneDetail bez thumb (jak duration).
    """
    from app.extractors._fetch import browser_get
    from app.extractors._models import TubePageError
    from app.extractors.thumb_extract import extract_thumbnail_url
    from app.models.playback_source import PlaybackSource

    scene = session.get(Scene, scene_id)
    if scene is None:
        raise HTTPException(status_code=404, detail="scene not found")

    sources = session.execute(
        select(PlaybackSource).where(
            PlaybackSource.scene_id == scene_id,
            PlaybackSource.dead_at.is_(None),
            PlaybackSource.origin.like("tube:%"),
        )
    ).scalars().all()

    sources_with_thumb = [s for s in sources if s.thumbnail_url]
    if sources_with_thumb:
        # już mamy — idempotent return.
        return EnrichThumbOut(
            scene_id=scene_id,
            thumbnail_url=sources_with_thumb[0].thumbnail_url,
            tube_used=None,
            sources_updated=0,
        )

    for src in sources:
        try:
            r = browser_get(src.page_url, timeout=15.0, follow_redirects=True)
            r.raise_for_status()
        except (TubePageError, Exception) as e:
            log.debug("enrich-thumbnail fetch failed for %s: %s", src.page_url, e)
            continue
        thumb = extract_thumbnail_url(r.text)
        if thumb:
            # Zapisz na wszystkich źródłach bez thumb (oszczędza duplikat fetch)
            updated = 0
            for s in sources:
                if not s.thumbnail_url:
                    s.thumbnail_url = thumb
                    updated += 1
            session.commit()
            return EnrichThumbOut(
                scene_id=scene_id,
                thumbnail_url=thumb,
                tube_used=src.origin.split(":", 1)[1] if ":" in src.origin else None,
                sources_updated=updated,
            )

    return EnrichThumbOut(
        scene_id=scene_id, thumbnail_url=None, tube_used=None, sources_updated=0
    )