"""GET /scenes — lista i szczegóły scen z bazy kanonicznej.""" from __future__ import annotations import logging import re import uuid from typing import Annotated from fastapi import APIRouter, Depends, HTTPException, Query, status from pydantic import BaseModel from sqlalchemy import distinct, exists, false, func, literal_column, select from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session from app.auth import require_api_key from app.api.schemas import ( ExternalRefOut, PerformerOut, PlaybackSourceOut, SceneListOut, SceneOut, StudioOut, TagOut, ) from app.db import get_session from app.api.device import LEGACY_DEVICE, get_device_id from app.models.favorite_scene import FavoriteScene from app.models.performer import Performer from app.models.play_progress import ScenePlayProgress from app.models.playback_source import PlaybackSource from app.models.scene import Scene, SceneExternalRef, ScenePerformer, SceneTag from app.models.source import Source, SourceKind from app.models.studio import Studio from app.models.tag import Tag log = logging.getLogger(__name__) router = APIRouter(prefix="/scenes", tags=["scenes"], dependencies=[Depends(require_api_key)]) _VALID_SORTS = {"created_at", "release_date", "title", "studio"} # TTL-cache dla count'u scen-z-żywym-playback (default lista bez filtra). Full-scan # 1.69M scen + EXISTS ~950ms; liczba zmienia się wolno i jest przybliżona (header # paginacji), więc 10-min cache w pamięci procesu API jest akceptowalny trade-off. _DEFAULT_COUNT_CACHE: dict = {"ts": 0.0, "val": 0} _DEFAULT_COUNT_TTL = 600.0 def _default_scene_count(session: Session) -> int: import time as _time now = _time.monotonic() if _DEFAULT_COUNT_CACHE["val"] and (now - _DEFAULT_COUNT_CACHE["ts"]) < _DEFAULT_COUNT_TTL: return _DEFAULT_COUNT_CACHE["val"] count_query = select(func.count()).select_from( select(Scene.id).where( exists( select(1).where( PlaybackSource.scene_id == Scene.id, PlaybackSource.dead_at.is_(None), ) ) ).subquery() ) total = session.execute(count_query).scalar_one() _DEFAULT_COUNT_CACHE["ts"] = now _DEFAULT_COUNT_CACHE["val"] = total return total # Blacklisty (performer/studio/tag) są zwykle PUSTE (self-hosted, single-user). Mimo to # 3 NOT EXISTS klauzule doklejały się do KAŻDEJ filtrowanej listy scen i były ewaluowane # per-row — przy filtrze typu duży-tag/has_playback planer chodzi po ~176k scen, więc te # puste-zawsze klauzule kosztowały ~3.4s (mega-tag „anal": 6.7s→3.3s po pominięciu). # Cache'ujemy emptiness (TTL 5 min); gdy ktoś doda blacklist-wpis, w ciągu 5 min klauzule # wracają. Patrz reference_scenes_list_perf / task #22. # Cache per device_id (blacklisty są teraz device-scoped — bug 2026-06-08). _BLACKLIST_EMPTY_CACHE: dict[str, tuple[float, bool]] = {} _BLACKLIST_EMPTY_TTL = 300.0 def _blacklists_empty(session: Session, device_id: str) -> bool: """True gdy WSZYSTKIE 3 blacklisty TEGO device puste → pomiń NOT EXISTS klauzule.""" import time as _time from app.models.blacklist import ( BlacklistedPerformer, BlacklistedStudio, BlacklistedTag, ) now = _time.monotonic() cached = _BLACKLIST_EMPTY_CACHE.get(device_id) if cached and (now - cached[0]) < _BLACKLIST_EMPTY_TTL: return cached[1] has_any = session.execute( select( exists(select(1).select_from(BlacklistedPerformer).where(BlacklistedPerformer.device_id == device_id)) | exists(select(1).select_from(BlacklistedStudio).where(BlacklistedStudio.device_id == device_id)) | exists(select(1).select_from(BlacklistedTag).where(BlacklistedTag.device_id == device_id)) ) ).scalar_one() _BLACKLIST_EMPTY_CACHE[device_id] = (now, not has_any) return not has_any def _split_csv(raw: str | None) -> list[str]: if not raw: return [] return [s.strip() for s in raw.split(",") if s.strip()] @router.get("", response_model=SceneListOut) def list_scenes( session: Annotated[Session, Depends(get_session)], device_id: Annotated[str, Depends(get_device_id)], q: str | None = Query(default=None, description="Wyszukiwanie po title_normalized (trgm)"), studio_slug: str | None = Query(default=None, description="DEPRECATED — użyj studio_slugs"), studio_slugs: str | None = Query( default=None, description="Comma-separated studio slugs (OR)" ), tags: str | None = Query( default=None, description="Comma-separated tag slugs (AND — scena musi mieć wszystkie wybrane tagi)", ), performer_ids: str | None = Query( default=None, description="Comma-separated performer UUIDs (AND — scena musi mieć wszystkich wybranych performerów)", ), has_playback: bool | None = Query( default=None, description="True: tylko sceny z ≥1 playback_source" ), min_duration_sec: int | None = Query(default=None, ge=0), max_duration_sec: int | None = Query(default=None, ge=0), released_within_days: int | None = Query( default=None, ge=1, description="Tylko sceny released w ostatnich N dniach", ), min_quality_p: int | None = Query( default=None, ge=1, description=( "Minimum quality (pixele wysokości — 2160 = 4K, 1080 = FullHD). Filtruje " "po PlaybackSource.quality (string typu '720p' / '1080p Full HD')." ), ), origin: str | None = Query( default=None, description=( "Filtruj po playback origin (np. 'tube:hqpornercom'). Substring match — " "'hqporner' złapie tube:hqpornercom. Diagnostyka per-hoster." ), ), include_stubs: bool = Query( default=False, description=( "False (default): ukrywa sceny-szkielety bez release_date, < 10min, " "z jedynym playback z hqporner (~7-min Brazzers trailer clipy zalewają katalog)." ), ), sort: str = Query(default="created_at", description="created_at|release_date|title|studio"), page: int = Query(default=1, ge=1), per_page: int = Query(default=50, ge=1, le=200), ) -> SceneListOut: if sort not in _VALID_SORTS: raise HTTPException(status_code=400, detail=f"sort must be one of {sorted(_VALID_SORTS)}") base = select(Scene) if q: base = base.where(Scene.title_normalized.ilike(f"%{q.lower()}%")) studio_slug_list = _split_csv(studio_slugs) if studio_slug: studio_slug_list.append(studio_slug) if studio_slug_list: base = base.where( Scene.studio_id.in_( select(Studio.id).where(Studio.slug.in_(studio_slug_list)) ) ) tag_slug_list = _split_csv(tags) # AND między tagami: scena musi mieć WSZYSTKIE zaznaczone tagi. Każdy slug → osobny # exists() — zaznaczanie kolejnych filtrów zawęża wyniki, jak intuicja użytkownika. # # PERF (2026-06-07): resolvujemy slug→tag_id w aplikacji i filtrujemy po LITERALNYM # tag_id (NIE JOIN po Tag.slug). Z literałem planner zna kardynalność tagu ze # statystyk (MCV) → dla popularnych tagów (blowjob ~273k scen) wybiera index-walk po # ix_scenes_created_at_desc zamiast materializować wszystkie scene_tags. Slug-JOIN # ukrywał tag_id przed plannerem → używał średniej (8.4M/11541≈726) → zły plan # (4-12s). Z literałem: ~20ms. Zob. też _build... light mode. if tag_slug_list: id_by_slug = dict( session.execute( select(Tag.slug, Tag.id).where(Tag.slug.in_(tag_slug_list)) ).all() ) for slug in tag_slug_list: tag_id = id_by_slug.get(slug) if tag_id is None: base = base.where(false()) # nieznany slug → brak wyników break base = base.where( exists( select(1) .select_from(SceneTag) .where(SceneTag.scene_id == Scene.id, SceneTag.tag_id == tag_id) ) ) perf_id_strings = _split_csv(performer_ids) if perf_id_strings: try: perf_ids = [uuid.UUID(s) for s in perf_id_strings] except ValueError as e: raise HTTPException(status_code=400, detail=f"invalid performer UUID: {e}") from e # AND między performerami (analogicznie do tagów). for pid in perf_ids: base = base.where( exists( select(1) .select_from(ScenePerformer) .where( ScenePerformer.scene_id == Scene.id, ScenePerformer.performer_id == pid, ) ) ) if has_playback is True: # Tylko sceny z choć jednym ŻYWYM playback_source. base = base.where( exists( select(1).where( PlaybackSource.scene_id == Scene.id, PlaybackSource.dead_at.is_(None), ) ) ) elif has_playback is False: base = base.where( ~exists( select(1).where( PlaybackSource.scene_id == Scene.id, PlaybackSource.dead_at.is_(None), ) ) ) if origin: # Substring match na origin — 'hqporner' złapie 'tube:hqpornercom'. base = base.where( exists( select(1).where( PlaybackSource.scene_id == Scene.id, PlaybackSource.dead_at.is_(None), PlaybackSource.origin.ilike(f"%{origin}%"), ) ) ) # Blacklisty — globalne wykluczenia. Jeśli scena ma JAKIEGOKOLWIEK blacklisted # performera, jest na blacklisted studio, lub ma JAKIKOLWIEK blacklisted tag → out. # Pomijamy gdy wszystkie 3 blacklisty puste (typowy stan single-user) — te NOT EXISTS # ewaluują się per-row na ~176k scen przy mega-tagu i kosztowały ~3.4s za nic. if not _blacklists_empty(session, device_id): from app.models.blacklist import ( BlacklistedPerformer, BlacklistedStudio, BlacklistedTag, ) base = base.where( ~exists( select(1) .select_from(ScenePerformer) .join( BlacklistedPerformer, (BlacklistedPerformer.performer_id == ScenePerformer.performer_id) & (BlacklistedPerformer.device_id == device_id), ) .where(ScenePerformer.scene_id == Scene.id) ) ) base = base.where( ~Scene.studio_id.in_( select(BlacklistedStudio.studio_id).where(BlacklistedStudio.device_id == device_id) ) ) base = base.where( ~exists( select(1) .select_from(SceneTag) .join( BlacklistedTag, (BlacklistedTag.tag_id == SceneTag.tag_id) & (BlacklistedTag.device_id == device_id), ) .where(SceneTag.scene_id == Scene.id) ) ) if min_duration_sec is not None: base = base.where(Scene.duration_sec >= min_duration_sec) if max_duration_sec is not None: base = base.where(Scene.duration_sec <= max_duration_sec) if released_within_days is not None: from datetime import date, timedelta cutoff = date.today() - timedelta(days=released_within_days) base = base.where(Scene.release_date >= cutoff) if min_quality_p is not None: # PlaybackSource.quality to wolny string — szukamy liczb w prefixie ('1080p', # '1080p Full HD', '2160p'). Heurystyka: wystarczy że scena ma JEDEN żywy # playback z quality liczbą >= min. '4K'/'UHD' aliasujemy na 2160. from sqlalchemy import Integer, cast, or_ numeric_q = cast( func.coalesce(func.substring(PlaybackSource.quality, r"\d+"), "0"), Integer, ) conds = [numeric_q >= min_quality_p] if min_quality_p <= 2160: conds.append(PlaybackSource.quality.ilike("%4k%")) conds.append(PlaybackSource.quality.ilike("%uhd%")) base = base.where( exists( select(1).where( PlaybackSource.scene_id == Scene.id, PlaybackSource.dead_at.is_(None), PlaybackSource.quality.isnot(None), or_(*conds), ) ) ) if not include_stubs: # Stub scene heuristic: tube-only scena BEZ release_date AND BEZ canonical # (TPDB/StashDB) ref AND BEZ żadnego ScenePerformer linka. ScenePerformer # dodaje continuous worker (search-by-name → wymusza link), więc per-performer # search-result NIGDY nie jest stub. To filtruje tylko anonymous tube-only # sceny z newUrl/categories ingestu które nie zostały zsyntowane z performerem. canonical_exists = exists( select(1) .select_from(SceneExternalRef) .join(Source, Source.id == SceneExternalRef.source_id) .where(SceneExternalRef.scene_id == Scene.id) .where(Source.kind.in_([SourceKind.tpdb, SourceKind.stashdb])) ) has_performer = exists( select(1).where(ScenePerformer.scene_id == Scene.id) ) # NOT stub gdy: ma canonical_ref OR ma release_date OR ma performera base = base.where( Scene.release_date.is_not(None) | canonical_exists | has_performer ) _is_pure_default = ( not include_stubs and not q and not studio_slug_list and not tag_slug_list and not perf_id_strings and origin is None and has_playback is None and min_duration_sec is None and max_duration_sec is None and released_within_days is None and min_quality_p is None ) # Count strategy: # - PURE default: cached pełny licznik katalogu (TTL 10 min). # - FILTROWANE: NIE liczymy dokładnie. Bounded-count nad EXISTS-filtrami był # dominującym kosztem (~4s na has_playback / min_duration / duży tag) i plan # był NIESTABILNY (literal LIMIT + count-nad-PK pomogły w części przypadków, # ale planer i tak czasem skanuje cały zbiór zamiast urwać). Mobile paginuje # po `has_more` (per_page+1 fetch), NIE po `total` — `total` to tylko licznik # "N+" w UI. Wyprowadzamy go z has_more PO fetchu (patrz niżej): dolna granica # + flaga "jest więcej". Eliminuje cały koszt count z każdej filtrowanej listy. total_capped = False total: int | None = _default_scene_count(session) if _is_pure_default else None # Sort: zawsze tie-break po created_at desc dla determinizmu paginacji. if sort == "release_date": ordered = base.order_by( Scene.release_date.desc().nullslast(), Scene.created_at.desc() ) elif sort == "title": ordered = base.order_by(Scene.title_normalized.asc(), Scene.created_at.desc()) elif sort == "studio": # Sceny bez studio na końcu; w obrębie studio — najświeższe pierwsze. ordered = ( base.outerjoin(Studio, Studio.id == Scene.studio_id) .order_by( Studio.name_normalized.asc().nullslast(), Scene.release_date.desc().nullslast(), Scene.created_at.desc(), ) ) else: # created_at ordered = base.order_by( Scene.created_at.desc(), Scene.release_date.desc().nullslast() ) # Fetch per_page+1 — obecność (per_page+1)-szego wiersza = jest kolejna strona. # To źródło prawdy dla paginacji (mobile getNextPageParam), niezależne od bounded # `total`. Nadmiarowy wiersz odcinamy przed serializacją. # LIMIT/OFFSET literalne (NIE bound-param) — patrz wyżej: sparametryzowany LIMIT # psuje early-termination i przy filtrach EXISTS planer robi gather-all+sort (sekundy) # zamiast limit-aware index-walk po `ix_scenes_created_at_desc`. page/per_page to # walidowane inty (Query ge=1, le=200), więc literal_column jest bezpieczne. _off = (page - 1) * per_page rows = ( session.execute( ordered.offset(literal_column(str(_off))).limit(literal_column(str(per_page + 1))) ) .scalars() .all() ) has_more = len(rows) > per_page rows = rows[:per_page] # Filtrowane listy: total = dolna granica z dotychczas-widzianych wierszy, a # total_capped=has_more daje UI "N+" (jest kolejna strona). Bez osobnego count query. if total is None: total = (page - 1) * per_page + len(rows) total_capped = has_more items = _build_scenes_out_batch(session, list(rows), light=True, device_id=device_id) return SceneListOut( items=items, total=total, page=page, per_page=per_page, has_more=has_more, total_capped=total_capped, ) @router.get("/{scene_id}", response_model=SceneOut) def get_scene( scene_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], device_id: Annotated[str, Depends(get_device_id)], ) -> SceneOut: scene = session.get(Scene, scene_id) if scene is None: raise HTTPException(status_code=404, detail="scene not found") return _build_scene_out(session, scene, device_id=device_id) _SXYPRN_POST_RE = re.compile(r"sxyprn\.com/post/([0-9a-f]{6,40})", re.IGNORECASE) def _sxyprn_thumb_url(page_url: str | None) -> str | None: """Dla źródła sxyprn zwraca STABILNY endpoint on-demand resolvera (`/proxy/sxyprn-thumb/`) zamiast martwego trafficdeposit URL — token żyje ~1h, więc poster resolvujemy przy serwowaniu (bug 2026-06-10).""" if not page_url: return None m = _SXYPRN_POST_RE.search(page_url) return f"/proxy/sxyprn-thumb/{m.group(1)}" if m else None def _is_rotting_thumb(url: str) -> bool: """sxyprn/trafficdeposit miniaturki są czasowo podpisane i rotują (asset 404 po ~tygodniach, nie odświeżalne server-side; bug 2026-06-10). De-prioritize je w wyborze slim-thumbnaila — używamy tylko gdy scena nie ma żadnej innej miniaturki.""" return "trafficdeposit.com" in url def _needs_proxy(url: str) -> bool: """Wszystkie thumbnaile z playback_sources są proxowane przez backend. Większość CDN-ów porn-tube'ów wymaga Refera (hqporner, mypornerleak/58img, inne sxyprn/eporner CDN-y) — expo-image nie wysyła Referera. Self-hosted lub backend-internal URL-e (zaczynające się od `/`) skipujemy.""" return url.startswith("http") and not url.startswith("/proxy/") def _wrap_image_proxy(url: str, referer: str) -> str: """Wraps a thumbnail URL through /proxy/img/{token}/img.jpg. Klient nie musi znać sekretu Referer — backend wstawi sam. Long TTL (30d) bo thumby są stabilne, krótkie ttl by tylko niepotrzebnie zaśmiecało cache.""" from app.api.stream_proxy import make_token # stable_bucket_sec=7d → proxied URL identyczny przez tydzień → expo-image # disk-cache hit zamiast re-download miniatur przy każdym fetchu listy / starcie apki. token = make_token(url, referer, ttl_sec=30 * 24 * 3600, stable_bucket_sec=7 * 24 * 3600) # Path zachowuje rozszerzenie żeby HTTP Content-Type był rozpoznany. import os as _os ext = _os.path.splitext(url.split("?")[0])[1].lstrip(".") or "jpg" return f"/proxy/img/{token}/img.{ext}" def _build_scenes_out_batch( session: Session, scenes: list[Scene], *, light: bool = False, device_id: str = LEGACY_DEVICE ) -> list[SceneOut]: """Batch-fetch wszystkich relacji dla N scen w 7 zapytaniach (zamiast 7×N). Eliminuje N+1 z `_build_scene_out` w listach scen — `/scenes?per_page=24` szło z ~9.6s do <500ms. Pojedyncza scena (`/scenes/{id}`) nadal używa `_build_scene_out` bo overhead na batch nie ma sensu dla N=1. `light=True` (listy/grid): pomija `tags` i `external_refs` (kafelek SceneTile ich NIE używa, a SceneDetail re-fetchuje pełną scenę osobno) i ślimaczy `playback_sources` do 1 wpisu z samą miniaturką (kafelek czyta tylko thumbnail_url/animated_thumbnail_url). Mniej DB + mniej payloadu + szybszy parse na kliencie (perf 2026-06-07). """ from collections import defaultdict if not scenes: return [] scene_ids = [s.id for s in scenes] studio_ids = list({s.studio_id for s in scenes if s.studio_id is not None}) # 1) Studios studios_by_id: dict = {} if studio_ids: for st in session.execute( select(Studio).where(Studio.id.in_(studio_ids)) ).scalars(): studios_by_id[st.id] = st # 2) Performers perf_rows = session.execute( select(ScenePerformer, Performer) .join(Performer, Performer.id == ScenePerformer.performer_id) .where(ScenePerformer.scene_id.in_(scene_ids)) .order_by(ScenePerformer.position.asc().nullslast()) ).all() performers_by_scene: dict = defaultdict(list) for sp, p in perf_rows: performers_by_scene[sp.scene_id].append( PerformerOut( id=p.id, canonical_name=p.canonical_name, slug=p.slug, gender=p.gender.value if p.gender else None, as_alias=sp.as_alias, ) ) # 3) Tags + 4) External refs — kafelek listy ich nie używa; w light mode pomijamy # (SceneDetail re-fetchuje pełną scenę przez /scenes/{id}). tags_by_scene: dict = defaultdict(list) refs_by_scene: dict = defaultdict(list) if not light: tag_rows = session.execute( select(SceneTag.scene_id, Tag) .join(Tag, Tag.id == SceneTag.tag_id) .where(SceneTag.scene_id.in_(scene_ids)) ).all() for sid, t in tag_rows: tags_by_scene[sid].append(TagOut.model_validate(t)) ref_rows = session.execute( select(SceneExternalRef, Source) .join(Source, Source.id == SceneExternalRef.source_id) .where(SceneExternalRef.scene_id.in_(scene_ids)) ).all() for ref, src in ref_rows: refs_by_scene[ref.scene_id].append( ExternalRefOut( source=src.name, external_id=ref.external_id, url=ref.url, last_seen=ref.last_seen, ) ) # 5) Playback sources. Light mode: tylko miniaturka (jedna na scenę) — kafelek # czyta wyłącznie playback_sources[].thumbnail_url / animated_thumbnail_url. pb_by_scene: dict = defaultdict(list) if light: pb_light = session.execute( select( PlaybackSource.scene_id, PlaybackSource.thumbnail_url, PlaybackSource.animated_thumbnail_url, PlaybackSource.page_url, ) .where( PlaybackSource.scene_id.in_(scene_ids), PlaybackSource.dead_at.is_(None), ) .order_by(PlaybackSource.origin.asc()) ).all() # Pierwsza miniaturka + pierwszy animated per scena (1 slim wpis). De-prioritize # sxyprn/trafficdeposit thumbnaile — są podpisane czasowo i ROTUJĄ (asset 404 po # ~tygodniach, nie da się re-signować; bug 2026-06-10). Wolimy miniaturkę z innego # źródła gdy istnieje; sxyprn bierzemy tylko gdy nic innego nie ma (świeże jeszcze # działają, martwe → mobile pokazuje placeholder zamiast broken-image). thumb_by_scene: dict = {} thumb_fallback: dict = {} anim_by_scene: dict = {} for sid, thumb, anim, page_url in pb_light: sxy = _sxyprn_thumb_url(page_url) if sxy: # sxyprn → żywy on-demand resolver (martwy stored URL ignorujemy), # tier fallback: użyty tylko gdy scena nie ma stabilniejszej miniatury. thumb_fallback.setdefault(sid, (sxy, page_url)) elif thumb: if _is_rotting_thumb(thumb): thumb_fallback.setdefault(sid, (thumb, page_url)) elif sid not in thumb_by_scene: thumb_by_scene[sid] = (thumb, page_url) if sid not in anim_by_scene and anim: anim_by_scene[sid] = (anim, page_url) # Uzupełnij scenami które mają TYLKO rotting thumbnail (sxyprn-only). for sid, val in thumb_fallback.items(): thumb_by_scene.setdefault(sid, val) for sid in scene_ids: t = thumb_by_scene.get(sid) a = anim_by_scene.get(sid) if not t and not a: continue t_url = t[0] if t else None a_url = a[0] if a else None ref = (t or a)[1] if t_url and _needs_proxy(t_url): t_url = _wrap_image_proxy(t_url, ref) if a_url and _needs_proxy(a_url): a_url = _wrap_image_proxy(a_url, ref) # id/origin/page_url wymagane przez schemat ale nieużywane przez kafelek # (SceneDetail re-fetchuje pełne źródła) — dummy sentinel. pb_by_scene[sid].append( PlaybackSourceOut( id=uuid.UUID(int=0), origin="", page_url="", thumbnail_url=t_url, animated_thumbnail_url=a_url, ) ) else: pb_rows = session.execute( select(PlaybackSource) .where( PlaybackSource.scene_id.in_(scene_ids), PlaybackSource.dead_at.is_(None), ) .order_by(PlaybackSource.origin.asc()) ).scalars().all() for p in pb_rows: out = PlaybackSourceOut.model_validate(p) if out.thumbnail_url and _needs_proxy(out.thumbnail_url): out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url) if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url): out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url) pb_by_scene[p.scene_id].append(out) # 6) Progress (device-scoped) progress_by_scene: dict = {} for prog in session.execute( select(ScenePlayProgress).where( ScenePlayProgress.scene_id.in_(scene_ids), ScenePlayProgress.device_id == device_id, ) ).scalars(): progress_by_scene[prog.scene_id] = prog # 7) Favorites (device-scoped) fav_scene_ids: set = set( session.execute( select(FavoriteScene.scene_id).where( FavoriteScene.scene_id.in_(scene_ids), FavoriteScene.device_id == device_id, ) ).scalars() ) out: list[SceneOut] = [] for scene in scenes: studio_out = None if scene.studio_id is not None and scene.studio_id in studios_by_id: studio_out = StudioOut.model_validate(studios_by_id[scene.studio_id]) progress = progress_by_scene.get(scene.id) out.append( SceneOut( id=scene.id, title=scene.title, slug=scene.slug, release_date=scene.release_date, duration_sec=scene.duration_sec, description=scene.description, code=scene.code, director=scene.director, studio=studio_out, performers=performers_by_scene.get(scene.id, []), tags=tags_by_scene.get(scene.id, []), external_refs=refs_by_scene.get(scene.id, []), playback_sources=pb_by_scene.get(scene.id, []), created_at=scene.created_at, last_played_at=progress.last_played_at if progress else None, finished=progress.finished if progress else False, position_sec=progress.position_sec if progress else 0, is_favorite=scene.id in fav_scene_ids, ) ) return out def _build_scene_out(session: Session, scene: Scene, *, device_id: str = LEGACY_DEVICE) -> SceneOut: studio_out: StudioOut | None = None if scene.studio_id is not None: st = session.get(Studio, scene.studio_id) if st is not None: studio_out = StudioOut.model_validate(st) performer_rows = session.execute( select(ScenePerformer, Performer) .join(Performer, Performer.id == ScenePerformer.performer_id) .where(ScenePerformer.scene_id == scene.id) .order_by(ScenePerformer.position.asc().nullslast()) ).all() performers_out: list[PerformerOut] = [] for sp, performer in performer_rows: performers_out.append( PerformerOut( id=performer.id, canonical_name=performer.canonical_name, slug=performer.slug, gender=performer.gender.value if performer.gender else None, as_alias=sp.as_alias, ) ) tag_rows = ( session.execute( select(Tag).join(SceneTag, SceneTag.tag_id == Tag.id).where(SceneTag.scene_id == scene.id) ) .scalars() .all() ) tags_out = [TagOut.model_validate(t) for t in tag_rows] ref_rows = session.execute( select(SceneExternalRef, Source) .join(Source, Source.id == SceneExternalRef.source_id) .where(SceneExternalRef.scene_id == scene.id) ).all() refs_out = [ ExternalRefOut( source=src.name, external_id=ref.external_id, url=ref.url, last_seen=ref.last_seen, ) for ref, src in ref_rows ] playback_rows = ( session.execute( select(PlaybackSource) .where( PlaybackSource.scene_id == scene.id, PlaybackSource.dead_at.is_(None), # ukryj martwe linki ) .order_by(PlaybackSource.origin.asc()) ) .scalars() .all() ) # Collapse źródła dzielące ten sam origin (hoster). Zmergowana scena często agreguje # kilka uploadów z JEDNEGO tube'a (re-enkody / wersje 4K: bug-report aa79a995 "2 linki, # oba do porntrex" = ta sama scena std+4K) — w UI to nierozróżnialne linki do tego # samego hostera (resolvują tym samym extractorem). Zostawiamy jeden najlepszy per # origin: preferuj długość zgodną ze sceną (realny match) → jakąkolwiek długość → # pierwszy (stabilnie, query jest origin-asc). Martwe już odfiltrowane (dead_at). def _origin_pick_key(p: PlaybackSource) -> tuple[int, int]: dur_match = ( 0 if (scene.duration_sec and p.duration_sec and abs(p.duration_sec - scene.duration_sec) <= 5) else 1 ) return (dur_match, 0 if p.duration_sec else 1) _best_by_origin: dict[str, PlaybackSource] = {} for p in playback_rows: key = p.origin or "" cur = _best_by_origin.get(key) if cur is None or _origin_pick_key(p) < _origin_pick_key(cur): _best_by_origin[key] = p playback_rows = list(_best_by_origin.values()) playback_out: list[PlaybackSourceOut] = [] for p in playback_rows: out = PlaybackSourceOut.model_validate(p) # Wrap thumbnail URL-e przez backend image proxy gdy CDN wymaga Refera # (hqporner — fastporndelivery zwraca 403 bez Referer headera, expo-image # nie wysyła go domyślnie). Token ma 30-dniowy TTL bo thumby są stabilne. if out.thumbnail_url and _needs_proxy(out.thumbnail_url): out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url) if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url): out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url) playback_out.append(out) # Rank natywne-resolve źródła PRZED WebView-fallback (IP-bound/ad-heavy: fpoxxx, # pornxpph, pornhub...). Query był alfabetyczny po origin, więc np. fpoxxx-WebView # pokazywał się przed działającym freshporno (bug-report 2026-06-07). Stabilny sort: # natywne (0) → fallback (1), tie-break po origin. from app.extractors import is_vps_blocked_fallback def _resolve_rank(origin: str | None) -> int: if not origin: return 1 sitetag = origin.split(":", 1)[1] if ":" in origin else origin return 1 if is_vps_blocked_fallback(sitetag) else 0 playback_out.sort(key=lambda o: (_resolve_rank(o.origin), o.origin or "")) progress = session.get(ScenePlayProgress, (device_id, scene.id)) is_fav = session.get(FavoriteScene, (device_id, scene.id)) is not None return SceneOut( id=scene.id, title=scene.title, slug=scene.slug, release_date=scene.release_date, duration_sec=scene.duration_sec, description=scene.description, code=scene.code, director=scene.director, studio=studio_out, performers=performers_out, tags=tags_out, external_refs=refs_out, playback_sources=playback_out, created_at=scene.created_at, last_played_at=progress.last_played_at if progress else None, finished=progress.finished if progress else False, position_sec=progress.position_sec if progress else 0, is_favorite=is_fav, ) @router.delete("/{scene_id}/tags/{tag_id}", status_code=status.HTTP_204_NO_CONTENT) def remove_tag_from_scene( scene_id: uuid.UUID, tag_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], ) -> None: """Usuwa relację scene↔tag (np. user uznał że tag jest błędny dla tej sceny). Idempotent: brak relacji = success. Nie kasuje samego Tag-a — inne sceny mogą z niego korzystać. Sam tag zostaje w słowniku tagów. """ rel = session.execute( select(SceneTag).where(SceneTag.scene_id == scene_id, SceneTag.tag_id == tag_id) ).scalar_one_or_none() if rel is None: return session.delete(rel) session.commit() @router.delete( "/{scene_id}/performers/{performer_id}", status_code=status.HTTP_204_NO_CONTENT ) def remove_performer_from_scene( scene_id: uuid.UUID, performer_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], ) -> None: """Usuwa relację scene↔performer (false-match dedup zostawił nie tą osobę). Idempotent. Sama Performer zostaje. Użyteczne np. gdy fuzzy match aliasu "Bella" wciągnął Anna Bella sceny pod Bad Bella, lub Miss Teela na xnxx została przypisana do scen w których jej nie ma (zgłoszenia 2026-05-10). """ from app.models.scene import ScenePerformer rel = session.execute( select(ScenePerformer).where( ScenePerformer.scene_id == scene_id, ScenePerformer.performer_id == performer_id, ) ).scalar_one_or_none() if rel is None: return session.delete(rel) session.commit() class SceneHideOut(BaseModel): scene_id: uuid.UUID playback_marked_dead: int @router.post("/{scene_id}/hide", response_model=SceneHideOut) def hide_scene( scene_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], ) -> SceneHideOut: """Ukryj scenę (user long-press → „usuń"). Oznacza wszystkie playback_sources jako dead → scena wypada z list (has_playback=false). Odwracalne w DB (dead_at). Nie kasujemy wiersza sceny — zachowujemy refs/dedup, tylko znika z UI.""" from datetime import UTC, datetime from app.models.playback_source import PlaybackSource if session.get(Scene, scene_id) is None: raise HTTPException(status_code=404, detail="scene not found") rows = session.execute( select(PlaybackSource).where( PlaybackSource.scene_id == scene_id, PlaybackSource.dead_at.is_(None), ) ).scalars().all() now = datetime.now(UTC) for p in rows: p.dead_at = now p.dead_reason = "user hid scene (long-press)" session.commit() return SceneHideOut(scene_id=scene_id, playback_marked_dead=len(rows)) class SceneMergeOut(BaseModel): keep_id: uuid.UUID dropped_id: uuid.UUID @router.post("/{keep_id}/merge/{drop_id}", response_model=SceneMergeOut) def merge_duplicate_scene( keep_id: uuid.UUID, drop_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], ) -> SceneMergeOut: """Scal `drop_id` w `keep_id` (user long-press → „oznacz duplikat" → wybór drugiej sceny). Przenosi refs/performers/tags/fingerprints/playback (scene_merge), kasuje `drop`. keep = scena na której user trzyma (zostaje), drop = wskazany duplikat.""" from app.resolve.scene_merge import MergeError, merge_scenes if keep_id == drop_id: raise HTTPException(status_code=400, detail="keep_id == drop_id") if session.get(Scene, keep_id) is None or session.get(Scene, drop_id) is None: raise HTTPException(status_code=404, detail="scene not found") try: merge_scenes(session, keep_id=keep_id, drop_id=drop_id, resolved_by="user_long_press_duplicate") except MergeError as e: raise HTTPException(status_code=400, detail=str(e)) from e session.commit() return SceneMergeOut(keep_id=keep_id, dropped_id=drop_id) class EnrichTagsOut(BaseModel): scene_id: uuid.UUID added: int tube_used: str | None tags: list[str] @router.post("/{scene_id}/enrich-tags", response_model=EnrichTagsOut) def enrich_tags_from_tube( scene_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], ) -> EnrichTagsOut: """Pobiera page HTML z dowolnego tube playback_source dla tej sceny i scrape'uje tagi (categories/tags). Dodaje brakujące do scene_tags. Mobile wywołuje to przy otwarciu SceneDetail jeśli scena ma 0 tagów AND ma tube source z obsługiwanym extractorem (porntrex/youporn/xvideos/xnxx/redtube/ xhamster/eporner). Idempotent: ponowne wywołanie z tymi samymi tagami nic nie robi (UNIQUE PK scene_tags). Konkretne tube źródło wybierane wg priority listy (mainstream bardziej rzetelne niż aggregator). """ from app.extractors._fetch import browser_get from app.extractors._models import TubePageError from app.extractors.tag_extract import EXTRACTORS, extract_tags from app.models.playback_source import PlaybackSource from app.models.tag import Tag from app.normalize.scenes import NormalizedTag from app.normalize.text import slugify from app.resolve.tag_resolver import resolve_tag scene = session.get(Scene, scene_id) if scene is None: raise HTTPException(status_code=404, detail="scene not found") # Priority: mainstream tubes (bogate metadane) > niche (mniej tagów albo garbage). PRIORITY = ["xhamstercom", "porntrexcom", "epornercom", "youporncom", "xvideoscom", "xnxxcom"] sources = session.execute( select(PlaybackSource).where( PlaybackSource.scene_id == scene_id, PlaybackSource.dead_at.is_(None), ) ).scalars().all() # Wybierz pierwsze źródło wg priority listy które ma supported extractor chosen: PlaybackSource | None = None for tag in PRIORITY: for src in sources: if src.origin == f"tube:{tag}": chosen = src break if chosen: break if chosen is None: # Fallback: dowolne źródło z extractorem for src in sources: if src.origin.startswith("tube:"): sitetag = src.origin.split(":", 1)[1] if sitetag in EXTRACTORS: chosen = src break if chosen is None: return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=None, tags=[]) sitetag = chosen.origin.split(":", 1)[1] try: r = browser_get(chosen.page_url, timeout=15.0, follow_redirects=True) r.raise_for_status() except (TubePageError, Exception) as e: log.warning("enrich-tags fetch failed for %s: %s", chosen.page_url, e) return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=sitetag, tags=[]) tag_names = extract_tags(sitetag, r.text) if not tag_names: return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=sitetag, tags=[]) # Upsert: dla każdego taga utwórz/znajdź Tag, dorzuć SceneTag idempotentnie. # Używamy PostgreSQL INSERT ... ON CONFLICT DO NOTHING zamiast ORM session.add() # bo `resolve_tag` robi session.flush() w pętli, emitując pending SceneTag INSERT # z poprzednich iteracji — gdy 2 concurrent enrich-tags collide na tym samym # (scene_id, tag_id), drugi flush dostaje UniqueViolation (GOON-H, 4 events # w 10h mimo wcześniejszego seen_tag_ids fix). ON CONFLICT skip'uje silently. from sqlalchemy.dialects.postgresql import insert as pg_insert added = 0 seen_tag_ids: set = set() for name in tag_names: norm = NormalizedTag(name=name, slug=slugify(name), external_id=None) tag = resolve_tag(session, norm=norm) if tag is None or tag.id in seen_tag_ids: continue seen_tag_ids.add(tag.id) stmt = ( pg_insert(SceneTag.__table__) .values(scene_id=scene_id, tag_id=tag.id, source_id=None) .on_conflict_do_nothing(index_elements=["scene_id", "tag_id"]) ) result = session.execute(stmt) # rowcount == 1 gdy faktycznie wstawiony, 0 gdy ON CONFLICT skip if result.rowcount and result.rowcount > 0: added += 1 session.commit() return EnrichTagsOut(scene_id=scene_id, added=added, tube_used=sitetag, tags=tag_names) class EnrichDurationOut(BaseModel): scene_id: uuid.UUID duration_sec: int | None tube_used: str | None @router.post("/{scene_id}/enrich-duration", response_model=EnrichDurationOut) def enrich_duration_from_tube( scene_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], ) -> EnrichDurationOut: """Wyciąga duration z dowolnego tube playback_source — wszystkie znane tube'y udostępniają duration na detail page (og:video:duration lub LD-JSON ISO 8601). Mobile wywołuje to przy otwarciu SceneDetail gdy scene.duration_sec jest null AND ma tube source. Dla dedupu duration to najsilniejszy single signal — bez niego sceny z weak title-only score są capowane na 0.85 (review queue). Idempotent: zwraca aktualne duration_sec jeśli już ustawione. """ from app.extractors._fetch import browser_get from app.extractors._models import TubePageError from app.extractors.duration_extract import extract_duration_sec from app.models.playback_source import PlaybackSource scene = session.get(Scene, scene_id) if scene is None: raise HTTPException(status_code=404, detail="scene not found") if scene.duration_sec is not None: return EnrichDurationOut( scene_id=scene_id, duration_sec=scene.duration_sec, tube_used=None ) sources = session.execute( select(PlaybackSource).where( PlaybackSource.scene_id == scene_id, PlaybackSource.dead_at.is_(None), PlaybackSource.origin.like("tube:%"), ) ).scalars().all() for src in sources: try: r = browser_get(src.page_url, timeout=15.0, follow_redirects=True) r.raise_for_status() except (TubePageError, Exception) as e: log.debug("enrich-duration fetch failed for %s: %s", src.page_url, e) continue d = extract_duration_sec(r.text) if d is not None and d > 0: scene.duration_sec = d # Zapisz też na poziomie playback_source dla parity (przyda się jeśli # potem dorobimy per-source duration mismatch detection). if src.duration_sec is None: src.duration_sec = d session.commit() return EnrichDurationOut( scene_id=scene_id, duration_sec=d, tube_used=src.origin.split(":", 1)[1] if ":" in src.origin else None, ) return EnrichDurationOut(scene_id=scene_id, duration_sec=None, tube_used=None) class EnrichStudioOut(BaseModel): scene_id: uuid.UUID studio_id: uuid.UUID | None studio_name: str | None tube_used: str | None @router.post("/{scene_id}/enrich-studio", response_model=EnrichStudioOut) def enrich_studio_from_tube( scene_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], ) -> EnrichStudioOut: """Wyciąga studio (DVD/series) z pornhat scene page'a. Pornhat ma `class="info-video js-ajax-dvd" data-setup='{"title": "Adult Time", ...}'` dla studio. Inne tube'y obsługiwane będą gdy znajdziemy ich pattern — na razie tylko pornhat (najczystsze studio metadata wśród free tubes). """ import json as _json from app.extractors._fetch import browser_get from app.extractors._models import TubePageError from app.models.playback_source import PlaybackSource from app.models.studio import Studio from app.normalize.text import slugify scene = session.get(Scene, scene_id) if scene is None: raise HTTPException(status_code=404, detail="scene not found") if scene.studio_id is not None: existing = session.get(Studio, scene.studio_id) return EnrichStudioOut( scene_id=scene_id, studio_id=scene.studio_id, studio_name=existing.name if existing else None, tube_used=None, ) chosen = session.execute( select(PlaybackSource).where( PlaybackSource.scene_id == scene_id, PlaybackSource.dead_at.is_(None), PlaybackSource.origin == "tube:pornhatcom", ) ).scalars().first() if chosen is None: return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used=None) try: r = browser_get(chosen.page_url, timeout=15.0, follow_redirects=True) r.raise_for_status() except (TubePageError, Exception) as e: log.warning("enrich-studio fetch failed for %s: %s", chosen.page_url, e) return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom") m = re.search( r"class=\"info-video js-ajax-dvd[^\"]*\"[^>]*data-setup='([^']+)'", r.text, re.IGNORECASE, ) if m is None: return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom") try: data = _json.loads(m.group(1)) except _json.JSONDecodeError: return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom") name = (data.get("title") or "").strip() if not name: return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom") slug = (data.get("dir") or "").strip() or slugify(name) studio = session.execute( select(Studio).where(Studio.slug == slug) ).scalar_one_or_none() if studio is None: studio = session.execute( select(Studio).where(Studio.name == name) ).scalar_one_or_none() if studio is None: studio = Studio(name=name, slug=slug) session.add(studio) session.flush() scene.studio_id = studio.id session.commit() return EnrichStudioOut( scene_id=scene_id, studio_id=studio.id, studio_name=studio.name, tube_used="pornhatcom" ) class EnrichThumbOut(BaseModel): scene_id: uuid.UUID thumbnail_url: str | None tube_used: str | None sources_updated: int @router.post("/{scene_id}/enrich-thumbnail", response_model=EnrichThumbOut) def enrich_thumbnail_from_tube( scene_id: uuid.UUID, session: Annotated[Session, Depends(get_session)], force: bool = False, ) -> EnrichThumbOut: """Pobiera detail page z dowolnego tube playback_source bez thumbnail_url i wyciąga miniaturkę (og:image / twitter:image / LD-JSON thumbnailUrl / KVS html5player). Update'uje WSZYSTKIE PlaybackSource'y dla tej sceny które nie mają thumb, żeby kolejne otwarcia listy widziały miniaturę niezależnie od source pick. Mobile auto-wywoła to przy otwarciu SceneDetail bez thumb (jak duration). `force=true` (przycisk "Refresh thumbnail" na SceneDetail, zgłoszenie d3376a71): NADPISUJE istniejącą miniaturę świeżą ze strony tube'a — dla zepsutych/stałych (rotting sxyprn/trafficdeposit, błędna grafika).""" from app.extractors._fetch import browser_get from app.extractors._models import TubePageError from app.extractors.thumb_extract import extract_thumbnail_url from app.models.playback_source import PlaybackSource scene = session.get(Scene, scene_id) if scene is None: raise HTTPException(status_code=404, detail="scene not found") sources = session.execute( select(PlaybackSource).where( PlaybackSource.scene_id == scene_id, PlaybackSource.dead_at.is_(None), PlaybackSource.origin.like("tube:%"), ) ).scalars().all() sources_with_thumb = [s for s in sources if s.thumbnail_url] if sources_with_thumb and not force: # już mamy — idempotent return (force=true pomija, żeby odświeżyć). return EnrichThumbOut( scene_id=scene_id, thumbnail_url=sources_with_thumb[0].thumbnail_url, tube_used=None, sources_updated=0, ) for src in sources: try: r = browser_get(src.page_url, timeout=15.0, follow_redirects=True) r.raise_for_status() except (TubePageError, Exception) as e: log.debug("enrich-thumbnail fetch failed for %s: %s", src.page_url, e) continue thumb = extract_thumbnail_url(r.text) if thumb: # Zapisz na wszystkich źródłach bez thumb (force → też nadpisz istniejące). updated = 0 for s in sources: if force or not s.thumbnail_url: s.thumbnail_url = thumb updated += 1 session.commit() return EnrichThumbOut( scene_id=scene_id, thumbnail_url=thumb, tube_used=src.origin.split(":", 1)[1] if ":" in src.origin else None, sources_updated=updated, ) return EnrichThumbOut( scene_id=scene_id, thumbnail_url=None, tube_used=None, sources_updated=0 )