Filtered /scenes (tag/origin/q/studio/performer) ran exhaustive COUNT with
stub-filter EXISTS over 1.7M rows: TAG 5.1s, ORIGIN 4.9s, SEARCH 3.1s.
Mobile relied on `loaded < total` for infinite-scroll, making exact count
mandatory and ruling out approximate shortcuts.
Backend:
- SceneListOut gains has_more (bool) and total_capped (bool), both optional
for backward compat with old mobile
- Filtered count uses LIMIT _COUNT_CAP+1 (1000) subquery — cost is
O(min(matches, cap)) instead of O(all). Measured: TAG 5.1s→664ms,
SEARCH 3.1s→138ms, ORIGIN 4.9s→1.07s (also fixes SiteScenes showing
global count ~1M instead of per-site count)
- has_more from fetching per_page+1 rows (essentially free); extra row
stripped before serialisation
- Pure-default list (no filters at all) keeps TTL-cached full count
Mobile:
- getNextPageParam uses has_more ?? fallback to loaded<total
- Display shows "{total}+" when total_capped=true (5 screens)
Verified on emulator: tag "Big Tits" → "1000 scenes" loaded, no 500s,
backward compat confirmed (old APK works against new backend).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1029 lines
38 KiB
Python
1029 lines
38 KiB
Python
"""GET /scenes — lista i szczegóły scen z bazy kanonicznej."""
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import re
|
||
import uuid
|
||
from typing import Annotated
|
||
|
||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||
from pydantic import BaseModel
|
||
from sqlalchemy import distinct, exists, func, select
|
||
from sqlalchemy.exc import IntegrityError
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.auth import require_api_key
|
||
|
||
from app.api.schemas import (
|
||
ExternalRefOut,
|
||
PerformerOut,
|
||
PlaybackSourceOut,
|
||
SceneListOut,
|
||
SceneOut,
|
||
StudioOut,
|
||
TagOut,
|
||
)
|
||
from app.db import get_session
|
||
from app.models.favorite_scene import FavoriteScene
|
||
from app.models.performer import Performer
|
||
from app.models.play_progress import ScenePlayProgress
|
||
from app.models.playback_source import PlaybackSource
|
||
from app.models.scene import Scene, SceneExternalRef, ScenePerformer, SceneTag
|
||
from app.models.source import Source, SourceKind
|
||
from app.models.studio import Studio
|
||
from app.models.tag import Tag
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
router = APIRouter(prefix="/scenes", tags=["scenes"], dependencies=[Depends(require_api_key)])
|
||
|
||
|
||
_VALID_SORTS = {"created_at", "release_date", "title", "studio"}
|
||
|
||
# TTL-cache dla count'u scen-z-żywym-playback (default lista bez filtra). Full-scan
|
||
# 1.69M scen + EXISTS ~950ms; liczba zmienia się wolno i jest przybliżona (header
|
||
# paginacji), więc 10-min cache w pamięci procesu API jest akceptowalny trade-off.
|
||
_DEFAULT_COUNT_CACHE: dict = {"ts": 0.0, "val": 0}
|
||
_DEFAULT_COUNT_TTL = 600.0
|
||
|
||
# Bounded count dla list FILTROWANYCH (origin/tag/q/studio/performer/...). Exhaustive
|
||
# count z per-row stub-filter EXISTS bierze ~3-5s przy 1.7M scen (zmierzone). Liczymy
|
||
# count tylko do CAP+1 — `LIMIT` ucina po znalezieniu CAP+1 pasujących, więc koszt to
|
||
# O(min(matches, CAP)) zamiast O(all). >CAP → UI pokazuje "{CAP}+". Paginacja idzie po
|
||
# has_more (fetch per_page+1), więc bounded total NIE psuje infinite-scroll.
|
||
_COUNT_CAP = 1000
|
||
|
||
|
||
def _default_scene_count(session: Session) -> int:
|
||
import time as _time
|
||
now = _time.monotonic()
|
||
if _DEFAULT_COUNT_CACHE["val"] and (now - _DEFAULT_COUNT_CACHE["ts"]) < _DEFAULT_COUNT_TTL:
|
||
return _DEFAULT_COUNT_CACHE["val"]
|
||
count_query = select(func.count()).select_from(
|
||
select(Scene.id).where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
)
|
||
).subquery()
|
||
)
|
||
total = session.execute(count_query).scalar_one()
|
||
_DEFAULT_COUNT_CACHE["ts"] = now
|
||
_DEFAULT_COUNT_CACHE["val"] = total
|
||
return total
|
||
|
||
|
||
def _split_csv(raw: str | None) -> list[str]:
|
||
if not raw:
|
||
return []
|
||
return [s.strip() for s in raw.split(",") if s.strip()]
|
||
|
||
|
||
@router.get("", response_model=SceneListOut)
|
||
def list_scenes(
|
||
session: Annotated[Session, Depends(get_session)],
|
||
q: str | None = Query(default=None, description="Wyszukiwanie po title_normalized (trgm)"),
|
||
studio_slug: str | None = Query(default=None, description="DEPRECATED — użyj studio_slugs"),
|
||
studio_slugs: str | None = Query(
|
||
default=None, description="Comma-separated studio slugs (OR)"
|
||
),
|
||
tags: str | None = Query(
|
||
default=None,
|
||
description="Comma-separated tag slugs (AND — scena musi mieć wszystkie wybrane tagi)",
|
||
),
|
||
performer_ids: str | None = Query(
|
||
default=None,
|
||
description="Comma-separated performer UUIDs (AND — scena musi mieć wszystkich wybranych performerów)",
|
||
),
|
||
has_playback: bool | None = Query(
|
||
default=None, description="True: tylko sceny z ≥1 playback_source"
|
||
),
|
||
has_animated_thumbnail: bool | None = Query(
|
||
default=None,
|
||
description="True: tylko sceny z ≥1 playback_source z animated_thumbnail_url (hold-to-preview)",
|
||
),
|
||
min_duration_sec: int | None = Query(default=None, ge=0),
|
||
max_duration_sec: int | None = Query(default=None, ge=0),
|
||
released_within_days: int | None = Query(
|
||
default=None, ge=1,
|
||
description="Tylko sceny released w ostatnich N dniach",
|
||
),
|
||
min_quality_p: int | None = Query(
|
||
default=None, ge=1,
|
||
description=(
|
||
"Minimum quality (pixele wysokości — 2160 = 4K, 1080 = FullHD). Filtruje "
|
||
"po PlaybackSource.quality (string typu '720p' / '1080p Full HD')."
|
||
),
|
||
),
|
||
origin: str | None = Query(
|
||
default=None,
|
||
description=(
|
||
"Filtruj po playback origin (np. 'tube:hqpornercom'). Substring match — "
|
||
"'hqporner' złapie tube:hqpornercom. Diagnostyka per-hoster."
|
||
),
|
||
),
|
||
include_stubs: bool = Query(
|
||
default=False,
|
||
description=(
|
||
"False (default): ukrywa sceny-szkielety bez release_date, < 10min, "
|
||
"z jedynym playback z hqporner (~7-min Brazzers trailer clipy zalewają katalog)."
|
||
),
|
||
),
|
||
sort: str = Query(default="created_at", description="created_at|release_date|title|studio"),
|
||
page: int = Query(default=1, ge=1),
|
||
per_page: int = Query(default=50, ge=1, le=200),
|
||
) -> SceneListOut:
|
||
if sort not in _VALID_SORTS:
|
||
raise HTTPException(status_code=400, detail=f"sort must be one of {sorted(_VALID_SORTS)}")
|
||
|
||
base = select(Scene)
|
||
|
||
if q:
|
||
base = base.where(Scene.title_normalized.ilike(f"%{q.lower()}%"))
|
||
|
||
studio_slug_list = _split_csv(studio_slugs)
|
||
if studio_slug:
|
||
studio_slug_list.append(studio_slug)
|
||
if studio_slug_list:
|
||
base = base.where(
|
||
Scene.studio_id.in_(
|
||
select(Studio.id).where(Studio.slug.in_(studio_slug_list))
|
||
)
|
||
)
|
||
|
||
tag_slug_list = _split_csv(tags)
|
||
# AND między tagami: scena musi mieć WSZYSTKIE zaznaczone tagi. Każdy slug → osobny
|
||
# exists() — zaznaczanie kolejnych filtrów zawęża wyniki, jak intuicja użytkownika.
|
||
for slug in tag_slug_list:
|
||
base = base.where(
|
||
exists(
|
||
select(1)
|
||
.select_from(SceneTag)
|
||
.join(Tag, Tag.id == SceneTag.tag_id)
|
||
.where(SceneTag.scene_id == Scene.id, Tag.slug == slug)
|
||
)
|
||
)
|
||
|
||
perf_id_strings = _split_csv(performer_ids)
|
||
if perf_id_strings:
|
||
try:
|
||
perf_ids = [uuid.UUID(s) for s in perf_id_strings]
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=f"invalid performer UUID: {e}") from e
|
||
# AND między performerami (analogicznie do tagów).
|
||
for pid in perf_ids:
|
||
base = base.where(
|
||
exists(
|
||
select(1)
|
||
.select_from(ScenePerformer)
|
||
.where(
|
||
ScenePerformer.scene_id == Scene.id,
|
||
ScenePerformer.performer_id == pid,
|
||
)
|
||
)
|
||
)
|
||
|
||
if has_playback is True:
|
||
# Tylko sceny z choć jednym ŻYWYM playback_source.
|
||
base = base.where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
)
|
||
)
|
||
elif has_playback is False:
|
||
base = base.where(
|
||
~exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
)
|
||
)
|
||
|
||
if origin:
|
||
# Substring match na origin — 'hqporner' złapie 'tube:hqpornercom'.
|
||
base = base.where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.origin.ilike(f"%{origin}%"),
|
||
)
|
||
)
|
||
)
|
||
|
||
# Blacklisty — globalne wykluczenia. Jeśli scena ma JAKIEGOKOLWIEK blacklisted
|
||
# performera, jest na blacklisted studio, lub ma JAKIKOLWIEK blacklisted tag → out.
|
||
from app.models.blacklist import (
|
||
BlacklistedPerformer,
|
||
BlacklistedStudio,
|
||
BlacklistedTag,
|
||
)
|
||
base = base.where(
|
||
~exists(
|
||
select(1)
|
||
.select_from(ScenePerformer)
|
||
.join(BlacklistedPerformer, BlacklistedPerformer.performer_id == ScenePerformer.performer_id)
|
||
.where(ScenePerformer.scene_id == Scene.id)
|
||
)
|
||
)
|
||
base = base.where(
|
||
~Scene.studio_id.in_(select(BlacklistedStudio.studio_id))
|
||
)
|
||
base = base.where(
|
||
~exists(
|
||
select(1)
|
||
.select_from(SceneTag)
|
||
.join(BlacklistedTag, BlacklistedTag.tag_id == SceneTag.tag_id)
|
||
.where(SceneTag.scene_id == Scene.id)
|
||
)
|
||
)
|
||
|
||
if has_animated_thumbnail:
|
||
base = base.where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.animated_thumbnail_url.isnot(None),
|
||
)
|
||
)
|
||
)
|
||
|
||
if min_duration_sec is not None:
|
||
base = base.where(Scene.duration_sec >= min_duration_sec)
|
||
if max_duration_sec is not None:
|
||
base = base.where(Scene.duration_sec <= max_duration_sec)
|
||
|
||
if released_within_days is not None:
|
||
from datetime import date, timedelta
|
||
cutoff = date.today() - timedelta(days=released_within_days)
|
||
base = base.where(Scene.release_date >= cutoff)
|
||
|
||
if min_quality_p is not None:
|
||
# PlaybackSource.quality to wolny string — szukamy liczb w prefixie ('1080p',
|
||
# '1080p Full HD', '2160p'). Heurystyka: wystarczy że scena ma JEDEN żywy
|
||
# playback z quality liczbą >= min. '4K'/'UHD' aliasujemy na 2160.
|
||
from sqlalchemy import Integer, cast, or_
|
||
numeric_q = cast(
|
||
func.coalesce(func.substring(PlaybackSource.quality, r"\d+"), "0"),
|
||
Integer,
|
||
)
|
||
conds = [numeric_q >= min_quality_p]
|
||
if min_quality_p <= 2160:
|
||
conds.append(PlaybackSource.quality.ilike("%4k%"))
|
||
conds.append(PlaybackSource.quality.ilike("%uhd%"))
|
||
base = base.where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.quality.isnot(None),
|
||
or_(*conds),
|
||
)
|
||
)
|
||
)
|
||
|
||
if not include_stubs:
|
||
# Stub scene heuristic: tube-only scena BEZ release_date AND BEZ canonical
|
||
# (TPDB/StashDB) ref AND BEZ żadnego ScenePerformer linka. ScenePerformer
|
||
# dodaje continuous worker (search-by-name → wymusza link), więc per-performer
|
||
# search-result NIGDY nie jest stub. To filtruje tylko anonymous tube-only
|
||
# sceny z newUrl/categories ingestu które nie zostały zsyntowane z performerem.
|
||
canonical_exists = exists(
|
||
select(1)
|
||
.select_from(SceneExternalRef)
|
||
.join(Source, Source.id == SceneExternalRef.source_id)
|
||
.where(SceneExternalRef.scene_id == Scene.id)
|
||
.where(Source.kind.in_([SourceKind.tpdb, SourceKind.stashdb]))
|
||
)
|
||
has_performer = exists(
|
||
select(1).where(ScenePerformer.scene_id == Scene.id)
|
||
)
|
||
# NOT stub gdy: ma canonical_ref OR ma release_date OR ma performera
|
||
base = base.where(
|
||
Scene.release_date.is_not(None) | canonical_exists | has_performer
|
||
)
|
||
|
||
# Count strategy:
|
||
# - PURE default (brak jakiegokolwiek filtra): cached count całego katalogu
|
||
# (full-scan + EXISTS ~950ms, TTL 10 min — patrz _default_scene_count).
|
||
# - FILTROWANE (origin/tag/q/studio/performer/quality/duration/...): bounded
|
||
# count do _COUNT_CAP. Exhaustive count z per-row stub EXISTS to ~3-5s; bounded
|
||
# ucina po CAP+1 trafieniach. Mobile paginuje po has_more (niżej), nie po total,
|
||
# więc cap nie psuje infinite-scroll.
|
||
_is_pure_default = (
|
||
not include_stubs and not q and not studio_slug_list and not tag_slug_list
|
||
and not perf_id_strings and origin is None and has_playback is None
|
||
and not has_animated_thumbnail and min_duration_sec is None
|
||
and max_duration_sec is None and released_within_days is None
|
||
and min_quality_p is None
|
||
)
|
||
total_capped = False
|
||
if _is_pure_default:
|
||
total = _default_scene_count(session)
|
||
else:
|
||
cnt = session.execute(
|
||
select(func.count()).select_from(base.limit(_COUNT_CAP + 1).subquery())
|
||
).scalar_one()
|
||
if cnt > _COUNT_CAP:
|
||
total, total_capped = _COUNT_CAP, True
|
||
else:
|
||
total = cnt
|
||
|
||
# Sort: zawsze tie-break po created_at desc dla determinizmu paginacji.
|
||
if sort == "release_date":
|
||
ordered = base.order_by(
|
||
Scene.release_date.desc().nullslast(), Scene.created_at.desc()
|
||
)
|
||
elif sort == "title":
|
||
ordered = base.order_by(Scene.title_normalized.asc(), Scene.created_at.desc())
|
||
elif sort == "studio":
|
||
# Sceny bez studio na końcu; w obrębie studio — najświeższe pierwsze.
|
||
ordered = (
|
||
base.outerjoin(Studio, Studio.id == Scene.studio_id)
|
||
.order_by(
|
||
Studio.name_normalized.asc().nullslast(),
|
||
Scene.release_date.desc().nullslast(),
|
||
Scene.created_at.desc(),
|
||
)
|
||
)
|
||
else: # created_at
|
||
ordered = base.order_by(
|
||
Scene.created_at.desc(), Scene.release_date.desc().nullslast()
|
||
)
|
||
|
||
# Fetch per_page+1 — obecność (per_page+1)-szego wiersza = jest kolejna strona.
|
||
# To źródło prawdy dla paginacji (mobile getNextPageParam), niezależne od bounded
|
||
# `total`. Nadmiarowy wiersz odcinamy przed serializacją.
|
||
rows = (
|
||
session.execute(ordered.offset((page - 1) * per_page).limit(per_page + 1))
|
||
.scalars()
|
||
.all()
|
||
)
|
||
has_more = len(rows) > per_page
|
||
rows = rows[:per_page]
|
||
|
||
items = _build_scenes_out_batch(session, list(rows))
|
||
|
||
return SceneListOut(
|
||
items=items,
|
||
total=total,
|
||
page=page,
|
||
per_page=per_page,
|
||
has_more=has_more,
|
||
total_capped=total_capped,
|
||
)
|
||
|
||
|
||
@router.get("/{scene_id}", response_model=SceneOut)
|
||
def get_scene(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> SceneOut:
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
return _build_scene_out(session, scene)
|
||
|
||
|
||
def _needs_proxy(url: str) -> bool:
|
||
"""Wszystkie thumbnaile z playback_sources są proxowane przez backend.
|
||
Większość CDN-ów porn-tube'ów wymaga Refera (hqporner, mypornerleak/58img,
|
||
inne sxyprn/eporner CDN-y) — expo-image nie wysyła Referera.
|
||
Self-hosted lub backend-internal URL-e (zaczynające się od `/`) skipujemy."""
|
||
return url.startswith("http") and not url.startswith("/proxy/")
|
||
|
||
|
||
def _wrap_image_proxy(url: str, referer: str) -> str:
|
||
"""Wraps a thumbnail URL through /proxy/img/{token}/img.jpg. Klient nie musi
|
||
znać sekretu Referer — backend wstawi sam. Long TTL (30d) bo thumby
|
||
są stabilne, krótkie ttl by tylko niepotrzebnie zaśmiecało cache."""
|
||
from app.api.stream_proxy import make_token
|
||
token = make_token(url, referer, ttl_sec=30 * 24 * 3600)
|
||
# Path zachowuje rozszerzenie żeby HTTP Content-Type był rozpoznany.
|
||
import os as _os
|
||
ext = _os.path.splitext(url.split("?")[0])[1].lstrip(".") or "jpg"
|
||
return f"/proxy/img/{token}/img.{ext}"
|
||
|
||
|
||
def _build_scenes_out_batch(session: Session, scenes: list[Scene]) -> list[SceneOut]:
|
||
"""Batch-fetch wszystkich relacji dla N scen w 7 zapytaniach (zamiast 7×N).
|
||
|
||
Eliminuje N+1 z `_build_scene_out` w listach scen — `/scenes?per_page=24` szło
|
||
z ~9.6s do <500ms. Pojedyncza scena (`/scenes/{id}`) nadal używa `_build_scene_out`
|
||
bo overhead na batch nie ma sensu dla N=1.
|
||
"""
|
||
from collections import defaultdict
|
||
if not scenes:
|
||
return []
|
||
|
||
scene_ids = [s.id for s in scenes]
|
||
studio_ids = list({s.studio_id for s in scenes if s.studio_id is not None})
|
||
|
||
# 1) Studios
|
||
studios_by_id: dict = {}
|
||
if studio_ids:
|
||
for st in session.execute(
|
||
select(Studio).where(Studio.id.in_(studio_ids))
|
||
).scalars():
|
||
studios_by_id[st.id] = st
|
||
|
||
# 2) Performers
|
||
perf_rows = session.execute(
|
||
select(ScenePerformer, Performer)
|
||
.join(Performer, Performer.id == ScenePerformer.performer_id)
|
||
.where(ScenePerformer.scene_id.in_(scene_ids))
|
||
.order_by(ScenePerformer.position.asc().nullslast())
|
||
).all()
|
||
performers_by_scene: dict = defaultdict(list)
|
||
for sp, p in perf_rows:
|
||
performers_by_scene[sp.scene_id].append(
|
||
PerformerOut(
|
||
id=p.id,
|
||
canonical_name=p.canonical_name,
|
||
slug=p.slug,
|
||
gender=p.gender.value if p.gender else None,
|
||
as_alias=sp.as_alias,
|
||
)
|
||
)
|
||
|
||
# 3) Tags
|
||
tag_rows = session.execute(
|
||
select(SceneTag.scene_id, Tag)
|
||
.join(Tag, Tag.id == SceneTag.tag_id)
|
||
.where(SceneTag.scene_id.in_(scene_ids))
|
||
).all()
|
||
tags_by_scene: dict = defaultdict(list)
|
||
for sid, t in tag_rows:
|
||
tags_by_scene[sid].append(TagOut.model_validate(t))
|
||
|
||
# 4) External refs + sources
|
||
ref_rows = session.execute(
|
||
select(SceneExternalRef, Source)
|
||
.join(Source, Source.id == SceneExternalRef.source_id)
|
||
.where(SceneExternalRef.scene_id.in_(scene_ids))
|
||
).all()
|
||
refs_by_scene: dict = defaultdict(list)
|
||
for ref, src in ref_rows:
|
||
refs_by_scene[ref.scene_id].append(
|
||
ExternalRefOut(
|
||
source=src.name,
|
||
external_id=ref.external_id,
|
||
url=ref.url,
|
||
last_seen=ref.last_seen,
|
||
)
|
||
)
|
||
|
||
# 5) Playback sources
|
||
pb_rows = session.execute(
|
||
select(PlaybackSource)
|
||
.where(
|
||
PlaybackSource.scene_id.in_(scene_ids),
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
.order_by(PlaybackSource.origin.asc())
|
||
).scalars().all()
|
||
pb_by_scene: dict = defaultdict(list)
|
||
for p in pb_rows:
|
||
out = PlaybackSourceOut.model_validate(p)
|
||
if out.thumbnail_url and _needs_proxy(out.thumbnail_url):
|
||
out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url)
|
||
if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url):
|
||
out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url)
|
||
pb_by_scene[p.scene_id].append(out)
|
||
|
||
# 6) Progress
|
||
progress_by_scene: dict = {}
|
||
for prog in session.execute(
|
||
select(ScenePlayProgress).where(ScenePlayProgress.scene_id.in_(scene_ids))
|
||
).scalars():
|
||
progress_by_scene[prog.scene_id] = prog
|
||
|
||
# 7) Favorites
|
||
fav_scene_ids: set = set(
|
||
session.execute(
|
||
select(FavoriteScene.scene_id).where(
|
||
FavoriteScene.scene_id.in_(scene_ids)
|
||
)
|
||
).scalars()
|
||
)
|
||
|
||
out: list[SceneOut] = []
|
||
for scene in scenes:
|
||
studio_out = None
|
||
if scene.studio_id is not None and scene.studio_id in studios_by_id:
|
||
studio_out = StudioOut.model_validate(studios_by_id[scene.studio_id])
|
||
progress = progress_by_scene.get(scene.id)
|
||
out.append(
|
||
SceneOut(
|
||
id=scene.id,
|
||
title=scene.title,
|
||
slug=scene.slug,
|
||
release_date=scene.release_date,
|
||
duration_sec=scene.duration_sec,
|
||
description=scene.description,
|
||
code=scene.code,
|
||
director=scene.director,
|
||
studio=studio_out,
|
||
performers=performers_by_scene.get(scene.id, []),
|
||
tags=tags_by_scene.get(scene.id, []),
|
||
external_refs=refs_by_scene.get(scene.id, []),
|
||
playback_sources=pb_by_scene.get(scene.id, []),
|
||
created_at=scene.created_at,
|
||
last_played_at=progress.last_played_at if progress else None,
|
||
finished=progress.finished if progress else False,
|
||
position_sec=progress.position_sec if progress else 0,
|
||
is_favorite=scene.id in fav_scene_ids,
|
||
)
|
||
)
|
||
return out
|
||
|
||
|
||
def _build_scene_out(session: Session, scene: Scene) -> SceneOut:
|
||
studio_out: StudioOut | None = None
|
||
if scene.studio_id is not None:
|
||
st = session.get(Studio, scene.studio_id)
|
||
if st is not None:
|
||
studio_out = StudioOut.model_validate(st)
|
||
|
||
performer_rows = session.execute(
|
||
select(ScenePerformer, Performer)
|
||
.join(Performer, Performer.id == ScenePerformer.performer_id)
|
||
.where(ScenePerformer.scene_id == scene.id)
|
||
.order_by(ScenePerformer.position.asc().nullslast())
|
||
).all()
|
||
performers_out: list[PerformerOut] = []
|
||
for sp, performer in performer_rows:
|
||
performers_out.append(
|
||
PerformerOut(
|
||
id=performer.id,
|
||
canonical_name=performer.canonical_name,
|
||
slug=performer.slug,
|
||
gender=performer.gender.value if performer.gender else None,
|
||
as_alias=sp.as_alias,
|
||
)
|
||
)
|
||
|
||
tag_rows = (
|
||
session.execute(
|
||
select(Tag).join(SceneTag, SceneTag.tag_id == Tag.id).where(SceneTag.scene_id == scene.id)
|
||
)
|
||
.scalars()
|
||
.all()
|
||
)
|
||
tags_out = [TagOut.model_validate(t) for t in tag_rows]
|
||
|
||
ref_rows = session.execute(
|
||
select(SceneExternalRef, Source)
|
||
.join(Source, Source.id == SceneExternalRef.source_id)
|
||
.where(SceneExternalRef.scene_id == scene.id)
|
||
).all()
|
||
refs_out = [
|
||
ExternalRefOut(
|
||
source=src.name,
|
||
external_id=ref.external_id,
|
||
url=ref.url,
|
||
last_seen=ref.last_seen,
|
||
)
|
||
for ref, src in ref_rows
|
||
]
|
||
|
||
playback_rows = (
|
||
session.execute(
|
||
select(PlaybackSource)
|
||
.where(
|
||
PlaybackSource.scene_id == scene.id,
|
||
PlaybackSource.dead_at.is_(None), # ukryj martwe linki
|
||
)
|
||
.order_by(PlaybackSource.origin.asc())
|
||
)
|
||
.scalars()
|
||
.all()
|
||
)
|
||
playback_out: list[PlaybackSourceOut] = []
|
||
for p in playback_rows:
|
||
out = PlaybackSourceOut.model_validate(p)
|
||
# Wrap thumbnail URL-e przez backend image proxy gdy CDN wymaga Refera
|
||
# (hqporner — fastporndelivery zwraca 403 bez Referer headera, expo-image
|
||
# nie wysyła go domyślnie). Token ma 30-dniowy TTL bo thumby są stabilne.
|
||
if out.thumbnail_url and _needs_proxy(out.thumbnail_url):
|
||
out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url)
|
||
if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url):
|
||
out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url)
|
||
playback_out.append(out)
|
||
|
||
progress = session.get(ScenePlayProgress, scene.id)
|
||
is_fav = session.get(FavoriteScene, scene.id) is not None
|
||
|
||
return SceneOut(
|
||
id=scene.id,
|
||
title=scene.title,
|
||
slug=scene.slug,
|
||
release_date=scene.release_date,
|
||
duration_sec=scene.duration_sec,
|
||
description=scene.description,
|
||
code=scene.code,
|
||
director=scene.director,
|
||
studio=studio_out,
|
||
performers=performers_out,
|
||
tags=tags_out,
|
||
external_refs=refs_out,
|
||
playback_sources=playback_out,
|
||
created_at=scene.created_at,
|
||
last_played_at=progress.last_played_at if progress else None,
|
||
finished=progress.finished if progress else False,
|
||
position_sec=progress.position_sec if progress else 0,
|
||
is_favorite=is_fav,
|
||
)
|
||
|
||
|
||
@router.delete("/{scene_id}/tags/{tag_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||
def remove_tag_from_scene(
|
||
scene_id: uuid.UUID,
|
||
tag_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> None:
|
||
"""Usuwa relację scene↔tag (np. user uznał że tag jest błędny dla tej sceny).
|
||
|
||
Idempotent: brak relacji = success. Nie kasuje samego Tag-a — inne sceny mogą
|
||
z niego korzystać. Sam tag zostaje w słowniku tagów.
|
||
"""
|
||
rel = session.execute(
|
||
select(SceneTag).where(SceneTag.scene_id == scene_id, SceneTag.tag_id == tag_id)
|
||
).scalar_one_or_none()
|
||
if rel is None:
|
||
return
|
||
session.delete(rel)
|
||
session.commit()
|
||
|
||
|
||
@router.delete(
|
||
"/{scene_id}/performers/{performer_id}", status_code=status.HTTP_204_NO_CONTENT
|
||
)
|
||
def remove_performer_from_scene(
|
||
scene_id: uuid.UUID,
|
||
performer_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> None:
|
||
"""Usuwa relację scene↔performer (false-match dedup zostawił nie tą osobę).
|
||
|
||
Idempotent. Sama Performer zostaje. Użyteczne np. gdy fuzzy match aliasu
|
||
"Bella" wciągnął Anna Bella sceny pod Bad Bella, lub Miss Teela na xnxx
|
||
została przypisana do scen w których jej nie ma (zgłoszenia 2026-05-10).
|
||
"""
|
||
from app.models.scene import ScenePerformer
|
||
|
||
rel = session.execute(
|
||
select(ScenePerformer).where(
|
||
ScenePerformer.scene_id == scene_id,
|
||
ScenePerformer.performer_id == performer_id,
|
||
)
|
||
).scalar_one_or_none()
|
||
if rel is None:
|
||
return
|
||
session.delete(rel)
|
||
session.commit()
|
||
|
||
|
||
class EnrichTagsOut(BaseModel):
|
||
scene_id: uuid.UUID
|
||
added: int
|
||
tube_used: str | None
|
||
tags: list[str]
|
||
|
||
|
||
@router.post("/{scene_id}/enrich-tags", response_model=EnrichTagsOut)
|
||
def enrich_tags_from_tube(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> EnrichTagsOut:
|
||
"""Pobiera page HTML z dowolnego tube playback_source dla tej sceny i scrape'uje
|
||
tagi (categories/tags). Dodaje brakujące do scene_tags.
|
||
|
||
Mobile wywołuje to przy otwarciu SceneDetail jeśli scena ma 0 tagów AND ma
|
||
tube source z obsługiwanym extractorem (porntrex/youporn/xvideos/xnxx/redtube/
|
||
xhamster/eporner).
|
||
|
||
Idempotent: ponowne wywołanie z tymi samymi tagami nic nie robi (UNIQUE PK
|
||
scene_tags). Konkretne tube źródło wybierane wg priority listy (mainstream
|
||
bardziej rzetelne niż aggregator).
|
||
"""
|
||
from app.extractors._fetch import browser_get
|
||
from app.extractors._models import TubePageError
|
||
from app.extractors.tag_extract import EXTRACTORS, extract_tags
|
||
from app.models.playback_source import PlaybackSource
|
||
from app.models.tag import Tag
|
||
from app.normalize.scenes import NormalizedTag
|
||
from app.normalize.text import slugify
|
||
from app.resolve.tag_resolver import resolve_tag
|
||
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
|
||
# Priority: mainstream tubes (bogate metadane) > niche (mniej tagów albo garbage).
|
||
PRIORITY = ["xhamstercom", "porntrexcom", "epornercom", "youporncom",
|
||
"xvideoscom", "xnxxcom", "redtubecom", "pornhatcom"]
|
||
sources = session.execute(
|
||
select(PlaybackSource).where(
|
||
PlaybackSource.scene_id == scene_id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
).scalars().all()
|
||
|
||
# Wybierz pierwsze źródło wg priority listy które ma supported extractor
|
||
chosen: PlaybackSource | None = None
|
||
for tag in PRIORITY:
|
||
for src in sources:
|
||
if src.origin == f"tube:{tag}":
|
||
chosen = src
|
||
break
|
||
if chosen:
|
||
break
|
||
if chosen is None:
|
||
# Fallback: dowolne źródło z extractorem
|
||
for src in sources:
|
||
if src.origin.startswith("tube:"):
|
||
sitetag = src.origin.split(":", 1)[1]
|
||
if sitetag in EXTRACTORS:
|
||
chosen = src
|
||
break
|
||
|
||
if chosen is None:
|
||
return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=None, tags=[])
|
||
|
||
sitetag = chosen.origin.split(":", 1)[1]
|
||
try:
|
||
r = browser_get(chosen.page_url, timeout=15.0, follow_redirects=True)
|
||
r.raise_for_status()
|
||
except (TubePageError, Exception) as e:
|
||
log.warning("enrich-tags fetch failed for %s: %s", chosen.page_url, e)
|
||
return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=sitetag, tags=[])
|
||
|
||
tag_names = extract_tags(sitetag, r.text)
|
||
if not tag_names:
|
||
return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=sitetag, tags=[])
|
||
|
||
# Upsert: dla każdego taga utwórz/znajdź Tag, dorzuć SceneTag idempotentnie.
|
||
# Używamy PostgreSQL INSERT ... ON CONFLICT DO NOTHING zamiast ORM session.add()
|
||
# bo `resolve_tag` robi session.flush() w pętli, emitując pending SceneTag INSERT
|
||
# z poprzednich iteracji — gdy 2 concurrent enrich-tags collide na tym samym
|
||
# (scene_id, tag_id), drugi flush dostaje UniqueViolation (GOON-H, 4 events
|
||
# w 10h mimo wcześniejszego seen_tag_ids fix). ON CONFLICT skip'uje silently.
|
||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||
added = 0
|
||
seen_tag_ids: set = set()
|
||
for name in tag_names:
|
||
norm = NormalizedTag(name=name, slug=slugify(name), external_id=None)
|
||
tag = resolve_tag(session, norm=norm)
|
||
if tag is None or tag.id in seen_tag_ids:
|
||
continue
|
||
seen_tag_ids.add(tag.id)
|
||
stmt = (
|
||
pg_insert(SceneTag.__table__)
|
||
.values(scene_id=scene_id, tag_id=tag.id, source_id=None)
|
||
.on_conflict_do_nothing(index_elements=["scene_id", "tag_id"])
|
||
)
|
||
result = session.execute(stmt)
|
||
# rowcount == 1 gdy faktycznie wstawiony, 0 gdy ON CONFLICT skip
|
||
if result.rowcount and result.rowcount > 0:
|
||
added += 1
|
||
session.commit()
|
||
return EnrichTagsOut(scene_id=scene_id, added=added, tube_used=sitetag, tags=tag_names)
|
||
|
||
|
||
class EnrichDurationOut(BaseModel):
|
||
scene_id: uuid.UUID
|
||
duration_sec: int | None
|
||
tube_used: str | None
|
||
|
||
|
||
@router.post("/{scene_id}/enrich-duration", response_model=EnrichDurationOut)
|
||
def enrich_duration_from_tube(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> EnrichDurationOut:
|
||
"""Wyciąga duration z dowolnego tube playback_source — wszystkie znane tube'y
|
||
udostępniają duration na detail page (og:video:duration lub LD-JSON ISO 8601).
|
||
|
||
Mobile wywołuje to przy otwarciu SceneDetail gdy scene.duration_sec jest null
|
||
AND ma tube source. Dla dedupu duration to najsilniejszy single signal — bez
|
||
niego sceny z weak title-only score są capowane na 0.85 (review queue).
|
||
|
||
Idempotent: zwraca aktualne duration_sec jeśli już ustawione.
|
||
"""
|
||
from app.extractors._fetch import browser_get
|
||
from app.extractors._models import TubePageError
|
||
from app.extractors.duration_extract import extract_duration_sec
|
||
from app.models.playback_source import PlaybackSource
|
||
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
|
||
if scene.duration_sec is not None:
|
||
return EnrichDurationOut(
|
||
scene_id=scene_id, duration_sec=scene.duration_sec, tube_used=None
|
||
)
|
||
|
||
sources = session.execute(
|
||
select(PlaybackSource).where(
|
||
PlaybackSource.scene_id == scene_id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.origin.like("tube:%"),
|
||
)
|
||
).scalars().all()
|
||
|
||
for src in sources:
|
||
try:
|
||
r = browser_get(src.page_url, timeout=15.0, follow_redirects=True)
|
||
r.raise_for_status()
|
||
except (TubePageError, Exception) as e:
|
||
log.debug("enrich-duration fetch failed for %s: %s", src.page_url, e)
|
||
continue
|
||
d = extract_duration_sec(r.text)
|
||
if d is not None and d > 0:
|
||
scene.duration_sec = d
|
||
# Zapisz też na poziomie playback_source dla parity (przyda się jeśli
|
||
# potem dorobimy per-source duration mismatch detection).
|
||
if src.duration_sec is None:
|
||
src.duration_sec = d
|
||
session.commit()
|
||
return EnrichDurationOut(
|
||
scene_id=scene_id,
|
||
duration_sec=d,
|
||
tube_used=src.origin.split(":", 1)[1] if ":" in src.origin else None,
|
||
)
|
||
|
||
return EnrichDurationOut(scene_id=scene_id, duration_sec=None, tube_used=None)
|
||
|
||
|
||
class EnrichStudioOut(BaseModel):
|
||
scene_id: uuid.UUID
|
||
studio_id: uuid.UUID | None
|
||
studio_name: str | None
|
||
tube_used: str | None
|
||
|
||
|
||
@router.post("/{scene_id}/enrich-studio", response_model=EnrichStudioOut)
|
||
def enrich_studio_from_tube(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> EnrichStudioOut:
|
||
"""Wyciąga studio (DVD/series) z pornhat scene page'a.
|
||
|
||
Pornhat ma `class="info-video js-ajax-dvd" data-setup='{"title": "Adult Time", ...}'`
|
||
dla studio. Inne tube'y obsługiwane będą gdy znajdziemy ich pattern — na razie
|
||
tylko pornhat (najczystsze studio metadata wśród free tubes).
|
||
"""
|
||
import json as _json
|
||
|
||
from app.extractors._fetch import browser_get
|
||
from app.extractors._models import TubePageError
|
||
from app.models.playback_source import PlaybackSource
|
||
from app.models.studio import Studio
|
||
from app.normalize.text import slugify
|
||
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
|
||
if scene.studio_id is not None:
|
||
existing = session.get(Studio, scene.studio_id)
|
||
return EnrichStudioOut(
|
||
scene_id=scene_id,
|
||
studio_id=scene.studio_id,
|
||
studio_name=existing.name if existing else None,
|
||
tube_used=None,
|
||
)
|
||
|
||
chosen = session.execute(
|
||
select(PlaybackSource).where(
|
||
PlaybackSource.scene_id == scene_id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.origin == "tube:pornhatcom",
|
||
)
|
||
).scalars().first()
|
||
if chosen is None:
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used=None)
|
||
|
||
try:
|
||
r = browser_get(chosen.page_url, timeout=15.0, follow_redirects=True)
|
||
r.raise_for_status()
|
||
except (TubePageError, Exception) as e:
|
||
log.warning("enrich-studio fetch failed for %s: %s", chosen.page_url, e)
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
|
||
|
||
m = re.search(
|
||
r"class=\"info-video js-ajax-dvd[^\"]*\"[^>]*data-setup='([^']+)'",
|
||
r.text, re.IGNORECASE,
|
||
)
|
||
if m is None:
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
|
||
try:
|
||
data = _json.loads(m.group(1))
|
||
except _json.JSONDecodeError:
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
|
||
|
||
name = (data.get("title") or "").strip()
|
||
if not name:
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
|
||
slug = (data.get("dir") or "").strip() or slugify(name)
|
||
|
||
studio = session.execute(
|
||
select(Studio).where(Studio.slug == slug)
|
||
).scalar_one_or_none()
|
||
if studio is None:
|
||
studio = session.execute(
|
||
select(Studio).where(Studio.name == name)
|
||
).scalar_one_or_none()
|
||
if studio is None:
|
||
studio = Studio(name=name, slug=slug)
|
||
session.add(studio)
|
||
session.flush()
|
||
scene.studio_id = studio.id
|
||
session.commit()
|
||
return EnrichStudioOut(
|
||
scene_id=scene_id, studio_id=studio.id, studio_name=studio.name, tube_used="pornhatcom"
|
||
)
|
||
|
||
|
||
class EnrichThumbOut(BaseModel):
|
||
scene_id: uuid.UUID
|
||
thumbnail_url: str | None
|
||
tube_used: str | None
|
||
sources_updated: int
|
||
|
||
|
||
@router.post("/{scene_id}/enrich-thumbnail", response_model=EnrichThumbOut)
|
||
def enrich_thumbnail_from_tube(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> EnrichThumbOut:
|
||
"""Pobiera detail page z dowolnego tube playback_source bez thumbnail_url
|
||
i wyciąga miniaturkę (og:image / twitter:image / LD-JSON thumbnailUrl /
|
||
KVS html5player).
|
||
|
||
Update'uje WSZYSTKIE PlaybackSource'y dla tej sceny które nie mają thumb,
|
||
żeby kolejne otwarcia listy widziały miniaturę niezależnie od source pick.
|
||
Mobile auto-wywoła to przy otwarciu SceneDetail bez thumb (jak duration).
|
||
"""
|
||
from app.extractors._fetch import browser_get
|
||
from app.extractors._models import TubePageError
|
||
from app.extractors.thumb_extract import extract_thumbnail_url
|
||
from app.models.playback_source import PlaybackSource
|
||
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
|
||
sources = session.execute(
|
||
select(PlaybackSource).where(
|
||
PlaybackSource.scene_id == scene_id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.origin.like("tube:%"),
|
||
)
|
||
).scalars().all()
|
||
|
||
sources_with_thumb = [s for s in sources if s.thumbnail_url]
|
||
if sources_with_thumb:
|
||
# już mamy — idempotent return.
|
||
return EnrichThumbOut(
|
||
scene_id=scene_id,
|
||
thumbnail_url=sources_with_thumb[0].thumbnail_url,
|
||
tube_used=None,
|
||
sources_updated=0,
|
||
)
|
||
|
||
for src in sources:
|
||
try:
|
||
r = browser_get(src.page_url, timeout=15.0, follow_redirects=True)
|
||
r.raise_for_status()
|
||
except (TubePageError, Exception) as e:
|
||
log.debug("enrich-thumbnail fetch failed for %s: %s", src.page_url, e)
|
||
continue
|
||
thumb = extract_thumbnail_url(r.text)
|
||
if thumb:
|
||
# Zapisz na wszystkich źródłach bez thumb (oszczędza duplikat fetch)
|
||
updated = 0
|
||
for s in sources:
|
||
if not s.thumbnail_url:
|
||
s.thumbnail_url = thumb
|
||
updated += 1
|
||
session.commit()
|
||
return EnrichThumbOut(
|
||
scene_id=scene_id,
|
||
thumbnail_url=thumb,
|
||
tube_used=src.origin.split(":", 1)[1] if ":" in src.origin else None,
|
||
sources_updated=updated,
|
||
)
|
||
|
||
return EnrichThumbOut(
|
||
scene_id=scene_id, thumbnail_url=None, tube_used=None, sources_updated=0
|
||
)
|