A merged scene often aggregates several uploads from ONE tube (re-encodes / 4K dups). bug-report aa79a995 "why 2 links, both porntrex?" = same scene std + 4K (porntrex 2591377 + 2593449 "...in 4K"). In the UI these are indistinguishable links to one hoster (same extractor). Keep one best per origin: prefer duration matching the scene → any duration → first (origin-asc stable). Dead already filtered. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1167 lines
45 KiB
Python
1167 lines
45 KiB
Python
"""GET /scenes — lista i szczegóły scen z bazy kanonicznej."""
|
||
from __future__ import annotations
|
||
|
||
import logging
|
||
import re
|
||
import uuid
|
||
from typing import Annotated
|
||
|
||
from fastapi import APIRouter, Depends, HTTPException, Query, status
|
||
from pydantic import BaseModel
|
||
from sqlalchemy import distinct, exists, false, func, literal_column, select
|
||
from sqlalchemy.exc import IntegrityError
|
||
from sqlalchemy.orm import Session
|
||
|
||
from app.auth import require_api_key
|
||
|
||
from app.api.schemas import (
|
||
ExternalRefOut,
|
||
PerformerOut,
|
||
PlaybackSourceOut,
|
||
SceneListOut,
|
||
SceneOut,
|
||
StudioOut,
|
||
TagOut,
|
||
)
|
||
from app.db import get_session
|
||
from app.models.favorite_scene import FavoriteScene
|
||
from app.models.performer import Performer
|
||
from app.models.play_progress import ScenePlayProgress
|
||
from app.models.playback_source import PlaybackSource
|
||
from app.models.scene import Scene, SceneExternalRef, ScenePerformer, SceneTag
|
||
from app.models.source import Source, SourceKind
|
||
from app.models.studio import Studio
|
||
from app.models.tag import Tag
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
router = APIRouter(prefix="/scenes", tags=["scenes"], dependencies=[Depends(require_api_key)])
|
||
|
||
|
||
_VALID_SORTS = {"created_at", "release_date", "title", "studio"}
|
||
|
||
# TTL-cache dla count'u scen-z-żywym-playback (default lista bez filtra). Full-scan
|
||
# 1.69M scen + EXISTS ~950ms; liczba zmienia się wolno i jest przybliżona (header
|
||
# paginacji), więc 10-min cache w pamięci procesu API jest akceptowalny trade-off.
|
||
_DEFAULT_COUNT_CACHE: dict = {"ts": 0.0, "val": 0}
|
||
_DEFAULT_COUNT_TTL = 600.0
|
||
|
||
def _default_scene_count(session: Session) -> int:
|
||
import time as _time
|
||
now = _time.monotonic()
|
||
if _DEFAULT_COUNT_CACHE["val"] and (now - _DEFAULT_COUNT_CACHE["ts"]) < _DEFAULT_COUNT_TTL:
|
||
return _DEFAULT_COUNT_CACHE["val"]
|
||
count_query = select(func.count()).select_from(
|
||
select(Scene.id).where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
)
|
||
).subquery()
|
||
)
|
||
total = session.execute(count_query).scalar_one()
|
||
_DEFAULT_COUNT_CACHE["ts"] = now
|
||
_DEFAULT_COUNT_CACHE["val"] = total
|
||
return total
|
||
|
||
|
||
# Blacklisty (performer/studio/tag) są zwykle PUSTE (self-hosted, single-user). Mimo to
|
||
# 3 NOT EXISTS klauzule doklejały się do KAŻDEJ filtrowanej listy scen i były ewaluowane
|
||
# per-row — przy filtrze typu duży-tag/has_playback planer chodzi po ~176k scen, więc te
|
||
# puste-zawsze klauzule kosztowały ~3.4s (mega-tag „anal": 6.7s→3.3s po pominięciu).
|
||
# Cache'ujemy emptiness (TTL 5 min); gdy ktoś doda blacklist-wpis, w ciągu 5 min klauzule
|
||
# wracają. Patrz reference_scenes_list_perf / task #22.
|
||
_BLACKLIST_EMPTY_CACHE: dict = {"ts": 0.0, "val": False, "checked": False}
|
||
_BLACKLIST_EMPTY_TTL = 300.0
|
||
|
||
|
||
def _blacklists_empty(session: Session) -> bool:
|
||
"""True gdy WSZYSTKIE 3 blacklisty puste → można pominąć NOT EXISTS klauzule."""
|
||
import time as _time
|
||
from app.models.blacklist import (
|
||
BlacklistedPerformer,
|
||
BlacklistedStudio,
|
||
BlacklistedTag,
|
||
)
|
||
now = _time.monotonic()
|
||
if _BLACKLIST_EMPTY_CACHE["checked"] and (now - _BLACKLIST_EMPTY_CACHE["ts"]) < _BLACKLIST_EMPTY_TTL:
|
||
return _BLACKLIST_EMPTY_CACHE["val"]
|
||
has_any = session.execute(
|
||
select(
|
||
exists(select(1).select_from(BlacklistedPerformer))
|
||
| exists(select(1).select_from(BlacklistedStudio))
|
||
| exists(select(1).select_from(BlacklistedTag))
|
||
)
|
||
).scalar_one()
|
||
_BLACKLIST_EMPTY_CACHE["ts"] = now
|
||
_BLACKLIST_EMPTY_CACHE["val"] = not has_any
|
||
_BLACKLIST_EMPTY_CACHE["checked"] = True
|
||
return not has_any
|
||
|
||
|
||
def _split_csv(raw: str | None) -> list[str]:
|
||
if not raw:
|
||
return []
|
||
return [s.strip() for s in raw.split(",") if s.strip()]
|
||
|
||
|
||
@router.get("", response_model=SceneListOut)
|
||
def list_scenes(
|
||
session: Annotated[Session, Depends(get_session)],
|
||
q: str | None = Query(default=None, description="Wyszukiwanie po title_normalized (trgm)"),
|
||
studio_slug: str | None = Query(default=None, description="DEPRECATED — użyj studio_slugs"),
|
||
studio_slugs: str | None = Query(
|
||
default=None, description="Comma-separated studio slugs (OR)"
|
||
),
|
||
tags: str | None = Query(
|
||
default=None,
|
||
description="Comma-separated tag slugs (AND — scena musi mieć wszystkie wybrane tagi)",
|
||
),
|
||
performer_ids: str | None = Query(
|
||
default=None,
|
||
description="Comma-separated performer UUIDs (AND — scena musi mieć wszystkich wybranych performerów)",
|
||
),
|
||
has_playback: bool | None = Query(
|
||
default=None, description="True: tylko sceny z ≥1 playback_source"
|
||
),
|
||
has_animated_thumbnail: bool | None = Query(
|
||
default=None,
|
||
description="True: tylko sceny z ≥1 playback_source z animated_thumbnail_url (hold-to-preview)",
|
||
),
|
||
min_duration_sec: int | None = Query(default=None, ge=0),
|
||
max_duration_sec: int | None = Query(default=None, ge=0),
|
||
released_within_days: int | None = Query(
|
||
default=None, ge=1,
|
||
description="Tylko sceny released w ostatnich N dniach",
|
||
),
|
||
min_quality_p: int | None = Query(
|
||
default=None, ge=1,
|
||
description=(
|
||
"Minimum quality (pixele wysokości — 2160 = 4K, 1080 = FullHD). Filtruje "
|
||
"po PlaybackSource.quality (string typu '720p' / '1080p Full HD')."
|
||
),
|
||
),
|
||
origin: str | None = Query(
|
||
default=None,
|
||
description=(
|
||
"Filtruj po playback origin (np. 'tube:hqpornercom'). Substring match — "
|
||
"'hqporner' złapie tube:hqpornercom. Diagnostyka per-hoster."
|
||
),
|
||
),
|
||
include_stubs: bool = Query(
|
||
default=False,
|
||
description=(
|
||
"False (default): ukrywa sceny-szkielety bez release_date, < 10min, "
|
||
"z jedynym playback z hqporner (~7-min Brazzers trailer clipy zalewają katalog)."
|
||
),
|
||
),
|
||
sort: str = Query(default="created_at", description="created_at|release_date|title|studio"),
|
||
page: int = Query(default=1, ge=1),
|
||
per_page: int = Query(default=50, ge=1, le=200),
|
||
) -> SceneListOut:
|
||
if sort not in _VALID_SORTS:
|
||
raise HTTPException(status_code=400, detail=f"sort must be one of {sorted(_VALID_SORTS)}")
|
||
|
||
base = select(Scene)
|
||
|
||
if q:
|
||
base = base.where(Scene.title_normalized.ilike(f"%{q.lower()}%"))
|
||
|
||
studio_slug_list = _split_csv(studio_slugs)
|
||
if studio_slug:
|
||
studio_slug_list.append(studio_slug)
|
||
if studio_slug_list:
|
||
base = base.where(
|
||
Scene.studio_id.in_(
|
||
select(Studio.id).where(Studio.slug.in_(studio_slug_list))
|
||
)
|
||
)
|
||
|
||
tag_slug_list = _split_csv(tags)
|
||
# AND między tagami: scena musi mieć WSZYSTKIE zaznaczone tagi. Każdy slug → osobny
|
||
# exists() — zaznaczanie kolejnych filtrów zawęża wyniki, jak intuicja użytkownika.
|
||
#
|
||
# PERF (2026-06-07): resolvujemy slug→tag_id w aplikacji i filtrujemy po LITERALNYM
|
||
# tag_id (NIE JOIN po Tag.slug). Z literałem planner zna kardynalność tagu ze
|
||
# statystyk (MCV) → dla popularnych tagów (blowjob ~273k scen) wybiera index-walk po
|
||
# ix_scenes_created_at_desc zamiast materializować wszystkie scene_tags. Slug-JOIN
|
||
# ukrywał tag_id przed plannerem → używał średniej (8.4M/11541≈726) → zły plan
|
||
# (4-12s). Z literałem: ~20ms. Zob. też _build... light mode.
|
||
if tag_slug_list:
|
||
id_by_slug = dict(
|
||
session.execute(
|
||
select(Tag.slug, Tag.id).where(Tag.slug.in_(tag_slug_list))
|
||
).all()
|
||
)
|
||
for slug in tag_slug_list:
|
||
tag_id = id_by_slug.get(slug)
|
||
if tag_id is None:
|
||
base = base.where(false()) # nieznany slug → brak wyników
|
||
break
|
||
base = base.where(
|
||
exists(
|
||
select(1)
|
||
.select_from(SceneTag)
|
||
.where(SceneTag.scene_id == Scene.id, SceneTag.tag_id == tag_id)
|
||
)
|
||
)
|
||
|
||
perf_id_strings = _split_csv(performer_ids)
|
||
if perf_id_strings:
|
||
try:
|
||
perf_ids = [uuid.UUID(s) for s in perf_id_strings]
|
||
except ValueError as e:
|
||
raise HTTPException(status_code=400, detail=f"invalid performer UUID: {e}") from e
|
||
# AND między performerami (analogicznie do tagów).
|
||
for pid in perf_ids:
|
||
base = base.where(
|
||
exists(
|
||
select(1)
|
||
.select_from(ScenePerformer)
|
||
.where(
|
||
ScenePerformer.scene_id == Scene.id,
|
||
ScenePerformer.performer_id == pid,
|
||
)
|
||
)
|
||
)
|
||
|
||
if has_playback is True:
|
||
# Tylko sceny z choć jednym ŻYWYM playback_source.
|
||
base = base.where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
)
|
||
)
|
||
elif has_playback is False:
|
||
base = base.where(
|
||
~exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
)
|
||
)
|
||
|
||
if origin:
|
||
# Substring match na origin — 'hqporner' złapie 'tube:hqpornercom'.
|
||
base = base.where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.origin.ilike(f"%{origin}%"),
|
||
)
|
||
)
|
||
)
|
||
|
||
# Blacklisty — globalne wykluczenia. Jeśli scena ma JAKIEGOKOLWIEK blacklisted
|
||
# performera, jest na blacklisted studio, lub ma JAKIKOLWIEK blacklisted tag → out.
|
||
# Pomijamy gdy wszystkie 3 blacklisty puste (typowy stan single-user) — te NOT EXISTS
|
||
# ewaluują się per-row na ~176k scen przy mega-tagu i kosztowały ~3.4s za nic.
|
||
if not _blacklists_empty(session):
|
||
from app.models.blacklist import (
|
||
BlacklistedPerformer,
|
||
BlacklistedStudio,
|
||
BlacklistedTag,
|
||
)
|
||
base = base.where(
|
||
~exists(
|
||
select(1)
|
||
.select_from(ScenePerformer)
|
||
.join(BlacklistedPerformer, BlacklistedPerformer.performer_id == ScenePerformer.performer_id)
|
||
.where(ScenePerformer.scene_id == Scene.id)
|
||
)
|
||
)
|
||
base = base.where(
|
||
~Scene.studio_id.in_(select(BlacklistedStudio.studio_id))
|
||
)
|
||
base = base.where(
|
||
~exists(
|
||
select(1)
|
||
.select_from(SceneTag)
|
||
.join(BlacklistedTag, BlacklistedTag.tag_id == SceneTag.tag_id)
|
||
.where(SceneTag.scene_id == Scene.id)
|
||
)
|
||
)
|
||
|
||
if has_animated_thumbnail:
|
||
base = base.where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.animated_thumbnail_url.isnot(None),
|
||
)
|
||
)
|
||
)
|
||
|
||
if min_duration_sec is not None:
|
||
base = base.where(Scene.duration_sec >= min_duration_sec)
|
||
if max_duration_sec is not None:
|
||
base = base.where(Scene.duration_sec <= max_duration_sec)
|
||
|
||
if released_within_days is not None:
|
||
from datetime import date, timedelta
|
||
cutoff = date.today() - timedelta(days=released_within_days)
|
||
base = base.where(Scene.release_date >= cutoff)
|
||
|
||
if min_quality_p is not None:
|
||
# PlaybackSource.quality to wolny string — szukamy liczb w prefixie ('1080p',
|
||
# '1080p Full HD', '2160p'). Heurystyka: wystarczy że scena ma JEDEN żywy
|
||
# playback z quality liczbą >= min. '4K'/'UHD' aliasujemy na 2160.
|
||
from sqlalchemy import Integer, cast, or_
|
||
numeric_q = cast(
|
||
func.coalesce(func.substring(PlaybackSource.quality, r"\d+"), "0"),
|
||
Integer,
|
||
)
|
||
conds = [numeric_q >= min_quality_p]
|
||
if min_quality_p <= 2160:
|
||
conds.append(PlaybackSource.quality.ilike("%4k%"))
|
||
conds.append(PlaybackSource.quality.ilike("%uhd%"))
|
||
base = base.where(
|
||
exists(
|
||
select(1).where(
|
||
PlaybackSource.scene_id == Scene.id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.quality.isnot(None),
|
||
or_(*conds),
|
||
)
|
||
)
|
||
)
|
||
|
||
if not include_stubs:
|
||
# Stub scene heuristic: tube-only scena BEZ release_date AND BEZ canonical
|
||
# (TPDB/StashDB) ref AND BEZ żadnego ScenePerformer linka. ScenePerformer
|
||
# dodaje continuous worker (search-by-name → wymusza link), więc per-performer
|
||
# search-result NIGDY nie jest stub. To filtruje tylko anonymous tube-only
|
||
# sceny z newUrl/categories ingestu które nie zostały zsyntowane z performerem.
|
||
canonical_exists = exists(
|
||
select(1)
|
||
.select_from(SceneExternalRef)
|
||
.join(Source, Source.id == SceneExternalRef.source_id)
|
||
.where(SceneExternalRef.scene_id == Scene.id)
|
||
.where(Source.kind.in_([SourceKind.tpdb, SourceKind.stashdb]))
|
||
)
|
||
has_performer = exists(
|
||
select(1).where(ScenePerformer.scene_id == Scene.id)
|
||
)
|
||
# NOT stub gdy: ma canonical_ref OR ma release_date OR ma performera
|
||
base = base.where(
|
||
Scene.release_date.is_not(None) | canonical_exists | has_performer
|
||
)
|
||
|
||
_is_pure_default = (
|
||
not include_stubs and not q and not studio_slug_list and not tag_slug_list
|
||
and not perf_id_strings and origin is None and has_playback is None
|
||
and not has_animated_thumbnail and min_duration_sec is None
|
||
and max_duration_sec is None and released_within_days is None
|
||
and min_quality_p is None
|
||
)
|
||
# Count strategy:
|
||
# - PURE default: cached pełny licznik katalogu (TTL 10 min).
|
||
# - FILTROWANE: NIE liczymy dokładnie. Bounded-count nad EXISTS-filtrami był
|
||
# dominującym kosztem (~4s na has_playback / min_duration / duży tag) i plan
|
||
# był NIESTABILNY (literal LIMIT + count-nad-PK pomogły w części przypadków,
|
||
# ale planer i tak czasem skanuje cały zbiór zamiast urwać). Mobile paginuje
|
||
# po `has_more` (per_page+1 fetch), NIE po `total` — `total` to tylko licznik
|
||
# "N+" w UI. Wyprowadzamy go z has_more PO fetchu (patrz niżej): dolna granica
|
||
# + flaga "jest więcej". Eliminuje cały koszt count z każdej filtrowanej listy.
|
||
total_capped = False
|
||
total: int | None = _default_scene_count(session) if _is_pure_default else None
|
||
|
||
# Sort: zawsze tie-break po created_at desc dla determinizmu paginacji.
|
||
if sort == "release_date":
|
||
ordered = base.order_by(
|
||
Scene.release_date.desc().nullslast(), Scene.created_at.desc()
|
||
)
|
||
elif sort == "title":
|
||
ordered = base.order_by(Scene.title_normalized.asc(), Scene.created_at.desc())
|
||
elif sort == "studio":
|
||
# Sceny bez studio na końcu; w obrębie studio — najświeższe pierwsze.
|
||
ordered = (
|
||
base.outerjoin(Studio, Studio.id == Scene.studio_id)
|
||
.order_by(
|
||
Studio.name_normalized.asc().nullslast(),
|
||
Scene.release_date.desc().nullslast(),
|
||
Scene.created_at.desc(),
|
||
)
|
||
)
|
||
else: # created_at
|
||
ordered = base.order_by(
|
||
Scene.created_at.desc(), Scene.release_date.desc().nullslast()
|
||
)
|
||
|
||
# Fetch per_page+1 — obecność (per_page+1)-szego wiersza = jest kolejna strona.
|
||
# To źródło prawdy dla paginacji (mobile getNextPageParam), niezależne od bounded
|
||
# `total`. Nadmiarowy wiersz odcinamy przed serializacją.
|
||
# LIMIT/OFFSET literalne (NIE bound-param) — patrz wyżej: sparametryzowany LIMIT
|
||
# psuje early-termination i przy filtrach EXISTS planer robi gather-all+sort (sekundy)
|
||
# zamiast limit-aware index-walk po `ix_scenes_created_at_desc`. page/per_page to
|
||
# walidowane inty (Query ge=1, le=200), więc literal_column jest bezpieczne.
|
||
_off = (page - 1) * per_page
|
||
rows = (
|
||
session.execute(
|
||
ordered.offset(literal_column(str(_off))).limit(literal_column(str(per_page + 1)))
|
||
)
|
||
.scalars()
|
||
.all()
|
||
)
|
||
has_more = len(rows) > per_page
|
||
rows = rows[:per_page]
|
||
|
||
# Filtrowane listy: total = dolna granica z dotychczas-widzianych wierszy, a
|
||
# total_capped=has_more daje UI "N+" (jest kolejna strona). Bez osobnego count query.
|
||
if total is None:
|
||
total = (page - 1) * per_page + len(rows)
|
||
total_capped = has_more
|
||
|
||
items = _build_scenes_out_batch(session, list(rows), light=True)
|
||
|
||
return SceneListOut(
|
||
items=items,
|
||
total=total,
|
||
page=page,
|
||
per_page=per_page,
|
||
has_more=has_more,
|
||
total_capped=total_capped,
|
||
)
|
||
|
||
|
||
@router.get("/{scene_id}", response_model=SceneOut)
|
||
def get_scene(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> SceneOut:
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
return _build_scene_out(session, scene)
|
||
|
||
|
||
def _needs_proxy(url: str) -> bool:
|
||
"""Wszystkie thumbnaile z playback_sources są proxowane przez backend.
|
||
Większość CDN-ów porn-tube'ów wymaga Refera (hqporner, mypornerleak/58img,
|
||
inne sxyprn/eporner CDN-y) — expo-image nie wysyła Referera.
|
||
Self-hosted lub backend-internal URL-e (zaczynające się od `/`) skipujemy."""
|
||
return url.startswith("http") and not url.startswith("/proxy/")
|
||
|
||
|
||
def _wrap_image_proxy(url: str, referer: str) -> str:
|
||
"""Wraps a thumbnail URL through /proxy/img/{token}/img.jpg. Klient nie musi
|
||
znać sekretu Referer — backend wstawi sam. Long TTL (30d) bo thumby
|
||
są stabilne, krótkie ttl by tylko niepotrzebnie zaśmiecało cache."""
|
||
from app.api.stream_proxy import make_token
|
||
token = make_token(url, referer, ttl_sec=30 * 24 * 3600)
|
||
# Path zachowuje rozszerzenie żeby HTTP Content-Type był rozpoznany.
|
||
import os as _os
|
||
ext = _os.path.splitext(url.split("?")[0])[1].lstrip(".") or "jpg"
|
||
return f"/proxy/img/{token}/img.{ext}"
|
||
|
||
|
||
def _build_scenes_out_batch(
|
||
session: Session, scenes: list[Scene], *, light: bool = False
|
||
) -> list[SceneOut]:
|
||
"""Batch-fetch wszystkich relacji dla N scen w 7 zapytaniach (zamiast 7×N).
|
||
|
||
Eliminuje N+1 z `_build_scene_out` w listach scen — `/scenes?per_page=24` szło
|
||
z ~9.6s do <500ms. Pojedyncza scena (`/scenes/{id}`) nadal używa `_build_scene_out`
|
||
bo overhead na batch nie ma sensu dla N=1.
|
||
|
||
`light=True` (listy/grid): pomija `tags` i `external_refs` (kafelek SceneTile ich
|
||
NIE używa, a SceneDetail re-fetchuje pełną scenę osobno) i ślimaczy `playback_sources`
|
||
do 1 wpisu z samą miniaturką (kafelek czyta tylko thumbnail_url/animated_thumbnail_url).
|
||
Mniej DB + mniej payloadu + szybszy parse na kliencie (perf 2026-06-07).
|
||
"""
|
||
from collections import defaultdict
|
||
if not scenes:
|
||
return []
|
||
|
||
scene_ids = [s.id for s in scenes]
|
||
studio_ids = list({s.studio_id for s in scenes if s.studio_id is not None})
|
||
|
||
# 1) Studios
|
||
studios_by_id: dict = {}
|
||
if studio_ids:
|
||
for st in session.execute(
|
||
select(Studio).where(Studio.id.in_(studio_ids))
|
||
).scalars():
|
||
studios_by_id[st.id] = st
|
||
|
||
# 2) Performers
|
||
perf_rows = session.execute(
|
||
select(ScenePerformer, Performer)
|
||
.join(Performer, Performer.id == ScenePerformer.performer_id)
|
||
.where(ScenePerformer.scene_id.in_(scene_ids))
|
||
.order_by(ScenePerformer.position.asc().nullslast())
|
||
).all()
|
||
performers_by_scene: dict = defaultdict(list)
|
||
for sp, p in perf_rows:
|
||
performers_by_scene[sp.scene_id].append(
|
||
PerformerOut(
|
||
id=p.id,
|
||
canonical_name=p.canonical_name,
|
||
slug=p.slug,
|
||
gender=p.gender.value if p.gender else None,
|
||
as_alias=sp.as_alias,
|
||
)
|
||
)
|
||
|
||
# 3) Tags + 4) External refs — kafelek listy ich nie używa; w light mode pomijamy
|
||
# (SceneDetail re-fetchuje pełną scenę przez /scenes/{id}).
|
||
tags_by_scene: dict = defaultdict(list)
|
||
refs_by_scene: dict = defaultdict(list)
|
||
if not light:
|
||
tag_rows = session.execute(
|
||
select(SceneTag.scene_id, Tag)
|
||
.join(Tag, Tag.id == SceneTag.tag_id)
|
||
.where(SceneTag.scene_id.in_(scene_ids))
|
||
).all()
|
||
for sid, t in tag_rows:
|
||
tags_by_scene[sid].append(TagOut.model_validate(t))
|
||
|
||
ref_rows = session.execute(
|
||
select(SceneExternalRef, Source)
|
||
.join(Source, Source.id == SceneExternalRef.source_id)
|
||
.where(SceneExternalRef.scene_id.in_(scene_ids))
|
||
).all()
|
||
for ref, src in ref_rows:
|
||
refs_by_scene[ref.scene_id].append(
|
||
ExternalRefOut(
|
||
source=src.name,
|
||
external_id=ref.external_id,
|
||
url=ref.url,
|
||
last_seen=ref.last_seen,
|
||
)
|
||
)
|
||
|
||
# 5) Playback sources. Light mode: tylko miniaturka (jedna na scenę) — kafelek
|
||
# czyta wyłącznie playback_sources[].thumbnail_url / animated_thumbnail_url.
|
||
pb_by_scene: dict = defaultdict(list)
|
||
if light:
|
||
pb_light = session.execute(
|
||
select(
|
||
PlaybackSource.scene_id,
|
||
PlaybackSource.thumbnail_url,
|
||
PlaybackSource.animated_thumbnail_url,
|
||
PlaybackSource.page_url,
|
||
)
|
||
.where(
|
||
PlaybackSource.scene_id.in_(scene_ids),
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
.order_by(PlaybackSource.origin.asc())
|
||
).all()
|
||
# Pierwsza miniaturka + pierwszy animated per scena (1 slim wpis).
|
||
thumb_by_scene: dict = {}
|
||
anim_by_scene: dict = {}
|
||
for sid, thumb, anim, page_url in pb_light:
|
||
if sid not in thumb_by_scene and thumb:
|
||
thumb_by_scene[sid] = (thumb, page_url)
|
||
if sid not in anim_by_scene and anim:
|
||
anim_by_scene[sid] = (anim, page_url)
|
||
for sid in scene_ids:
|
||
t = thumb_by_scene.get(sid)
|
||
a = anim_by_scene.get(sid)
|
||
if not t and not a:
|
||
continue
|
||
t_url = t[0] if t else None
|
||
a_url = a[0] if a else None
|
||
ref = (t or a)[1]
|
||
if t_url and _needs_proxy(t_url):
|
||
t_url = _wrap_image_proxy(t_url, ref)
|
||
if a_url and _needs_proxy(a_url):
|
||
a_url = _wrap_image_proxy(a_url, ref)
|
||
# id/origin/page_url wymagane przez schemat ale nieużywane przez kafelek
|
||
# (SceneDetail re-fetchuje pełne źródła) — dummy sentinel.
|
||
pb_by_scene[sid].append(
|
||
PlaybackSourceOut(
|
||
id=uuid.UUID(int=0), origin="", page_url="",
|
||
thumbnail_url=t_url, animated_thumbnail_url=a_url,
|
||
)
|
||
)
|
||
else:
|
||
pb_rows = session.execute(
|
||
select(PlaybackSource)
|
||
.where(
|
||
PlaybackSource.scene_id.in_(scene_ids),
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
.order_by(PlaybackSource.origin.asc())
|
||
).scalars().all()
|
||
for p in pb_rows:
|
||
out = PlaybackSourceOut.model_validate(p)
|
||
if out.thumbnail_url and _needs_proxy(out.thumbnail_url):
|
||
out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url)
|
||
if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url):
|
||
out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url)
|
||
pb_by_scene[p.scene_id].append(out)
|
||
|
||
# 6) Progress
|
||
progress_by_scene: dict = {}
|
||
for prog in session.execute(
|
||
select(ScenePlayProgress).where(ScenePlayProgress.scene_id.in_(scene_ids))
|
||
).scalars():
|
||
progress_by_scene[prog.scene_id] = prog
|
||
|
||
# 7) Favorites
|
||
fav_scene_ids: set = set(
|
||
session.execute(
|
||
select(FavoriteScene.scene_id).where(
|
||
FavoriteScene.scene_id.in_(scene_ids)
|
||
)
|
||
).scalars()
|
||
)
|
||
|
||
out: list[SceneOut] = []
|
||
for scene in scenes:
|
||
studio_out = None
|
||
if scene.studio_id is not None and scene.studio_id in studios_by_id:
|
||
studio_out = StudioOut.model_validate(studios_by_id[scene.studio_id])
|
||
progress = progress_by_scene.get(scene.id)
|
||
out.append(
|
||
SceneOut(
|
||
id=scene.id,
|
||
title=scene.title,
|
||
slug=scene.slug,
|
||
release_date=scene.release_date,
|
||
duration_sec=scene.duration_sec,
|
||
description=scene.description,
|
||
code=scene.code,
|
||
director=scene.director,
|
||
studio=studio_out,
|
||
performers=performers_by_scene.get(scene.id, []),
|
||
tags=tags_by_scene.get(scene.id, []),
|
||
external_refs=refs_by_scene.get(scene.id, []),
|
||
playback_sources=pb_by_scene.get(scene.id, []),
|
||
created_at=scene.created_at,
|
||
last_played_at=progress.last_played_at if progress else None,
|
||
finished=progress.finished if progress else False,
|
||
position_sec=progress.position_sec if progress else 0,
|
||
is_favorite=scene.id in fav_scene_ids,
|
||
)
|
||
)
|
||
return out
|
||
|
||
|
||
def _build_scene_out(session: Session, scene: Scene) -> SceneOut:
|
||
studio_out: StudioOut | None = None
|
||
if scene.studio_id is not None:
|
||
st = session.get(Studio, scene.studio_id)
|
||
if st is not None:
|
||
studio_out = StudioOut.model_validate(st)
|
||
|
||
performer_rows = session.execute(
|
||
select(ScenePerformer, Performer)
|
||
.join(Performer, Performer.id == ScenePerformer.performer_id)
|
||
.where(ScenePerformer.scene_id == scene.id)
|
||
.order_by(ScenePerformer.position.asc().nullslast())
|
||
).all()
|
||
performers_out: list[PerformerOut] = []
|
||
for sp, performer in performer_rows:
|
||
performers_out.append(
|
||
PerformerOut(
|
||
id=performer.id,
|
||
canonical_name=performer.canonical_name,
|
||
slug=performer.slug,
|
||
gender=performer.gender.value if performer.gender else None,
|
||
as_alias=sp.as_alias,
|
||
)
|
||
)
|
||
|
||
tag_rows = (
|
||
session.execute(
|
||
select(Tag).join(SceneTag, SceneTag.tag_id == Tag.id).where(SceneTag.scene_id == scene.id)
|
||
)
|
||
.scalars()
|
||
.all()
|
||
)
|
||
tags_out = [TagOut.model_validate(t) for t in tag_rows]
|
||
|
||
ref_rows = session.execute(
|
||
select(SceneExternalRef, Source)
|
||
.join(Source, Source.id == SceneExternalRef.source_id)
|
||
.where(SceneExternalRef.scene_id == scene.id)
|
||
).all()
|
||
refs_out = [
|
||
ExternalRefOut(
|
||
source=src.name,
|
||
external_id=ref.external_id,
|
||
url=ref.url,
|
||
last_seen=ref.last_seen,
|
||
)
|
||
for ref, src in ref_rows
|
||
]
|
||
|
||
playback_rows = (
|
||
session.execute(
|
||
select(PlaybackSource)
|
||
.where(
|
||
PlaybackSource.scene_id == scene.id,
|
||
PlaybackSource.dead_at.is_(None), # ukryj martwe linki
|
||
)
|
||
.order_by(PlaybackSource.origin.asc())
|
||
)
|
||
.scalars()
|
||
.all()
|
||
)
|
||
# Collapse źródła dzielące ten sam origin (hoster). Zmergowana scena często agreguje
|
||
# kilka uploadów z JEDNEGO tube'a (re-enkody / wersje 4K: bug-report aa79a995 "2 linki,
|
||
# oba do porntrex" = ta sama scena std+4K) — w UI to nierozróżnialne linki do tego
|
||
# samego hostera (resolvują tym samym extractorem). Zostawiamy jeden najlepszy per
|
||
# origin: preferuj długość zgodną ze sceną (realny match) → jakąkolwiek długość →
|
||
# pierwszy (stabilnie, query jest origin-asc). Martwe już odfiltrowane (dead_at).
|
||
def _origin_pick_key(p: PlaybackSource) -> tuple[int, int]:
|
||
dur_match = (
|
||
0 if (scene.duration_sec and p.duration_sec
|
||
and abs(p.duration_sec - scene.duration_sec) <= 5) else 1
|
||
)
|
||
return (dur_match, 0 if p.duration_sec else 1)
|
||
|
||
_best_by_origin: dict[str, PlaybackSource] = {}
|
||
for p in playback_rows:
|
||
key = p.origin or ""
|
||
cur = _best_by_origin.get(key)
|
||
if cur is None or _origin_pick_key(p) < _origin_pick_key(cur):
|
||
_best_by_origin[key] = p
|
||
playback_rows = list(_best_by_origin.values())
|
||
|
||
playback_out: list[PlaybackSourceOut] = []
|
||
for p in playback_rows:
|
||
out = PlaybackSourceOut.model_validate(p)
|
||
# Wrap thumbnail URL-e przez backend image proxy gdy CDN wymaga Refera
|
||
# (hqporner — fastporndelivery zwraca 403 bez Referer headera, expo-image
|
||
# nie wysyła go domyślnie). Token ma 30-dniowy TTL bo thumby są stabilne.
|
||
if out.thumbnail_url and _needs_proxy(out.thumbnail_url):
|
||
out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url)
|
||
if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url):
|
||
out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url)
|
||
playback_out.append(out)
|
||
|
||
# Rank natywne-resolve źródła PRZED WebView-fallback (IP-bound/ad-heavy: fpoxxx,
|
||
# pornxpph, pornhub...). Query był alfabetyczny po origin, więc np. fpoxxx-WebView
|
||
# pokazywał się przed działającym freshporno (bug-report 2026-06-07). Stabilny sort:
|
||
# natywne (0) → fallback (1), tie-break po origin.
|
||
from app.extractors import is_vps_blocked_fallback
|
||
|
||
def _resolve_rank(origin: str | None) -> int:
|
||
if not origin:
|
||
return 1
|
||
sitetag = origin.split(":", 1)[1] if ":" in origin else origin
|
||
return 1 if is_vps_blocked_fallback(sitetag) else 0
|
||
|
||
playback_out.sort(key=lambda o: (_resolve_rank(o.origin), o.origin or ""))
|
||
|
||
progress = session.get(ScenePlayProgress, scene.id)
|
||
is_fav = session.get(FavoriteScene, scene.id) is not None
|
||
|
||
return SceneOut(
|
||
id=scene.id,
|
||
title=scene.title,
|
||
slug=scene.slug,
|
||
release_date=scene.release_date,
|
||
duration_sec=scene.duration_sec,
|
||
description=scene.description,
|
||
code=scene.code,
|
||
director=scene.director,
|
||
studio=studio_out,
|
||
performers=performers_out,
|
||
tags=tags_out,
|
||
external_refs=refs_out,
|
||
playback_sources=playback_out,
|
||
created_at=scene.created_at,
|
||
last_played_at=progress.last_played_at if progress else None,
|
||
finished=progress.finished if progress else False,
|
||
position_sec=progress.position_sec if progress else 0,
|
||
is_favorite=is_fav,
|
||
)
|
||
|
||
|
||
@router.delete("/{scene_id}/tags/{tag_id}", status_code=status.HTTP_204_NO_CONTENT)
|
||
def remove_tag_from_scene(
|
||
scene_id: uuid.UUID,
|
||
tag_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> None:
|
||
"""Usuwa relację scene↔tag (np. user uznał że tag jest błędny dla tej sceny).
|
||
|
||
Idempotent: brak relacji = success. Nie kasuje samego Tag-a — inne sceny mogą
|
||
z niego korzystać. Sam tag zostaje w słowniku tagów.
|
||
"""
|
||
rel = session.execute(
|
||
select(SceneTag).where(SceneTag.scene_id == scene_id, SceneTag.tag_id == tag_id)
|
||
).scalar_one_or_none()
|
||
if rel is None:
|
||
return
|
||
session.delete(rel)
|
||
session.commit()
|
||
|
||
|
||
@router.delete(
|
||
"/{scene_id}/performers/{performer_id}", status_code=status.HTTP_204_NO_CONTENT
|
||
)
|
||
def remove_performer_from_scene(
|
||
scene_id: uuid.UUID,
|
||
performer_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> None:
|
||
"""Usuwa relację scene↔performer (false-match dedup zostawił nie tą osobę).
|
||
|
||
Idempotent. Sama Performer zostaje. Użyteczne np. gdy fuzzy match aliasu
|
||
"Bella" wciągnął Anna Bella sceny pod Bad Bella, lub Miss Teela na xnxx
|
||
została przypisana do scen w których jej nie ma (zgłoszenia 2026-05-10).
|
||
"""
|
||
from app.models.scene import ScenePerformer
|
||
|
||
rel = session.execute(
|
||
select(ScenePerformer).where(
|
||
ScenePerformer.scene_id == scene_id,
|
||
ScenePerformer.performer_id == performer_id,
|
||
)
|
||
).scalar_one_or_none()
|
||
if rel is None:
|
||
return
|
||
session.delete(rel)
|
||
session.commit()
|
||
|
||
|
||
class EnrichTagsOut(BaseModel):
|
||
scene_id: uuid.UUID
|
||
added: int
|
||
tube_used: str | None
|
||
tags: list[str]
|
||
|
||
|
||
@router.post("/{scene_id}/enrich-tags", response_model=EnrichTagsOut)
|
||
def enrich_tags_from_tube(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> EnrichTagsOut:
|
||
"""Pobiera page HTML z dowolnego tube playback_source dla tej sceny i scrape'uje
|
||
tagi (categories/tags). Dodaje brakujące do scene_tags.
|
||
|
||
Mobile wywołuje to przy otwarciu SceneDetail jeśli scena ma 0 tagów AND ma
|
||
tube source z obsługiwanym extractorem (porntrex/youporn/xvideos/xnxx/redtube/
|
||
xhamster/eporner).
|
||
|
||
Idempotent: ponowne wywołanie z tymi samymi tagami nic nie robi (UNIQUE PK
|
||
scene_tags). Konkretne tube źródło wybierane wg priority listy (mainstream
|
||
bardziej rzetelne niż aggregator).
|
||
"""
|
||
from app.extractors._fetch import browser_get
|
||
from app.extractors._models import TubePageError
|
||
from app.extractors.tag_extract import EXTRACTORS, extract_tags
|
||
from app.models.playback_source import PlaybackSource
|
||
from app.models.tag import Tag
|
||
from app.normalize.scenes import NormalizedTag
|
||
from app.normalize.text import slugify
|
||
from app.resolve.tag_resolver import resolve_tag
|
||
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
|
||
# Priority: mainstream tubes (bogate metadane) > niche (mniej tagów albo garbage).
|
||
PRIORITY = ["xhamstercom", "porntrexcom", "epornercom", "youporncom",
|
||
"xvideoscom", "xnxxcom", "redtubecom", "pornhatcom"]
|
||
sources = session.execute(
|
||
select(PlaybackSource).where(
|
||
PlaybackSource.scene_id == scene_id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
)
|
||
).scalars().all()
|
||
|
||
# Wybierz pierwsze źródło wg priority listy które ma supported extractor
|
||
chosen: PlaybackSource | None = None
|
||
for tag in PRIORITY:
|
||
for src in sources:
|
||
if src.origin == f"tube:{tag}":
|
||
chosen = src
|
||
break
|
||
if chosen:
|
||
break
|
||
if chosen is None:
|
||
# Fallback: dowolne źródło z extractorem
|
||
for src in sources:
|
||
if src.origin.startswith("tube:"):
|
||
sitetag = src.origin.split(":", 1)[1]
|
||
if sitetag in EXTRACTORS:
|
||
chosen = src
|
||
break
|
||
|
||
if chosen is None:
|
||
return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=None, tags=[])
|
||
|
||
sitetag = chosen.origin.split(":", 1)[1]
|
||
try:
|
||
r = browser_get(chosen.page_url, timeout=15.0, follow_redirects=True)
|
||
r.raise_for_status()
|
||
except (TubePageError, Exception) as e:
|
||
log.warning("enrich-tags fetch failed for %s: %s", chosen.page_url, e)
|
||
return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=sitetag, tags=[])
|
||
|
||
tag_names = extract_tags(sitetag, r.text)
|
||
if not tag_names:
|
||
return EnrichTagsOut(scene_id=scene_id, added=0, tube_used=sitetag, tags=[])
|
||
|
||
# Upsert: dla każdego taga utwórz/znajdź Tag, dorzuć SceneTag idempotentnie.
|
||
# Używamy PostgreSQL INSERT ... ON CONFLICT DO NOTHING zamiast ORM session.add()
|
||
# bo `resolve_tag` robi session.flush() w pętli, emitując pending SceneTag INSERT
|
||
# z poprzednich iteracji — gdy 2 concurrent enrich-tags collide na tym samym
|
||
# (scene_id, tag_id), drugi flush dostaje UniqueViolation (GOON-H, 4 events
|
||
# w 10h mimo wcześniejszego seen_tag_ids fix). ON CONFLICT skip'uje silently.
|
||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||
added = 0
|
||
seen_tag_ids: set = set()
|
||
for name in tag_names:
|
||
norm = NormalizedTag(name=name, slug=slugify(name), external_id=None)
|
||
tag = resolve_tag(session, norm=norm)
|
||
if tag is None or tag.id in seen_tag_ids:
|
||
continue
|
||
seen_tag_ids.add(tag.id)
|
||
stmt = (
|
||
pg_insert(SceneTag.__table__)
|
||
.values(scene_id=scene_id, tag_id=tag.id, source_id=None)
|
||
.on_conflict_do_nothing(index_elements=["scene_id", "tag_id"])
|
||
)
|
||
result = session.execute(stmt)
|
||
# rowcount == 1 gdy faktycznie wstawiony, 0 gdy ON CONFLICT skip
|
||
if result.rowcount and result.rowcount > 0:
|
||
added += 1
|
||
session.commit()
|
||
return EnrichTagsOut(scene_id=scene_id, added=added, tube_used=sitetag, tags=tag_names)
|
||
|
||
|
||
class EnrichDurationOut(BaseModel):
|
||
scene_id: uuid.UUID
|
||
duration_sec: int | None
|
||
tube_used: str | None
|
||
|
||
|
||
@router.post("/{scene_id}/enrich-duration", response_model=EnrichDurationOut)
|
||
def enrich_duration_from_tube(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> EnrichDurationOut:
|
||
"""Wyciąga duration z dowolnego tube playback_source — wszystkie znane tube'y
|
||
udostępniają duration na detail page (og:video:duration lub LD-JSON ISO 8601).
|
||
|
||
Mobile wywołuje to przy otwarciu SceneDetail gdy scene.duration_sec jest null
|
||
AND ma tube source. Dla dedupu duration to najsilniejszy single signal — bez
|
||
niego sceny z weak title-only score są capowane na 0.85 (review queue).
|
||
|
||
Idempotent: zwraca aktualne duration_sec jeśli już ustawione.
|
||
"""
|
||
from app.extractors._fetch import browser_get
|
||
from app.extractors._models import TubePageError
|
||
from app.extractors.duration_extract import extract_duration_sec
|
||
from app.models.playback_source import PlaybackSource
|
||
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
|
||
if scene.duration_sec is not None:
|
||
return EnrichDurationOut(
|
||
scene_id=scene_id, duration_sec=scene.duration_sec, tube_used=None
|
||
)
|
||
|
||
sources = session.execute(
|
||
select(PlaybackSource).where(
|
||
PlaybackSource.scene_id == scene_id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.origin.like("tube:%"),
|
||
)
|
||
).scalars().all()
|
||
|
||
for src in sources:
|
||
try:
|
||
r = browser_get(src.page_url, timeout=15.0, follow_redirects=True)
|
||
r.raise_for_status()
|
||
except (TubePageError, Exception) as e:
|
||
log.debug("enrich-duration fetch failed for %s: %s", src.page_url, e)
|
||
continue
|
||
d = extract_duration_sec(r.text)
|
||
if d is not None and d > 0:
|
||
scene.duration_sec = d
|
||
# Zapisz też na poziomie playback_source dla parity (przyda się jeśli
|
||
# potem dorobimy per-source duration mismatch detection).
|
||
if src.duration_sec is None:
|
||
src.duration_sec = d
|
||
session.commit()
|
||
return EnrichDurationOut(
|
||
scene_id=scene_id,
|
||
duration_sec=d,
|
||
tube_used=src.origin.split(":", 1)[1] if ":" in src.origin else None,
|
||
)
|
||
|
||
return EnrichDurationOut(scene_id=scene_id, duration_sec=None, tube_used=None)
|
||
|
||
|
||
class EnrichStudioOut(BaseModel):
|
||
scene_id: uuid.UUID
|
||
studio_id: uuid.UUID | None
|
||
studio_name: str | None
|
||
tube_used: str | None
|
||
|
||
|
||
@router.post("/{scene_id}/enrich-studio", response_model=EnrichStudioOut)
|
||
def enrich_studio_from_tube(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> EnrichStudioOut:
|
||
"""Wyciąga studio (DVD/series) z pornhat scene page'a.
|
||
|
||
Pornhat ma `class="info-video js-ajax-dvd" data-setup='{"title": "Adult Time", ...}'`
|
||
dla studio. Inne tube'y obsługiwane będą gdy znajdziemy ich pattern — na razie
|
||
tylko pornhat (najczystsze studio metadata wśród free tubes).
|
||
"""
|
||
import json as _json
|
||
|
||
from app.extractors._fetch import browser_get
|
||
from app.extractors._models import TubePageError
|
||
from app.models.playback_source import PlaybackSource
|
||
from app.models.studio import Studio
|
||
from app.normalize.text import slugify
|
||
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
|
||
if scene.studio_id is not None:
|
||
existing = session.get(Studio, scene.studio_id)
|
||
return EnrichStudioOut(
|
||
scene_id=scene_id,
|
||
studio_id=scene.studio_id,
|
||
studio_name=existing.name if existing else None,
|
||
tube_used=None,
|
||
)
|
||
|
||
chosen = session.execute(
|
||
select(PlaybackSource).where(
|
||
PlaybackSource.scene_id == scene_id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.origin == "tube:pornhatcom",
|
||
)
|
||
).scalars().first()
|
||
if chosen is None:
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used=None)
|
||
|
||
try:
|
||
r = browser_get(chosen.page_url, timeout=15.0, follow_redirects=True)
|
||
r.raise_for_status()
|
||
except (TubePageError, Exception) as e:
|
||
log.warning("enrich-studio fetch failed for %s: %s", chosen.page_url, e)
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
|
||
|
||
m = re.search(
|
||
r"class=\"info-video js-ajax-dvd[^\"]*\"[^>]*data-setup='([^']+)'",
|
||
r.text, re.IGNORECASE,
|
||
)
|
||
if m is None:
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
|
||
try:
|
||
data = _json.loads(m.group(1))
|
||
except _json.JSONDecodeError:
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
|
||
|
||
name = (data.get("title") or "").strip()
|
||
if not name:
|
||
return EnrichStudioOut(scene_id=scene_id, studio_id=None, studio_name=None, tube_used="pornhatcom")
|
||
slug = (data.get("dir") or "").strip() or slugify(name)
|
||
|
||
studio = session.execute(
|
||
select(Studio).where(Studio.slug == slug)
|
||
).scalar_one_or_none()
|
||
if studio is None:
|
||
studio = session.execute(
|
||
select(Studio).where(Studio.name == name)
|
||
).scalar_one_or_none()
|
||
if studio is None:
|
||
studio = Studio(name=name, slug=slug)
|
||
session.add(studio)
|
||
session.flush()
|
||
scene.studio_id = studio.id
|
||
session.commit()
|
||
return EnrichStudioOut(
|
||
scene_id=scene_id, studio_id=studio.id, studio_name=studio.name, tube_used="pornhatcom"
|
||
)
|
||
|
||
|
||
class EnrichThumbOut(BaseModel):
|
||
scene_id: uuid.UUID
|
||
thumbnail_url: str | None
|
||
tube_used: str | None
|
||
sources_updated: int
|
||
|
||
|
||
@router.post("/{scene_id}/enrich-thumbnail", response_model=EnrichThumbOut)
|
||
def enrich_thumbnail_from_tube(
|
||
scene_id: uuid.UUID,
|
||
session: Annotated[Session, Depends(get_session)],
|
||
) -> EnrichThumbOut:
|
||
"""Pobiera detail page z dowolnego tube playback_source bez thumbnail_url
|
||
i wyciąga miniaturkę (og:image / twitter:image / LD-JSON thumbnailUrl /
|
||
KVS html5player).
|
||
|
||
Update'uje WSZYSTKIE PlaybackSource'y dla tej sceny które nie mają thumb,
|
||
żeby kolejne otwarcia listy widziały miniaturę niezależnie od source pick.
|
||
Mobile auto-wywoła to przy otwarciu SceneDetail bez thumb (jak duration).
|
||
"""
|
||
from app.extractors._fetch import browser_get
|
||
from app.extractors._models import TubePageError
|
||
from app.extractors.thumb_extract import extract_thumbnail_url
|
||
from app.models.playback_source import PlaybackSource
|
||
|
||
scene = session.get(Scene, scene_id)
|
||
if scene is None:
|
||
raise HTTPException(status_code=404, detail="scene not found")
|
||
|
||
sources = session.execute(
|
||
select(PlaybackSource).where(
|
||
PlaybackSource.scene_id == scene_id,
|
||
PlaybackSource.dead_at.is_(None),
|
||
PlaybackSource.origin.like("tube:%"),
|
||
)
|
||
).scalars().all()
|
||
|
||
sources_with_thumb = [s for s in sources if s.thumbnail_url]
|
||
if sources_with_thumb:
|
||
# już mamy — idempotent return.
|
||
return EnrichThumbOut(
|
||
scene_id=scene_id,
|
||
thumbnail_url=sources_with_thumb[0].thumbnail_url,
|
||
tube_used=None,
|
||
sources_updated=0,
|
||
)
|
||
|
||
for src in sources:
|
||
try:
|
||
r = browser_get(src.page_url, timeout=15.0, follow_redirects=True)
|
||
r.raise_for_status()
|
||
except (TubePageError, Exception) as e:
|
||
log.debug("enrich-thumbnail fetch failed for %s: %s", src.page_url, e)
|
||
continue
|
||
thumb = extract_thumbnail_url(r.text)
|
||
if thumb:
|
||
# Zapisz na wszystkich źródłach bez thumb (oszczędza duplikat fetch)
|
||
updated = 0
|
||
for s in sources:
|
||
if not s.thumbnail_url:
|
||
s.thumbnail_url = thumb
|
||
updated += 1
|
||
session.commit()
|
||
return EnrichThumbOut(
|
||
scene_id=scene_id,
|
||
thumbnail_url=thumb,
|
||
tube_used=src.origin.split(":", 1)[1] if ":" in src.origin else None,
|
||
sources_updated=updated,
|
||
)
|
||
|
||
return EnrichThumbOut(
|
||
scene_id=scene_id, thumbnail_url=None, tube_used=None, sources_updated=0
|
||
)
|