fix(scenes): propagate playback duration to Scene + duration-consistent counts

Scene.duration_sec was NULL for ~74% of playable scenes (tube duration lives on
playback_source, never propagated to Scene), so the mobile min_duration_sec=60 filter
(Scene.duration_sec >= 60; NULL fails) silently hid them — surfaced as '119 in favorites,
14 after entering the performer' (Safira Yakkuza).

- resolver: _effective_duration() falls back to max live playback_source duration when the
  connector provides no scene-level duration (forward fix, used in create + update).
- scripts/backfill_scene_duration_from_playback.py: one-off idempotent backfill (recovered
  204,014 scenes).
- taxonomy_counts: scene_count now counts playable AND duration_sec >= 60, matching the
  always-60s-filtered scene lists, so favorites/performer/studio/tag badges agree with what
  the scene screen actually shows (Safira: 39 == 39).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jtrzupek 2026-06-01 21:31:01 +02:00
parent cd12348782
commit 817b50fbf8
3 changed files with 83 additions and 7 deletions

View file

@ -380,6 +380,20 @@ def resolve_scene(
# ---- helpery -------------------------------------------------------------- # ---- helpery --------------------------------------------------------------
def _effective_duration(norm: NormalizedScene) -> int | None:
"""Duration sceny: scene-level z connectora, a gdy brak — max z playback_sources.
Tube'y często podają duration TYLKO na playbacku (norm.duration_sec=None, ale
norm.playback_sources[].duration_sec ustawione). Bez tego fallbacku Scene.duration_sec
zostaje NULL mobilny filtr `min_duration_sec=60` (Scene.duration_sec >= 60) wycina
scenę mimo że jest grywalna i znamy jej długość (74% katalogu było ukryte, fix 2026-06-01).
"""
if norm.duration_sec:
return norm.duration_sec
ps_durs = [ps.duration_sec for ps in norm.playback_sources if ps.duration_sec]
return max(ps_durs) if ps_durs else None
def _create_canonical( def _create_canonical(
session: Session, *, norm: NormalizedScene, studio_id: uuid.UUID | None session: Session, *, norm: NormalizedScene, studio_id: uuid.UUID | None
) -> Scene: ) -> Scene:
@ -389,7 +403,7 @@ def _create_canonical(
slug=norm.slug or slugify(norm.title), slug=norm.slug or slugify(norm.title),
release_date=norm.release_date, release_date=norm.release_date,
studio_id=studio_id, studio_id=studio_id,
duration_sec=norm.duration_sec, duration_sec=_effective_duration(norm),
description=norm.description, description=norm.description,
code=norm.code, code=norm.code,
director=norm.director, director=norm.director,
@ -440,9 +454,11 @@ def _update_scene_fields(
if studio_id and not scene.studio_id: if studio_id and not scene.studio_id:
scene.studio_id = studio_id scene.studio_id = studio_id
# Duration: canonical może doprecyzować (TPDB/StashDB lepiej to mierzą niż tube # Duration: canonical może doprecyzować (TPDB/StashDB lepiej to mierzą niż tube
# który czasem reportuje compilation length); scraper tylko gdy null. # który czasem reportuje compilation length); scraper tylko gdy null. Fallback do
if norm.duration_sec and (not scene.duration_sec or is_canonical): # duration z playback_source gdy connector nie dał scene-level (patrz _effective_duration).
scene.duration_sec = norm.duration_sec eff_duration = _effective_duration(norm)
if eff_duration and (not scene.duration_sec or is_canonical):
scene.duration_sec = eff_duration
if norm.description and not scene.description: if norm.description and not scene.description:
scene.description = norm.description scene.description = norm.description
if norm.code and not scene.code: if norm.code and not scene.code:

View file

@ -4,9 +4,12 @@ Liczniki są utrzymywane w tle (zamiast liczone per-request) bo agregacja po 6.3
scene_tags / 3M scene_performers z EXISTS do 1.15M playback_sources zajmuje ~4.3s scene_tags / 3M scene_performers z EXISTS do 1.15M playback_sources zajmuje ~4.3s
nie do zaakceptowania w hot-path UI (/tags, /performers, /studios, /favorites). nie do zaakceptowania w hot-path UI (/tags, /performers, /studios, /favorites).
Definicja (identyczna z dotychczasowym has_live_playback filtrem w taxonomies.py): Definicja: scene_count = liczba scen z danym tagiem/performerem/studiem mających 1
scene_count = liczba scen z danym tagiem/performerem/studiem mających 1 playback_source z dead_at IS NULL **ORAZ** duration_sec >= 60. Filtr duration mirror
playback_source z dead_at IS NULL. mobilnego `min_duration_sec=60` (api.ts default) listy scen w apce ZAWSZE filtrują
60s, więc badge/licznik musi liczyć to samo, inaczej "119 w ulubionych, 39 po wejściu"
(bug-report 2026-06-01 Safira Yakkuza). Wymaga propagacji duration z playback_source na
Scene.duration_sec (backfill + _effective_duration w resolverze, 2026-06-01).
Każdy UPDATE robi pełny LEFT JOIN (tag/performer/studio) agregat ustawia 0 dla Każdy UPDATE robi pełny LEFT JOIN (tag/performer/studio) agregat ustawia 0 dla
sierot. `IS DISTINCT FROM` pomija przepisywanie niezmienionych wierszy (mniej WAL/bloat). sierot. `IS DISTINCT FROM` pomija przepisywanie niezmienionych wierszy (mniej WAL/bloat).
@ -23,12 +26,18 @@ from app.db import session_scope
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# Mirror mobilnego api.ts `min_duration_sec ?? 60` — listy scen filtrują ≥60s.
_MIN_DURATION_SEC = 60
# Wspólny predykat: scena ma ≥1 żywy playback_source. # Wspólny predykat: scena ma ≥1 żywy playback_source.
_LIVE = ( _LIVE = (
"EXISTS (SELECT 1 FROM playback_sources ps " "EXISTS (SELECT 1 FROM playback_sources ps "
"WHERE ps.scene_id = {scene_col} AND ps.dead_at IS NULL)" "WHERE ps.scene_id = {scene_col} AND ps.dead_at IS NULL)"
) )
# Duration check po scene_id (dla tags/performers gdzie nie mamy scene-row w scope).
_DUR = "(SELECT s2.duration_sec FROM scenes s2 WHERE s2.id = {scene_col}) >= " + str(_MIN_DURATION_SEC)
_TAGS_SQL = text( _TAGS_SQL = text(
f""" f"""
UPDATE tags t SET scene_count = COALESCE(a.c, 0) UPDATE tags t SET scene_count = COALESCE(a.c, 0)
@ -37,6 +46,7 @@ _TAGS_SQL = text(
SELECT st.tag_id, count(*) AS c SELECT st.tag_id, count(*) AS c
FROM scene_tags st FROM scene_tags st
WHERE {_LIVE.format(scene_col="st.scene_id")} WHERE {_LIVE.format(scene_col="st.scene_id")}
AND {_DUR.format(scene_col="st.scene_id")}
GROUP BY st.tag_id GROUP BY st.tag_id
) a ON a.tag_id = base.id ) a ON a.tag_id = base.id
WHERE t.id = base.id AND t.scene_count IS DISTINCT FROM COALESCE(a.c, 0) WHERE t.id = base.id AND t.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
@ -51,6 +61,7 @@ _PERFORMERS_SQL = text(
SELECT sp.performer_id, count(*) AS c SELECT sp.performer_id, count(*) AS c
FROM scene_performers sp FROM scene_performers sp
WHERE {_LIVE.format(scene_col="sp.scene_id")} WHERE {_LIVE.format(scene_col="sp.scene_id")}
AND {_DUR.format(scene_col="sp.scene_id")}
GROUP BY sp.performer_id GROUP BY sp.performer_id
) a ON a.performer_id = base.id ) a ON a.performer_id = base.id
WHERE p.id = base.id AND p.scene_count IS DISTINCT FROM COALESCE(a.c, 0) WHERE p.id = base.id AND p.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
@ -65,6 +76,7 @@ _STUDIOS_SQL = text(
SELECT sc.studio_id, count(*) AS c SELECT sc.studio_id, count(*) AS c
FROM scenes sc FROM scenes sc
WHERE sc.studio_id IS NOT NULL AND {_LIVE.format(scene_col="sc.id")} WHERE sc.studio_id IS NOT NULL AND {_LIVE.format(scene_col="sc.id")}
AND sc.duration_sec >= {_MIN_DURATION_SEC}
GROUP BY sc.studio_id GROUP BY sc.studio_id
) a ON a.studio_id = base.id ) a ON a.studio_id = base.id
WHERE s.id = base.id AND s.scene_count IS DISTINCT FROM COALESCE(a.c, 0) WHERE s.id = base.id AND s.scene_count IS DISTINCT FROM COALESCE(a.c, 0)

View file

@ -0,0 +1,48 @@
"""Backfill Scene.duration_sec z live playback_source, gdzie Scene NULL.
Tube'y zapisują duration na playback_source, a nie na Scene → 74% grywalnego katalogu
miało Scene.duration_sec=NULL mobilny filtr `min_duration_sec=60` (Scene.duration_sec
>= 60; NULL >= 60 = false) chował te sceny mimo że grywalne i długość jest znana
(bug-report 2026-06-01 Safira Yakkuza: 119 w ulubionych, 14 po wejściu).
Propagacja forward jest w resolverze (`_effective_duration`); ten skrypt nadrabia
istniejące. Idempotentny ustawia tylko wiersze z NULL.
Użycie: python scripts/backfill_scene_duration_from_playback.py
"""
from __future__ import annotations
import logging
import sys
from sqlalchemy import text
from app.db import session_scope
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger("backfill_duration")
_SQL = text(
"""
UPDATE scenes sc
SET duration_sec = sub.d
FROM (
SELECT scene_id, max(duration_sec) AS d
FROM playback_sources
WHERE dead_at IS NULL AND duration_sec IS NOT NULL
GROUP BY scene_id
) sub
WHERE sc.id = sub.scene_id AND sc.duration_sec IS NULL
"""
)
def main() -> int:
with session_scope() as session:
res = session.execute(_SQL)
log.info("DONE: backfilled Scene.duration_sec for %d scenes", res.rowcount or 0)
return 0
if __name__ == "__main__":
sys.exit(main())