fix(scenes): propagate playback duration to Scene + duration-consistent counts
Scene.duration_sec was NULL for ~74% of playable scenes (tube duration lives on playback_source, never propagated to Scene), so the mobile min_duration_sec=60 filter (Scene.duration_sec >= 60; NULL fails) silently hid them — surfaced as '119 in favorites, 14 after entering the performer' (Safira Yakkuza). - resolver: _effective_duration() falls back to max live playback_source duration when the connector provides no scene-level duration (forward fix, used in create + update). - scripts/backfill_scene_duration_from_playback.py: one-off idempotent backfill (recovered 204,014 scenes). - taxonomy_counts: scene_count now counts playable AND duration_sec >= 60, matching the always-60s-filtered scene lists, so favorites/performer/studio/tag badges agree with what the scene screen actually shows (Safira: 39 == 39). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
cd12348782
commit
817b50fbf8
3 changed files with 83 additions and 7 deletions
|
|
@ -380,6 +380,20 @@ def resolve_scene(
|
|||
|
||||
# ---- helpery --------------------------------------------------------------
|
||||
|
||||
def _effective_duration(norm: NormalizedScene) -> int | None:
|
||||
"""Duration sceny: scene-level z connectora, a gdy brak — max z playback_sources.
|
||||
|
||||
Tube'y często podają duration TYLKO na playbacku (norm.duration_sec=None, ale
|
||||
norm.playback_sources[].duration_sec ustawione). Bez tego fallbacku Scene.duration_sec
|
||||
zostaje NULL → mobilny filtr `min_duration_sec=60` (Scene.duration_sec >= 60) wycina
|
||||
scenę mimo że jest grywalna i znamy jej długość (74% katalogu było ukryte, fix 2026-06-01).
|
||||
"""
|
||||
if norm.duration_sec:
|
||||
return norm.duration_sec
|
||||
ps_durs = [ps.duration_sec for ps in norm.playback_sources if ps.duration_sec]
|
||||
return max(ps_durs) if ps_durs else None
|
||||
|
||||
|
||||
def _create_canonical(
|
||||
session: Session, *, norm: NormalizedScene, studio_id: uuid.UUID | None
|
||||
) -> Scene:
|
||||
|
|
@ -389,7 +403,7 @@ def _create_canonical(
|
|||
slug=norm.slug or slugify(norm.title),
|
||||
release_date=norm.release_date,
|
||||
studio_id=studio_id,
|
||||
duration_sec=norm.duration_sec,
|
||||
duration_sec=_effective_duration(norm),
|
||||
description=norm.description,
|
||||
code=norm.code,
|
||||
director=norm.director,
|
||||
|
|
@ -440,9 +454,11 @@ def _update_scene_fields(
|
|||
if studio_id and not scene.studio_id:
|
||||
scene.studio_id = studio_id
|
||||
# Duration: canonical może doprecyzować (TPDB/StashDB lepiej to mierzą niż tube
|
||||
# który czasem reportuje compilation length); scraper tylko gdy null.
|
||||
if norm.duration_sec and (not scene.duration_sec or is_canonical):
|
||||
scene.duration_sec = norm.duration_sec
|
||||
# który czasem reportuje compilation length); scraper tylko gdy null. Fallback do
|
||||
# duration z playback_source gdy connector nie dał scene-level (patrz _effective_duration).
|
||||
eff_duration = _effective_duration(norm)
|
||||
if eff_duration and (not scene.duration_sec or is_canonical):
|
||||
scene.duration_sec = eff_duration
|
||||
if norm.description and not scene.description:
|
||||
scene.description = norm.description
|
||||
if norm.code and not scene.code:
|
||||
|
|
|
|||
|
|
@ -4,9 +4,12 @@ Liczniki są utrzymywane w tle (zamiast liczone per-request) bo agregacja po 6.3
|
|||
scene_tags / 3M scene_performers z EXISTS do 1.15M playback_sources zajmuje ~4.3s —
|
||||
nie do zaakceptowania w hot-path UI (/tags, /performers, /studios, /favorites).
|
||||
|
||||
Definicja (identyczna z dotychczasowym has_live_playback filtrem w taxonomies.py):
|
||||
scene_count = liczba scen z danym tagiem/performerem/studiem mających ≥1
|
||||
playback_source z dead_at IS NULL.
|
||||
Definicja: scene_count = liczba scen z danym tagiem/performerem/studiem mających ≥1
|
||||
playback_source z dead_at IS NULL **ORAZ** duration_sec >= 60. Filtr duration mirror
|
||||
mobilnego `min_duration_sec=60` (api.ts default) — listy scen w apce ZAWSZE filtrują
|
||||
≥60s, więc badge/licznik musi liczyć to samo, inaczej "119 w ulubionych, 39 po wejściu"
|
||||
(bug-report 2026-06-01 Safira Yakkuza). Wymaga propagacji duration z playback_source na
|
||||
Scene.duration_sec (backfill + _effective_duration w resolverze, 2026-06-01).
|
||||
|
||||
Każdy UPDATE robi pełny LEFT JOIN (tag/performer/studio) ⨝ agregat → ustawia 0 dla
|
||||
sierot. `IS DISTINCT FROM` pomija przepisywanie niezmienionych wierszy (mniej WAL/bloat).
|
||||
|
|
@ -23,12 +26,18 @@ from app.db import session_scope
|
|||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
# Mirror mobilnego api.ts `min_duration_sec ?? 60` — listy scen filtrują ≥60s.
|
||||
_MIN_DURATION_SEC = 60
|
||||
|
||||
# Wspólny predykat: scena ma ≥1 żywy playback_source.
|
||||
_LIVE = (
|
||||
"EXISTS (SELECT 1 FROM playback_sources ps "
|
||||
"WHERE ps.scene_id = {scene_col} AND ps.dead_at IS NULL)"
|
||||
)
|
||||
|
||||
# Duration check po scene_id (dla tags/performers gdzie nie mamy scene-row w scope).
|
||||
_DUR = "(SELECT s2.duration_sec FROM scenes s2 WHERE s2.id = {scene_col}) >= " + str(_MIN_DURATION_SEC)
|
||||
|
||||
_TAGS_SQL = text(
|
||||
f"""
|
||||
UPDATE tags t SET scene_count = COALESCE(a.c, 0)
|
||||
|
|
@ -37,6 +46,7 @@ _TAGS_SQL = text(
|
|||
SELECT st.tag_id, count(*) AS c
|
||||
FROM scene_tags st
|
||||
WHERE {_LIVE.format(scene_col="st.scene_id")}
|
||||
AND {_DUR.format(scene_col="st.scene_id")}
|
||||
GROUP BY st.tag_id
|
||||
) a ON a.tag_id = base.id
|
||||
WHERE t.id = base.id AND t.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
||||
|
|
@ -51,6 +61,7 @@ _PERFORMERS_SQL = text(
|
|||
SELECT sp.performer_id, count(*) AS c
|
||||
FROM scene_performers sp
|
||||
WHERE {_LIVE.format(scene_col="sp.scene_id")}
|
||||
AND {_DUR.format(scene_col="sp.scene_id")}
|
||||
GROUP BY sp.performer_id
|
||||
) a ON a.performer_id = base.id
|
||||
WHERE p.id = base.id AND p.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
||||
|
|
@ -65,6 +76,7 @@ _STUDIOS_SQL = text(
|
|||
SELECT sc.studio_id, count(*) AS c
|
||||
FROM scenes sc
|
||||
WHERE sc.studio_id IS NOT NULL AND {_LIVE.format(scene_col="sc.id")}
|
||||
AND sc.duration_sec >= {_MIN_DURATION_SEC}
|
||||
GROUP BY sc.studio_id
|
||||
) a ON a.studio_id = base.id
|
||||
WHERE s.id = base.id AND s.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
||||
|
|
|
|||
48
scripts/backfill_scene_duration_from_playback.py
Normal file
48
scripts/backfill_scene_duration_from_playback.py
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
"""Backfill Scene.duration_sec z live playback_source, gdzie Scene NULL.
|
||||
|
||||
Tube'y zapisują duration na playback_source, a nie na Scene → 74% grywalnego katalogu
|
||||
miało Scene.duration_sec=NULL → mobilny filtr `min_duration_sec=60` (Scene.duration_sec
|
||||
>= 60; NULL >= 60 = false) chował te sceny mimo że są grywalne i długość jest znana
|
||||
(bug-report 2026-06-01 Safira Yakkuza: 119 w ulubionych, 14 po wejściu).
|
||||
|
||||
Propagacja forward jest w resolverze (`_effective_duration`); ten skrypt nadrabia
|
||||
istniejące. Idempotentny — ustawia tylko wiersze z NULL.
|
||||
|
||||
Użycie: python scripts/backfill_scene_duration_from_playback.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
from sqlalchemy import text
|
||||
|
||||
from app.db import session_scope
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
log = logging.getLogger("backfill_duration")
|
||||
|
||||
_SQL = text(
|
||||
"""
|
||||
UPDATE scenes sc
|
||||
SET duration_sec = sub.d
|
||||
FROM (
|
||||
SELECT scene_id, max(duration_sec) AS d
|
||||
FROM playback_sources
|
||||
WHERE dead_at IS NULL AND duration_sec IS NOT NULL
|
||||
GROUP BY scene_id
|
||||
) sub
|
||||
WHERE sc.id = sub.scene_id AND sc.duration_sec IS NULL
|
||||
"""
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
with session_scope() as session:
|
||||
res = session.execute(_SQL)
|
||||
log.info("DONE: backfilled Scene.duration_sec for %d scenes", res.rowcount or 0)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Loading…
Add table
Reference in a new issue