fix(scenes): propagate playback duration to Scene + duration-consistent counts
Scene.duration_sec was NULL for ~74% of playable scenes (tube duration lives on playback_source, never propagated to Scene), so the mobile min_duration_sec=60 filter (Scene.duration_sec >= 60; NULL fails) silently hid them — surfaced as '119 in favorites, 14 after entering the performer' (Safira Yakkuza). - resolver: _effective_duration() falls back to max live playback_source duration when the connector provides no scene-level duration (forward fix, used in create + update). - scripts/backfill_scene_duration_from_playback.py: one-off idempotent backfill (recovered 204,014 scenes). - taxonomy_counts: scene_count now counts playable AND duration_sec >= 60, matching the always-60s-filtered scene lists, so favorites/performer/studio/tag badges agree with what the scene screen actually shows (Safira: 39 == 39). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
cd12348782
commit
817b50fbf8
3 changed files with 83 additions and 7 deletions
|
|
@ -380,6 +380,20 @@ def resolve_scene(
|
||||||
|
|
||||||
# ---- helpery --------------------------------------------------------------
|
# ---- helpery --------------------------------------------------------------
|
||||||
|
|
||||||
|
def _effective_duration(norm: NormalizedScene) -> int | None:
|
||||||
|
"""Duration sceny: scene-level z connectora, a gdy brak — max z playback_sources.
|
||||||
|
|
||||||
|
Tube'y często podają duration TYLKO na playbacku (norm.duration_sec=None, ale
|
||||||
|
norm.playback_sources[].duration_sec ustawione). Bez tego fallbacku Scene.duration_sec
|
||||||
|
zostaje NULL → mobilny filtr `min_duration_sec=60` (Scene.duration_sec >= 60) wycina
|
||||||
|
scenę mimo że jest grywalna i znamy jej długość (74% katalogu było ukryte, fix 2026-06-01).
|
||||||
|
"""
|
||||||
|
if norm.duration_sec:
|
||||||
|
return norm.duration_sec
|
||||||
|
ps_durs = [ps.duration_sec for ps in norm.playback_sources if ps.duration_sec]
|
||||||
|
return max(ps_durs) if ps_durs else None
|
||||||
|
|
||||||
|
|
||||||
def _create_canonical(
|
def _create_canonical(
|
||||||
session: Session, *, norm: NormalizedScene, studio_id: uuid.UUID | None
|
session: Session, *, norm: NormalizedScene, studio_id: uuid.UUID | None
|
||||||
) -> Scene:
|
) -> Scene:
|
||||||
|
|
@ -389,7 +403,7 @@ def _create_canonical(
|
||||||
slug=norm.slug or slugify(norm.title),
|
slug=norm.slug or slugify(norm.title),
|
||||||
release_date=norm.release_date,
|
release_date=norm.release_date,
|
||||||
studio_id=studio_id,
|
studio_id=studio_id,
|
||||||
duration_sec=norm.duration_sec,
|
duration_sec=_effective_duration(norm),
|
||||||
description=norm.description,
|
description=norm.description,
|
||||||
code=norm.code,
|
code=norm.code,
|
||||||
director=norm.director,
|
director=norm.director,
|
||||||
|
|
@ -440,9 +454,11 @@ def _update_scene_fields(
|
||||||
if studio_id and not scene.studio_id:
|
if studio_id and not scene.studio_id:
|
||||||
scene.studio_id = studio_id
|
scene.studio_id = studio_id
|
||||||
# Duration: canonical może doprecyzować (TPDB/StashDB lepiej to mierzą niż tube
|
# Duration: canonical może doprecyzować (TPDB/StashDB lepiej to mierzą niż tube
|
||||||
# który czasem reportuje compilation length); scraper tylko gdy null.
|
# który czasem reportuje compilation length); scraper tylko gdy null. Fallback do
|
||||||
if norm.duration_sec and (not scene.duration_sec or is_canonical):
|
# duration z playback_source gdy connector nie dał scene-level (patrz _effective_duration).
|
||||||
scene.duration_sec = norm.duration_sec
|
eff_duration = _effective_duration(norm)
|
||||||
|
if eff_duration and (not scene.duration_sec or is_canonical):
|
||||||
|
scene.duration_sec = eff_duration
|
||||||
if norm.description and not scene.description:
|
if norm.description and not scene.description:
|
||||||
scene.description = norm.description
|
scene.description = norm.description
|
||||||
if norm.code and not scene.code:
|
if norm.code and not scene.code:
|
||||||
|
|
|
||||||
|
|
@ -4,9 +4,12 @@ Liczniki są utrzymywane w tle (zamiast liczone per-request) bo agregacja po 6.3
|
||||||
scene_tags / 3M scene_performers z EXISTS do 1.15M playback_sources zajmuje ~4.3s —
|
scene_tags / 3M scene_performers z EXISTS do 1.15M playback_sources zajmuje ~4.3s —
|
||||||
nie do zaakceptowania w hot-path UI (/tags, /performers, /studios, /favorites).
|
nie do zaakceptowania w hot-path UI (/tags, /performers, /studios, /favorites).
|
||||||
|
|
||||||
Definicja (identyczna z dotychczasowym has_live_playback filtrem w taxonomies.py):
|
Definicja: scene_count = liczba scen z danym tagiem/performerem/studiem mających ≥1
|
||||||
scene_count = liczba scen z danym tagiem/performerem/studiem mających ≥1
|
playback_source z dead_at IS NULL **ORAZ** duration_sec >= 60. Filtr duration mirror
|
||||||
playback_source z dead_at IS NULL.
|
mobilnego `min_duration_sec=60` (api.ts default) — listy scen w apce ZAWSZE filtrują
|
||||||
|
≥60s, więc badge/licznik musi liczyć to samo, inaczej "119 w ulubionych, 39 po wejściu"
|
||||||
|
(bug-report 2026-06-01 Safira Yakkuza). Wymaga propagacji duration z playback_source na
|
||||||
|
Scene.duration_sec (backfill + _effective_duration w resolverze, 2026-06-01).
|
||||||
|
|
||||||
Każdy UPDATE robi pełny LEFT JOIN (tag/performer/studio) ⨝ agregat → ustawia 0 dla
|
Każdy UPDATE robi pełny LEFT JOIN (tag/performer/studio) ⨝ agregat → ustawia 0 dla
|
||||||
sierot. `IS DISTINCT FROM` pomija przepisywanie niezmienionych wierszy (mniej WAL/bloat).
|
sierot. `IS DISTINCT FROM` pomija przepisywanie niezmienionych wierszy (mniej WAL/bloat).
|
||||||
|
|
@ -23,12 +26,18 @@ from app.db import session_scope
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Mirror mobilnego api.ts `min_duration_sec ?? 60` — listy scen filtrują ≥60s.
|
||||||
|
_MIN_DURATION_SEC = 60
|
||||||
|
|
||||||
# Wspólny predykat: scena ma ≥1 żywy playback_source.
|
# Wspólny predykat: scena ma ≥1 żywy playback_source.
|
||||||
_LIVE = (
|
_LIVE = (
|
||||||
"EXISTS (SELECT 1 FROM playback_sources ps "
|
"EXISTS (SELECT 1 FROM playback_sources ps "
|
||||||
"WHERE ps.scene_id = {scene_col} AND ps.dead_at IS NULL)"
|
"WHERE ps.scene_id = {scene_col} AND ps.dead_at IS NULL)"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Duration check po scene_id (dla tags/performers gdzie nie mamy scene-row w scope).
|
||||||
|
_DUR = "(SELECT s2.duration_sec FROM scenes s2 WHERE s2.id = {scene_col}) >= " + str(_MIN_DURATION_SEC)
|
||||||
|
|
||||||
_TAGS_SQL = text(
|
_TAGS_SQL = text(
|
||||||
f"""
|
f"""
|
||||||
UPDATE tags t SET scene_count = COALESCE(a.c, 0)
|
UPDATE tags t SET scene_count = COALESCE(a.c, 0)
|
||||||
|
|
@ -37,6 +46,7 @@ _TAGS_SQL = text(
|
||||||
SELECT st.tag_id, count(*) AS c
|
SELECT st.tag_id, count(*) AS c
|
||||||
FROM scene_tags st
|
FROM scene_tags st
|
||||||
WHERE {_LIVE.format(scene_col="st.scene_id")}
|
WHERE {_LIVE.format(scene_col="st.scene_id")}
|
||||||
|
AND {_DUR.format(scene_col="st.scene_id")}
|
||||||
GROUP BY st.tag_id
|
GROUP BY st.tag_id
|
||||||
) a ON a.tag_id = base.id
|
) a ON a.tag_id = base.id
|
||||||
WHERE t.id = base.id AND t.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
WHERE t.id = base.id AND t.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
||||||
|
|
@ -51,6 +61,7 @@ _PERFORMERS_SQL = text(
|
||||||
SELECT sp.performer_id, count(*) AS c
|
SELECT sp.performer_id, count(*) AS c
|
||||||
FROM scene_performers sp
|
FROM scene_performers sp
|
||||||
WHERE {_LIVE.format(scene_col="sp.scene_id")}
|
WHERE {_LIVE.format(scene_col="sp.scene_id")}
|
||||||
|
AND {_DUR.format(scene_col="sp.scene_id")}
|
||||||
GROUP BY sp.performer_id
|
GROUP BY sp.performer_id
|
||||||
) a ON a.performer_id = base.id
|
) a ON a.performer_id = base.id
|
||||||
WHERE p.id = base.id AND p.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
WHERE p.id = base.id AND p.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
||||||
|
|
@ -65,6 +76,7 @@ _STUDIOS_SQL = text(
|
||||||
SELECT sc.studio_id, count(*) AS c
|
SELECT sc.studio_id, count(*) AS c
|
||||||
FROM scenes sc
|
FROM scenes sc
|
||||||
WHERE sc.studio_id IS NOT NULL AND {_LIVE.format(scene_col="sc.id")}
|
WHERE sc.studio_id IS NOT NULL AND {_LIVE.format(scene_col="sc.id")}
|
||||||
|
AND sc.duration_sec >= {_MIN_DURATION_SEC}
|
||||||
GROUP BY sc.studio_id
|
GROUP BY sc.studio_id
|
||||||
) a ON a.studio_id = base.id
|
) a ON a.studio_id = base.id
|
||||||
WHERE s.id = base.id AND s.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
WHERE s.id = base.id AND s.scene_count IS DISTINCT FROM COALESCE(a.c, 0)
|
||||||
|
|
|
||||||
48
scripts/backfill_scene_duration_from_playback.py
Normal file
48
scripts/backfill_scene_duration_from_playback.py
Normal file
|
|
@ -0,0 +1,48 @@
|
||||||
|
"""Backfill Scene.duration_sec z live playback_source, gdzie Scene NULL.
|
||||||
|
|
||||||
|
Tube'y zapisują duration na playback_source, a nie na Scene → 74% grywalnego katalogu
|
||||||
|
miało Scene.duration_sec=NULL → mobilny filtr `min_duration_sec=60` (Scene.duration_sec
|
||||||
|
>= 60; NULL >= 60 = false) chował te sceny mimo że są grywalne i długość jest znana
|
||||||
|
(bug-report 2026-06-01 Safira Yakkuza: 119 w ulubionych, 14 po wejściu).
|
||||||
|
|
||||||
|
Propagacja forward jest w resolverze (`_effective_duration`); ten skrypt nadrabia
|
||||||
|
istniejące. Idempotentny — ustawia tylko wiersze z NULL.
|
||||||
|
|
||||||
|
Użycie: python scripts/backfill_scene_duration_from_playback.py
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
from app.db import session_scope
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||||
|
log = logging.getLogger("backfill_duration")
|
||||||
|
|
||||||
|
_SQL = text(
|
||||||
|
"""
|
||||||
|
UPDATE scenes sc
|
||||||
|
SET duration_sec = sub.d
|
||||||
|
FROM (
|
||||||
|
SELECT scene_id, max(duration_sec) AS d
|
||||||
|
FROM playback_sources
|
||||||
|
WHERE dead_at IS NULL AND duration_sec IS NOT NULL
|
||||||
|
GROUP BY scene_id
|
||||||
|
) sub
|
||||||
|
WHERE sc.id = sub.scene_id AND sc.duration_sec IS NULL
|
||||||
|
"""
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
with session_scope() as session:
|
||||||
|
res = session.execute(_SQL)
|
||||||
|
log.info("DONE: backfilled Scene.duration_sec for %d scenes", res.rowcount or 0)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
Loading…
Add table
Reference in a new issue