diff --git a/app/resolve/scene_resolver.py b/app/resolve/scene_resolver.py index a22d5d4..686355e 100644 --- a/app/resolve/scene_resolver.py +++ b/app/resolve/scene_resolver.py @@ -380,6 +380,20 @@ def resolve_scene( # ---- helpery -------------------------------------------------------------- +def _effective_duration(norm: NormalizedScene) -> int | None: + """Duration sceny: scene-level z connectora, a gdy brak — max z playback_sources. + + Tube'y często podają duration TYLKO na playbacku (norm.duration_sec=None, ale + norm.playback_sources[].duration_sec ustawione). Bez tego fallbacku Scene.duration_sec + zostaje NULL → mobilny filtr `min_duration_sec=60` (Scene.duration_sec >= 60) wycina + scenę mimo że jest grywalna i znamy jej długość (74% katalogu było ukryte, fix 2026-06-01). + """ + if norm.duration_sec: + return norm.duration_sec + ps_durs = [ps.duration_sec for ps in norm.playback_sources if ps.duration_sec] + return max(ps_durs) if ps_durs else None + + def _create_canonical( session: Session, *, norm: NormalizedScene, studio_id: uuid.UUID | None ) -> Scene: @@ -389,7 +403,7 @@ def _create_canonical( slug=norm.slug or slugify(norm.title), release_date=norm.release_date, studio_id=studio_id, - duration_sec=norm.duration_sec, + duration_sec=_effective_duration(norm), description=norm.description, code=norm.code, director=norm.director, @@ -440,9 +454,11 @@ def _update_scene_fields( if studio_id and not scene.studio_id: scene.studio_id = studio_id # Duration: canonical może doprecyzować (TPDB/StashDB lepiej to mierzą niż tube - # który czasem reportuje compilation length); scraper tylko gdy null. - if norm.duration_sec and (not scene.duration_sec or is_canonical): - scene.duration_sec = norm.duration_sec + # który czasem reportuje compilation length); scraper tylko gdy null. Fallback do + # duration z playback_source gdy connector nie dał scene-level (patrz _effective_duration). + eff_duration = _effective_duration(norm) + if eff_duration and (not scene.duration_sec or is_canonical): + scene.duration_sec = eff_duration if norm.description and not scene.description: scene.description = norm.description if norm.code and not scene.code: diff --git a/app/scheduler/taxonomy_counts.py b/app/scheduler/taxonomy_counts.py index 6861254..3e0ab03 100644 --- a/app/scheduler/taxonomy_counts.py +++ b/app/scheduler/taxonomy_counts.py @@ -4,9 +4,12 @@ Liczniki są utrzymywane w tle (zamiast liczone per-request) bo agregacja po 6.3 scene_tags / 3M scene_performers z EXISTS do 1.15M playback_sources zajmuje ~4.3s — nie do zaakceptowania w hot-path UI (/tags, /performers, /studios, /favorites). -Definicja (identyczna z dotychczasowym has_live_playback filtrem w taxonomies.py): - scene_count = liczba scen z danym tagiem/performerem/studiem mających ≥1 - playback_source z dead_at IS NULL. +Definicja: scene_count = liczba scen z danym tagiem/performerem/studiem mających ≥1 + playback_source z dead_at IS NULL **ORAZ** duration_sec >= 60. Filtr duration mirror + mobilnego `min_duration_sec=60` (api.ts default) — listy scen w apce ZAWSZE filtrują + ≥60s, więc badge/licznik musi liczyć to samo, inaczej "119 w ulubionych, 39 po wejściu" + (bug-report 2026-06-01 Safira Yakkuza). Wymaga propagacji duration z playback_source na + Scene.duration_sec (backfill + _effective_duration w resolverze, 2026-06-01). Każdy UPDATE robi pełny LEFT JOIN (tag/performer/studio) ⨝ agregat → ustawia 0 dla sierot. `IS DISTINCT FROM` pomija przepisywanie niezmienionych wierszy (mniej WAL/bloat). @@ -23,12 +26,18 @@ from app.db import session_scope log = logging.getLogger(__name__) +# Mirror mobilnego api.ts `min_duration_sec ?? 60` — listy scen filtrują ≥60s. +_MIN_DURATION_SEC = 60 + # Wspólny predykat: scena ma ≥1 żywy playback_source. _LIVE = ( "EXISTS (SELECT 1 FROM playback_sources ps " "WHERE ps.scene_id = {scene_col} AND ps.dead_at IS NULL)" ) +# Duration check po scene_id (dla tags/performers gdzie nie mamy scene-row w scope). +_DUR = "(SELECT s2.duration_sec FROM scenes s2 WHERE s2.id = {scene_col}) >= " + str(_MIN_DURATION_SEC) + _TAGS_SQL = text( f""" UPDATE tags t SET scene_count = COALESCE(a.c, 0) @@ -37,6 +46,7 @@ _TAGS_SQL = text( SELECT st.tag_id, count(*) AS c FROM scene_tags st WHERE {_LIVE.format(scene_col="st.scene_id")} + AND {_DUR.format(scene_col="st.scene_id")} GROUP BY st.tag_id ) a ON a.tag_id = base.id WHERE t.id = base.id AND t.scene_count IS DISTINCT FROM COALESCE(a.c, 0) @@ -51,6 +61,7 @@ _PERFORMERS_SQL = text( SELECT sp.performer_id, count(*) AS c FROM scene_performers sp WHERE {_LIVE.format(scene_col="sp.scene_id")} + AND {_DUR.format(scene_col="sp.scene_id")} GROUP BY sp.performer_id ) a ON a.performer_id = base.id WHERE p.id = base.id AND p.scene_count IS DISTINCT FROM COALESCE(a.c, 0) @@ -65,6 +76,7 @@ _STUDIOS_SQL = text( SELECT sc.studio_id, count(*) AS c FROM scenes sc WHERE sc.studio_id IS NOT NULL AND {_LIVE.format(scene_col="sc.id")} + AND sc.duration_sec >= {_MIN_DURATION_SEC} GROUP BY sc.studio_id ) a ON a.studio_id = base.id WHERE s.id = base.id AND s.scene_count IS DISTINCT FROM COALESCE(a.c, 0) diff --git a/scripts/backfill_scene_duration_from_playback.py b/scripts/backfill_scene_duration_from_playback.py new file mode 100644 index 0000000..72eca94 --- /dev/null +++ b/scripts/backfill_scene_duration_from_playback.py @@ -0,0 +1,48 @@ +"""Backfill Scene.duration_sec z live playback_source, gdzie Scene NULL. + +Tube'y zapisują duration na playback_source, a nie na Scene → 74% grywalnego katalogu +miało Scene.duration_sec=NULL → mobilny filtr `min_duration_sec=60` (Scene.duration_sec +>= 60; NULL >= 60 = false) chował te sceny mimo że są grywalne i długość jest znana +(bug-report 2026-06-01 Safira Yakkuza: 119 w ulubionych, 14 po wejściu). + +Propagacja forward jest w resolverze (`_effective_duration`); ten skrypt nadrabia +istniejące. Idempotentny — ustawia tylko wiersze z NULL. + +Użycie: python scripts/backfill_scene_duration_from_playback.py +""" +from __future__ import annotations + +import logging +import sys + +from sqlalchemy import text + +from app.db import session_scope + +logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") +log = logging.getLogger("backfill_duration") + +_SQL = text( + """ + UPDATE scenes sc + SET duration_sec = sub.d + FROM ( + SELECT scene_id, max(duration_sec) AS d + FROM playback_sources + WHERE dead_at IS NULL AND duration_sec IS NOT NULL + GROUP BY scene_id + ) sub + WHERE sc.id = sub.scene_id AND sc.duration_sec IS NULL + """ +) + + +def main() -> int: + with session_scope() as session: + res = session.execute(_SQL) + log.info("DONE: backfilled Scene.duration_sec for %d scenes", res.rowcount or 0) + return 0 + + +if __name__ == "__main__": + sys.exit(main())