"""Refresh denormalizowanych `scene_count` na tags / performers / studios. Liczniki są utrzymywane w tle (zamiast liczone per-request) bo agregacja po 6.3M scene_tags / 3M scene_performers z EXISTS do 1.15M playback_sources zajmuje ~4.3s — nie do zaakceptowania w hot-path UI (/tags, /performers, /studios, /favorites). Definicja: scene_count = liczba scen z danym tagiem/performerem/studiem mających ≥1 playback_source z dead_at IS NULL **ORAZ** duration_sec >= 60. Filtr duration mirror mobilnego `min_duration_sec=60` (api.ts default) — listy scen w apce ZAWSZE filtrują ≥60s, więc badge/licznik musi liczyć to samo, inaczej "119 w ulubionych, 39 po wejściu" (bug-report 2026-06-01 Safira Yakkuza). Wymaga propagacji duration z playback_source na Scene.duration_sec (backfill + _effective_duration w resolverze, 2026-06-01). Każdy UPDATE robi pełny LEFT JOIN (tag/performer/studio) ⨝ agregat → ustawia 0 dla sierot. `IS DISTINCT FROM` pomija przepisywanie niezmienionych wierszy (mniej WAL/bloat). Całość ~5-10s, leci co kilka godzin — counts do tego stale, co dla sortu "popular" i badge "(N)" jest bez znaczenia. """ from __future__ import annotations import logging from sqlalchemy import text from app.db import session_scope log = logging.getLogger(__name__) # Mirror mobilnego api.ts `min_duration_sec ?? 60` — listy scen filtrują ≥60s. _MIN_DURATION_SEC = 60 # Wspólny predykat: scena ma ≥1 żywy playback_source. _LIVE = ( "EXISTS (SELECT 1 FROM playback_sources ps " "WHERE ps.scene_id = {scene_col} AND ps.dead_at IS NULL)" ) # Duration check po scene_id (dla tags/performers gdzie nie mamy scene-row w scope). _DUR = "(SELECT s2.duration_sec FROM scenes s2 WHERE s2.id = {scene_col}) >= " + str(_MIN_DURATION_SEC) _TAGS_SQL = text( f""" UPDATE tags t SET scene_count = COALESCE(a.c, 0) FROM tags base LEFT JOIN ( SELECT st.tag_id, count(*) AS c FROM scene_tags st WHERE {_LIVE.format(scene_col="st.scene_id")} AND {_DUR.format(scene_col="st.scene_id")} GROUP BY st.tag_id ) a ON a.tag_id = base.id WHERE t.id = base.id AND t.scene_count IS DISTINCT FROM COALESCE(a.c, 0) """ ) _PERFORMERS_SQL = text( f""" UPDATE performers p SET scene_count = COALESCE(a.c, 0) FROM performers base LEFT JOIN ( SELECT sp.performer_id, count(*) AS c FROM scene_performers sp WHERE {_LIVE.format(scene_col="sp.scene_id")} AND {_DUR.format(scene_col="sp.scene_id")} GROUP BY sp.performer_id ) a ON a.performer_id = base.id WHERE p.id = base.id AND p.scene_count IS DISTINCT FROM COALESCE(a.c, 0) """ ) _STUDIOS_SQL = text( f""" UPDATE studios s SET scene_count = COALESCE(a.c, 0) FROM studios base LEFT JOIN ( SELECT sc.studio_id, count(*) AS c FROM scenes sc WHERE sc.studio_id IS NOT NULL AND {_LIVE.format(scene_col="sc.id")} AND sc.duration_sec >= {_MIN_DURATION_SEC} GROUP BY sc.studio_id ) a ON a.studio_id = base.id WHERE s.id = base.id AND s.scene_count IS DISTINCT FROM COALESCE(a.c, 0) """ ) def refresh_taxonomy_counts() -> dict[str, int]: """Przelicza scene_count dla tags/performers/studios. Zwraca rowcount per tabela (ile wierszy faktycznie się zmieniło).""" out: dict[str, int] = {} with session_scope() as session: for name, stmt in ( ("tags", _TAGS_SQL), ("performers", _PERFORMERS_SQL), ("studios", _STUDIOS_SQL), ): res = session.execute(stmt) out[name] = res.rowcount or 0 # Commit per-tabela — długie transakcje trzymałyby locki na hot tables. session.commit() return out