diff --git a/scripts/merge_exact_title_duration.py b/scripts/merge_exact_title_duration.py index c995b61..df3406f 100644 --- a/scripts/merge_exact_title_duration.py +++ b/scripts/merge_exact_title_duration.py @@ -26,19 +26,26 @@ from app.db import session_scope from app.resolve.scene_merge import merge_scenes -def _args() -> tuple[str | None, bool]: +def _args() -> tuple[str | None, bool, bool]: commit = "--commit" in sys.argv + playback_only = "--playback-only" in sys.argv pid = None for a in sys.argv[1:]: - if a != "--commit" and len(a) >= 32: + if not a.startswith("--") and len(a) >= 32: pid = a - return pid, commit + return pid, commit, playback_only -def _groups(pid: str | None) -> list[list[str]]: +def _groups(pid: str | None, playback_only: bool = False) -> list[list[str]]: # Grupy scen (per performer) o identycznym lower(trim(title)) + duration_sec. # member order: refs DESC, srcs DESC, created_at ASC → pierwszy = keeper. where_perf = "AND sp.performer_id = :pid" if pid else "" + # app-visible: tylko sceny z żywym playbackiem (to co user faktycznie widzi na + # stronach) — pomija canonical stuby bez tube-linków. + where_pb = ( + "AND EXISTS (SELECT 1 FROM playback_sources p WHERE p.scene_id=s.id AND p.dead_at IS NULL)" + if playback_only else "" + ) sql = f""" WITH cand AS ( SELECT s.id, @@ -50,7 +57,7 @@ def _groups(pid: str | None) -> list[list[str]]: (SELECT count(*) FROM playback_sources p WHERE p.scene_id=s.id) srcs FROM scenes s JOIN scene_performers sp ON sp.scene_id=s.id {where_perf} - WHERE s.duration_sec IS NOT NULL AND btrim(s.title) <> '' + WHERE s.duration_sec IS NOT NULL AND btrim(s.title) <> '' {where_pb} ) SELECT array_agg(id::text ORDER BY refs DESC, srcs DESC, created_at ASC) members FROM cand @@ -73,11 +80,17 @@ def _groups(pid: str | None) -> list[list[str]]: def main() -> None: - pid, commit = _args() - groups = _groups(pid) + pid, commit, playback_only = _args() + groups = _groups(pid, playback_only) pairs = sum(len(g) - 1 for g in groups) - print(f"performer={pid or 'ALL'} groups={len(groups)} merges={pairs} commit={commit}", flush=True) + print( + f"performer={pid or 'ALL'} playback_only={playback_only} " + f"groups={len(groups)} merges={pairs} commit={commit}", + flush=True, + ) + import uuid as _u merged = 0 + errors = 0 for g in groups: keep = g[0] for drop in g[1:]: @@ -86,12 +99,15 @@ def main() -> None: continue try: with session_scope() as s: - import uuid as _u merge_scenes(s, keep_id=_u.UUID(keep), drop_id=_u.UUID(drop), resolved_by="merge_exact_title_duration") merged += 1 + if merged % 500 == 0: + print(f" progress merged={merged}/{pairs} errors={errors}", flush=True) except Exception as e: - print(f" ERR keep {keep[:8]} drop {drop[:8]}: {e}") - print(f"DONE merged={merged}/{pairs}", flush=True) + errors += 1 + if errors <= 20: + print(f" ERR keep {keep[:8]} drop {drop[:8]}: {str(e)[:120]}") + print(f"DONE merged={merged}/{pairs} errors={errors}", flush=True) if __name__ == "__main__":