feat(scripts): merge_exact_title_duration --playback-only + progress logging
--playback-only restricts to scenes with live playback (app-visible dupes only). Progress print every 500 merges for long global runs. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
parent
e23e2d1f17
commit
a9545a7ab2
1 changed files with 27 additions and 11 deletions
|
|
@ -26,19 +26,26 @@ from app.db import session_scope
|
||||||
from app.resolve.scene_merge import merge_scenes
|
from app.resolve.scene_merge import merge_scenes
|
||||||
|
|
||||||
|
|
||||||
def _args() -> tuple[str | None, bool]:
|
def _args() -> tuple[str | None, bool, bool]:
|
||||||
commit = "--commit" in sys.argv
|
commit = "--commit" in sys.argv
|
||||||
|
playback_only = "--playback-only" in sys.argv
|
||||||
pid = None
|
pid = None
|
||||||
for a in sys.argv[1:]:
|
for a in sys.argv[1:]:
|
||||||
if a != "--commit" and len(a) >= 32:
|
if not a.startswith("--") and len(a) >= 32:
|
||||||
pid = a
|
pid = a
|
||||||
return pid, commit
|
return pid, commit, playback_only
|
||||||
|
|
||||||
|
|
||||||
def _groups(pid: str | None) -> list[list[str]]:
|
def _groups(pid: str | None, playback_only: bool = False) -> list[list[str]]:
|
||||||
# Grupy scen (per performer) o identycznym lower(trim(title)) + duration_sec.
|
# Grupy scen (per performer) o identycznym lower(trim(title)) + duration_sec.
|
||||||
# member order: refs DESC, srcs DESC, created_at ASC → pierwszy = keeper.
|
# member order: refs DESC, srcs DESC, created_at ASC → pierwszy = keeper.
|
||||||
where_perf = "AND sp.performer_id = :pid" if pid else ""
|
where_perf = "AND sp.performer_id = :pid" if pid else ""
|
||||||
|
# app-visible: tylko sceny z żywym playbackiem (to co user faktycznie widzi na
|
||||||
|
# stronach) — pomija canonical stuby bez tube-linków.
|
||||||
|
where_pb = (
|
||||||
|
"AND EXISTS (SELECT 1 FROM playback_sources p WHERE p.scene_id=s.id AND p.dead_at IS NULL)"
|
||||||
|
if playback_only else ""
|
||||||
|
)
|
||||||
sql = f"""
|
sql = f"""
|
||||||
WITH cand AS (
|
WITH cand AS (
|
||||||
SELECT s.id,
|
SELECT s.id,
|
||||||
|
|
@ -50,7 +57,7 @@ def _groups(pid: str | None) -> list[list[str]]:
|
||||||
(SELECT count(*) FROM playback_sources p WHERE p.scene_id=s.id) srcs
|
(SELECT count(*) FROM playback_sources p WHERE p.scene_id=s.id) srcs
|
||||||
FROM scenes s
|
FROM scenes s
|
||||||
JOIN scene_performers sp ON sp.scene_id=s.id {where_perf}
|
JOIN scene_performers sp ON sp.scene_id=s.id {where_perf}
|
||||||
WHERE s.duration_sec IS NOT NULL AND btrim(s.title) <> ''
|
WHERE s.duration_sec IS NOT NULL AND btrim(s.title) <> '' {where_pb}
|
||||||
)
|
)
|
||||||
SELECT array_agg(id::text ORDER BY refs DESC, srcs DESC, created_at ASC) members
|
SELECT array_agg(id::text ORDER BY refs DESC, srcs DESC, created_at ASC) members
|
||||||
FROM cand
|
FROM cand
|
||||||
|
|
@ -73,11 +80,17 @@ def _groups(pid: str | None) -> list[list[str]]:
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
pid, commit = _args()
|
pid, commit, playback_only = _args()
|
||||||
groups = _groups(pid)
|
groups = _groups(pid, playback_only)
|
||||||
pairs = sum(len(g) - 1 for g in groups)
|
pairs = sum(len(g) - 1 for g in groups)
|
||||||
print(f"performer={pid or 'ALL'} groups={len(groups)} merges={pairs} commit={commit}", flush=True)
|
print(
|
||||||
|
f"performer={pid or 'ALL'} playback_only={playback_only} "
|
||||||
|
f"groups={len(groups)} merges={pairs} commit={commit}",
|
||||||
|
flush=True,
|
||||||
|
)
|
||||||
|
import uuid as _u
|
||||||
merged = 0
|
merged = 0
|
||||||
|
errors = 0
|
||||||
for g in groups:
|
for g in groups:
|
||||||
keep = g[0]
|
keep = g[0]
|
||||||
for drop in g[1:]:
|
for drop in g[1:]:
|
||||||
|
|
@ -86,12 +99,15 @@ def main() -> None:
|
||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
with session_scope() as s:
|
with session_scope() as s:
|
||||||
import uuid as _u
|
|
||||||
merge_scenes(s, keep_id=_u.UUID(keep), drop_id=_u.UUID(drop), resolved_by="merge_exact_title_duration")
|
merge_scenes(s, keep_id=_u.UUID(keep), drop_id=_u.UUID(drop), resolved_by="merge_exact_title_duration")
|
||||||
merged += 1
|
merged += 1
|
||||||
|
if merged % 500 == 0:
|
||||||
|
print(f" progress merged={merged}/{pairs} errors={errors}", flush=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f" ERR keep {keep[:8]} drop {drop[:8]}: {e}")
|
errors += 1
|
||||||
print(f"DONE merged={merged}/{pairs}", flush=True)
|
if errors <= 20:
|
||||||
|
print(f" ERR keep {keep[:8]} drop {drop[:8]}: {str(e)[:120]}")
|
||||||
|
print(f"DONE merged={merged}/{pairs} errors={errors}", flush=True)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue