diff --git a/app/scheduler/jobs.py b/app/scheduler/jobs.py index 23eb24f..f8621cb 100644 --- a/app/scheduler/jobs.py +++ b/app/scheduler/jobs.py @@ -210,8 +210,16 @@ def _job_bulk_dedup_performers() -> None: log.info("[scheduler] bulk_dedup performers starting") try: from app.scheduler.bulk_dedup import run_bulk_dedup - bc = run_bulk_dedup(strategy="performers", dry_run=False) - log.info("[scheduler] bulk_dedup performers done: %s", bc) + # cross_source_only=True: bez tego flag pairwise generuje N²/2 par na płodnego + # performera, materializowane w listę → worker OOM-killed co 12h (6GB RSS na + # 7.6GB boxie, 2026-06-06), ubijając przy okazji równoległe tpdb/stashdb/ingesty. + # Flag zawęża do cross-source kandydatów (TPDB↔StashDB) z pre-filtrem candidate. + # Timeout-wrap jak tpdb/stashdb — job nie ma własnego hard-timeoutu. + _run_with_timeout( + lambda: run_bulk_dedup(strategy="performers", dry_run=False, cross_source_only=True), + label="bulk-dedup-performers", + ) + log.info("[scheduler] bulk_dedup performers done") except Exception: log.exception("[scheduler] bulk_dedup performers failed")