diff --git a/app/scheduler/jobs.py b/app/scheduler/jobs.py index f8621cb..eb419d7 100644 --- a/app/scheduler/jobs.py +++ b/app/scheduler/jobs.py @@ -193,6 +193,48 @@ def _job_refresh_taxonomy_counts() -> None: log.exception("[scheduler] taxonomy counts refresh failed") +def _job_refresh_sxyprn_thumbs(batch: int = 1200) -> None: + """Odświeża wygasłe sxyprn miniaturki z żywych stron /post/ (bug 2026-06-10). + + sxyprn/trafficdeposit thumbnaile są podpisane czasowo i rotują (token wygasa po + ~tygodniach → 404), ale strona post żyje i ma świeży poster (`og:image`). Search/ + listingi NIE re-surfaceują starych postów, więc jedyna droga to per-post page fetch. + Bierzemy `batch` najdawniej-aktualizowanych źródeł — cykl po całym katalogu co kilka + dni (mieści się w oknie wygaśnięcia). Patrz scripts/refresh_sxyprn_thumbs.py. + """ + log.info("[scheduler] sxyprn thumb refresh starting (batch=%d)", batch) + + def _run() -> None: + from sqlalchemy import text + + from app.db import session_scope + from scripts.refresh_sxyprn_thumbs import _fresh_thumb + + with session_scope() as session: + rows = session.execute( + text( + "SELECT id, page_url FROM playback_sources " + "WHERE origin='tube:sxyprncom' AND dead_at IS NULL " + "ORDER BY updated_at ASC LIMIT :n" + ).bindparams(n=batch) + ).all() + updated = 0 + for pbid, page_url in rows: + thumb = _fresh_thumb(page_url) + if thumb: + with session_scope() as session: + session.execute( + text("UPDATE playback_sources SET thumbnail_url=:t WHERE id=:i").bindparams( + t=thumb, i=pbid + ) + ) + session.commit() + updated += 1 + log.info("[scheduler] sxyprn thumb refresh done: %d/%d", updated, len(rows)) + + _run_with_timeout(_run, label="sxyprn-thumb-refresh") + + def _job_bulk_dedup_performers() -> None: """Pair-wise dedup po performer overlap — safety net dla duplikatów które resolver-time scoring nie złapał. @@ -381,12 +423,30 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler: ) log.info("scheduler: reap-stuck every %dh", reap_hours) + # sxyprn thumbnail refresh — sxyprn miniaturki rotują (signed CDN, 404 po ~tygodniach). + # Domyślnie ZAWSZE on co 12h, batch najdawniej-aktualizowanych → cykl po katalogu w + # ~tydzień (mieści się w oknie wygaśnięcia). Bug 2026-06-10. + sxyprn_hours = cfg.get("sxyprn_thumb_refresh_hours", 12) + if sxyprn_hours: + batch = cfg.get("sxyprn_thumb_refresh_batch", 1200) + sched.add_job( + lambda: _job_refresh_sxyprn_thumbs(batch), + IntervalTrigger(hours=sxyprn_hours, start_date=INTERVAL_ANCHOR), + id="sxyprn_thumb_refresh", + replace_existing=True, + max_instances=1, + coalesce=True, + ) + log.info("scheduler: sxyprn-thumb-refresh every %dh (batch=%d)", sxyprn_hours, batch) + return sched DEFAULT_CONFIG: dict[str, Any] = { "tpdb_hours": 6, "stashdb_hours": 6, + "sxyprn_thumb_refresh_hours": 12, + "sxyprn_thumb_refresh_batch": 1200, "performer_driven_hours": 12, "performer_driven_top_n": 20, # Browse-latest — newest scenes z rich-metadata tubes. Co 6h (4×/dobę) × ~100 diff --git a/scripts/refresh_sxyprn_thumbs.py b/scripts/refresh_sxyprn_thumbs.py new file mode 100644 index 0000000..e810775 --- /dev/null +++ b/scripts/refresh_sxyprn_thumbs.py @@ -0,0 +1,94 @@ +"""Odśwież wygasłe miniaturki sxyprn z ich (żywych) stron post (bug 2026-06-10). + +sxyprn/trafficdeposit thumbnaile są podpisane czasowo i ROTUJĄ (token wygasa po +~tygodniach → 404), ALE sama strona /post/.html ŻYJE (200) i zawiera świeży +poster sceny w `og:image` / `