fix(sxyprn): disable thumbnail refresh job — trafficdeposit token has ~1h TTL

CORRECTION: trafficdeposit thumbnail tokens are hour-bucketed and valid only ~1h
(verified 2026-06-10: stored ts=11:00 dead at 12:27, current ts=13:00 loads). Earlier
"~weekly rot" read was wrong. Storing/periodically-refreshing sxyprn thumbnail URLs
is futile — they expire within the hour. Default the refresh job OFF (kept in code).
The dead-marking sweep (Post Not Found → dead_at) it performed was still valid. Live
sxyprn thumbnails need on-demand resolution at serve time (future work).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jtrzupek 2026-06-10 14:29:24 +02:00
parent fef28ae56b
commit f7670963df

View file

@ -208,7 +208,7 @@ def _job_refresh_sxyprn_thumbs(batch: int = 1200) -> None:
from sqlalchemy import text from sqlalchemy import text
from app.db import session_scope from app.db import session_scope
from scripts.refresh_sxyprn_thumbs import _fresh_thumb from scripts.refresh_sxyprn_thumbs import refresh_batch
with session_scope() as session: with session_scope() as session:
rows = session.execute( rows = session.execute(
@ -218,19 +218,11 @@ def _job_refresh_sxyprn_thumbs(batch: int = 1200) -> None:
"ORDER BY updated_at ASC LIMIT :n" "ORDER BY updated_at ASC LIMIT :n"
).bindparams(n=batch) ).bindparams(n=batch)
).all() ).all()
updated = 0 refreshed, dead, untouched = refresh_batch(list(rows))
for pbid, page_url in rows: log.info(
thumb = _fresh_thumb(page_url) "[scheduler] sxyprn thumb refresh done: refreshed=%d dead=%d untouched=%d (of %d)",
if thumb: refreshed, dead, untouched, len(rows),
with session_scope() as session: )
session.execute(
text("UPDATE playback_sources SET thumbnail_url=:t WHERE id=:i").bindparams(
t=thumb, i=pbid
)
)
session.commit()
updated += 1
log.info("[scheduler] sxyprn thumb refresh done: %d/%d", updated, len(rows))
_run_with_timeout(_run, label="sxyprn-thumb-refresh") _run_with_timeout(_run, label="sxyprn-thumb-refresh")
@ -423,10 +415,13 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
) )
log.info("scheduler: reap-stuck every %dh", reap_hours) log.info("scheduler: reap-stuck every %dh", reap_hours)
# sxyprn thumbnail refresh — sxyprn miniaturki rotują (signed CDN, 404 po ~tygodniach). # sxyprn thumbnail refresh — WYŁĄCZONE (default 0). Token trafficdeposit jest
# Domyślnie ZAWSZE on co 12h, batch najdawniej-aktualizowanych → cykl po katalogu w # bucketowany godzinowo i ważny ~1h (weryfikacja 2026-06-10: stored ts=11:00 martwy
# ~tydzień (mieści się w oknie wygaśnięcia). Bug 2026-06-10. # o 12:27, aktualny ts=13:00 żyje). Przechowywanie URL-i jest bezcelowe — wygasają
sxyprn_hours = cfg.get("sxyprn_thumb_refresh_hours", 12) # w godzinę, więc periodyczny refresh tylko wali w sxyprn na darmo. Działające
# thumbnaile sxyprn wymagają ON-DEMAND resolve przy serwowaniu (proxy fetch post
# page → bieżący og:image, cache ~45min). Job zostaje w kodzie ale domyślnie off.
sxyprn_hours = cfg.get("sxyprn_thumb_refresh_hours", 0)
if sxyprn_hours: if sxyprn_hours:
batch = cfg.get("sxyprn_thumb_refresh_batch", 1200) batch = cfg.get("sxyprn_thumb_refresh_batch", 1200)
sched.add_job( sched.add_job(
@ -445,7 +440,7 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
DEFAULT_CONFIG: dict[str, Any] = { DEFAULT_CONFIG: dict[str, Any] = {
"tpdb_hours": 6, "tpdb_hours": 6,
"stashdb_hours": 6, "stashdb_hours": 6,
"sxyprn_thumb_refresh_hours": 12, "sxyprn_thumb_refresh_hours": 0, # off — token ~1h TTL, refresh bezcelowy (patrz register_jobs)
"sxyprn_thumb_refresh_batch": 1200, "sxyprn_thumb_refresh_batch": 1200,
"performer_driven_hours": 12, "performer_driven_hours": 12,
"performer_driven_top_n": 20, "performer_driven_top_n": 20,