From bb9e1afc319dc83bab1a99c58359289ab5fc5c56 Mon Sep 17 00:00:00 2001 From: jtrzupek Date: Wed, 10 Jun 2026 10:28:18 +0200 Subject: [PATCH] fix(resolver): refresh thumbnails on re-scrape instead of fill-only-if-null MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _upsert_playback_sources only set thumbnail_url when the existing value was NULL, so signed CDN thumbnails that ROT (sxyprn/trafficdeposit tokens expire ~weekly → 404) were never replaced even when a fresh re-scrape captured a valid URL — making the rot permanent (bug 2026-06-10). Always overwrite thumbnail_url/animated_thumbnail_url with the freshly-scraped value when present; other fields keep fill-if-null. Lets the regular performer-driven ingest self-heal thumbnails for re-crawled scenes. (Note: old sxyprn backlog can't be bulk-refreshed — search/listings don't re-surface those posts, verified 0 overlap — so it's forward-looking; old sxyprn-only scenes fall back to the clean placeholder.) Co-Authored-By: Claude Opus 4.8 --- app/resolve/scene_resolver.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/app/resolve/scene_resolver.py b/app/resolve/scene_resolver.py index 686355e..7b2bb73 100644 --- a/app/resolve/scene_resolver.py +++ b/app/resolve/scene_resolver.py @@ -644,15 +644,23 @@ def _sync_playback_sources( ) ) else: - # Refresh + uzupełnij braki (nigdy nie nadpisujemy istniejących wartości). + # Refresh + uzupełnij braki (nie nadpisujemy istniejących wartości). existing.last_seen_at = datetime.now(UTC) if existing.scene_id != scene_id: # Ten sam (origin, page_url) trafił do innej canonical sceny — to znaczy # że dedup zmergował. Re-link do bieżącej. existing.scene_id = scene_id - for attr in ("embed_url", "stream_url", "quality", "duration_sec", "thumbnail_url", "animated_thumbnail_url"): + for attr in ("embed_url", "stream_url", "quality", "duration_sec"): if getattr(existing, attr) is None and getattr(ps, attr) is not None: setattr(existing, attr, getattr(ps, attr)) + # Thumbnaile ZAWSZE odśwież do świeżej wartości ze scrape (gdy podana). + # Signed CDN thumbnaile (sxyprn/trafficdeposit) rotują — token wygasa po + # ~tygodniach i stary URL daje 404. Trzymanie "tylko gdy NULL" robiło rot + # permanentnym (bug 2026-06-10). Re-scrape teraz odświeża martwe miniaturki. + for attr in ("thumbnail_url", "animated_thumbnail_url"): + new_val = getattr(ps, attr) + if new_val is not None: + setattr(existing, attr, new_val) def _log_auto_merge(