From 4b71689a95c3b5c700b3fddca1901f7d8a953633 Mon Sep 17 00:00:00 2001 From: jtrzupek Date: Mon, 15 Jun 2026 09:59:40 +0200 Subject: [PATCH] fix(scrapers): freshporno browse from /latest-updates/ not homepage root The homepage root / is a KVS page with cache-control: no-store and a fresh PHPSESSID per request; the server rotates its featured block and on a cold session can serve an old set instead of the newest scenes. Result: browse-latest skipped everything for 3 days (root served 20 May content), no new freshporno scenes since 12 Jun (user report). Switch _listing_url to the explicit date-sorted /latest-updates/ feed (pagination /latest-updates/N/), which is not subject to that rotation. Co-Authored-By: Claude Fable 5 --- app/connectors/direct_scrapers/freshporno.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/app/connectors/direct_scrapers/freshporno.py b/app/connectors/direct_scrapers/freshporno.py index 0390985..462bf62 100644 --- a/app/connectors/direct_scrapers/freshporno.py +++ b/app/connectors/direct_scrapers/freshporno.py @@ -5,8 +5,15 @@ Pilot #2 (po shyfap fail). Hipoteza: freshporno zachowuje oryginalne studio titl do canonical zadziała. Bonus: channel = studio 1:1 (Pure Taboo, Brazzers, etc.). URL patterns: - - Listing: `/` (page 1), `/2/`, `/3/`, ... (last `/391/` w czasie pisania) + - Listing: `/latest-updates/` (page 1), `/latest-updates/2/`, ... (chronologiczny feed) - Scene: `/videos//` + +Listing: świadomie `/latest-updates/` zamiast roota `/`. Root jest KVS-owym +homepage z `cache-control: no-store` i świeżym PHPSESSID per-request — serwer +rotuje tam blok "featured" i na zimnej sesji potrafi podać stary zestaw zamiast +najnowszych (zaobserwowane 2026-06-15: 3 dni browse-latest skipowało wszystko bo +root podawał sceny z 20 maja; freshporno.org report). `/latest-updates/` to jawny +feed sortowany po dacie, odporny na tę rotację. Paginacja: `/latest-updates/N/`. - Channels: `/channels//` (= studio) - Models: `/models//` (= performer) - Tags: `/tags//` (= category) @@ -61,8 +68,8 @@ class FreshpornoScraper(BaseBrowseScraper): def _listing_url(self, page: int) -> str: if page <= 1: - return f"{_BASE}/" - return f"{_BASE}/{page}/" + return f"{_BASE}/latest-updates/" + return f"{_BASE}/latest-updates/{page}/" def _extract_scene_urls(self, listing_html: str) -> list[str]: seen: set[str] = set()