"""freshporno.org — latest-vids browse scraper. Pilot #2 (po shyfap fail). Hipoteza: freshporno zachowuje oryginalne studio titles ("Straighten Her Out" zamiast custom rebranding jak shyfap) → title fuzzy match do canonical zadziała. Bonus: channel = studio 1:1 (Pure Taboo, Brazzers, etc.). URL patterns: - Listing: `/` (page 1), `/2/`, `/3/`, ... (last `/391/` w czasie pisania) - Scene: `/videos//` - Channels: `/channels//` (= studio) - Models: `/models//` (= performer) - Tags: `/tags//` (= category) """ from __future__ import annotations import re from datetime import date, datetime, timedelta from urllib.parse import urljoin from app.connectors.base import ( RawFingerprint, RawPerformer, RawPlaybackSource, RawScene, RawStudio, RawTag, ) from app.connectors.direct_scrapers._browse_base import ( BaseBrowseScraper, compute_thumbnail_phash, meta_content, ) _BASE = "https://freshporno.org" _SCENE_URL_RE = re.compile(r'href="(https://freshporno\.org/videos/[a-z0-9\-]+/)"', re.IGNORECASE) _CHANNEL_LINK_RE = re.compile( r'href="https://freshporno\.org/channels/([a-z0-9\-]+)/"[^>]*>([^<]+)', re.IGNORECASE ) _MODEL_LINK_RE = re.compile( r'href="https://freshporno\.org/models/([a-z0-9\-]+)/"[^>]*>([^<]+)', re.IGNORECASE ) _TAG_LINK_RE = re.compile( r'href="https://freshporno\.org/tags/([a-z0-9\-]+)/"[^>]*>([^<]+)', re.IGNORECASE ) # Duration via