"""xhamster.com — natywny server-side HLS extractor. 2026-06-08: re-test Chrome DevTools + cross-IP NAPRAWIA założenie z `_vps_blocked_fallback`. Wcześniej `xhamstercom` szedł przez WebView fallback (założenie: Cloudflare blokuje Hetzner IP). Re-test pokazał: 1. VPS pobiera scene page (HTTP 200, BEZ Cloudflare challenge — blok się zdjął). 2. Master HLS URL jest w SSR HTML plain: `video-nss.xhcdn.com/,/media=hls4/ multi=.../...m3u8`. `` to UNIX ts → token TIME-BOUND, nie IP-bound. 3. Cross-IP test (VPS Hetzner): master m3u8 → 200, wariant playlist → 200, segment .m4s → 206 video/mp4. Cały łańcuch PORTABLE — mobile gra HLS direct z residential IP, zero VPS proxy bandwidth. Dlatego resolvujemy SERVER-SIDE jak porntrex/freshporno: fetch page (curl_cffi chrome) → wyłuskaj master m3u8 → oddaj jako type='m3u8' mobile_direct. ExoPlayer robi adaptive multi-quality z jednego master URL. NB `sources.standard.av1/h264` w HTML to ZASZYFROWANE hex-bloby (player deszyfruje w JS), bezużyteczne server-side — dlatego bierzemy HLS, nie mp4. """ from __future__ import annotations import logging import re from app.extractors._fetch import _DEFAULT_IMPERSONATE, _DEFAULT_UA, _HAS_CURL_CFFI, fetch_tube_html from app.extractors._models import HosterDead, StreamSource log = logging.getLogger(__name__) _BASE = "https://xhamster.com" # Master HLS na xhcdn (video-nss.xhcdn.com / fallback inne sub-domeny). JSON w HTML # escape'uje slashe (`https:\/\/...`), więc unescape przed matchowaniem. _M3U8_RE = re.compile(r"https://[a-z0-9.\-]*xhcdn\.com/[^\"'\\ ]+?\.m3u8", re.IGNORECASE) # Markery skasowanej sceny (strona istnieje, ale bez wideo) → HosterDead. _DEAD_MARKERS = ( "this video has been deleted", "this video was deleted", "video is no longer available", "has been removed", ) def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None: html = "" if _HAS_CURL_CFFI: from curl_cffi import requests as _cf_requests session = _cf_requests.Session(impersonate=_DEFAULT_IMPERSONATE) try: resp = session.get( page_url, headers={"User-Agent": _DEFAULT_UA, "Accept": "text/html,application/xhtml+xml"}, timeout=timeout, allow_redirects=True, ) html = resp.text if resp.status_code < 400 else "" except Exception as e: log.info("xhamster: page fetch failed %s: %s", page_url, e) html = "" if not html: # fetch_tube_html podnosi TubePageError dla 404/410 (caller → dead_at). html = fetch_tube_html(page_url, timeout=timeout) # JSON-escaped slashe → plain, żeby regex złapał master URL. unescaped = html.replace("\\/", "/") m = _M3U8_RE.search(unescaped) if not m: low = unescaped.lower() if any(marker in low for marker in _DEAD_MARKERS): raise HosterDead(f"xhamster: scene deleted {page_url}") log.info("xhamster: no HLS master URL on %s", page_url) return None master = m.group(0) return [ StreamSource( link=master, type="m3u8", quality=None, # HLS master = adaptive multi-quality (ExoPlayer wybiera) referer=_BASE + "/", # Master + warianty + segmenty są time-bound (nie IP/cookie-bound), # zweryfikowane cross-IP 2026-06-08 → mobile gra direct, zero VPS proxy. raw={"mobile_direct_ok": True}, ) ]