From cbb2390a2ad5b85860d8840795f0a7383d8d5d75 Mon Sep 17 00:00:00 2001 From: jtrzupek Date: Mon, 22 Jun 2026 12:23:29 +0200 Subject: [PATCH] feat(sources): remove 0dayxx + pornditt + pornhat entirely MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three orphan-factory tubes (0–0.2% canonical match — auto-screenshot thumbs and slug titles that never match TPDB/StashDB) — to be replaced by better sources. Removed scrapers (files + imports), extractors (registry + modules), the pornhat entry from tag-enrichment priority lists and the 0dayxx display override, and purged the DB (19,003 playback_sources + 9,904 solo-orphan scenes; shared mirror scenes keep their other sources). The pornhat-based enrich_studio endpoint stays as a graceful no-op (no pornhat sources → returns no studio). Co-Authored-By: Claude Opus 4.8 (1M context) --- app/api/scenes.py | 2 +- app/api/sources.py | 1 - app/api/taxonomies.py | 2 +- app/connectors/direct_scrapers/__init__.py | 27 +---- app/connectors/direct_scrapers/pornditt.py | 26 ----- app/connectors/direct_scrapers/pornhat.py | 99 ---------------- app/connectors/direct_scrapers/zerodayxx.py | 119 -------------------- app/extractors/__init__.py | 15 +-- app/extractors/tubes/pornditt.py | 23 ---- app/extractors/tubes/pornhat.py | 86 -------------- 10 files changed, 9 insertions(+), 391 deletions(-) delete mode 100644 app/connectors/direct_scrapers/pornditt.py delete mode 100644 app/connectors/direct_scrapers/pornhat.py delete mode 100644 app/connectors/direct_scrapers/zerodayxx.py delete mode 100644 app/extractors/tubes/pornditt.py delete mode 100644 app/extractors/tubes/pornhat.py diff --git a/app/api/scenes.py b/app/api/scenes.py index 208e97e..5b00fe3 100644 --- a/app/api/scenes.py +++ b/app/api/scenes.py @@ -969,7 +969,7 @@ def enrich_tags_from_tube( # Priority: mainstream tubes (bogate metadane) > niche (mniej tagów albo garbage). PRIORITY = ["xhamstercom", "porntrexcom", "epornercom", "youporncom", - "xvideoscom", "xnxxcom", "pornhatcom"] + "xvideoscom", "xnxxcom"] sources = session.execute( select(PlaybackSource).where( PlaybackSource.scene_id == scene_id, diff --git a/app/api/sources.py b/app/api/sources.py index 4712399..478459b 100644 --- a/app/api/sources.py +++ b/app/api/sources.py @@ -91,7 +91,6 @@ _DISPLAY_OVERRIDES: dict[str, str] = { "porn00org": "porn00.org", "freshpornoorg": "freshporno.org", "pornxpph": "pornxp.ph", - "0dayxxcom": "0dayxx.com", "shyfapnet": "shyfap.net", "hdporngg": "hdporn.gg", "fullmoviesxxx": "fullmovies.xxx", diff --git a/app/api/taxonomies.py b/app/api/taxonomies.py index 6037bbd..d41a5d7 100644 --- a/app/api/taxonomies.py +++ b/app/api/taxonomies.py @@ -349,7 +349,7 @@ _TAG_RESCRAPE_THRESHOLD = 3 # Mainstream tubes priority dla tagów — bogate metadane. _TAG_PRIORITY = [ "xhamstercom", "porntrexcom", "epornercom", "youporncom", - "xvideoscom", "xnxxcom", "pornhatcom", + "xvideoscom", "xnxxcom", ] diff --git a/app/connectors/direct_scrapers/__init__.py b/app/connectors/direct_scrapers/__init__.py index 0c9d962..1a52701 100644 --- a/app/connectors/direct_scrapers/__init__.py +++ b/app/connectors/direct_scrapers/__init__.py @@ -34,9 +34,7 @@ from app.connectors.direct_scrapers.latestpornvideo import LatestPornVideoScrape from app.connectors.direct_scrapers.mypornerleak import MyPornerLeakScraper from app.connectors.direct_scrapers.perverzija import PerverzijaScraper from app.connectors.direct_scrapers.porn4days import Porn4DaysScraper -from app.connectors.direct_scrapers.pornditt import PornDittScraper from app.connectors.direct_scrapers.porndish import PornDishScraper -from app.connectors.direct_scrapers.pornhat import PornHatScraper # noqa: F401 — kept for backref; ingest disabled from app.connectors.direct_scrapers.porntrex import PornTrexScraper from app.connectors.direct_scrapers.siska import SiskaScraper from app.connectors.direct_scrapers.sxyland import SxyLandScraper @@ -48,7 +46,6 @@ from app.connectors.direct_scrapers.xnxx import XnxxScraper from app.connectors.direct_scrapers.xvideos import XVideosScraper from app.connectors.direct_scrapers.xxxfreewatch import XxxFreeWatchScraper # noqa: F401 — kept for backref; delisted from app.connectors.direct_scrapers.youporn import YouPornScraper -from app.connectors.direct_scrapers.zerodayxx import ZeroDayXXScraper ALL_DIRECT_SCRAPERS: list[type[BaseDirectTubeScraper]] = [ # Existing 4 (verified, in production) @@ -58,12 +55,8 @@ ALL_DIRECT_SCRAPERS: list[type[BaseDirectTubeScraper]] = [ # popunder redirect. Mobile WebView page-as-hoster pokazuje ad redirect zamiast video. # 33,598 playback_sources mass-marked dead, 27,374 solo-orphan scenes deleted. SxyLandScraper, - # ZeroDayXXScraper — wyłączony 2026-05-12 (source quality report): 25,596 scen, 0.1% canonical - # match. Slug-concat tytuły (`bella reese big butt ready to be filled with cum analized`) bez - # `[Studio]` lub `Studio - Perf - Title` prefixu (parse rate 3%) → resolver nie ma żadnego - # signalu do matchu. Wraps watchporn ale dziedziczy stripped metadata. Solo orphany usunięte - # (~21k scen) — plik scrapera + extractor zostają (istniejące playback_sources nadal się - # resolvują). + # ZeroDayXXScraper (0dayxx) — USUNIĘTY CAŁKOWICIE 2026-06-22 (user request). Orphan + # factory (0.1% canonical), zastępujemy lepszymi źródłami. Dane/pliki/extractor skasowane. # Mainstream (URL templates well-known) # PornHub + RedTube — USUNIĘTE CAŁKOWICIE 2026-06-22 (user request). Disabled od # 2026-05-12 (0.4% canonical match), zamrożone dane skasowane z DB, pliki scraperów @@ -105,19 +98,9 @@ ALL_DIRECT_SCRAPERS: list[type[BaseDirectTubeScraper]] = [ # zwraca consistent search results. KVS engine, slug-aware scene URLs. Mostly # orphan ingest (auto-screenshots, no canonical phash match — sprawdzone), ale # może łapać sceny popularnych performerów których jeszcze nie mamy w TPDB. - # PornHatScraper — wyłączony 2026-05-18. 9,799 scen, 0.2% canonical match, 100% solo-orphan. - # Pure orphan factory — auto-screenshot thumbs nie matchują phash do canonical, slug tytuły - # nie matchują rapidfuzz, brak duration/date signals. KEEP `pornhatcom` extractor i istniejące - # playback_sources żywe — mobile może je odtwarzać; disable tylko future ingest. - # PornDittScraper — wyłączony 2026-05-12 (bug-report 64356e9b). Każdy link - # produkował nową Scene row zamiast matchować do istniejącej kanonicznej - # (TPDB/StashDB) bo pornditt ma weak signal: title + cz. performera, brak - # fingerprintu/duration/date → composite_score zawsze poniżej auto_merge - # threshold (0.92). Plik scrapera + extractor zostają (istniejące playback_sources - # nadal się resolvują, _REGISTRY w app/extractors/__init__.py odpala - # `porndittcom` → _embed_iframe.extract). Re-enable wymaga albo - # "alternative-source mode" w resolverze (match-only, never create new), - # albo bogatszej extracji metadanych (duration + fingerprint). + # PornHat (pornhatcom) + PornDitt (porndittcom) — USUNIĘTE CAŁKOWICIE 2026-06-22 + # (user request). Orphan factories (0.2% / weak-signal canonical match), zastępujemy + # lepszymi źródłami. Dane/pliki scraperów/extractory skasowane. # Special SxyPrnScraper, PerverzijaScraper, diff --git a/app/connectors/direct_scrapers/pornditt.py b/app/connectors/direct_scrapers/pornditt.py deleted file mode 100644 index 938c115..0000000 --- a/app/connectors/direct_scrapers/pornditt.py +++ /dev/null @@ -1,26 +0,0 @@ -"""pornditt.com — direct HTML scrape. - -KVS-style site (kt_player engine). Search URL: `/search//?from=` z slug-style -zapytaniem (spacje → `-`). Sceny renderują się na subdomenie `v.pornditt.com/videos///`, -więc regex matchuje oba (z i bez `v.` prefix). - -Sitetag `porndittcom` (legacy z porn-app DEFAULT_SITETAGS — suffix-stripped name). -""" -from __future__ import annotations - -import re - -from app.connectors.direct_scrapers._search_base import BaseSearchScraper - - -class PornDittScraper(BaseSearchScraper): - sitetag = "porndittcom" - _search_url_template = "https://pornditt.com/search/{query}/?from={page}" - _scene_url_re = re.compile( - r'href="(?Phttps://(?:v\.)?pornditt\.com/videos/(?P\d+)/(?P[a-z0-9\-]+))/"', - re.IGNORECASE, - ) - - def _format_query_for_url(self, query: str) -> str: - # KVS slug: lowercase, spacja/interpunkcja → `-`. URL-encoded (`+`) tu nie zadziała. - return re.sub(r"[^a-z0-9]+", "-", query.lower()).strip("-") diff --git a/app/connectors/direct_scrapers/pornhat.py b/app/connectors/direct_scrapers/pornhat.py deleted file mode 100644 index 24e12e4..0000000 --- a/app/connectors/direct_scrapers/pornhat.py +++ /dev/null @@ -1,99 +0,0 @@ -"""pornhat.com — search-mode scraper (performer-driven backfill). - -KVS engine. Search URL: `/search//` z `+` jako space separator. Scene URLs -to `/video//` (slug bez ID prefix, w przeciwieństwie do 3Movs/OK.xxx). Slug -zawiera tokens query gdy match jest relevant, więc filtruje się automatycznie. - -Auto-screenshot thumbnaile (`static.pornhat.com/contents/videos_screenshots/.../1.jpg`) -— do canonical match przez phash NIE nadają się (sprawdzone w probe 2026-05-12, 8%). -Ale wartość scrapera: discovering nowych scen performera których inne tube'y/canonical -nie mają. Mostly orphan ingest, ale dla popular performers może łapać studio scenes -których nie mamy w TPDB jeszcze. - -Metadata enrich: scene page ma `class="info-video js-ajax-{dvd,model,tag}"` div'y -z `data-setup='{"title": ..., "url": ..., "dir": ...}'` JSON. Parsujemy w -`_fetch_scene_metadata()` żeby insertować studio (dvd), dodatkowych performerów -(models), i tagi do każdej sceny. -""" -from __future__ import annotations - -import json -import logging -import re - -from app.connectors.base import RawPerformer, RawStudio, RawTag -from app.connectors.direct_scrapers._search_base import BaseSearchScraper -from app.extractors import browser_get - -log = logging.getLogger(__name__) - - -# `class="info-video js-ajax-"` ... `data-setup=''`. JSON jest -# single-quoted (HTML attribute), z double-quotes wewnątrz dla string values. -# `\1` w replacement: backreference do `` żeby wiedzieć co matchujemy. -_AJAX_DATA_RE = re.compile( - r"class=\"info-video js-ajax-(?Pdvd|model|tag)[^\"]*\"[^>]*data-setup='(?P[^']+)'", - re.IGNORECASE, -) - - -class PornHatScraper(BaseSearchScraper): - sitetag = "pornhatcom" - # Pagination KVS-style: /search/// (page=1 ALSO works z explicit `/1/`) - _search_url_template = "https://www.pornhat.com/search/{query}/{page}/" - # PornHat search HTML używa relative hrefs `/video//`. BaseSearchScraper - # automatycznie konwertuje relative → absolute via urlparse(search_url).netloc. - _scene_url_re = re.compile( - r'href="(?P(?:https://www\.pornhat\.com)?/video/(?P[a-z0-9\-]+)/)"', - re.IGNORECASE, - ) - - def _format_query_for_url(self, query: str) -> str: - # KVS: lowercase + spaces → `-` (slug-style), działa też `+` - return query.strip().lower().replace(" ", "-") - - def _fetch_scene_metadata( - self, scene_url: str - ) -> tuple[RawStudio | None, list[RawPerformer], list[RawTag]] | None: - """Fetch scene detail + parse `js-ajax-{dvd,model,tag}` data-setup JSON.""" - try: - r = browser_get(scene_url, timeout=self._timeout) - if r.status_code != 200: - return None - except Exception as e: - log.debug("pornhat detail fetch failed %s: %s", scene_url, e) - return None - - studio: RawStudio | None = None - performers: list[RawPerformer] = [] - tags: list[RawTag] = [] - - for m in _AJAX_DATA_RE.finditer(r.text): - kind = m.group("kind").lower() - try: - data = json.loads(m.group("json")) - except json.JSONDecodeError: - continue - name = (data.get("title") or "").strip() - slug = (data.get("dir") or "").strip() or None - if not name: - continue - if kind == "dvd": - # `dvd` to studio/series wrapper (np. "Adult Time"). Pierwsze - # wystąpienie bierzemy jako studio sceny — rzadko jest ich więcej. - if studio is None: - studio = RawStudio( - external_id=f"pornhatcom:dvd:{slug or name.lower()}", - name=name, - slug=slug, - ) - elif kind == "model": - performers.append(RawPerformer(name=name)) - elif kind == "tag": - tags.append(RawTag( - external_id=f"pornhatcom:tag:{slug or name.lower()}", - name=name, - slug=slug, - )) - - return studio, performers, tags diff --git a/app/connectors/direct_scrapers/zerodayxx.py b/app/connectors/direct_scrapers/zerodayxx.py deleted file mode 100644 index 629aa1a..0000000 --- a/app/connectors/direct_scrapers/zerodayxx.py +++ /dev/null @@ -1,119 +0,0 @@ -"""ZeroDayXXScraper — direct HTML scrape 0dayxx.com search. - -Search: `https://0dayxx.com/page//?s=`. Scene URL format: -`https://0dayxx.com/0day-porn-video//` (lub czasem `///`). -""" -from __future__ import annotations - -import logging -import re -import urllib.parse -from collections.abc import Iterator - -from app.connectors.base import RawPerformer, RawPlaybackSource, RawScene -from app.connectors.direct_scrapers.base import BaseDirectTubeScraper -from app.extractors import browser_get - -log = logging.getLogger(__name__) - - -_SCENE_URL_RE = re.compile( - r'href="(https://0dayxx\.com/(?:0day-porn-video|latest-porn-videos|porn-(?:bf|videos))/([^"/]+))/?"' -) -_OG_TITLE_RE = re.compile( - r' tuple[str | None, str | None]: - """Pobiera 0dayxx detail page i wyciąga (real_title, thumbnail_url). - - 0dayxx jest wrapperem (embeduje watchporn.to/inne), więc duration/tagi tu - nie są — siedzą na watchporn.to. og:image jednak jest na 0dayxx i daje - miniaturkę z poprawnym wymiarem (200x200 — mała, ale lepsza niż żadna). - - Bez tego fetch'u sceny 0dayxx trafiały do dedupu z slug'iem jako title + - bez thumbnail_url — czyli z dwoma najsłabszymi sygnałami na raz, co - powodowało albo brak match'y albo false-positive merge'y (zgłoszone - 2026-05-09). - """ - try: - r = browser_get(scene_url, timeout=20) - except Exception as e: - log.debug("0dayxx detail fetch failed for %s: %s", scene_url, e) - return None, None - if r.status_code != 200: - return None, None - title = None - thumb = None - if (m := _OG_TITLE_RE.search(r.text)): - # Strip ` | 0dayxx.com Daily...` suffix (powtórki og:title czasem mają go). - title = m.group(1).split("|")[0].strip() - if (m := _OG_IMAGE_RE.search(r.text)): - thumb = m.group(1).strip() - return title, thumb - - -class ZeroDayXXScraper(BaseDirectTubeScraper): - sitetag = "0dayxxcom" - - def search( - self, - query: str, - *, - page: int = 1, - limit: int | None = None, - ) -> Iterator[RawScene]: - q = urllib.parse.quote_plus(query.strip()) - url = f"https://0dayxx.com/page/{page}/?s={q}" - try: - r = browser_get(url, timeout=30) - except Exception as e: - log.warning("0dayxx search fetch failed: %s", e) - return - if r.status_code != 200: - return - - query_tokens = {tok for tok in query.lower().split() if len(tok) >= 3} - - seen: set[str] = set() - yielded = 0 - for m in _SCENE_URL_RE.finditer(r.text): - scene_url = m.group(1) + "/" - slug = m.group(2) - if scene_url in seen: - continue - seen.add(scene_url) - - slug_lower = slug.lower() - if query_tokens and not any(tok in slug_lower for tok in query_tokens): - continue - - real_title, thumb = _fetch_detail(scene_url) - title = real_title or slug.replace("-", " ").strip() - - yield RawScene( - external_id=f"0dayxxcom:{scene_url}", - title=title, - url=scene_url, - playback_sources=[ - RawPlaybackSource( - origin="tube:0dayxxcom", - page_url=scene_url, - thumbnail_url=thumb, - ) - ], - performers=[RawPerformer(name=query.strip())], - raw={ - "source": "direct_scraper:0dayxx", - "query": query, - "page": page, - "url": scene_url, - }, - ) - yielded += 1 - if limit is not None and yielded >= limit: - return diff --git a/app/extractors/__init__.py b/app/extractors/__init__.py index d4173bf..f13807a 100644 --- a/app/extractors/__init__.py +++ b/app/extractors/__init__.py @@ -38,8 +38,6 @@ from app.extractors.tubes import ( latestpornvideo, paradisehill, porn00, - pornditt, - pornhat, porntrex, sxyprn, xhamster, @@ -85,10 +83,6 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = { # flashvars `video_url` → `get_file` 302 → CDN time-bound signed URL # (`expires`+`md5`, NIE IP-bound) → mobile gra direct, zero VPS bandwidth. "porntrexcom": porntrex.extract, - # pornditt — KVS jak yespornvip (function/0 + license). VPS dociera → resolve - # server-side (decode + follow 302 → portable twa.tgprn.com CDN). Wcześniej WebView - # fallback łapał VAST preroll (trafostatic) zamiast contentu. Patrz pornditt.py/_kvs.py. - "porndittcom": pornditt.extract, # fpoxxx — KVS, plain get_file + license. 2026-06-01 (task #20): get_file 302 → # `videos3.fpo.xxx/remote_control.php?acctoken=` — zdekodowany acctoken # zawiera WBITY IP serwera-resolvera → definitywnie IP-bound. WebView only. @@ -118,10 +112,6 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = { # ~155k solo-scen upgrade z WebView-z-reklamami na natywne. Wcześniej WebView fallback # ładował ad-heavy stronę z phone IP (działało, ale gorszy UX + preroll VAST). "xhamstercom": xhamster.extract, - # PornHat — dedicated extractor: tylko `` z player area (skip sidebar - # trailer URLs `_preview*.mp4`), dedupe po filename. Get_file 302 → CDN, proxy - # follow_redirects=True wymagane (fix w stream_proxy.py). - "pornhatcom": pornhat.extract, # Freshporno KVS (function/0 + license). 2026-06-04 DevTools + cross-IP re-test # NAPRAWIA błąd z #20: finalny cdn4.freshporno.org/remote_control.php jest PORTABLE # (token time-bound nie IP-bound — VPS odtworzył token z residential → 206) ale @@ -152,9 +142,8 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = { # — używają identycznego embed-iframe pattern dla streamingu. # hdporn92com — DELISTED 2026-05-18. Scene pages to SEO shell bez player iframe, # JS hijackuje kliki na popunder. Wszystkie playback_sources mass-marked dead. - # 0dayxx wraps watchporn.to embed. watchporn.to/get_file/ token IP-bound (302→410 - # cross-IP). Switch na WebView fallback. ~5k scen. - "0dayxxcom": _vps_blocked_fallback.extract, + # 0dayxx + pornditt + pornhat — USUNIĘTE CAŁKOWICIE 2026-06-22 (user request): orphan + # factories (0–0.2% canonical match), zastępujemy lepszymi źródłami. Dane skasowane. # CF-protected tube — curl_cffi w fetch_tube_html bypassa JA3, embed-iframe pattern. "perverzijacom": _embed_iframe.extract, # Special: WebView-only (Yii2 session-bound player). diff --git a/app/extractors/tubes/pornditt.py b/app/extractors/tubes/pornditt.py deleted file mode 100644 index 0029827..0000000 --- a/app/extractors/tubes/pornditt.py +++ /dev/null @@ -1,23 +0,0 @@ -"""pornditt.com — KVS (kt_player) direct stream extractor. Patrz app/extractors/tubes/_kvs.py. - -User bug 2026-05-31 (scene 40f118e1): "Pornditt łapie reklamę zamiast video". pornditt -był na _vps_blocked_fallback (WebView), gdzie scrape łapał VAST preroll (trafostatic) zamiast -contentu. Identyczny silnik jak yespornvip: flashvars `video_url`/`video_alt_url` = -`function/0/...get_file/...` + `license_code`; VPS dociera (HTTP 200). Resolve server-side: -decode + follow 302 → portable CDN (twa.tgprn.com, time-bound, NIE IP/cookie-bound — -zweryfikowane cross-IP 2026-06-01 fresh session → 206 video/mp4). Native, multi-quality, -zero WebView/reklam. - -NB: runtime `window.flashvars.video_url` pokazuje już ZDEKODOWANY plain get_file, ale raw -HTML (server-fetch) ma formę `function/0/...` + license — dekodujemy sami (_kvs.real_url). -""" -from __future__ import annotations - -from app.extractors._models import StreamSource -from app.extractors.tubes import _kvs - -_BASE = "https://v.pornditt.com" - - -def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None: - return _kvs.resolve_kvs(page_url, base_url=_BASE, timeout=timeout) diff --git a/app/extractors/tubes/pornhat.py b/app/extractors/tubes/pornhat.py deleted file mode 100644 index 202453e..0000000 --- a/app/extractors/tubes/pornhat.py +++ /dev/null @@ -1,86 +0,0 @@ -"""pornhat.com — KVS engine. get_file 302 → HLS m3u8 manifest. - -**2026-05-18 bandwidth optimization**: pornhat CDN tokens (`cdn.privatehost.com`) są -**time-bound, nie IP-bound** (`?sign=&exp_time=`). Zweryfikowane Chrome -DevTools MCP — VPS-resolved URL działa z każdego IP, bez Referer header. Zamiast -zwracać `pornhat.com/get_file/` URL (mobile dostaje go i robi 302 chain przez VPS -proxy), robimy server-side resolve i zwracamy końcowy manifest URL z signed token. - -Mobile ExoPlayer otrzymuje: - `https://nvms12.cdn.privatehost.com/hls/contents/.../?sign=...&exp_time=...` -i pobiera manifest + segments direct z CDN. **Zero VPS bandwidth** (poza ~5KB -initial resolve fetch). - -`mobile_direct_ok=True` w `raw` mówi playback.py że dla type=m3u8 ten URL jest OK -dla `direct_url=raw_url` (zazwyczaj m3u8 by szły przez proxy). - -Token wygasa za ~30-120 min od resolve (depends na lra param). User pause+resume -po >2h może dostać 403 → mobile fallback na proxified URL re-resolve'a. -""" -from __future__ import annotations - -import logging - -import httpx - -from app.extractors._models import StreamSource -from app.extractors.tubes._kvs_source import extract_kvs_sources - -log = logging.getLogger(__name__) - - -def _resolve_get_file_redirect(get_file_url: str, *, timeout: float = 15.0) -> str | None: - """Follow 302 chain pornhat.com/get_file/ → cdn.privatehost.com/hls/... - - Returns final manifest URL z signed token, lub None gdy fail. - """ - try: - with httpx.Client( - timeout=timeout, - follow_redirects=True, - headers={ - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", - "Referer": "https://www.pornhat.com/", - }, - ) as c: - r = c.head(get_file_url) - final = str(r.url) - if "cdn.privatehost.com" in final and ".m3u8" not in final: - # Generic master URL: /hls/contents/... CDN serves jako m3u8 mime - # nawet bez .m3u8 w path (sprawdzone Content-Type). - return final - if ".m3u8" in final: - return final - log.info("pornhat resolve: unexpected final URL %s", final) - return None - except Exception as e: - log.warning("pornhat resolve %s failed: %s", get_file_url, e) - return None - - -def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None: - sources = extract_kvs_sources( - page_url, stream_type="m3u8", timeout=timeout, log_tag="pornhat" - ) - if not sources: - return None - - # Resolve każdy get_file URL → CDN signed manifest URL. Mobile dostaje direct. - resolved: list[StreamSource] = [] - for s in sources: - final = _resolve_get_file_redirect(s.link) - if final: - resolved.append( - StreamSource( - link=final, - type="m3u8", - quality=s.quality, - referer=s.referer, - raw={"mobile_direct_ok": True}, - ) - ) - else: - # Fallback: keep original (proxy will re-resolve) - resolved.append(s) - - return resolved