goon/app/extractors/__init__.py
jtrzupek 6e3ad870a7 fix(freshporno): backend KVS resolve (portable CDN) — corrects #20
Re-investigated with the proper method (Chrome DevTools network capture + cross-IP
test via Bright Data residential proxy + curl_cffi browser-TLS) instead of guessing.
freshporno's real flow is get_file -> 302 -> cdn4.freshporno.org/remote_control.php
-> 206 video/mp4. The CDN URL is PORTABLE cross-IP (a token generated from one
residential IP replays fine from the VPS and from a different Bright Data residential
IP), it only rejects non-browser TLS fingerprints (plain curl -> 000, curl_cffi
chrome / ExoPlayer -> 206).

In #20 I tested the final URL with a standalone plain curl, got 000, and wrongly
concluded "unreachable from residential" -> kept it on the WebView fallback, which
barely worked (ad-heavy page, flaky). That false negative is the regression the user
reported. freshporno is function/0 KVS, so _kvs.resolve_kvs (which uses curl_cffi
chrome) already decodes + resolves it to a portable mp4 — switch to backend resolve
like yespornvip/pornditt: native, multi-quality, no proxy, no WebView.

Verified: backend resolve returns 3x mp4 (1080/720/480, mobile_direct) + cdn 206;
user confirmed native playback on device.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-05 21:12:17 +02:00

203 lines
10 KiB
Python

"""Stream URL extractors per-tube.
Public API:
- `try_extract(sitetag, page_url) -> list[StreamSource] | None`
- `StreamSource` (dataclass)
- `HosterDead` (exception)
- `extract_stream_from_hoster(iframe_url, *, referer)` — generic packer-based hoster extract
- `fetch_tube_html(url)` — Chrome TLS fingerprint fetch (curl_cffi)
- `browser_get(url)` — low-level
Architektura: każdy tube ma osobny moduł `app.extractors.tubes.<tube>` który eksportuje
`extract(page_url) -> list[StreamSource] | None`. Registry niżej mapuje sitetag →
modułowy extractor. `try_extract()` to thin wrapper z exception handlingiem.
Po removalu porn-app dependency, ten moduł jest jedynym mechanizmem rozwiązywania
streamów — playback.py nie wpada już do porn-app /stream API.
"""
from __future__ import annotations
import logging
from collections.abc import Callable
from app.extractors._fetch import browser_get, fetch_tube_html
from app.extractors._models import HosterDead, StreamSource, TubePageError
from app.extractors.hoster import extract_stream_from_hoster, unpack_packer
from app.extractors.tubes import (
_embed_iframe,
_vps_blocked_fallback,
_ytdlp,
eporner,
freshporno,
hqporner,
latestpornvideo,
paradisehill,
pornditt,
pornhat,
porntrex,
sxyprn,
yespornvip,
)
log = logging.getLogger(__name__)
# Sitetag → extractor function. Sitetag pasuje do format'u z origin: `pornapp:<sitetag>`
# (lub po Fazie 2 migracji: `tube:<sitetag>`).
#
# Mainstream tubes (pornhub/xvideos/xnxx/xhamster/redtube/youporn/porntrex) używają
# yt-dlp jako extractor — battle-tested, aktualizowane przez upstream przy zmianach
# HTML. Aggregator tubes (xmoviesforyou/watchporn/siska/...) używają generic
# embed-iframe extractor (page → /e/<id> iframe → P.A.C.K.E.R. unpack). Custom kod
# tylko tam gdzie tube ma niestandardowy schemat (eporner XHR, sxyprn URL transform).
_REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
# hqporner — dedicated extractor zwraca multi-quality `<source>` mp4 URLs
# (bigcdn.cc / hqwo.cc / flyflv) z `force_proxy=True`. CDN URLs IP-bound do
# VPS, więc playback.py routuje przez proxy — mobile dostaje quality picker
# + natywny ExoPlayer, bez WebView.
# Bug-report e8ddd8d4: WebView fallback (`_vps_blocked_fallback`) ładował
# hqporner.com scene page w WebView, ale ta strona ma ad-iframes (adtng,
# goaserv, mavrtracktor) + pop-under-triggery → user klikał i widział
# reklamę zamiast video. INJECTED_JS w PlayerScreen.tsx nie chwytał
# popupów dośc szybko. Powrót do natywnego = `<source>` mp4 picker omija
# tę ścieżkę całkowicie.
"hqpornercom": hqporner.extract,
"epornercom": eporner.extract,
"sxyprncom": sxyprn.extract,
# Mainstream tubes — yt-dlp
# NB: 2026-05-18 cross-IP test potwierdził że xvideos/xnxx/pornhub/youporn/redtube
# CDN URLs są **time-bound** (nie IP-bound) — mobile_direct_ok auto-detect w
# playback.py daje mobile direct fetch, zero VPS bandwidth.
# pornhub — 2026-06-02: yt-dlp z VPS dostaje HTTP 403 (Pornhub blokuje Hetzner IP;
# yt-dlp aktualny, inne yt-dlp tuby działają → blok specyficzny dla PH). WebView
# fallback gra z residential IP telefonu (jak xhamster). Wcześniej `_ytdlp.extract`
# zwracał 0 źródeł → "nie działa odtwarzanie".
"pornhubcom": _vps_blocked_fallback.extract,
"redtubecom": _ytdlp.extract,
"xvideoscom": _ytdlp.extract,
"xnxxcom": _ytdlp.extract,
"youporncom": _ytdlp.extract,
# porntrex KVS — 2026-05-22 VPS znów dociera (HTTP 200). Dedykowany extractor:
# flashvars `video_url` → `get_file` 302 → CDN time-bound signed URL
# (`expires`+`md5`, NIE IP-bound) → mobile gra direct, zero VPS bandwidth.
"porntrexcom": porntrex.extract,
# VPS-blocked tubes — KVS / Cloudflare blokuje Hetzner IP, ale działają z residential
# IP (potwierdzone Chrome DevTools MCP 2026-05-15). Mobile WebView + INJECTED_JS
# (PlayerScreen.tsx:805) skanuje <video>.src + XHR — łapie URL po decode-ie player JS.
"xhamstercom": _vps_blocked_fallback.extract,
# pornditt — KVS jak yespornvip (function/0 + license). VPS dociera → resolve
# server-side (decode + follow 302 → portable twa.tgprn.com CDN). Wcześniej WebView
# fallback łapał VAST preroll (trafostatic) zamiast contentu. Patrz pornditt.py/_kvs.py.
"porndittcom": pornditt.extract,
# fpoxxx — KVS, plain get_file + license. 2026-06-01 (task #20): get_file 302 →
# `videos3.fpo.xxx/remote_control.php?acctoken=<base64>` — zdekodowany acctoken
# zawiera WBITY IP serwera-resolvera → definitywnie IP-bound. WebView only.
"fpoxxx": _vps_blocked_fallback.extract,
"sxylandcom": _vps_blocked_fallback.extract,
# Aggregator tubes — generic embed-iframe → hoster unpacker
"latestpornvideocom": latestpornvideo.extract,
"xmoviesforyoucom": _embed_iframe.extract,
"watchporn": _embed_iframe.extract,
"siskavideo": _embed_iframe.extract,
"porn4dayspw": _embed_iframe.extract,
"porndishcom": _embed_iframe.extract,
# xxxfreewatch — DELISTED 2026-05-18. 790 solo-orphan scen, 0% match, CF-walled z VPS.
"latestleaksco": _embed_iframe.extract,
"mypornerleakcom": _embed_iframe.extract,
# PornHat — dedicated extractor: tylko `<source>` z player area (skip sidebar
# trailer URLs `_preview*.mp4`), dedupe po filename. Get_file 302 → CDN, proxy
# follow_redirects=True wymagane (fix w stream_proxy.py).
"pornhatcom": pornhat.extract,
# Freshporno KVS (function/0 + license). 2026-06-04 DevTools + cross-IP re-test
# NAPRAWIA błąd z #20: finalny cdn4.freshporno.org/remote_control.php jest PORTABLE
# (token time-bound nie IP-bound — VPS odtworzył token z residential → 206) ale
# wymaga browser-TLS (curl_cffi chrome/ExoPlayer → 206; plain curl → 000). W #20
# testowałem plain-curl-em poza sesją → 000 → błędnie „nieosiągalny" → WebView.
# Teraz backend-resolve jak yespornvip/pornditt (_kvs używa curl_cffi chrome).
# Native, multi-quality, zero proxy/WebView. (zweryfikowane na emulatorze przed deploy)
"freshpornoorg": freshporno.extract,
# porn00 — KVS engine. 2026-06-01 cross-IP re-test (task #20): get_file 302 →
# `fe.porn00.org/videos/.../<id>.mp4?token=&expires=` zwraca 403 z residential
# IP → token IP-bound do resolvera (VPS), NIE portable jak yespornvip/pornditt.
# Backend-resolve nie daje mobile-playable URL bez proxy, a video-proxy odpada
# (public app, feedback Jana). Per polityka "IP-bound CDN → WebView": switch z
# porn00.extract (force_proxy=True, łamało no-proxy) na _vps_blocked_fallback.
# Ad-risk z bug-reportów 5037b3e3/e8e3198b złagodzony przez ad-filter (31d9076:
# AD_HOSTS + coverOverlay + INJECTED_JS skip ad-CDN).
"porn00org": _vps_blocked_fallback.extract,
# pornxp — `<source> //sr.porn-xp.com/<token>/.../720.mp4` (redirect → xpxp.eu).
# 2026-06-01 (task #20): 403 cross-IP → token w path IP-bound. WebView only.
"pornxpph": _vps_blocked_fallback.extract,
# yesporn.vip — KVS engine. VPS znów dociera (HTTP 200, odblokowane jak porntrex),
# więc resolvujemy SERVER-SIDE: dekoduj flashvars `video_url`/alt/alt2 (function/0/ +
# license_code, algo kt_player) → follow get_file 302 → portable cdn5 url (time-bound,
# NIE IP/cookie-bound, zweryfikowane cross-IP 2026-05-31). Mobile gra direct natywnie,
# multi-quality, ZERO WebView/reklam/preroll. Wcześniej WebView fallback pokazywał
# ad-heavy stronę a scrape łapał preroll-reklamę (bkcdn) zamiast wideo.
"yespornvip": yespornvip.extract,
# Direct-scraping tubes (mają też search scraper w connectors/direct_scrapers/)
# — używają identycznego embed-iframe pattern dla streamingu.
# hdporn92com — DELISTED 2026-05-18. Scene pages to SEO shell bez player iframe,
# JS hijackuje kliki na popunder. Wszystkie playback_sources mass-marked dead.
# 0dayxx wraps watchporn.to embed. watchporn.to/get_file/ token IP-bound (302→410
# cross-IP). Switch na WebView fallback. ~5k scen.
"0dayxxcom": _vps_blocked_fallback.extract,
# CF-protected tube — curl_cffi w fetch_tube_html bypassa JA3, embed-iframe pattern.
"perverzijacom": _embed_iframe.extract,
# Special: WebView-only (Yii2 session-bound player).
"paradisehillcc": paradisehill.extract,
# PornDoe — dołączony 2026-05-21 (theporndude audit). Stream URL nie inline w
# SSR HTML (player JS init po Play click), więc WebView fallback: mobile pobiera
# /watch/<id> z phone IP, player JS dekoduje video.src, INJECTED_JS scrape.
# 0 VPS bandwidth — zgodne z pre-public bandwidth/anonimowość priorytet.
"porndoecom": _vps_blocked_fallback.extract,
# fullmovies.xxx + hdporn.gg — BRAKOWAŁO extractora (try_extract→None→"no stream";
# bug 19866e9e "problem z oboma hosterami" — scena mająca TYLKO te dwa źródła nie
# grała w ogóle). fullmovies ma `<source src=...get_file...mp4>`, ale get_file
# time-outuje z VPS (CDN nieosiągalny, jak freshporno) → backend-resolve odpada.
# hdporn.gg sample-scena 404 (część contentu usunięta). Oba → WebView fallback:
# telefon (residential IP) ładuje stronę, player JS/`<source>` gra, INJECTED_JS
# scrape łapie URL. Lepsze niż brak ścieżki playbacku. (2026-06-03)
"fullmoviesxxx": _vps_blocked_fallback.extract,
"hdporngg": _vps_blocked_fallback.extract,
}
def try_extract(sitetag: str, page_url: str) -> list[StreamSource] | None:
"""Próbuje rozwiązać stream URL dla danego tube'a + page_url.
Zwraca listę StreamSource (różne quality/kontener) lub None gdy:
- brak extractora dla tego sitetag
- extractor zwrócił None / nie znalazł URL'a
Raises HosterDead gdy embed page wprost mówi że video deleted/not found —
caller (playback.py) łapie i oznacza playback_source.dead_at.
"""
extractor = _REGISTRY.get(sitetag)
if extractor is None:
return None
try:
return extractor(page_url)
except (HosterDead, TubePageError):
raise
except Exception as e:
log.warning("extractor for %s failed on %s: %s", sitetag, page_url, e)
return None
def supported_sitetags() -> tuple[str, ...]:
"""Zwraca listę sitetag-ów które mają zarejestrowany extractor."""
return tuple(_REGISTRY.keys())
__all__ = [
"try_extract",
"supported_sitetags",
"StreamSource",
"HosterDead",
"TubePageError",
"extract_stream_from_hoster",
"unpack_packer",
"fetch_tube_html",
"browser_get",
]