fix(extractors): mixdrop hardening, yespornvip extractor, freshporno revert

Mixdrop (bug #3/#10 czarny ekran): wymagane UA+Accept headers (bez nich shell
bez P.A.C.K.E.R.). Detect dead-video page -> raise HosterDead zamiast None
(mobile dostaje skip-to-next sygnal). Dispatch regex obejmuje nowy canonical
domain `miixdrop` (double-i).

Yespornvip (bug #1): nowy KVS engine extractor. Origin `tube:yespornvip`
istnial w playback_sources ale brak handlera w _REGISTRY -> try_extract None.
Flashvars `video_url: 'function/0/<get_file_url>'`, function/0 to passthrough.
480p mp4 z mobile_direct_ok=True.

Freshporno (bug #9 revert): wrocony na _vps_blocked_fallback (WebView path).
Krotko-zywy switch na native extract z force_proxy=True cofniety bo app idzie
publicznie - VPS bandwidth/anonimowosc priorytet nad UX flicker.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
jtrzupek 2026-05-28 23:23:37 +02:00
parent 49bb65d707
commit 81090ca8d2
4 changed files with 118 additions and 7 deletions

View file

@ -37,6 +37,7 @@ from app.extractors.tubes import (
porntrex,
pornxp,
sxyprn,
yespornvip,
)
log = logging.getLogger(__name__)
@ -98,9 +99,13 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
# trailer URLs `_preview*.mp4`), dedupe po filename. Get_file 302 → CDN, proxy
# follow_redirects=True wymagane (fix w stream_proxy.py).
"pornhatcom": pornhat.extract,
# Freshporno KVS — `cv=` HMAC signed token IP-bound do VPS. 2026-05-20 pre-public:
# bandwidth + VPS anonimowość priorytet. WebView fallback → mobile pobiera embed
# z phone IP, KVS player JS decoduje video_url, ExoPlayer odtwarza direct z CDN.
# Freshporno KVS — `cv=` HMAC signed token IP-bound do VPS. WebView fallback:
# mobile fetchuje embed z phone IP, KVS player JS dekoduje video_url, ExoPlayer
# odtwarza direct z CDN. UX trade-off (page flicker przed video) vs bandwidth/
# anonimowość — public-app priorytet → WebView wygrywa.
# (2026-05-28: krótko-żywy switch na freshporno.extract z force_proxy=True
# cofnięty po feedbacku Jana "video proxy mnie nie interesuje, idziemy
# publicznie".)
"freshpornoorg": _vps_blocked_fallback.extract,
# porn00 — KVS engine z v-acctoken w URL. Backend extract działa (zweryfikowane
# 2026-05-23), zwraca świeże get_file URL-e z `force_proxy=True` flag.
@ -111,6 +116,12 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
# mobile dostaje proxy URL od razu, ExoPlayer gra bez WebView.
"porn00org": porn00.extract,
"pornxpph": _vps_blocked_fallback.extract,
# yesporn.vip — KVS engine. flashvars `video_url: 'function/0/<get_file_url>'`,
# function/0 to passthrough wrapper, URL po prefixie direct streamuje (480p).
# Wymagało odrębnego extractora bo origin `tube:yespornvip` był w
# playback_sources ale brak handlera w _REGISTRY → mobile no-source (bug-report
# 2026-05-27 "Yespornvip dalej nie działa").
"yespornvip": yespornvip.extract,
# Direct-scraping tubes (mają też search scraper w connectors/direct_scrapers/)
# — używają identycznego embed-iframe pattern dla streamingu.
# hdporn92com — DELISTED 2026-05-18. Scene pages to SEO shell bez player iframe,

View file

@ -170,7 +170,12 @@ def extract_stream_from_hoster(
# Per-hoster dedicated extractors (specific URL shapes / decode patterns).
# Mixdrop: P.A.C.K.E.R. → MDCore.wurl protocol-relative `//host/v2/<id>.mp4?s=...`
# — generic packer fallback regex `https?://...\.mp4` mija ten URL (no scheme).
if re.search(r"(?:mixdrop|m1xdrop|mxdrop)\.[a-z]+/", iframe_url, re.IGNORECASE):
# `miixdrop` (double-i) to current canonical domain — wszystkie legacy
# `mixdrop.{ag,sb,my,co,...}` + `m1xdrop.bz` 301-ują tam. Bez `miixdrop`
# w dispatch URLs already-on-new-domain (upstream tubes które zaktualizowały
# embed src) trafiałyby do generic logic, gdzie regex `https?://...\.mp4`
# mija protocol-relative `//a-delivery22.mxcontent.net/...`.
if re.search(r"(?:mixdrop|miixdrop|m1xdrop|mxdrop)\.[a-z]+/", iframe_url, re.IGNORECASE):
from app.extractors.hosters import mixdrop
sources = mixdrop.extract(iframe_url, timeout=timeout)
if sources:

View file

@ -21,8 +21,8 @@ from __future__ import annotations
import logging
import re
from app.extractors._fetch import browser_get
from app.extractors._models import StreamSource
from app.extractors._fetch import _DEFAULT_UA, browser_get
from app.extractors._models import HosterDead, StreamSource
log = logging.getLogger(__name__)
@ -32,16 +32,34 @@ _PACKER_RE = re.compile(
re.DOTALL,
)
_MP4_URL_RE = re.compile(r'MDCore\.wurl\s*=\s*"([^"]+\.mp4[^"]*)"')
# Dead-video page (200 OK but no packer, only the "sorry" shell). Wcześniej nasz
# extractor zwracał None bez sygnału "dead" → playback.py nie ustawiał dead_at,
# mobile dostawał pusty wynik → czarny ekran zamiast skip-to-next-source.
_DEAD_RE = re.compile(
r"can't find the video|WE ARE SORRY",
re.IGNORECASE,
)
def extract(page_url: str, *, timeout: float = 30.0) -> list[StreamSource] | None:
res = browser_get(page_url, timeout=timeout)
# UA + Accept są wymagane — bez nich mixdrop dla VALID video zwraca minimalny
# body bez P.A.C.K.E.R. (sam stream_proxy._refetch_mixdrop_url też tak robi).
# Brak headerów powodował że extract() na żywym mixdrop ID dostawał shell bez
# packera → no match → None → mobile dostawał czarny ekran.
headers = {
"User-Agent": _DEFAULT_UA,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
}
res = browser_get(page_url, headers=headers, timeout=timeout)
if res.status_code != 200 or not res.text:
log.info("mixdrop: fetch fail status=%s url=%s", res.status_code, page_url)
return None
m = _PACKER_RE.search(res.text)
if not m:
if _DEAD_RE.search(res.text):
raise HosterDead(f"mixdrop {page_url}: video not found")
log.info("mixdrop: no P.A.C.K.E.R. block in %s (page changed?)", page_url)
return None

View file

@ -0,0 +1,77 @@
"""yesporn.vip — KVS engine direct stream extractor.
User bug-report 2026-05-27: "Yespornvip dalej nie działa". Origin `tube:yespornvip`
istniał w playback_sources ale brak wpisu w `_REGISTRY` `try_extract()` zwracał
None mobile player no-source.
Detail page sceny linkuje do `/embed/<id>` w iframe. Embed page renderuje KVS
player z `flashvars`:
- `video_url: 'function/0/https://yesporn.vip/get_file/<srv>/<token>/<bucket>/<id>/<id>.mp4/?embed=true'`
- `event_reporting2: 'https://yesporn.vip/get_file/.../<id>.mp4/'` (analytics ping
URL, ale jest valid get_file)
- `video_url_text: '480p'` quality label dla video_url
`function/0/` to KVS player JS dekoder prefix dla type 0 to passthrough,
URL po prefixie jest bezpośrednio użyteczny.
Single-quality (480p) z embed bo wyższe (`video_alt_url`) to redirect URLs
(`video_alt_url_redirect: '1'`), nie direct streamy. CDN time-bound signed,
mobile gra direct.
"""
from __future__ import annotations
import logging
import re
from app.extractors._fetch import fetch_tube_html
from app.extractors._models import StreamSource
log = logging.getLogger(__name__)
_BASE = "https://yesporn.vip"
# `video_url: 'function/0/https://.../get_file/...mp4/?embed=true'` lub bez prefixu
_VIDEO_URL_RE = re.compile(
r"video_url\s*:\s*'(?:function/0/)?(https?://[^']+/get_file/[^']+\.mp4/?[^']*)'",
re.IGNORECASE,
)
_QUALITY_RE = re.compile(r"video_url_text\s*:\s*'([^']*)'", re.IGNORECASE)
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
if "/embed/" not in page_url:
# Detail page → derive embed URL via /video/<id>/<slug>/ → /embed/<id>.
m = re.search(r"/video/(\d+)/", page_url)
if m:
embed_url = f"{_BASE}/embed/{m.group(1)}"
else:
log.info("yespornvip: cannot derive embed from %s", page_url)
return None
else:
embed_url = page_url
html = fetch_tube_html(embed_url, timeout=timeout)
m = _VIDEO_URL_RE.search(html)
if not m:
log.info("yespornvip: no video_url in flashvars on %s", embed_url)
return None
url = m.group(1)
# `?embed=true` parametr — get_file z embed=true może zwracać HTML wrapper
# zamiast 302 do CDN. Zostawiamy bo player tak go używa, ale jeśli 302 nie
# wskoczy poprawnie to fallback usunie param.
quality = None
q_match = _QUALITY_RE.search(html)
if q_match:
quality = q_match.group(1).strip() or None
return [
StreamSource(
link=url,
type="mp4",
quality=quality,
referer=_BASE + "/",
raw={"mobile_direct_ok": True},
)
]