fix(extractors): mixdrop hardening, yespornvip extractor, freshporno revert
Mixdrop (bug #3/#10 czarny ekran): wymagane UA+Accept headers (bez nich shell bez P.A.C.K.E.R.). Detect dead-video page -> raise HosterDead zamiast None (mobile dostaje skip-to-next sygnal). Dispatch regex obejmuje nowy canonical domain `miixdrop` (double-i). Yespornvip (bug #1): nowy KVS engine extractor. Origin `tube:yespornvip` istnial w playback_sources ale brak handlera w _REGISTRY -> try_extract None. Flashvars `video_url: 'function/0/<get_file_url>'`, function/0 to passthrough. 480p mp4 z mobile_direct_ok=True. Freshporno (bug #9 revert): wrocony na _vps_blocked_fallback (WebView path). Krotko-zywy switch na native extract z force_proxy=True cofniety bo app idzie publicznie - VPS bandwidth/anonimowosc priorytet nad UX flicker. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
49bb65d707
commit
81090ca8d2
4 changed files with 118 additions and 7 deletions
|
|
@ -37,6 +37,7 @@ from app.extractors.tubes import (
|
||||||
porntrex,
|
porntrex,
|
||||||
pornxp,
|
pornxp,
|
||||||
sxyprn,
|
sxyprn,
|
||||||
|
yespornvip,
|
||||||
)
|
)
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
@ -98,9 +99,13 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
|
||||||
# trailer URLs `_preview*.mp4`), dedupe po filename. Get_file 302 → CDN, proxy
|
# trailer URLs `_preview*.mp4`), dedupe po filename. Get_file 302 → CDN, proxy
|
||||||
# follow_redirects=True wymagane (fix w stream_proxy.py).
|
# follow_redirects=True wymagane (fix w stream_proxy.py).
|
||||||
"pornhatcom": pornhat.extract,
|
"pornhatcom": pornhat.extract,
|
||||||
# Freshporno KVS — `cv=` HMAC signed token IP-bound do VPS. 2026-05-20 pre-public:
|
# Freshporno KVS — `cv=` HMAC signed token IP-bound do VPS. WebView fallback:
|
||||||
# bandwidth + VPS anonimowość priorytet. WebView fallback → mobile pobiera embed
|
# mobile fetchuje embed z phone IP, KVS player JS dekoduje video_url, ExoPlayer
|
||||||
# z phone IP, KVS player JS decoduje video_url, ExoPlayer odtwarza direct z CDN.
|
# odtwarza direct z CDN. UX trade-off (page flicker przed video) vs bandwidth/
|
||||||
|
# anonimowość — public-app priorytet → WebView wygrywa.
|
||||||
|
# (2026-05-28: krótko-żywy switch na freshporno.extract z force_proxy=True
|
||||||
|
# cofnięty po feedbacku Jana "video proxy mnie nie interesuje, idziemy
|
||||||
|
# publicznie".)
|
||||||
"freshpornoorg": _vps_blocked_fallback.extract,
|
"freshpornoorg": _vps_blocked_fallback.extract,
|
||||||
# porn00 — KVS engine z v-acctoken w URL. Backend extract działa (zweryfikowane
|
# porn00 — KVS engine z v-acctoken w URL. Backend extract działa (zweryfikowane
|
||||||
# 2026-05-23), zwraca świeże get_file URL-e z `force_proxy=True` flag.
|
# 2026-05-23), zwraca świeże get_file URL-e z `force_proxy=True` flag.
|
||||||
|
|
@ -111,6 +116,12 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
|
||||||
# mobile dostaje proxy URL od razu, ExoPlayer gra bez WebView.
|
# mobile dostaje proxy URL od razu, ExoPlayer gra bez WebView.
|
||||||
"porn00org": porn00.extract,
|
"porn00org": porn00.extract,
|
||||||
"pornxpph": _vps_blocked_fallback.extract,
|
"pornxpph": _vps_blocked_fallback.extract,
|
||||||
|
# yesporn.vip — KVS engine. flashvars `video_url: 'function/0/<get_file_url>'`,
|
||||||
|
# function/0 to passthrough wrapper, URL po prefixie direct streamuje (480p).
|
||||||
|
# Wymagało odrębnego extractora bo origin `tube:yespornvip` był w
|
||||||
|
# playback_sources ale brak handlera w _REGISTRY → mobile no-source (bug-report
|
||||||
|
# 2026-05-27 "Yespornvip dalej nie działa").
|
||||||
|
"yespornvip": yespornvip.extract,
|
||||||
# Direct-scraping tubes (mają też search scraper w connectors/direct_scrapers/)
|
# Direct-scraping tubes (mają też search scraper w connectors/direct_scrapers/)
|
||||||
# — używają identycznego embed-iframe pattern dla streamingu.
|
# — używają identycznego embed-iframe pattern dla streamingu.
|
||||||
# hdporn92com — DELISTED 2026-05-18. Scene pages to SEO shell bez player iframe,
|
# hdporn92com — DELISTED 2026-05-18. Scene pages to SEO shell bez player iframe,
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,12 @@ def extract_stream_from_hoster(
|
||||||
# Per-hoster dedicated extractors (specific URL shapes / decode patterns).
|
# Per-hoster dedicated extractors (specific URL shapes / decode patterns).
|
||||||
# Mixdrop: P.A.C.K.E.R. → MDCore.wurl protocol-relative `//host/v2/<id>.mp4?s=...`
|
# Mixdrop: P.A.C.K.E.R. → MDCore.wurl protocol-relative `//host/v2/<id>.mp4?s=...`
|
||||||
# — generic packer fallback regex `https?://...\.mp4` mija ten URL (no scheme).
|
# — generic packer fallback regex `https?://...\.mp4` mija ten URL (no scheme).
|
||||||
if re.search(r"(?:mixdrop|m1xdrop|mxdrop)\.[a-z]+/", iframe_url, re.IGNORECASE):
|
# `miixdrop` (double-i) to current canonical domain — wszystkie legacy
|
||||||
|
# `mixdrop.{ag,sb,my,co,...}` + `m1xdrop.bz` 301-ują tam. Bez `miixdrop`
|
||||||
|
# w dispatch URLs already-on-new-domain (upstream tubes które zaktualizowały
|
||||||
|
# embed src) trafiałyby do generic logic, gdzie regex `https?://...\.mp4`
|
||||||
|
# mija protocol-relative `//a-delivery22.mxcontent.net/...`.
|
||||||
|
if re.search(r"(?:mixdrop|miixdrop|m1xdrop|mxdrop)\.[a-z]+/", iframe_url, re.IGNORECASE):
|
||||||
from app.extractors.hosters import mixdrop
|
from app.extractors.hosters import mixdrop
|
||||||
sources = mixdrop.extract(iframe_url, timeout=timeout)
|
sources = mixdrop.extract(iframe_url, timeout=timeout)
|
||||||
if sources:
|
if sources:
|
||||||
|
|
|
||||||
|
|
@ -21,8 +21,8 @@ from __future__ import annotations
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from app.extractors._fetch import browser_get
|
from app.extractors._fetch import _DEFAULT_UA, browser_get
|
||||||
from app.extractors._models import StreamSource
|
from app.extractors._models import HosterDead, StreamSource
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -32,16 +32,34 @@ _PACKER_RE = re.compile(
|
||||||
re.DOTALL,
|
re.DOTALL,
|
||||||
)
|
)
|
||||||
_MP4_URL_RE = re.compile(r'MDCore\.wurl\s*=\s*"([^"]+\.mp4[^"]*)"')
|
_MP4_URL_RE = re.compile(r'MDCore\.wurl\s*=\s*"([^"]+\.mp4[^"]*)"')
|
||||||
|
# Dead-video page (200 OK but no packer, only the "sorry" shell). Wcześniej nasz
|
||||||
|
# extractor zwracał None bez sygnału "dead" → playback.py nie ustawiał dead_at,
|
||||||
|
# mobile dostawał pusty wynik → czarny ekran zamiast skip-to-next-source.
|
||||||
|
_DEAD_RE = re.compile(
|
||||||
|
r"can't find the video|WE ARE SORRY",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def extract(page_url: str, *, timeout: float = 30.0) -> list[StreamSource] | None:
|
def extract(page_url: str, *, timeout: float = 30.0) -> list[StreamSource] | None:
|
||||||
res = browser_get(page_url, timeout=timeout)
|
# UA + Accept są wymagane — bez nich mixdrop dla VALID video zwraca minimalny
|
||||||
|
# body bez P.A.C.K.E.R. (sam stream_proxy._refetch_mixdrop_url też tak robi).
|
||||||
|
# Brak headerów powodował że extract() na żywym mixdrop ID dostawał shell bez
|
||||||
|
# packera → no match → None → mobile dostawał czarny ekran.
|
||||||
|
headers = {
|
||||||
|
"User-Agent": _DEFAULT_UA,
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
}
|
||||||
|
res = browser_get(page_url, headers=headers, timeout=timeout)
|
||||||
if res.status_code != 200 or not res.text:
|
if res.status_code != 200 or not res.text:
|
||||||
log.info("mixdrop: fetch fail status=%s url=%s", res.status_code, page_url)
|
log.info("mixdrop: fetch fail status=%s url=%s", res.status_code, page_url)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
m = _PACKER_RE.search(res.text)
|
m = _PACKER_RE.search(res.text)
|
||||||
if not m:
|
if not m:
|
||||||
|
if _DEAD_RE.search(res.text):
|
||||||
|
raise HosterDead(f"mixdrop {page_url}: video not found")
|
||||||
log.info("mixdrop: no P.A.C.K.E.R. block in %s (page changed?)", page_url)
|
log.info("mixdrop: no P.A.C.K.E.R. block in %s (page changed?)", page_url)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
|
||||||
77
app/extractors/tubes/yespornvip.py
Normal file
77
app/extractors/tubes/yespornvip.py
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
"""yesporn.vip — KVS engine direct stream extractor.
|
||||||
|
|
||||||
|
User bug-report 2026-05-27: "Yespornvip dalej nie działa". Origin `tube:yespornvip`
|
||||||
|
istniał w playback_sources ale brak wpisu w `_REGISTRY` → `try_extract()` zwracał
|
||||||
|
None → mobile player no-source.
|
||||||
|
|
||||||
|
Detail page sceny linkuje do `/embed/<id>` w iframe. Embed page renderuje KVS
|
||||||
|
player z `flashvars`:
|
||||||
|
- `video_url: 'function/0/https://yesporn.vip/get_file/<srv>/<token>/<bucket>/<id>/<id>.mp4/?embed=true'`
|
||||||
|
- `event_reporting2: 'https://yesporn.vip/get_file/.../<id>.mp4/'` (analytics ping
|
||||||
|
URL, ale jest valid get_file)
|
||||||
|
- `video_url_text: '480p'` — quality label dla video_url
|
||||||
|
|
||||||
|
`function/0/` to KVS player JS dekoder prefix — dla type 0 to passthrough,
|
||||||
|
URL po prefixie jest bezpośrednio użyteczny.
|
||||||
|
|
||||||
|
Single-quality (480p) z embed bo wyższe (`video_alt_url`) to redirect URLs
|
||||||
|
(`video_alt_url_redirect: '1'`), nie direct streamy. CDN time-bound signed,
|
||||||
|
mobile gra direct.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
|
||||||
|
from app.extractors._fetch import fetch_tube_html
|
||||||
|
from app.extractors._models import StreamSource
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_BASE = "https://yesporn.vip"
|
||||||
|
|
||||||
|
# `video_url: 'function/0/https://.../get_file/...mp4/?embed=true'` lub bez prefixu
|
||||||
|
_VIDEO_URL_RE = re.compile(
|
||||||
|
r"video_url\s*:\s*'(?:function/0/)?(https?://[^']+/get_file/[^']+\.mp4/?[^']*)'",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_QUALITY_RE = re.compile(r"video_url_text\s*:\s*'([^']*)'", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
|
||||||
|
if "/embed/" not in page_url:
|
||||||
|
# Detail page → derive embed URL via /video/<id>/<slug>/ → /embed/<id>.
|
||||||
|
m = re.search(r"/video/(\d+)/", page_url)
|
||||||
|
if m:
|
||||||
|
embed_url = f"{_BASE}/embed/{m.group(1)}"
|
||||||
|
else:
|
||||||
|
log.info("yespornvip: cannot derive embed from %s", page_url)
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
embed_url = page_url
|
||||||
|
|
||||||
|
html = fetch_tube_html(embed_url, timeout=timeout)
|
||||||
|
|
||||||
|
m = _VIDEO_URL_RE.search(html)
|
||||||
|
if not m:
|
||||||
|
log.info("yespornvip: no video_url in flashvars on %s", embed_url)
|
||||||
|
return None
|
||||||
|
|
||||||
|
url = m.group(1)
|
||||||
|
# `?embed=true` parametr — get_file z embed=true może zwracać HTML wrapper
|
||||||
|
# zamiast 302 do CDN. Zostawiamy bo player tak go używa, ale jeśli 302 nie
|
||||||
|
# wskoczy poprawnie to fallback usunie param.
|
||||||
|
quality = None
|
||||||
|
q_match = _QUALITY_RE.search(html)
|
||||||
|
if q_match:
|
||||||
|
quality = q_match.group(1).strip() or None
|
||||||
|
|
||||||
|
return [
|
||||||
|
StreamSource(
|
||||||
|
link=url,
|
||||||
|
type="mp4",
|
||||||
|
quality=quality,
|
||||||
|
referer=_BASE + "/",
|
||||||
|
raw={"mobile_direct_ok": True},
|
||||||
|
)
|
||||||
|
]
|
||||||
Loading…
Add table
Reference in a new issue