Registry mapowanie `hqpornercom` -> `_vps_blocked_fallback.extract` zwracało scene page URL do mobile WebView. Page ma 3 ad-iframes (adtng/goaserv/ mavrtracktor) + pop-under triggery -> user widział reklame zamiast video. Powrot do `hqporner.extract` (multi-quality bigcdn.cc mp4 + force_proxy=True). Plus hardening: iframe regex bound do `<div id="playerWrapper">...</div>`, whitelist hostow embed (mydaddy.cc/hqwo.cc) i CDN mp4 (bigcdn/hqwo/flyflv).
163 lines
7.2 KiB
Python
163 lines
7.2 KiB
Python
"""hqporner.com — direct stream extractor.
|
|
|
|
Page → iframe (mydaddy.cc lub hqwo.cc — hosting się zmienia w czasie) → wyciągnij
|
|
mp4 URL-e z `<source>` tagów lub innych miejsc w HTML/JS playera.
|
|
|
|
Dwie generacje hostera (oba aktywne dla różnych scen):
|
|
|
|
1. **Stara: mydaddy.cc/video/<hash>/** — FluidPlayer wrapper z `<source>` tagami
|
|
bezpośrednio w HTML iframe:
|
|
`<source src="//s12.bigcdn.cc/.../360.mp4" title="360p">` + 720p + 1080p.
|
|
|
|
2. **Nowa: hqwo.cc/player/<hash>?img=<base64>** — `<source>` tagi są wewnątrz
|
|
JavaScript string literal (`$("#jw").html("<video>...<source src=\"...\">")`).
|
|
Quotes są escaped (`\"`), więc plain regex na `<source[^>]+src="..."`
|
|
nie matchuje. Trzeba odescape'ować HTML przed regex match.
|
|
|
|
URL pattern: `https://hqwo.cc/pubs/<pub_id>/<quality>.mp4` gdzie pub_id jest
|
|
inny niż player_hash w iframe URL — generowany serwerem per request.
|
|
|
|
Fallback gdy oba zawiodą: hoster type → mobile otworzy w WebView (FluidPlayer
|
|
JS wyciągnie URL po user click).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
|
|
from app.extractors._fetch import _DEFAULT_UA, browser_get, fetch_tube_html
|
|
from app.extractors._models import StreamSource
|
|
from app.extractors.hoster import extract_stream_from_hoster
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
# Wyciągamy zawartość `<div id="playerWrapper">…</div>` osobno, potem szukamy
|
|
# `<iframe>` TYLKO wewnątrz. Wcześniej regex `playerWrapper>.*?<iframe` z DOTALL
|
|
# przelatywał przez pusty/JS-loaded wrapper i łapał kolejny `<iframe>` w
|
|
# dokumencie — a hqporner ma 2-3 ad-iframes (adtng/goaserv/mavrtracktor) wokół
|
|
# playera, więc trafialiśmy w reklamę zamiast w mydaddy.cc/hqwo.cc.
|
|
_PLAYER_WRAPPER_RE = re.compile(
|
|
r'<div[^>]+id=["\']?playerWrapper["\']?[^>]*>(.*?)</div>',
|
|
re.IGNORECASE | re.DOTALL,
|
|
)
|
|
_PLAYER_IFRAME_RE = re.compile(r'<iframe[^>]+src=["\']([^"\']+)', re.IGNORECASE)
|
|
|
|
# Whitelist hostów embed iframe'a. Hqporner rotuje między mydaddy.cc i hqwo.cc
|
|
# (zmiany typowo co kilka miesięcy). Wszystko inne (adtng, goaserv, mavrtracktor,
|
|
# smartpop, popcash, reebr) → reklama. Brak match = fail safe (return None),
|
|
# nie próbujemy go odpalić jako hostera bo to ad-redirect → pop-under.
|
|
_VIDEO_IFRAME_HOST_RE = re.compile(
|
|
r"//(?:[a-z0-9-]+\.)?(?:mydaddy|hqwo|hqporner)\.[a-z]{2,4}/",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
# Match `<source src="...mp4" title="...">` z opcjonalnym title. Po unescape
|
|
# (`\"` → `"`) ten regex łapie zarówno raw HTML (mydaddy.cc) jak i JS-embedded
|
|
# HTML (hqwo.cc).
|
|
_SOURCE_RE = re.compile(
|
|
r'<source[^>]+src=["\']((?://|https?://)[^"\']+\.mp4[^"\']*)["\'](?:[^>]+title=["\']([^"\']+))?',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
# Whitelist CDN-ów mp4. Real video URLs lecą z bigcdn.cc (s12./s68./...),
|
|
# hqwo.cc/pubs, flyflv. Wszystko spoza listy w `<source>` tagu = pre-roll /
|
|
# interstitial / ad injection (hipoteza z bug-reportu: hqporner zaczął
|
|
# wrzucać ad mp4 URLs do `<source>` w 2026).
|
|
_VIDEO_CDN_HOST_RE = re.compile(
|
|
r"//(?:[a-z0-9-]+\.)?(?:bigcdn|hqwo|flyflv|hqwallcdn)\.[a-z]{2,4}/",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
|
|
page_html = fetch_tube_html(page_url, timeout=timeout)
|
|
wrapper_m = _PLAYER_WRAPPER_RE.search(page_html)
|
|
if not wrapper_m:
|
|
log.warning("hqporner: no playerWrapper div in %s", page_url)
|
|
return None
|
|
iframe_m = _PLAYER_IFRAME_RE.search(wrapper_m.group(1))
|
|
if not iframe_m:
|
|
log.warning("hqporner: no iframe inside playerWrapper for %s", page_url)
|
|
return None
|
|
iframe_src = iframe_m.group(1).strip()
|
|
if iframe_src.startswith("//"):
|
|
iframe_src = "https:" + iframe_src
|
|
elif iframe_src.startswith("/"):
|
|
iframe_src = f"https://hqporner.com{iframe_src}"
|
|
|
|
if not _VIDEO_IFRAME_HOST_RE.search(iframe_src):
|
|
log.warning("hqporner: iframe host not whitelisted (likely ad): %s", iframe_src)
|
|
return None
|
|
|
|
headers = {
|
|
"User-Agent": _DEFAULT_UA,
|
|
"Accept": "text/html,application/xhtml+xml",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Referer": "https://hqporner.com/",
|
|
}
|
|
try:
|
|
r = browser_get(iframe_src, headers=headers, timeout=timeout, follow_redirects=True)
|
|
r.raise_for_status()
|
|
except Exception as e:
|
|
log.warning("hqporner iframe fetch %s failed: %s", iframe_src, e)
|
|
return None
|
|
|
|
# Hqwo.cc embeds `<source>` tags inside `$.html("<video>...<source src=\"...\">")`
|
|
# JS string literals — quotes are escaped. Plain HTML in mydaddy.cc has raw quotes.
|
|
# Unescape commonly-escaped sequences so the same regex handles both shapes.
|
|
iframe_html = (
|
|
r.text.replace('\\"', '"').replace("\\'", "'").replace("\\\\", "\\")
|
|
)
|
|
|
|
# CDN-y (bigcdn.cc, hqwo.cc) bindują URL do Referera embed iframe'a (host hqwo.cc /
|
|
# mydaddy.cc), nie hqporner.com. Trzymamy referer = host iframe'a dla proxy.
|
|
from urllib.parse import urlparse as _urlparse
|
|
iframe_host = _urlparse(iframe_src).hostname or ""
|
|
iframe_referer = f"https://{iframe_host}/" if iframe_host else iframe_src
|
|
|
|
# De-dup by URL: hqwo.cc emits `<source>` tags twice (adblock + non-adblock branches).
|
|
seen_urls: set[str] = set()
|
|
sources: list[StreamSource] = []
|
|
for sm in _SOURCE_RE.finditer(iframe_html):
|
|
url = sm.group(1).strip()
|
|
if url.startswith("//"):
|
|
url = "https:" + url
|
|
if url in seen_urls:
|
|
continue
|
|
seen_urls.add(url)
|
|
# Drop `<source>` URLs spoza znanych CDN-ów. Jeśli hqporner wstrzyknie
|
|
# `<source src="//ads.example.com/preroll.mp4">` (hipoteza z bug-reportu)
|
|
# — bez whitelist'a quality picker w mobile mógłby wystrzelić mu URL
|
|
# reklamy zamiast 1080p mp4.
|
|
if not _VIDEO_CDN_HOST_RE.search(url):
|
|
log.info("hqporner: skip non-CDN source URL: %s", url)
|
|
continue
|
|
title = (sm.group(2) or "").strip()
|
|
# `force_proxy=True` (2026-05-20): CDN-y bigcdn.cc/flyflv IP-bound + flyflv ma
|
|
# `ip=46.62.219.154` w URL path. Mobile direct = 404/403 → fallback proxy
|
|
# generuje flicker. Force_proxy wymusza mobile użycie proxied od razu.
|
|
# Bug-report e8ddd8d4: "kliknięcie otwiera reklamę" gdy _vps_blocked_fallback
|
|
# (hqporner page ads). Force_proxy + native mp4 = quality picker + natywny.
|
|
sources.append(StreamSource(
|
|
link=url, quality=title or None, type="mp4", referer=iframe_referer,
|
|
raw={"force_proxy": True},
|
|
))
|
|
|
|
if sources:
|
|
return sources
|
|
|
|
# Fallback 1: niektóre mydaddy.cc iframes używają packed JS (JWPlayer).
|
|
stream_url = extract_stream_from_hoster(
|
|
iframe_src, referer="https://hqporner.com/", timeout=timeout,
|
|
)
|
|
if stream_url:
|
|
type_hint = "m3u8" if ".m3u8" in stream_url.lower() else "mp4"
|
|
return [StreamSource(link=stream_url, type=type_hint, referer=iframe_referer)]
|
|
|
|
# Fallback 2: oddaj iframe URL jako hoster type — mobile otworzy w WebView,
|
|
# FluidPlayer JS sam wyciągnie URL po user click / przejściu adblock check.
|
|
# Iframe_src ma już zwalidowany host whitelist (mydaddy.cc/hqwo.cc), więc
|
|
# WebView nie wpadnie w ad-domain redirect.
|
|
log.info("hqporner: using hoster fallback for %s", iframe_src)
|
|
return [StreamSource(link=iframe_src, type="hoster")]
|