PlayTube CMS. Sitemap-based pagination (listing has no GET paging), JSON-LD VideoObject metadata, pornstar/category pills, " Clips" categories mapped to studio. Direct mp4 (cdnde.com/okcdn.ru), tokens time-bound and portable cross-IP, so mobile plays direct. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
69 lines
2.4 KiB
Python
69 lines
2.4 KiB
Python
"""hqfap.com — direct stream extractor.
|
|
|
|
Scene page (SSR, za Cloudflare → curl_cffi w fetch_tube_html) ma JSON-LD
|
|
VideoObject z `contentUrl` = direct mp4. Dwie generacje hostingu w katalogu:
|
|
|
|
- nowsze sceny: `v4.cdnde.com/...?video=<b64>&time=<epoch>&ip=<addr>` — param
|
|
`ip` NIE jest egzekwowany (cross-IP test 2026-06-10: lokalny ISP i VPS Hetzner
|
|
oba 206), token time-bound → resolve on-demand daje świeży URL,
|
|
- starsze sceny: `vd*.okcdn.ru/?expires=...&srcIp=...&sig=...` (ok.ru) — również
|
|
portable cross-IP (206 z innego IP niż fetcher).
|
|
|
|
Mobile gra direct (mobile_direct auto-detect w playback.py), zero proxy/WebView.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
|
|
from app.extractors._fetch import fetch_tube_html
|
|
from app.extractors._models import StreamSource
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_JSONLD_RE = re.compile(
|
|
r'<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>',
|
|
re.IGNORECASE | re.DOTALL,
|
|
)
|
|
# Fallback gdy JSON-LD nie parsuje się jako JSON (trailing comma itp.).
|
|
_CONTENT_URL_RE = re.compile(r'"contentUrl"\s*:\s*"([^"]+)"')
|
|
_QUALITY_RE = re.compile(r"_(\d{3,4})p\.mp4", re.IGNORECASE)
|
|
|
|
|
|
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
|
|
html = fetch_tube_html(page_url, timeout=timeout)
|
|
|
|
content_url: str | None = None
|
|
for m in _JSONLD_RE.finditer(html):
|
|
raw = m.group(1).strip()
|
|
if not raw:
|
|
continue
|
|
try:
|
|
data = json.loads(raw)
|
|
except (json.JSONDecodeError, ValueError):
|
|
continue
|
|
items = data if isinstance(data, list) else [data]
|
|
for obj in items:
|
|
if isinstance(obj, dict) and obj.get("@type") == "VideoObject":
|
|
content_url = (obj.get("contentUrl") or "").strip() or None
|
|
break
|
|
if content_url:
|
|
break
|
|
if not content_url:
|
|
rm = _CONTENT_URL_RE.search(html)
|
|
content_url = rm.group(1).strip() if rm else None
|
|
if not content_url or not content_url.startswith("http"):
|
|
log.warning("hqfap: no contentUrl in JSON-LD for %s", page_url)
|
|
return None
|
|
|
|
qm = _QUALITY_RE.search(content_url)
|
|
quality = f"{qm.group(1)}p" if qm else None
|
|
return [
|
|
StreamSource(
|
|
link=content_url,
|
|
quality=quality,
|
|
type="mp4",
|
|
referer="https://hqfap.com/",
|
|
)
|
|
]
|