Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
96 lines
3.5 KiB
Python
96 lines
3.5 KiB
Python
"""paradisehill.cc — direct mp4 extractor.
|
|
|
|
Paradisehill embed strony renderują video.js z `og:video` meta tagiem wskazującym
|
|
na `/player/<id>/` iframe. Ten iframe zawiera inline JS:
|
|
|
|
var videoList = [
|
|
{"sources":[{"src":"https://v1.paradisehill.cc/video/<hash>_part1.mp4","type":"video/mp4"}]},
|
|
{"sources":[{"src":"...part2.mp4",...}]},
|
|
...
|
|
];
|
|
|
|
Wieloczęściowe filmy są dzielone na part1..partN (~20-30 min każda). v1.paradisehill.cc
|
|
serwuje direct mp4 z Referer = paradisehill scene page; nie ma session auth ani token
|
|
bind (zweryfikowane 2026-05-15 z VPS Hetzner, status 200, ISO Media MP4).
|
|
|
|
Zwracamy listę StreamSource — jeden per part. Mobile player uznaje pierwszy element
|
|
(`best`) za główny; jeśli kiedyś potrzebowalibyśmy chapter switching, parts są w
|
|
`raw["parts"]` jako URL-e.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from urllib.parse import urljoin
|
|
|
|
from app.extractors._fetch import browser_get, _DEFAULT_UA
|
|
from app.extractors._models import HosterDead, StreamSource
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_OG_VIDEO_RE = re.compile(r'<meta\s+property=["\']og:video["\']\s+content=["\']([^"\']+)["\']', re.IGNORECASE)
|
|
_VIDEOLIST_RE = re.compile(r'var\s+videoList\s*=\s*(\[[^;]+\]);', re.DOTALL)
|
|
_MP4_RE = re.compile(r'https?://[^\s"\'<>]+\.mp4(?:\?[^\s"\'<>]*)?', re.IGNORECASE)
|
|
|
|
|
|
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
|
|
headers = {
|
|
"User-Agent": _DEFAULT_UA,
|
|
"Accept": "text/html,application/xhtml+xml",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
}
|
|
r = browser_get(page_url, headers=headers, timeout=timeout)
|
|
if r.status_code == 404 or r.status_code == 410:
|
|
raise HosterDead(f"paradisehill {page_url}: HTTP {r.status_code}")
|
|
if r.status_code != 200 or not r.text:
|
|
log.info("paradisehill: page fetch fail %s status=%s", page_url, r.status_code)
|
|
return None
|
|
|
|
m = _OG_VIDEO_RE.search(r.text)
|
|
if not m:
|
|
log.info("paradisehill: no og:video meta in %s", page_url)
|
|
return None
|
|
player_url = urljoin(page_url, m.group(1))
|
|
|
|
r2 = browser_get(player_url, headers={**headers, "Referer": page_url}, timeout=timeout)
|
|
if r2.status_code != 200 or not r2.text:
|
|
log.info("paradisehill: player iframe fail %s status=%s", player_url, r2.status_code)
|
|
return None
|
|
|
|
vl = _VIDEOLIST_RE.search(r2.text)
|
|
parts: list[str] = []
|
|
if vl:
|
|
try:
|
|
data = json.loads(vl.group(1))
|
|
for item in data:
|
|
for src in (item.get("sources") or []):
|
|
u = src.get("src")
|
|
if u and u not in parts:
|
|
parts.append(u)
|
|
except json.JSONDecodeError as e:
|
|
log.info("paradisehill: videoList JSON decode fail in %s: %s", player_url, e)
|
|
|
|
if not parts:
|
|
for m in _MP4_RE.finditer(r2.text):
|
|
u = m.group(0)
|
|
if u not in parts:
|
|
parts.append(u)
|
|
|
|
if not parts:
|
|
log.info("paradisehill: no mp4 in player iframe %s", player_url)
|
|
return None
|
|
|
|
referer = page_url
|
|
sources: list[StreamSource] = []
|
|
for i, url in enumerate(parts):
|
|
sources.append(
|
|
StreamSource(
|
|
link=url,
|
|
quality=None,
|
|
type="mp4",
|
|
referer=referer,
|
|
raw={"part_index": i, "total_parts": len(parts), "parts": parts} if len(parts) > 1 else None,
|
|
)
|
|
)
|
|
return sources
|