goon/app/extractors/tubes/paradisehill.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

96 lines
3.5 KiB
Python

"""paradisehill.cc — direct mp4 extractor.
Paradisehill embed strony renderują video.js z `og:video` meta tagiem wskazującym
na `/player/<id>/` iframe. Ten iframe zawiera inline JS:
var videoList = [
{"sources":[{"src":"https://v1.paradisehill.cc/video/<hash>_part1.mp4","type":"video/mp4"}]},
{"sources":[{"src":"...part2.mp4",...}]},
...
];
Wieloczęściowe filmy są dzielone na part1..partN (~20-30 min każda). v1.paradisehill.cc
serwuje direct mp4 z Referer = paradisehill scene page; nie ma session auth ani token
bind (zweryfikowane 2026-05-15 z VPS Hetzner, status 200, ISO Media MP4).
Zwracamy listę StreamSource — jeden per part. Mobile player uznaje pierwszy element
(`best`) za główny; jeśli kiedyś potrzebowalibyśmy chapter switching, parts są w
`raw["parts"]` jako URL-e.
"""
from __future__ import annotations
import json
import logging
import re
from urllib.parse import urljoin
from app.extractors._fetch import browser_get, _DEFAULT_UA
from app.extractors._models import HosterDead, StreamSource
log = logging.getLogger(__name__)
_OG_VIDEO_RE = re.compile(r'<meta\s+property=["\']og:video["\']\s+content=["\']([^"\']+)["\']', re.IGNORECASE)
_VIDEOLIST_RE = re.compile(r'var\s+videoList\s*=\s*(\[[^;]+\]);', re.DOTALL)
_MP4_RE = re.compile(r'https?://[^\s"\'<>]+\.mp4(?:\?[^\s"\'<>]*)?', re.IGNORECASE)
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
headers = {
"User-Agent": _DEFAULT_UA,
"Accept": "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
}
r = browser_get(page_url, headers=headers, timeout=timeout)
if r.status_code == 404 or r.status_code == 410:
raise HosterDead(f"paradisehill {page_url}: HTTP {r.status_code}")
if r.status_code != 200 or not r.text:
log.info("paradisehill: page fetch fail %s status=%s", page_url, r.status_code)
return None
m = _OG_VIDEO_RE.search(r.text)
if not m:
log.info("paradisehill: no og:video meta in %s", page_url)
return None
player_url = urljoin(page_url, m.group(1))
r2 = browser_get(player_url, headers={**headers, "Referer": page_url}, timeout=timeout)
if r2.status_code != 200 or not r2.text:
log.info("paradisehill: player iframe fail %s status=%s", player_url, r2.status_code)
return None
vl = _VIDEOLIST_RE.search(r2.text)
parts: list[str] = []
if vl:
try:
data = json.loads(vl.group(1))
for item in data:
for src in (item.get("sources") or []):
u = src.get("src")
if u and u not in parts:
parts.append(u)
except json.JSONDecodeError as e:
log.info("paradisehill: videoList JSON decode fail in %s: %s", player_url, e)
if not parts:
for m in _MP4_RE.finditer(r2.text):
u = m.group(0)
if u not in parts:
parts.append(u)
if not parts:
log.info("paradisehill: no mp4 in player iframe %s", player_url)
return None
referer = page_url
sources: list[StreamSource] = []
for i, url in enumerate(parts):
sources.append(
StreamSource(
link=url,
quality=None,
type="mp4",
referer=referer,
raw={"part_index": i, "total_parts": len(parts), "parts": parts} if len(parts) > 1 else None,
)
)
return sources