"""paradisehill.cc — direct mp4 extractor. Paradisehill embed strony renderują video.js z `og:video` meta tagiem wskazującym na `/player//` iframe. Ten iframe zawiera inline JS: var videoList = [ {"sources":[{"src":"https://v1.paradisehill.cc/video/_part1.mp4","type":"video/mp4"}]}, {"sources":[{"src":"...part2.mp4",...}]}, ... ]; Wieloczęściowe filmy są dzielone na part1..partN (~20-30 min każda). v1.paradisehill.cc serwuje direct mp4 z Referer = paradisehill scene page; nie ma session auth ani token bind (zweryfikowane 2026-05-15 z VPS Hetzner, status 200, ISO Media MP4). Zwracamy listę StreamSource — jeden per part. Mobile player uznaje pierwszy element (`best`) za główny; jeśli kiedyś potrzebowalibyśmy chapter switching, parts są w `raw["parts"]` jako URL-e. """ from __future__ import annotations import json import logging import re from urllib.parse import urljoin from app.extractors._fetch import browser_get, _DEFAULT_UA from app.extractors._models import HosterDead, StreamSource log = logging.getLogger(__name__) _OG_VIDEO_RE = re.compile(r']+\.mp4(?:\?[^\s"\'<>]*)?', re.IGNORECASE) def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None: headers = { "User-Agent": _DEFAULT_UA, "Accept": "text/html,application/xhtml+xml", "Accept-Language": "en-US,en;q=0.9", } r = browser_get(page_url, headers=headers, timeout=timeout) if r.status_code == 404 or r.status_code == 410: raise HosterDead(f"paradisehill {page_url}: HTTP {r.status_code}") if r.status_code != 200 or not r.text: log.info("paradisehill: page fetch fail %s status=%s", page_url, r.status_code) return None m = _OG_VIDEO_RE.search(r.text) if not m: log.info("paradisehill: no og:video meta in %s", page_url) return None player_url = urljoin(page_url, m.group(1)) r2 = browser_get(player_url, headers={**headers, "Referer": page_url}, timeout=timeout) if r2.status_code != 200 or not r2.text: log.info("paradisehill: player iframe fail %s status=%s", player_url, r2.status_code) return None vl = _VIDEOLIST_RE.search(r2.text) parts: list[str] = [] if vl: try: data = json.loads(vl.group(1)) for item in data: for src in (item.get("sources") or []): u = src.get("src") if u and u not in parts: parts.append(u) except json.JSONDecodeError as e: log.info("paradisehill: videoList JSON decode fail in %s: %s", player_url, e) if not parts: for m in _MP4_RE.finditer(r2.text): u = m.group(0) if u not in parts: parts.append(u) if not parts: log.info("paradisehill: no mp4 in player iframe %s", player_url) return None referer = page_url sources: list[StreamSource] = [] for i, url in enumerate(parts): sources.append( StreamSource( link=url, quality=None, type="mp4", referer=referer, raw={"part_index": i, "total_parts": len(parts), "parts": parts} if len(parts) > 1 else None, ) ) return sources