"""voe.sx embed → direct m3u8 extractor. Pattern (verified 2026-05-15 z VPS Hetzner FI): 1. `voe.sx/e/` zwraca 759-byte HTML z JS redirect: window.location.href = 'https://.com/e/' Mirror domena rotuje (rebeccasciencestreet.com, darnobedienceupscale.com itp.) — bierzemy LITERAL `Location` z window.location.href assignment. 2. Mirror embed page 137KB zawiera `` — pojedyncza zakodowana string z chunkami 4-char rozdzielonymi 7 distinct 2-char delimiterami: `@$`, `^^`, `~@`, `%?`, `*~`, `!!`, `#&`. 3. Loader `/js/loader.bc4a6543429.js` (83KB obfuscator.io) wczytuje payload przez `querySelectorAll('script[type=application/json]')[i].textContent`, parsuje JSON, dekoduje 7-step pipeline (RE'd 2026-05-15): a. ROT13 letters b. replace each of 7 magic separators with "_" c. strip underscores d. base64 decode (atob) e. shift each char DOWN by 3 (charCode - 3) f. reverse string g. base64 decode AGAIN h. JSON.parse 4. Decoded JSON ma `source` (HLS m3u8 na `cloudwindow-route.com` z signed token) + `fallback: [{file: ".mp4", type: "mp4", label: "720"}]`. URL signed z `i=` — z VPS dostajemy `i=46.62` (Hetzner). Token IP-bound do tych pierwszych 2 oktetów (luźne) — proxy działa. 5. CDN host losowy ale wzorzec stały. Wymaga Referer = voe.sx (lub mirror) bo token z `?node=` valida. 21607 movies origin='mangoporn:voe' w DB. """ from __future__ import annotations import base64 import json import logging import re from app.extractors._fetch import _DEFAULT_UA, browser_get from app.extractors._models import HosterDead, StreamSource log = logging.getLogger(__name__) _REDIRECT_RE = re.compile(r"window\.location\.href\s*=\s*['\"]([^'\"]+)['\"]") _PAYLOAD_RE = re.compile(r'(\[.+?\])', re.DOTALL) _MAGIC_SEPS = ("@$", "^^", "~@", "%?", "*~", "!!", "#&") def _rot13(s: str) -> str: out = [] for ch in s: c = ord(ch) if 0x41 <= c <= 0x5A: c = (c - 0x41 + 13) % 26 + 0x41 elif 0x61 <= c <= 0x7A: c = (c - 0x61 + 13) % 26 + 0x61 out.append(chr(c)) return "".join(out) def _decode_payload(payload: str) -> dict | None: """7-step pipeline z loader.bc4a6543429.js (RE 2026-05-15).""" try: s = _rot13(payload) for sep in _MAGIC_SEPS: s = s.replace(sep, "_") s = s.replace("_", "") # 1st atob — uses latin-1 to preserve all 256 byte values for shift step. b = base64.b64decode(s + "=" * (-len(s) % 4)).decode("latin-1") shifted = "".join(chr(ord(c) - 3) for c in b) reversed_str = shifted[::-1] plaintext = base64.b64decode(reversed_str + "=" * (-len(reversed_str) % 4)).decode( "utf-8", errors="replace" ) return json.loads(plaintext) except Exception as e: log.warning("voe: decode pipeline fail: %s", e) return None def extract(page_url: str, *, timeout: float = 30.0) -> list[StreamSource] | None: headers = { "User-Agent": _DEFAULT_UA, "Accept": "text/html,application/xhtml+xml", "Accept-Language": "en-US,en;q=0.9", } r = browser_get(page_url, headers=headers, timeout=timeout) if r.status_code in (404, 410): raise HosterDead(f"voe {page_url}: HTTP {r.status_code}") if r.status_code != 200 or not r.text: log.info("voe: stage1 fail %s status=%s", page_url, r.status_code) return None # Stage 1: follow JS redirect do losowego mirroru. target_url = page_url if "window.location.href" in r.text: m = _REDIRECT_RE.search(r.text) if not m: log.info("voe: redirect script ale brak href w %s", page_url) return None mirror_url = m.group(1) # JS sprawdza `permanentToken` w localStorage; bez niego idzie na pierwszy # match (`rebeccasciencestreet` itp). Wszyscy mają identyczny content. r2 = browser_get(mirror_url, headers=headers, timeout=timeout) if r2.status_code in (404, 410): raise HosterDead(f"voe mirror {mirror_url}: HTTP {r2.status_code}") if r2.status_code != 200 or not r2.text: log.info("voe: mirror fail %s status=%s", mirror_url, r2.status_code) return None target_url = mirror_url r = r2 # Stage 2: extract & decode JSON payload. pm = _PAYLOAD_RE.search(r.text) if not pm: if "Video not found" in r.text or "Video has been removed" in r.text: raise HosterDead(f"voe {target_url}: video not found") log.info("voe: no application/json payload w %s", target_url) return None try: arr = json.loads(pm.group(1)) payload = arr[0] if isinstance(arr, list) and arr else None except Exception as e: log.warning("voe: JSON list parse fail %s: %s", target_url, e) return None if not isinstance(payload, str): return None config = _decode_payload(payload) if not config: return None source = (config.get("source") or "").strip() fallback = config.get("fallback") or [] if isinstance(fallback, dict): fallback = [fallback] # Voe CDN URL ma `i=<2-octet IP prefix>` — token loose-bound do IP range. # Proxy z VPS dostaje signed dla `i=46.62`, fetch działa. referer = "https://voe.sx/" sources: list[StreamSource] = [] if source: sources.append( StreamSource( link=source, quality=None, type="m3u8", referer=referer, raw={"engine": "voe"}, ) ) # Dorzucamy mp4 fallback gdy m3u8 by zawiódł. for fb in fallback: if isinstance(fb, dict) and fb.get("file"): sources.append( StreamSource( link=fb["file"], quality=fb.get("label"), type="mp4", referer=referer, raw={"engine": "voe", "fallback": True}, ) ) return sources or None