goon/app/extractors/hosters/voe.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

172 lines
6.1 KiB
Python

"""voe.sx embed → direct m3u8 extractor.
Pattern (verified 2026-05-15 z VPS Hetzner FI):
1. `voe.sx/e/<id>` zwraca 759-byte HTML z JS redirect:
window.location.href = 'https://<random>.com/e/<id>'
Mirror domena rotuje (rebeccasciencestreet.com, darnobedienceupscale.com itp.) —
bierzemy LITERAL `Location` z window.location.href assignment.
2. Mirror embed page 137KB zawiera `<script type="application/json">["DROH@$nJjm..."]</script>`
— pojedyncza zakodowana string z chunkami 4-char rozdzielonymi 7 distinct
2-char delimiterami: `@$`, `^^`, `~@`, `%?`, `*~`, `!!`, `#&`.
3. Loader `/js/loader.bc4a6543429.js` (83KB obfuscator.io) wczytuje payload przez
`querySelectorAll('script[type=application/json]')[i].textContent`, parsuje
JSON, dekoduje 7-step pipeline (RE'd 2026-05-15):
a. ROT13 letters
b. replace each of 7 magic separators with "_"
c. strip underscores
d. base64 decode (atob)
e. shift each char DOWN by 3 (charCode - 3)
f. reverse string
g. base64 decode AGAIN
h. JSON.parse
4. Decoded JSON ma `source` (HLS m3u8 na `cloudwindow-route.com` z signed token)
+ `fallback: [{file: ".mp4", type: "mp4", label: "720"}]`. URL signed
z `i=<visitor IP first 2 octets>` — z VPS dostajemy `i=46.62` (Hetzner).
Token IP-bound do tych pierwszych 2 oktetów (luźne) — proxy działa.
5. CDN host losowy ale wzorzec stały. Wymaga Referer = voe.sx (lub mirror) bo
token z `?node=` valida.
21607 movies origin='mangoporn:voe' w DB.
"""
from __future__ import annotations
import base64
import json
import logging
import re
from app.extractors._fetch import _DEFAULT_UA, browser_get
from app.extractors._models import HosterDead, StreamSource
log = logging.getLogger(__name__)
_REDIRECT_RE = re.compile(r"window\.location\.href\s*=\s*['\"]([^'\"]+)['\"]")
_PAYLOAD_RE = re.compile(r'<script\s+type=["\']application/json["\']>(\[.+?\])</script>', re.DOTALL)
_MAGIC_SEPS = ("@$", "^^", "~@", "%?", "*~", "!!", "#&")
def _rot13(s: str) -> str:
out = []
for ch in s:
c = ord(ch)
if 0x41 <= c <= 0x5A:
c = (c - 0x41 + 13) % 26 + 0x41
elif 0x61 <= c <= 0x7A:
c = (c - 0x61 + 13) % 26 + 0x61
out.append(chr(c))
return "".join(out)
def _decode_payload(payload: str) -> dict | None:
"""7-step pipeline z loader.bc4a6543429.js (RE 2026-05-15)."""
try:
s = _rot13(payload)
for sep in _MAGIC_SEPS:
s = s.replace(sep, "_")
s = s.replace("_", "")
# 1st atob — uses latin-1 to preserve all 256 byte values for shift step.
b = base64.b64decode(s + "=" * (-len(s) % 4)).decode("latin-1")
shifted = "".join(chr(ord(c) - 3) for c in b)
reversed_str = shifted[::-1]
plaintext = base64.b64decode(reversed_str + "=" * (-len(reversed_str) % 4)).decode(
"utf-8", errors="replace"
)
return json.loads(plaintext)
except Exception as e:
log.warning("voe: decode pipeline fail: %s", e)
return None
def extract(page_url: str, *, timeout: float = 30.0) -> list[StreamSource] | None:
headers = {
"User-Agent": _DEFAULT_UA,
"Accept": "text/html,application/xhtml+xml",
"Accept-Language": "en-US,en;q=0.9",
}
r = browser_get(page_url, headers=headers, timeout=timeout)
if r.status_code in (404, 410):
raise HosterDead(f"voe {page_url}: HTTP {r.status_code}")
if r.status_code != 200 or not r.text:
log.info("voe: stage1 fail %s status=%s", page_url, r.status_code)
return None
# Stage 1: follow JS redirect do losowego mirroru.
target_url = page_url
if "window.location.href" in r.text:
m = _REDIRECT_RE.search(r.text)
if not m:
log.info("voe: redirect script ale brak href w %s", page_url)
return None
mirror_url = m.group(1)
# JS sprawdza `permanentToken` w localStorage; bez niego idzie na pierwszy
# match (`rebeccasciencestreet` itp). Wszyscy mają identyczny content.
r2 = browser_get(mirror_url, headers=headers, timeout=timeout)
if r2.status_code in (404, 410):
raise HosterDead(f"voe mirror {mirror_url}: HTTP {r2.status_code}")
if r2.status_code != 200 or not r2.text:
log.info("voe: mirror fail %s status=%s", mirror_url, r2.status_code)
return None
target_url = mirror_url
r = r2
# Stage 2: extract & decode JSON payload.
pm = _PAYLOAD_RE.search(r.text)
if not pm:
if "Video not found" in r.text or "Video has been removed" in r.text:
raise HosterDead(f"voe {target_url}: video not found")
log.info("voe: no application/json payload w %s", target_url)
return None
try:
arr = json.loads(pm.group(1))
payload = arr[0] if isinstance(arr, list) and arr else None
except Exception as e:
log.warning("voe: JSON list parse fail %s: %s", target_url, e)
return None
if not isinstance(payload, str):
return None
config = _decode_payload(payload)
if not config:
return None
source = (config.get("source") or "").strip()
fallback = config.get("fallback") or []
if isinstance(fallback, dict):
fallback = [fallback]
# Voe CDN URL ma `i=<2-octet IP prefix>` — token loose-bound do IP range.
# Proxy z VPS dostaje signed dla `i=46.62`, fetch działa.
referer = "https://voe.sx/"
sources: list[StreamSource] = []
if source:
sources.append(
StreamSource(
link=source,
quality=None,
type="m3u8",
referer=referer,
raw={"engine": "voe"},
)
)
# Dorzucamy mp4 fallback gdy m3u8 by zawiódł.
for fb in fallback:
if isinstance(fb, dict) and fb.get("file"):
sources.append(
StreamSource(
link=fb["file"],
quality=fb.get("label"),
type="mp4",
referer=referer,
raw={"engine": "voe", "fallback": True},
)
)
return sources or None