Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
172 lines
6.1 KiB
Python
172 lines
6.1 KiB
Python
"""voe.sx embed → direct m3u8 extractor.
|
|
|
|
Pattern (verified 2026-05-15 z VPS Hetzner FI):
|
|
|
|
1. `voe.sx/e/<id>` zwraca 759-byte HTML z JS redirect:
|
|
|
|
window.location.href = 'https://<random>.com/e/<id>'
|
|
|
|
Mirror domena rotuje (rebeccasciencestreet.com, darnobedienceupscale.com itp.) —
|
|
bierzemy LITERAL `Location` z window.location.href assignment.
|
|
|
|
2. Mirror embed page 137KB zawiera `<script type="application/json">["DROH@$nJjm..."]</script>`
|
|
— pojedyncza zakodowana string z chunkami 4-char rozdzielonymi 7 distinct
|
|
2-char delimiterami: `@$`, `^^`, `~@`, `%?`, `*~`, `!!`, `#&`.
|
|
|
|
3. Loader `/js/loader.bc4a6543429.js` (83KB obfuscator.io) wczytuje payload przez
|
|
`querySelectorAll('script[type=application/json]')[i].textContent`, parsuje
|
|
JSON, dekoduje 7-step pipeline (RE'd 2026-05-15):
|
|
|
|
a. ROT13 letters
|
|
b. replace each of 7 magic separators with "_"
|
|
c. strip underscores
|
|
d. base64 decode (atob)
|
|
e. shift each char DOWN by 3 (charCode - 3)
|
|
f. reverse string
|
|
g. base64 decode AGAIN
|
|
h. JSON.parse
|
|
|
|
4. Decoded JSON ma `source` (HLS m3u8 na `cloudwindow-route.com` z signed token)
|
|
+ `fallback: [{file: ".mp4", type: "mp4", label: "720"}]`. URL signed
|
|
z `i=<visitor IP first 2 octets>` — z VPS dostajemy `i=46.62` (Hetzner).
|
|
Token IP-bound do tych pierwszych 2 oktetów (luźne) — proxy działa.
|
|
|
|
5. CDN host losowy ale wzorzec stały. Wymaga Referer = voe.sx (lub mirror) bo
|
|
token z `?node=` valida.
|
|
|
|
21607 movies origin='mangoporn:voe' w DB.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import base64
|
|
import json
|
|
import logging
|
|
import re
|
|
|
|
from app.extractors._fetch import _DEFAULT_UA, browser_get
|
|
from app.extractors._models import HosterDead, StreamSource
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_REDIRECT_RE = re.compile(r"window\.location\.href\s*=\s*['\"]([^'\"]+)['\"]")
|
|
_PAYLOAD_RE = re.compile(r'<script\s+type=["\']application/json["\']>(\[.+?\])</script>', re.DOTALL)
|
|
_MAGIC_SEPS = ("@$", "^^", "~@", "%?", "*~", "!!", "#&")
|
|
|
|
|
|
def _rot13(s: str) -> str:
|
|
out = []
|
|
for ch in s:
|
|
c = ord(ch)
|
|
if 0x41 <= c <= 0x5A:
|
|
c = (c - 0x41 + 13) % 26 + 0x41
|
|
elif 0x61 <= c <= 0x7A:
|
|
c = (c - 0x61 + 13) % 26 + 0x61
|
|
out.append(chr(c))
|
|
return "".join(out)
|
|
|
|
|
|
def _decode_payload(payload: str) -> dict | None:
|
|
"""7-step pipeline z loader.bc4a6543429.js (RE 2026-05-15)."""
|
|
try:
|
|
s = _rot13(payload)
|
|
for sep in _MAGIC_SEPS:
|
|
s = s.replace(sep, "_")
|
|
s = s.replace("_", "")
|
|
# 1st atob — uses latin-1 to preserve all 256 byte values for shift step.
|
|
b = base64.b64decode(s + "=" * (-len(s) % 4)).decode("latin-1")
|
|
shifted = "".join(chr(ord(c) - 3) for c in b)
|
|
reversed_str = shifted[::-1]
|
|
plaintext = base64.b64decode(reversed_str + "=" * (-len(reversed_str) % 4)).decode(
|
|
"utf-8", errors="replace"
|
|
)
|
|
return json.loads(plaintext)
|
|
except Exception as e:
|
|
log.warning("voe: decode pipeline fail: %s", e)
|
|
return None
|
|
|
|
|
|
def extract(page_url: str, *, timeout: float = 30.0) -> list[StreamSource] | None:
|
|
headers = {
|
|
"User-Agent": _DEFAULT_UA,
|
|
"Accept": "text/html,application/xhtml+xml",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
}
|
|
r = browser_get(page_url, headers=headers, timeout=timeout)
|
|
if r.status_code in (404, 410):
|
|
raise HosterDead(f"voe {page_url}: HTTP {r.status_code}")
|
|
if r.status_code != 200 or not r.text:
|
|
log.info("voe: stage1 fail %s status=%s", page_url, r.status_code)
|
|
return None
|
|
|
|
# Stage 1: follow JS redirect do losowego mirroru.
|
|
target_url = page_url
|
|
if "window.location.href" in r.text:
|
|
m = _REDIRECT_RE.search(r.text)
|
|
if not m:
|
|
log.info("voe: redirect script ale brak href w %s", page_url)
|
|
return None
|
|
mirror_url = m.group(1)
|
|
# JS sprawdza `permanentToken` w localStorage; bez niego idzie na pierwszy
|
|
# match (`rebeccasciencestreet` itp). Wszyscy mają identyczny content.
|
|
r2 = browser_get(mirror_url, headers=headers, timeout=timeout)
|
|
if r2.status_code in (404, 410):
|
|
raise HosterDead(f"voe mirror {mirror_url}: HTTP {r2.status_code}")
|
|
if r2.status_code != 200 or not r2.text:
|
|
log.info("voe: mirror fail %s status=%s", mirror_url, r2.status_code)
|
|
return None
|
|
target_url = mirror_url
|
|
r = r2
|
|
|
|
# Stage 2: extract & decode JSON payload.
|
|
pm = _PAYLOAD_RE.search(r.text)
|
|
if not pm:
|
|
if "Video not found" in r.text or "Video has been removed" in r.text:
|
|
raise HosterDead(f"voe {target_url}: video not found")
|
|
log.info("voe: no application/json payload w %s", target_url)
|
|
return None
|
|
try:
|
|
arr = json.loads(pm.group(1))
|
|
payload = arr[0] if isinstance(arr, list) and arr else None
|
|
except Exception as e:
|
|
log.warning("voe: JSON list parse fail %s: %s", target_url, e)
|
|
return None
|
|
if not isinstance(payload, str):
|
|
return None
|
|
|
|
config = _decode_payload(payload)
|
|
if not config:
|
|
return None
|
|
|
|
source = (config.get("source") or "").strip()
|
|
fallback = config.get("fallback") or []
|
|
if isinstance(fallback, dict):
|
|
fallback = [fallback]
|
|
|
|
# Voe CDN URL ma `i=<2-octet IP prefix>` — token loose-bound do IP range.
|
|
# Proxy z VPS dostaje signed dla `i=46.62`, fetch działa.
|
|
referer = "https://voe.sx/"
|
|
sources: list[StreamSource] = []
|
|
if source:
|
|
sources.append(
|
|
StreamSource(
|
|
link=source,
|
|
quality=None,
|
|
type="m3u8",
|
|
referer=referer,
|
|
raw={"engine": "voe"},
|
|
)
|
|
)
|
|
# Dorzucamy mp4 fallback gdy m3u8 by zawiódł.
|
|
for fb in fallback:
|
|
if isinstance(fb, dict) and fb.get("file"):
|
|
sources.append(
|
|
StreamSource(
|
|
link=fb["file"],
|
|
quality=fb.get("label"),
|
|
type="mp4",
|
|
referer=referer,
|
|
raw={"engine": "voe", "fallback": True},
|
|
)
|
|
)
|
|
|
|
return sources or None
|