Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
153 lines
5.5 KiB
Python
153 lines
5.5 KiB
Python
"""Common engine extractor for: embedseek, seekplayer, rpmplay, upns, player4me, easyvidplayer.
|
||
|
||
Wszyscy używają tego samego silnika (Vite-built React SPA + AES-CBC encrypted API
|
||
+ HLS-based streaming). Hostname domains different ale shared backend.
|
||
|
||
Pattern (verified 2026-05-15 z residential PL + VPS Hetzner FI):
|
||
|
||
1. Embed URL = `https://<sub>.<host>.<tld>/#<hash_id>` — hash fragment to video ID.
|
||
SPA shell `Loading...` body load'uje `/assets/index-<n>.js` bundle.
|
||
|
||
2. JS fetcha `/api/v1/video?id=<hash_id>&w=<W>&h=<H>&r=` (W,H z window.screen).
|
||
Response: hex-encoded AES-CBC(key=`kiemtienmua911ca`, iv=`1234567890oiuytr`)
|
||
ciphertext, ~5KB. PKCS7 padded.
|
||
|
||
3. Plaintext JSON zawiera:
|
||
- `source`: signed m3u8 URL na CDN edge IP (np. `185.237.107.146/v4/<sig>/<exp>/ty/<hash>/master.m3u8?v=...`)
|
||
- `cf`: Cloudflare-fronted fallback URL (.txt z listą m3u8 paths)
|
||
- `metric.ipAddress`: IP visitora (signed token IP-bound do tego IP)
|
||
- `metric.cfDomain`: CF domain dla fallback
|
||
- `title`, `poster`, `thumbnail`, ...
|
||
|
||
4. `source` URL jest signed z visitor IP. Z VPS fetch zwraca master.m3u8 z signed
|
||
token tied to VPS IP — proxy fetcha segments z tym samym tokenem, działa.
|
||
CDN port 443 z `verify=False` (self-signed IP cert).
|
||
|
||
5. Wszystkie hostery share te same wartości KEY/IV. Wewnętrzna obfuskacja JS
|
||
maskuje to lookupem `ue(773)`, `ue(686)` itp. — derived bytes są zawsze
|
||
identyczne dla każdej domeny.
|
||
|
||
Hostery covered (origin counts w DB, 2026-05-15):
|
||
- embedseek (20271), seekplayer (20271) — mirror sites, dzielą hash_id
|
||
- rpmplay (15317)
|
||
- upns (14287)
|
||
- player4me (41040)
|
||
- easyvidplayer (47588)
|
||
|
||
Razem ~159k playback sources.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import logging
|
||
import re
|
||
from urllib.parse import urlparse
|
||
|
||
from cryptography.hazmat.primitives import padding
|
||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
||
|
||
from app.extractors._fetch import _DEFAULT_UA, browser_get
|
||
from app.extractors._models import HosterDead, StreamSource
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
_KEY = b"kiemtienmua911ca"
|
||
_IV = b"1234567890oiuytr"
|
||
|
||
# Hostname matching: 6 base hosts × subdomains × TLD variants.
|
||
# Examples:
|
||
# my.embedseek.online, vip.seekplayer.vip, my.rpmplay.online,
|
||
# my.upns.online, vip.player4me.vip, p.easyvidplayer.com
|
||
_HOST_RE = re.compile(
|
||
r"^(?:[a-z0-9]+\.)?(?:embedseek|seekplayer|rpmplay|upns|player4me|easyvidplayer)\."
|
||
r"(?:online|vip|com|net|io|me|tv)$",
|
||
re.IGNORECASE,
|
||
)
|
||
|
||
|
||
def matches(url: str) -> bool:
|
||
try:
|
||
host = urlparse(url).hostname or ""
|
||
except Exception:
|
||
return False
|
||
return bool(_HOST_RE.match(host))
|
||
|
||
|
||
def _decrypt(hex_str: str) -> str:
|
||
ct = bytes.fromhex(hex_str)
|
||
cipher = Cipher(algorithms.AES(_KEY), modes.CBC(_IV))
|
||
dec = cipher.decryptor()
|
||
pt = dec.update(ct) + dec.finalize()
|
||
unpadder = padding.PKCS7(128).unpadder()
|
||
return (unpadder.update(pt) + unpadder.finalize()).decode("utf-8", errors="replace")
|
||
|
||
|
||
def extract(page_url: str, *, timeout: float = 30.0) -> list[StreamSource] | None:
|
||
parsed = urlparse(page_url)
|
||
if not parsed.hostname or not _HOST_RE.match(parsed.hostname):
|
||
return None
|
||
# hash_id w `#<id>` fragmencie; gdy klient przekazał bez `#` (np. po nav.replace),
|
||
# spróbujemy też `?id=` query param.
|
||
hash_id = parsed.fragment.strip()
|
||
if not hash_id and parsed.query:
|
||
from urllib.parse import parse_qs
|
||
qs = parse_qs(parsed.query)
|
||
hash_id = (qs.get("id") or [""])[0]
|
||
if not hash_id:
|
||
log.info("seekplayer-engine: no hash_id w %s", page_url)
|
||
return None
|
||
|
||
host = f"{parsed.scheme}://{parsed.hostname}"
|
||
api_url = f"{host}/api/v1/video?id={hash_id}&w=1920&h=1080&r="
|
||
|
||
headers = {
|
||
"User-Agent": _DEFAULT_UA,
|
||
"Accept": "*/*",
|
||
"Referer": f"{host}/",
|
||
}
|
||
r = browser_get(api_url, headers=headers, timeout=timeout)
|
||
if r.status_code in (404, 410):
|
||
raise HosterDead(f"seekplayer-engine {page_url}: HTTP {r.status_code}")
|
||
if r.status_code != 200 or not r.text:
|
||
log.info("seekplayer-engine: api fail %s status=%s", api_url, r.status_code)
|
||
return None
|
||
|
||
try:
|
||
plaintext = _decrypt(r.text)
|
||
except Exception as e:
|
||
log.warning("seekplayer-engine: decrypt fail dla %s: %s", api_url, e)
|
||
return None
|
||
|
||
try:
|
||
data = json.loads(plaintext)
|
||
except Exception as e:
|
||
log.warning("seekplayer-engine: JSON parse fail dla %s: %s", api_url, e)
|
||
return None
|
||
|
||
# Hostery same-engine wracają `{"error": "..."}` gdy video nie istnieje.
|
||
if isinstance(data, dict) and data.get("error"):
|
||
raise HosterDead(f"seekplayer-engine {page_url}: {data['error']}")
|
||
|
||
source = (data.get("source") or "").strip()
|
||
cf = (data.get("cf") or "").strip()
|
||
|
||
# Source: IP-bound m3u8 URL na CDN edge (np. `185.237.107.146/v4/<sig>/<exp>/ty/<hash>/master.m3u8`).
|
||
# Token signed dla VPS IP — proxy poda segmenty z tego samego IP, OK.
|
||
# CDN servuje cert na IP — fetch wymaga verify=False (stream_proxy.py ma już
|
||
# taką gałąź dla IP-host m3u8).
|
||
sources: list[StreamSource] = []
|
||
if source:
|
||
sources.append(
|
||
StreamSource(
|
||
link=source,
|
||
quality=None,
|
||
type="m3u8",
|
||
referer=f"{host}/",
|
||
raw={
|
||
"proxy_no_verify": True,
|
||
"cf_fallback": cf or None,
|
||
"engine": "seekplayer",
|
||
},
|
||
)
|
||
)
|
||
return sources or None
|