goon/app/extractors/hosters/seekplayer_engine.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

153 lines
5.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Common engine extractor for: embedseek, seekplayer, rpmplay, upns, player4me, easyvidplayer.
Wszyscy używają tego samego silnika (Vite-built React SPA + AES-CBC encrypted API
+ HLS-based streaming). Hostname domains different ale shared backend.
Pattern (verified 2026-05-15 z residential PL + VPS Hetzner FI):
1. Embed URL = `https://<sub>.<host>.<tld>/#<hash_id>` — hash fragment to video ID.
SPA shell `Loading...` body load'uje `/assets/index-<n>.js` bundle.
2. JS fetcha `/api/v1/video?id=<hash_id>&w=<W>&h=<H>&r=` (W,H z window.screen).
Response: hex-encoded AES-CBC(key=`kiemtienmua911ca`, iv=`1234567890oiuytr`)
ciphertext, ~5KB. PKCS7 padded.
3. Plaintext JSON zawiera:
- `source`: signed m3u8 URL na CDN edge IP (np. `185.237.107.146/v4/<sig>/<exp>/ty/<hash>/master.m3u8?v=...`)
- `cf`: Cloudflare-fronted fallback URL (.txt z listą m3u8 paths)
- `metric.ipAddress`: IP visitora (signed token IP-bound do tego IP)
- `metric.cfDomain`: CF domain dla fallback
- `title`, `poster`, `thumbnail`, ...
4. `source` URL jest signed z visitor IP. Z VPS fetch zwraca master.m3u8 z signed
token tied to VPS IP — proxy fetcha segments z tym samym tokenem, działa.
CDN port 443 z `verify=False` (self-signed IP cert).
5. Wszystkie hostery share te same wartości KEY/IV. Wewnętrzna obfuskacja JS
maskuje to lookupem `ue(773)`, `ue(686)` itp. — derived bytes są zawsze
identyczne dla każdej domeny.
Hostery covered (origin counts w DB, 2026-05-15):
- embedseek (20271), seekplayer (20271) — mirror sites, dzielą hash_id
- rpmplay (15317)
- upns (14287)
- player4me (41040)
- easyvidplayer (47588)
Razem ~159k playback sources.
"""
from __future__ import annotations
import json
import logging
import re
from urllib.parse import urlparse
from cryptography.hazmat.primitives import padding
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from app.extractors._fetch import _DEFAULT_UA, browser_get
from app.extractors._models import HosterDead, StreamSource
log = logging.getLogger(__name__)
_KEY = b"kiemtienmua911ca"
_IV = b"1234567890oiuytr"
# Hostname matching: 6 base hosts × subdomains × TLD variants.
# Examples:
# my.embedseek.online, vip.seekplayer.vip, my.rpmplay.online,
# my.upns.online, vip.player4me.vip, p.easyvidplayer.com
_HOST_RE = re.compile(
r"^(?:[a-z0-9]+\.)?(?:embedseek|seekplayer|rpmplay|upns|player4me|easyvidplayer)\."
r"(?:online|vip|com|net|io|me|tv)$",
re.IGNORECASE,
)
def matches(url: str) -> bool:
try:
host = urlparse(url).hostname or ""
except Exception:
return False
return bool(_HOST_RE.match(host))
def _decrypt(hex_str: str) -> str:
ct = bytes.fromhex(hex_str)
cipher = Cipher(algorithms.AES(_KEY), modes.CBC(_IV))
dec = cipher.decryptor()
pt = dec.update(ct) + dec.finalize()
unpadder = padding.PKCS7(128).unpadder()
return (unpadder.update(pt) + unpadder.finalize()).decode("utf-8", errors="replace")
def extract(page_url: str, *, timeout: float = 30.0) -> list[StreamSource] | None:
parsed = urlparse(page_url)
if not parsed.hostname or not _HOST_RE.match(parsed.hostname):
return None
# hash_id w `#<id>` fragmencie; gdy klient przekazał bez `#` (np. po nav.replace),
# spróbujemy też `?id=` query param.
hash_id = parsed.fragment.strip()
if not hash_id and parsed.query:
from urllib.parse import parse_qs
qs = parse_qs(parsed.query)
hash_id = (qs.get("id") or [""])[0]
if not hash_id:
log.info("seekplayer-engine: no hash_id w %s", page_url)
return None
host = f"{parsed.scheme}://{parsed.hostname}"
api_url = f"{host}/api/v1/video?id={hash_id}&w=1920&h=1080&r="
headers = {
"User-Agent": _DEFAULT_UA,
"Accept": "*/*",
"Referer": f"{host}/",
}
r = browser_get(api_url, headers=headers, timeout=timeout)
if r.status_code in (404, 410):
raise HosterDead(f"seekplayer-engine {page_url}: HTTP {r.status_code}")
if r.status_code != 200 or not r.text:
log.info("seekplayer-engine: api fail %s status=%s", api_url, r.status_code)
return None
try:
plaintext = _decrypt(r.text)
except Exception as e:
log.warning("seekplayer-engine: decrypt fail dla %s: %s", api_url, e)
return None
try:
data = json.loads(plaintext)
except Exception as e:
log.warning("seekplayer-engine: JSON parse fail dla %s: %s", api_url, e)
return None
# Hostery same-engine wracają `{"error": "..."}` gdy video nie istnieje.
if isinstance(data, dict) and data.get("error"):
raise HosterDead(f"seekplayer-engine {page_url}: {data['error']}")
source = (data.get("source") or "").strip()
cf = (data.get("cf") or "").strip()
# Source: IP-bound m3u8 URL na CDN edge (np. `185.237.107.146/v4/<sig>/<exp>/ty/<hash>/master.m3u8`).
# Token signed dla VPS IP — proxy poda segmenty z tego samego IP, OK.
# CDN servuje cert na IP — fetch wymaga verify=False (stream_proxy.py ma już
# taką gałąź dla IP-host m3u8).
sources: list[StreamSource] = []
if source:
sources.append(
StreamSource(
link=source,
quality=None,
type="m3u8",
referer=f"{host}/",
raw={
"proxy_no_verify": True,
"cf_fallback": cf or None,
"engine": "seekplayer",
},
)
)
return sources or None