goon/app/extractors/tubes/sxyprn.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

85 lines
2.6 KiB
Python

"""sxyprn.com — direct stream extractor.
Page → `data-vnfo` JSON → URL transform algorytmem boo/ssut51/preda → .vid mp4.
Sxyprn URL'e w `data-vnfo` mają format `/cdn/c8/<seg3>/<seg4>/<ts>/<seg6>/<seg7>.vid`
który wymaga rebuildu zanim CDN zaserwuje video bytes (bez tego endpoint zwraca tylko
pojedynczy timestamp 10B jako placeholder/probe response). Algorytm z `main2.js`:
tmp[1] += "8/" + base64url("<ssut51(tmp[6])>-sxyprn.com-<ssut51(tmp[7])>")
tmp[5] -= ssut51(tmp[6]) + ssut51(tmp[7]) # preda
gdzie ssut51 sumuje cyfry w stringu. Po tym joinujemy → finalny URL serwujący mp4.
Każdy fetch strony zwraca FRESH signed URL (różne tokeny i timestamp).
"""
from __future__ import annotations
import base64
import json
import logging
import re
from app.extractors._fetch import fetch_tube_html
from app.extractors._models import StreamSource
log = logging.getLogger(__name__)
_VNFO_RE = re.compile(r"data-vnfo='([^']+)'")
def _ssut51(s: str) -> int:
"""Sumuje wszystkie cyfry w stringu."""
return sum(int(c) for c in s if c.isdigit())
def _boo(ss: int, es: int) -> str:
"""base64url-safe `<ss>-sxyprn.com-<es>` z `=`→`.`."""
raw = f"{ss}-sxyprn.com-{es}".encode()
return (
base64.b64encode(raw)
.decode()
.replace("+", "-")
.replace("/", "_")
.replace("=", ".")
)
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
html = fetch_tube_html(page_url, timeout=timeout)
m = _VNFO_RE.search(html)
if not m:
log.warning("sxyprn: no data-vnfo in %s", page_url)
return None
try:
vnfo = json.loads(m.group(1))
except json.JSONDecodeError:
log.warning("sxyprn: bad vnfo JSON in %s", page_url)
return None
if not isinstance(vnfo, dict) or not vnfo:
return None
sources: list[StreamSource] = []
for _pid, src in vnfo.items():
if not isinstance(src, str) or not src.startswith("/cdn/"):
continue
tmp = src.split("/")
if len(tmp) < 8:
log.warning("sxyprn: short path (%d segs) %s", len(tmp), src)
continue
try:
s6 = _ssut51(tmp[6])
s7 = _ssut51(tmp[7])
tmp[1] += "8" + "/" + _boo(s6, s7)
tmp[5] = str(int(tmp[5]) - s6 - s7)
except (ValueError, IndexError) as e:
log.warning("sxyprn: transform failed for %s: %s", src, e)
continue
final_path = "/".join(tmp)
full = "https://sxyprn.com" + final_path
sources.append(StreamSource(link=full, type="mp4"))
if not sources:
return None
return sources