goon/app/extractors/tubes/pornxp.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

54 lines
1.7 KiB
Python

"""pornxp.ph — direct mp4 sources extractor.
Detail page ma `<video>` z multiple `<source>` różnych jakości:
- `<source src="//sv.porn-xp.com/.../360.mp4">`
- `<source src="//sv.porn-xp.com/.../720.mp4">`
URL protocol-relative (`//`) — normalize do `https:`. CDN może być IP-bound (token
w path), playback.py i tak proxifies. Type='mp4'.
"""
from __future__ import annotations
import logging
import re
from app.extractors._fetch import fetch_tube_html
from app.extractors._models import StreamSource
log = logging.getLogger(__name__)
# `<source src="(?:https:)?//sv.porn-xp.com/.../(360|720|1080).mp4">`
_SOURCE_RE = re.compile(
r'<source\s+src="(?P<url>(?:https?:)?//[^"]+/(?P<q>\d{3,4})\.mp4)"',
re.IGNORECASE,
)
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
html = fetch_tube_html(page_url, timeout=timeout)
seen: set[str] = set()
result: list[StreamSource] = []
# sv.porn-xp.com URL ma signed token w path — IP-bound. Mobile direct → 403,
# od razu używaj proxy żeby uniknąć mrugnięcia.
proxy_flag = {"force_proxy": True}
for m in _SOURCE_RE.finditer(html):
url = m.group("url")
if url.startswith("//"):
url = "https:" + url
if url in seen:
continue
seen.add(url)
result.append(StreamSource(link=url, type="mp4", quality=f"{m.group('q')}p", raw=proxy_flag))
if not result:
log.info("pornxp: no <source> tags on %s", page_url)
return None
# Sort by quality desc (720p > 360p)
def _q(s: StreamSource) -> int:
try:
return int((s.quality or "0").rstrip("p"))
except ValueError:
return 0
result.sort(key=_q, reverse=True)
return result