Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
63 lines
2.2 KiB
Python
63 lines
2.2 KiB
Python
"""freshporno.org — KVS engine, BEZ `<source>` tagów.
|
|
|
|
Page używa kt_player (KVS Flash + JS legacy player) — URLs są wewnątrz JavaScript
|
|
flashvars JSON (`video_url: 'function/0/<URL>'`) i w `<a href="...?download=true">`
|
|
linkach z labelem "MP4 720p" / "MP4 480p".
|
|
|
|
Bierzemy anchor pattern bo ma WSZYSTKIE quality z explicit labelem (vs flashvars
|
|
ma tylko main+alt, max 2 jakości). `<a href="...get_file/...mp4/?download=true...">MP4 <q>p, ...`
|
|
|
|
Sidebar suggested videos używają `data-preview="...get_file/.../<id>_preview.mp4"` —
|
|
inny pattern (nie `<a href>`), więc anchor regex je naturalnie pomija.
|
|
|
|
CDN token IP-bound do VPS — mobile dostanie 403 na direct, fallback proxy działa.
|
|
get_file 302 → `cdn4.freshporno.org/remote_control.php?...&file=<path>` direct mp4
|
|
(nie HLS). Type='mp4'.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
|
|
from app.extractors._fetch import fetch_tube_html
|
|
from app.extractors._models import StreamSource
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# `<a href="<URL>?download=true...">MP4 <quality>p, <size>` — main + alt streams.
|
|
_ANCHOR_QUALITY_RE = re.compile(
|
|
r'<a\s+[^>]*href="(?P<url>https?://[^"]+/get_file/[^"]+\.mp4/)\?download=true[^"]*"'
|
|
r'[^>]*>\s*MP4\s+(?P<q>\d{3,4}p)',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
|
|
html = fetch_tube_html(page_url, timeout=timeout)
|
|
|
|
seen_keys: set[str] = set()
|
|
result: list[StreamSource] = []
|
|
for m in _ANCHOR_QUALITY_RE.finditer(html):
|
|
url = m.group("url")
|
|
quality = m.group("q")
|
|
# Dedupe po basename (path bez query string).
|
|
basename = url.rstrip("/").split("/")[-1]
|
|
if basename in seen_keys:
|
|
continue
|
|
seen_keys.add(basename)
|
|
result.append(StreamSource(link=url, type="mp4", quality=quality))
|
|
|
|
if not result:
|
|
log.info("freshporno: no MP4 anchor matches on %s", page_url)
|
|
return None
|
|
|
|
def _quality_key(s: StreamSource) -> int:
|
|
if not s.quality:
|
|
return -1
|
|
try:
|
|
return int(s.quality.rstrip("p"))
|
|
except ValueError:
|
|
return -1
|
|
|
|
result.sort(key=_quality_key, reverse=True)
|
|
return result
|