goon/app/extractors/tubes/fullmovies.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

68 lines
2.3 KiB
Python

"""fullmovies.xxx — direct mp4 sources extractor.
Detail page ma `<video class="video-js">` z multiple `<source>` (per quality):
`<source src='https://www.fullmovies.xxx/get_file/<token>/<dir>/<id>/<id>_2160m.mp4/' type='video/mp4' label="2160p" selected="true">`
`<source src='.../<id>_720m.mp4/' type='video/mp4' label="720p">`
`<source src='.../<id>_480m.mp4/' type='video/mp4' label="480p">`
URL pattern: `https://www.fullmovies.xxx/get_file/<signed_token>/<dir>/<id>/<id>_<q>m.mp4/`
- Trailing slash — server odsyła 302 na CDN.
- `<signed_token>` IP-bound do requester (jak HQPorner /get_file/). Mobile direct = 403.
- force_proxy=True wymusza wszystko przez goon proxy (proxy follows redirect na CDN).
Quality labels: 2160p / 1080p / 720p / 480p / 360p.
"""
from __future__ import annotations
import logging
import re
from app.extractors._fetch import fetch_tube_html
from app.extractors._models import StreamSource
log = logging.getLogger(__name__)
# Single-quoted attribute (apostrophes inside src=). Quality from `label="<q>"`.
_SOURCE_RE = re.compile(
r"""<source\s+src=['"](?P<url>https?://[^'"]+\.mp4/?)['"]"""
r"""\s+type=['"]video/mp4['"]"""
r"""\s+label=['"](?P<q>[^'"]+)['"]""",
re.IGNORECASE,
)
def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | None:
html = fetch_tube_html(page_url, timeout=timeout)
seen: set[str] = set()
result: list[StreamSource] = []
# fullmovies /get_file/ URL ma signed token IP-bound do requester. Bez force_proxy
# mobile dostaje 403. Proxy follows 302 na CDN.
proxy_flag = {"force_proxy": True}
for m in _SOURCE_RE.finditer(html):
url = m.group("url")
if url in seen:
continue
seen.add(url)
result.append(
StreamSource(
link=url,
type="mp4",
quality=m.group("q"),
referer=f"{page_url}",
raw=proxy_flag,
)
)
if not result:
log.info("fullmovies: no <source> tags on %s", page_url)
return None
# Sort by quality desc (2160p > 1080p > 720p > 480p > 360p)
def _q(s: StreamSource) -> int:
try:
return int((s.quality or "0").rstrip("p"))
except ValueError:
return 0
result.sort(key=_q, reverse=True)
return result