goon/app/api/playback.py
https://github.com/goon-foss/goon 642f1ab8b8 Mobile 0.1.9: OTA enable, WebView cookie-dismiss fix, porndoe connector
Mobile / OTA:
- Enable Expo Updates (app.json + AndroidManifest) → api.goon-foss.org
- Bump 0.1.6 → 0.1.9 (build.gradle, app.json, appVersion.ts, main.py /version)
- backend.ts: default public backend auto-connect (no manual login)

WebView fallback fix (PlayerScreen INJECTED_JS):
- Auto-dismiss cookie/consent gates (hqporner et al. blocked kt_player init)
- Context-scoped: only clicks consent buttons inside cookie/gdpr containers
- Retry window for <source>.src polling raised 5→15 ticks (post-dismiss init)

Resolver:
- Series-position + modifier mismatch detector (Episode 2≠4, BTS/unedited)
  → composite_score hard-reject / cap; wired into scene_score + bulk_dedup
- aggregator-mode candidate query: LIMIT 500 + title-match ordering

Connectors:
- porndoe.com browse scraper (JSON-LD VideoObject) — theporndude audit pilot

landing: APK links → goon-v0.1.9.apk

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 11:20:57 +02:00

538 lines
22 KiB
Python

"""POST /scenes/{scene_id}/playback/{playback_id}/resolve — rozwiązuje stream URL.
Mobile apka woła ten endpoint na klik "Watch" — backend ekstraktuje świeży
stream URL (m3u8/mp4) z page tube'a i zwraca go. Mobile otwiera URL przez
Linking.openURL → Android player chooser (MX Player / VLC / browser).
Stream URLs są podpisane/expire (zwykle ~kilka godzin) — nie cache'ujemy ich
w DB, tylko resolve on-demand. Logika ekstrakcji per-tube w `app.extractors`.
**Dead-link detection**: gdy hoster embed page mówi "Video deleted/not found",
oznaczamy `PlaybackSource.dead_at = now()` — API dalej go nie listuje, mobile
nie pokaże martwego buttonu.
"""
from __future__ import annotations
import logging
import re
import uuid
from datetime import UTC, datetime
from typing import Annotated, Any
from fastapi import APIRouter, Depends, HTTPException, Request, status
from pydantic import BaseModel
from sqlalchemy.orm import Session
from app.api.schemas import PlaybackSourceOut
from app.auth import require_api_key
from app.db import get_session
from app.extractors import (
HosterDead,
StreamSource,
TubePageError,
extract_stream_from_hoster,
try_extract,
)
from app.models.playback_source import PlaybackSource
log = logging.getLogger(__name__)
router = APIRouter(prefix="/scenes", tags=["playback"], dependencies=[Depends(require_api_key)])
# CDN-domain allowlist dla mobile direct fetch — token IS time-bound (nie IP-bound),
# zweryfikowane cross-IP curl test 2026-05-18. Mobile ExoPlayer pobiera manifest+segments
# bezpośrednio z CDN, **zero VPS bandwidth**. Critical dla public release (TB+/miesiąc).
#
# Verified time-bound:
# - xvideos-cdn.com, xnxx-cdn.com (WGCZ Holding) — signed token w path + exp_time
# - phncdn.com (pornhub), ypncdn.com (youporn), rdtcdn.com (redtube) — validfrom+validto+hash
# - privatehost.com (pornhat CDN) — sign + exp_time, brak Referer requirement
# - sxyprn.com — signed path
# - eporner.com CDN — IP literal w path ale CDN go ignoruje
#
# NIE w allowlist (IP-bound, wymagają proxy):
# - premilkyway.com (latestpornvideo) — 403 cross-IP
# - tnmr.org (mypornerleak) — 403 cross-IP
# - porntrex.com/get_file — single-use token (410 po reuse)
# - freshporno.org/get_file — cv= signed token IP-bound
# - sn.porn-xp.com, porn00.org — force_proxy explicit
_TIME_BOUND_CDN_RE = re.compile(
r"\b(?:"
r"xvideos-cdn|xnxx-cdn|phncdn|ypncdn|rdtcdn" # mainstream
r"|privatehost" # pornhat
r")\.[a-z]{2,4}"
r"|(?:^|/)(?:sxyprn\.com|[\w\-]+\.eporner\.com)/",
re.IGNORECASE,
)
# IP-BOUND CDN signature — token bind do requester IP, cross-IP fetch = 403.
# Wymaga mobile WebView fallback (mobile extract z phone session, nie VPS).
# Shared KVS infrastructure across multiple hosters (luluvid movies, mypornerleak,
# latestpornvideo) — wszystkie używają tego samego CDN pool.
_IP_BOUND_CDN_RE = re.compile(
r"\b(?:"
r"premilkyway\.com" # latestpornvideo
r"|tnmr\.org" # mypornerleak legacy + luluvid movies (cdn-tnmr.org)
r"|acek-cdn\.com" # mypornerleak current
r")\b",
re.IGNORECASE,
)
class StreamLink(BaseModel):
"""Pojedynczy variant stream URL (różne quality / kontener).
`stream_url` = URL do video file (mp4/m3u8/webm) — proxy-wrapped URL przez backend
VPS (`/proxy/{token}/play.ext`). Bezpieczny fallback gdy CDN binduje URL do IP
extractora (np. fpo.xxx z kt_remote_ips cookie). Bandwidth idzie przez VPS.
`direct_url` + `headers` = surowy CDN URL z headers do bezpośredniego fetchu z
urządzenia. Większość tube CDN (xhamster, redtube, watchporn, eporner) zwraca
poprawnie content gdy mobile player wysyła `Referer` + `User-Agent` z `headers`.
Mobile próbuje direct PIERWSZY — gdy CDN zwróci 403/410 (IP-bound), spada na
`stream_url` (proxy). Daje 0 bandwidth na VPS-ie dla większości scen.
`embed_url` = URL do embed/hoster page (HTML, np. StreamWish, doodporn) — mobile
otwiera w WebView. Type: 'hoster'.
"""
stream_url: str | None = None
embed_url: str | None = None
direct_url: str | None = None
headers: dict[str, str] | None = None
quality: str | None = None
type: str | None = None # mime/ext, np. 'video/mp4', 'application/x-mpegURL'
raw: dict[str, Any] | None = None
class ResolveOut(BaseModel):
source: PlaybackSourceOut
best: StreamLink | None = None
links: list[StreamLink] = []
movies_router = APIRouter(
prefix="/movies", tags=["movies-playback"], dependencies=[Depends(require_api_key)]
)
@movies_router.post("/{movie_id}/playback/{playback_id}/resolve", response_model=ResolveOut)
def resolve_movie_playback(
movie_id: uuid.UUID,
playback_id: uuid.UUID,
session: Annotated[Session, Depends(get_session)],
) -> ResolveOut:
"""Movies playback resolve — analog do `/scenes/{id}/playback/{pb}/resolve`.
Origin patterns:
- 'paradisehill' → tylko page_url (Yii2 player wymaga login session, więc
mobile dostaje page_url jako embed_url, otwiera w WebView).
- 'mangoporn:host', 'streamporn:host', 'pandamovies:host' → embed_url to URL
embedu hostera (doodstream/lulustream/rpmplay/itp.). Próbujemy lokalnie
wyciągnąć direct stream URL przez generic packer (`extract_stream_from_hoster`),
z fallback na embed-only gdy się nie uda. Mobile w PlayerScreen.WebViewMode
wyciągnie wtedy URL JS-em (jak ze scenami).
"""
from app.models.movie_playback_source import MoviePlaybackSource
pb = session.get(MoviePlaybackSource, playback_id)
if pb is None or pb.movie_id != movie_id:
raise HTTPException(status_code=404, detail="movie playback source not found")
if pb.dead_at is not None:
raise HTTPException(
status_code=410,
detail=f"playback dead: {pb.dead_reason or 'unknown'}",
)
referer = pb.page_url
links: list[StreamLink] = []
if pb.origin == "paradisehill":
# Tylko WebView fallback — paradisehill player wymaga session login dla streamu.
links = [
StreamLink(
stream_url=None,
embed_url=pb.page_url,
quality=pb.quality,
type="hoster",
raw={"origin": pb.origin},
)
]
else:
# dooplay mirror sources: spróbuj direct stream extract z hoster URL
target = pb.embed_url or pb.page_url
stream: str | None = None
try:
stream = extract_stream_from_hoster(target, referer=referer)
except HosterDead as e:
# Hoster wprost mówi "video deleted" — oznacz dead, NIE proponuj
# embed fallback (mobile ExoPlayer dostałby 404 HTML page i
# próbowałby zapisać jako .bin file; bug-report 2026-05-16
# "streamtape ściąga hurtowo pliki .bin").
pb.dead_at = datetime.now(UTC)
pb.dead_reason = str(e)[:512]
session.commit()
log.info("marked movie playback %s dead (origin=%s reason=%s)", pb.id, pb.origin, e)
raise HTTPException(status_code=410, detail=f"playback dead: {e}") from e
except Exception as e:
log.warning("movie hoster extract failed for %s: %s", target, e)
if stream and _IP_BOUND_CDN_RE.search(stream):
# IP-bound CDN (luluvid → cdn-tnmr.org, etc.) — token bind do VPS IP,
# mobile direct = 403. Skip stream, fallback na embed_url (mobile WebView).
log.info(
"movie playback %s: stream URL IP-bound CDN — skip, WebView fallback",
pb.id,
)
stream = None
# Mixdrop mxcontent CDN wymaga curl_cffi JA3 → wymusza VPS proxy.
# Pre-public: skip mixdrop direct, fallback na embed_url (mobile WebView z
# phone IP). Bandwidth + anonimowość VPS > UX. Movie ma zwykle 10+ alt
# hosterów (voe/luluvid/doply/etc.), user może wybrać alternative.
if stream and "mxcontent.net" in stream.lower():
log.info(
"movie playback %s: mixdrop mxcontent — skip (VPS-proxy required), WebView fallback",
pb.id,
)
stream = None
if stream:
type_hint = "m3u8" if ".m3u8" in stream.lower() else "mp4"
raw_meta: dict = {"origin": pb.origin, "host": target}
links.append(
StreamLink(
stream_url=stream,
embed_url=None,
quality=pb.quality,
type=type_hint,
raw=raw_meta,
)
)
# Zawsze dorzucamy embed jako fallback — mobile WebView może wyłapać URL z JS-a
if pb.embed_url:
links.append(
StreamLink(
stream_url=None,
embed_url=pb.embed_url,
quality=pb.quality,
type="hoster",
raw={"origin": pb.origin},
)
)
if not links:
raise HTTPException(status_code=502, detail="no playable links")
links = [_proxify_link(link, referer) for link in links]
best = _pick_best(links) if links else None
return ResolveOut(
source=PlaybackSourceOut.model_validate(pb),
best=best,
links=links,
)
def _requester_tag(request: Request) -> str:
"""Audit tag dla mark-dead: IP (X-Forwarded-For preferred dla nginx proxy)
+ skrócony User-Agent. Zapisywane w dead_reason + log dla post-mortem
gdyby leaked APK key był używany do masowego psucia danych."""
fwd = request.headers.get("x-forwarded-for", "")
ip = fwd.split(",")[0].strip() if fwd else (request.client.host if request.client else "?")
ua = (request.headers.get("user-agent") or "")[:40]
return f"ip={ip} ua={ua}"
@router.post(
"/{scene_id}/playback/{playback_id}/mark-dead",
status_code=status.HTTP_204_NO_CONTENT,
)
def mark_playback_dead(
scene_id: uuid.UUID,
playback_id: uuid.UUID,
request: Request,
session: Annotated[Session, Depends(get_session)],
) -> None:
"""User-triggered mark dead — long-press na playback chip w mobile.
Bug-report 2026-05-12 (dd17c709): "Eporner to nie temporary. Więc długie
przytrzymanie na linku celem usunięcia też byłoby ok". Backend mark-dead
flow działał tylko dla resolve failures (HosterDead/TubePageError). User
może teraz oznaczać linki które działają backendowi (200 OK) ale są broken
w praktyce (np. źle zmatchowana scena, ad-redirect, hoster zwraca placeholder).
Audit: zapisujemy requester IP+UA w dead_reason+log żeby leaked APK key
nie mógł silently masowo niszczyć danych bez ścieżki dochodzenia.
"""
pb = session.get(PlaybackSource, playback_id)
if pb is None or pb.scene_id != scene_id:
raise HTTPException(status_code=404, detail="playback source not found for scene")
if pb.dead_at is None:
tag = _requester_tag(request)
pb.dead_at = datetime.now(UTC)
pb.dead_reason = f"user-marked dead (mobile long-press) {tag}"[:512]
session.commit()
log.info("user marked playback %s dead (origin=%s %s)", pb.id, pb.origin, tag)
@movies_router.post(
"/{movie_id}/playback/{playback_id}/mark-dead",
status_code=status.HTTP_204_NO_CONTENT,
)
def mark_movie_playback_dead(
movie_id: uuid.UUID,
playback_id: uuid.UUID,
request: Request,
session: Annotated[Session, Depends(get_session)],
) -> None:
"""User-triggered mark dead dla movie playback (long-press w MovieDetail)."""
from app.models.movie_playback_source import MoviePlaybackSource
pb = session.get(MoviePlaybackSource, playback_id)
if pb is None or pb.movie_id != movie_id:
raise HTTPException(status_code=404, detail="movie playback source not found")
if pb.dead_at is None:
tag = _requester_tag(request)
pb.dead_at = datetime.now(UTC)
pb.dead_reason = f"user-marked dead (mobile long-press) {tag}"[:512]
session.commit()
log.info("user marked movie playback %s dead (origin=%s %s)", pb.id, pb.origin, tag)
@router.post("/{scene_id}/playback/{playback_id}/resolve", response_model=ResolveOut)
def resolve_playback(
scene_id: uuid.UUID,
playback_id: uuid.UUID,
session: Annotated[Session, Depends(get_session)],
) -> ResolveOut:
pb = session.get(PlaybackSource, playback_id)
if pb is None or pb.scene_id != scene_id:
raise HTTPException(status_code=404, detail="playback source not found for scene")
if pb.dead_at is not None:
raise HTTPException(
status_code=410,
detail=f"playback source marked dead: {pb.dead_reason or 'unknown'}",
)
page_url = pb.page_url
sitetag: str | None = None
if pb.origin.startswith("pornapp:"):
# Legacy origin format — pre-pornapp-removal migration. Po Fazie 2 zostanie tylko `tube:`.
sitetag = pb.origin.split(":", 1)[1]
elif pb.origin.startswith("tube:"):
sitetag = pb.origin.split(":", 1)[1]
if sitetag is None:
raise HTTPException(
status_code=501,
detail=f"resolve not implemented for origin '{pb.origin}'",
)
try:
sources = try_extract(sitetag, page_url)
except HosterDead as e:
pb.dead_at = datetime.now(UTC)
pb.dead_reason = str(e)[:512]
session.commit()
log.info("marked playback %s dead (origin=%s reason=%s)", pb.id, pb.origin, e)
raise HTTPException(status_code=410, detail=f"playback dead: {e}") from e
except TubePageError as e:
# Tube page is gone (404/410) — mark dead, propagate as 410. Inne 5xx → 502.
if e.status_code in (404, 410):
reason = f"tube page {e.status_code} {pb.page_url}"
pb.dead_at = datetime.now(UTC)
pb.dead_reason = reason[:512]
session.commit()
log.info("marked playback %s dead (origin=%s reason=%s)", pb.id, pb.origin, reason)
raise HTTPException(status_code=410, detail=f"playback dead: {reason}") from e
log.warning("tube fetch http error %s for %s", e.status_code, pb.page_url)
raise HTTPException(
status_code=502,
detail=f"tube fetch failed: HTTP {e.status_code}",
) from e
if not sources:
# Extractor None — TRANSIENT failure (network glitch, tube chwilowy 503,
# ad-network response zmieniony, race condition). NIE oznaczamy `dead_at`
# bo wcześniej powodowało false-positive permanent dead dla freshporno scen
# które działały przy następnym attempt (bug-report 2026-05-12).
#
# Permanent dead idzie TYLKO z explicit signals:
# - HosterDead exception (hoster page mówi "video deleted")
# - TubePageError 404/410 (page nie istnieje)
# Reszta = transient, mobile dostaje 501 → user może retry.
log.info(
"extractor None for playback %s (origin=%s) — transient, not marking dead",
pb.id, pb.origin,
)
# 503 (not 410!) żeby mobile NIE pokazało "Tube usunął ten film" — ten kod
# jest dla permanent removal. 503 = transient, user może retry.
# Sentry filtruje HTTPException 502/503/504 w `_sentry_before_send` (main.py) —
# bez tego GOON-3 spam-floodował issue list (16 events/5h dla expected case).
raise HTTPException(
status_code=503,
detail="extraction failed temporarily — retry possible",
)
# Per-source referer: niektóre extractory (yt-dlp, embed-iframe) zwracają stream
# URL którego CDN expectuje Referera embed page'a (host iframe), nie oryginalnej
# strony tube'a. Np. 0dayxx page → watchporn.to/embed iframe → stream URL chce
# `Referer: watchporn.to/` (z `Referer: 0dayxx.com` CDN zwraca 410). StreamSource.
# referer trzyma tę informację; fallback na page_url gdy extractor nie ustawił.
proxified: list[StreamLink] = []
for s in sources:
link = _stream_source_to_link(s)
proxified.append(_proxify_link(link, s.referer or page_url))
links = proxified
best = _pick_best(links) if links else None
return ResolveOut(
source=PlaybackSourceOut.model_validate(pb),
best=best,
links=links,
)
DEFAULT_PLAYER_UA = (
"Mozilla/5.0 (Linux; Android 13) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/140.0.0.0 Mobile Safari/537.36"
)
def _proxify_link(link: StreamLink, referer: str) -> StreamLink:
"""Wzbogaca StreamLink o:
- `stream_url`: proxy URL (fallback gdy direct fails)
- `direct_url`: surowy CDN URL (preferowany — 0 VPS bandwidth)
- `headers`: Referer + User-Agent dla direct fetch
Mobile player próbuje direct PIERWSZY, fallback na stream_url po błędzie 403/410.
"""
if not link.stream_url:
return link
from app.api.stream_proxy import make_token
raw_url = link.stream_url
# Extractor flags w raw:
# - proxy_impersonate: curl_cffi chrome JA3 (mxcontent etc.)
# - refetch_url: embed URL do re-extract gdy token expired (same-session
# cookie binding dla mixdrop). Bez tego mp4 token + brak cookies → 403.
use_impersonate = bool(link.raw and link.raw.get("proxy_impersonate"))
# force_proxy=True (extractor flag) → direct_url=proxied od razu. Dla CDN-ów
# gdzie token IS bound do VPS IP (porn00 v-acctoken, pornxp sv.porn-xp.com
# signed path) — mobile direct ZAWSZE 403, więc nie ma sensu próbować.
# Bez tego: każdy playback "mrugnie" (direct fail → fallback na proxy).
force_proxy = bool(link.raw and link.raw.get("force_proxy"))
# mobile_direct_ok=True (extractor flag) → m3u8 może iść direct do mobile bo
# CDN URL ma time-bound (nie IP-bound) signed token. Mobile ExoPlayer pobiera
# manifest+segments bezpośrednio z CDN, zero VPS bandwidth.
mobile_direct_ok = bool(link.raw and link.raw.get("mobile_direct_ok"))
# Auto-detect time-bound CDN po domain — bez per-extractor flag setting.
# Critical dla public release: wszystkie mainstream tubes (xvideos/xnxx/pornhub/
# youporn/redtube + pornhat) zwracają time-bound URLs które działają cross-IP.
if not mobile_direct_ok and raw_url and _TIME_BOUND_CDN_RE.search(raw_url):
mobile_direct_ok = True
refetch_url = (link.raw or {}).get("refetch_url")
refetch_hoster = (link.raw or {}).get("refetch_hoster")
token = make_token(
raw_url, referer, impersonate=use_impersonate,
refresh=refetch_url, refresh_hoster=refetch_hoster,
)
# Decyzja na BASIE link.type (zaufanie do extractora), z fallback path-hint.
# Pornhat: raw URL `.../get_file/.../<id>.mp4/` ale CDN 302 → HLS manifest.
# Extractor markuje type='m3u8' żeby ExoPlayer użył HlsMediaSource (bez tego
# path `.mp4` mylił player → "no extractors").
type_lower = (link.type or "").lower()
if type_lower in {"m3u8", "hls", "mpd"}:
ext = "m3u8" if type_lower in {"m3u8", "hls"} else "mpd"
elif ".m3u8" in raw_url.lower():
ext = "m3u8"
elif ".mpd" in raw_url.lower():
ext = "mpd"
else:
ext = "mp4"
proxied = f"/proxy/{token}/play.{ext}"
# `direct_url`: surowy CDN URL — mobile próbuje go PIERWSZY (0 VPS bandwidth).
# ALE: dla type=m3u8/hls/mpd manifest URL musi być rewritowany żeby segmenty/keys
# też leciały przez proxy (inne IP może też mieć rate limit/token issues), plus
# ExoPlayer wybiera extractor po URL extension — `.mp4` w direct URL pornhat
# → Mp4Extractor → fail bo content to HLS. Dla m3u8/mpd zwracamy proxied JAKO
# direct (mobile używa go bezpośrednio, 1 hop przez VPS ale to jedyny sposób
# żeby manifest+segments były spójne i ExoPlayer wybrał HlsMediaSource).
# Dla CDNs które wymagają chrome JA3 (mxcontent) direct_url też zawsze przez
# proxy — bez tego mobile direct fetch z OkHttp JA3 dostaje 403 → fallback proxy
# → extra round-trip + ExoPlayer "no extractors" przed retry.
# mobile_direct_ok overrides m3u8 default-to-proxy: gdy CDN ma time-bound token
# (nie IP-bound), mobile ExoPlayer może pobrać manifest direct bez VPS proxy.
is_manifest_type = type_lower in {"m3u8", "hls", "mpd"}
if use_impersonate or force_proxy or (is_manifest_type and not mobile_direct_ok):
direct_for_mobile = proxied
else:
direct_for_mobile = raw_url
return StreamLink(
stream_url=proxied,
embed_url=link.embed_url,
direct_url=direct_for_mobile,
headers={"Referer": referer, "User-Agent": DEFAULT_PLAYER_UA},
quality=link.quality,
type=link.type,
raw=link.raw,
)
def _stream_source_to_link(s: StreamSource) -> StreamLink:
"""Mapowanie StreamSource (z extractorów) na StreamLink (response API).
Hoster type → embed_url (mobile otworzy WebView). mp4/m3u8/mpd → stream_url
(mobile odtworzy w native playerze przez /proxy).
"""
is_hoster = (s.type or "").lower() == "hoster"
return StreamLink(
stream_url=None if is_hoster else s.link,
embed_url=s.link if is_hoster else None,
quality=s.quality,
type=s.type,
raw=s.raw,
)
def _pick_best(links: list[StreamLink]) -> StreamLink | None:
"""Wybiera najlepszą jakość. Preferencje:
1. Najpierw direct video (`stream_url` niepuste); fallback na embed-only gdy żaden
nie ma direct (mobile pokaże "Open in browser").
2. Najwyższe quality (parsowane jako int z '720p' / '1080p' / '4k')
3. Preferuj mp4 nad m3u8 jeśli ten sam quality (mp4 łatwiejsze dla MX Player)
"""
direct = [link for link in links if link.stream_url]
pool = direct or [link for link in links if link.embed_url]
if not pool:
return None
def score(link: StreamLink) -> tuple[int, int]:
q_int = _quality_to_int(link.quality)
url_low = (link.stream_url or link.embed_url or "").lower()
type_low = (link.type or "").lower()
is_mp4 = ".mp4" in url_low or "mp4" in type_low or "direct" in type_low
type_priority = 1 if is_mp4 else 0
return (q_int, type_priority)
return max(pool, key=score)
_QUALITY_DIGITS_RE = re.compile(r"\d+")
def _quality_to_int(q: str | None) -> int:
"""Wyciąga liczbę pikseli z różnych formatów: '720p', '1080p Full HD', '4K', 'HD'."""
if not q:
return 0
s = q.lower().strip()
if "4k" in s or "uhd" in s:
return 2160
if "2k" in s or "qhd" in s:
return 1440
m = _QUALITY_DIGITS_RE.search(s)
if m:
return int(m.group(0))
if "fhd" in s:
return 1080
if "hd" in s:
return 720
if "sd" in s:
return 480
return 0