goon/app/api/sources.py
https://github.com/goon-foss/goon 7979d5fa61 session work: bug-report fixes + WIP cleanup
User-facing bugs resolved (per bug_reports table 2026-05-25):
- 40cd28aa (short-scene filter): mobile api.ts default min_duration_sec=60
  hides 6519 sub-60s scenes across all list endpoints (Performer/Site/Tag/
  Browse). Caller may override with explicit 0.
- 5e89ef7e (porndoe needs cookies/play click): INJECTED_JS in PlayerScreen
  now auto-clicks player-poster overlay (player-poster-play, big-play-button,
  vjs-big-play-button, jw-icon-display, btn-big-play, mejs__overlay-button,
  play-button, btn-play, videoPlayButton). Triggered same interval as
  consent-dismiss + ad-iframe removal.
- b1b5e1a2 (Mixdrop czarny ekran): re-enable mixdrop direct stream via VPS
  curl_cffi proxy (was: skip → WebView fallback → blank screen). Backend
  pipeline (mixdrop.py extract + stream_proxy._curl_cffi_stream with JA3 +
  auto-refetch on token expire) was already complete; just removed the skip
  in app/api/playback.py.

Plus ongoing WIP (paradisehill multi-part extraction, stream_proxy refetch
logic, gesture race fix for long-press 2x speed, anti-adblock INJECTED_JS
defenses, scripts for freshporno backfill, new sources API).
2026-05-25 22:02:52 +02:00

129 lines
4.4 KiB
Python

"""GET /sources — lista tube źródeł dla feature "Sites" (mobile top-level tab).
Bug-report 2026-05-24 (ea6f05f9, Scenes screen): user chce wybrać "pages"
obok Scenes i Movies — widzieć liście tube'ów i wchodzić w nie żeby zobaczyć
najnowsze sceny z konkretnego źródła.
Endpoint enumeruje distinct `playback_sources.origin` z ŻYWYCH playback_sources
(`dead_at IS NULL`), tylko origins zaczynające się od 'tube:' (kanoniczne źródła
typu `canonical:tpdb_trailer` są pomijane — to nie są "scrapowane strony" w sensie
intencji feature'a).
Sortowanie: scene_count DESC (najbardziej "wypełnione" tubey na górze).
"""
from __future__ import annotations
import logging
import re
from datetime import datetime
from typing import Annotated
from fastapi import APIRouter, Depends
from pydantic import BaseModel
from sqlalchemy import func, select
from sqlalchemy.orm import Session
from app.auth import require_api_key
from app.db import get_session
from app.models.playback_source import PlaybackSource
log = logging.getLogger(__name__)
router = APIRouter(prefix="/sources", tags=["sources"], dependencies=[Depends(require_api_key)])
class SourceOut(BaseModel):
origin: str
"""Raw origin string z DB — np. 'tube:hqpornercom'. Używany jako parametr
`origin=` filtra w GET /scenes (substring match)."""
sitetag: str
"""Origin bez prefiksu 'tube:' — np. 'hqpornercom'. Stabilne ID tube'a (zgodne
z `BaseDirectTubeScraper.sitetag`)."""
display_name: str
"""Czytelna nazwa do UI — np. 'hqporner.com'. Wyprowadzona z sitetag przez
`_sitetag_to_display`. Tylko presentation; logikę trzymamy na sitetag/origin."""
scene_count: int
"""Liczba ŻYWYCH playback_sources (dead_at IS NULL) per origin. Approx scenes
coverage — scena może mieć wiele sources tego samego origin (różne page_url),
więc trochę zawyża rzeczywistą scene-distinct count, ale dla orientacji OK."""
last_scraped_at: datetime | None
"""MAX(last_seen_at) — najświeższy scrape dla tego origin. Pozwala mobile pokazać
'scrapowane Xh temu' i sortować świeżość."""
class SourceListOut(BaseModel):
items: list[SourceOut]
total: int
# Hardcoded display-name overrides dla edge cases. Większość sitetags mapuje się
# czysto `_sitetag_to_display` regex'em (`hqpornercom` → `hqporner.com`), ale niektóre
# tubey mają nietypowe TLDs / brakujące kropki w sitetag.
_DISPLAY_OVERRIDES: dict[str, str] = {
"fpoxxx": "fpo.xxx",
"siskavideo": "siska.video",
"porn4dayspw": "porn4days.pw",
"porn00org": "porn00.org",
"freshpornoorg": "freshporno.org",
"pornxpph": "pornxp.ph",
"0dayxxcom": "0dayxx.com",
"shyfapnet": "shyfap.net",
"hdporngg": "hdporn.gg",
"fullmoviesxxx": "fullmovies.xxx",
"latestleaksco": "latestleaks.co",
"xxxfreewatch": "xxxfreewatch.com",
"watchporn": "watchporn.to",
}
_TLD_RE = re.compile(r"^(.+?)(com|org|net|info)$")
def _sitetag_to_display(sitetag: str) -> str:
"""`hqpornercom` → `hqporner.com`. Fallback dla mainstream tube'ów."""
if sitetag in _DISPLAY_OVERRIDES:
return _DISPLAY_OVERRIDES[sitetag]
m = _TLD_RE.match(sitetag)
if m:
return f"{m.group(1)}.{m.group(2)}"
return sitetag
@router.get("", response_model=SourceListOut)
def list_sources(
session: Annotated[Session, Depends(get_session)],
) -> SourceListOut:
"""Zwraca listę tube źródeł z ŻYWYMI playback_sources.
Filter: `origin LIKE 'tube:%'` (drop canonical:* — TPDB trailery to inna semantyka).
"""
rows = session.execute(
select(
PlaybackSource.origin,
func.count(PlaybackSource.id).label("scene_count"),
func.max(PlaybackSource.last_seen_at).label("last_scraped_at"),
)
.where(PlaybackSource.dead_at.is_(None))
.where(PlaybackSource.origin.like("tube:%"))
.group_by(PlaybackSource.origin)
.order_by(func.count(PlaybackSource.id).desc())
).all()
items: list[SourceOut] = []
for origin, scene_count, last_scraped_at in rows:
sitetag = origin.split(":", 1)[1] if origin.startswith("tube:") else origin
items.append(
SourceOut(
origin=origin,
sitetag=sitetag,
display_name=_sitetag_to_display(sitetag),
scene_count=scene_count,
last_scraped_at=last_scraped_at,
)
)
return SourceListOut(items=items, total=len(items))