Rates each source on three axes the user asked for: - freshness: how recently/often new content arrives (newest age + 7d volume) - richness: metadata coverage (thumbnail/tags/performers/description/studio/duration) - plays: does it actually play — from real playback telemetry when available, else a proxy from the resolve mechanism. 0★ = offline (gates the overall stars, so a fresh+rich source that doesn't play still ranks bottom — the hqfap/4k69 case) Backend: - playback_events: fire-and-forget telemetry POST from the app per playback attempt (origin + success/error + time-to-first-frame), append-only, 30d retention - source_stats: per-origin computed scores, refreshed by a scheduler job (6h); /sources joins it and sorts by stars - models + local migration 0025; new GOON_SCHED_SOURCE_STATS_HOURS setting Mobile: - Sites rows show ★ rating; tap the stars for a breakdown (axes + metadata %, plus whether "plays" is measured or estimated) - PlayerScreen reports playback success/failure per source (native path only — symmetric, conservative); origin threaded through Scene/Movie play callsites Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
208 lines
11 KiB
Python
208 lines
11 KiB
Python
from functools import lru_cache
|
||
|
||
from pydantic import Field
|
||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||
|
||
|
||
class Settings(BaseSettings):
|
||
model_config = SettingsConfigDict(env_file=".env", extra="ignore", case_sensitive=False)
|
||
|
||
database_url: str = Field(
|
||
default="postgresql+psycopg://goon:goon@localhost:5432/goon",
|
||
validation_alias="DATABASE_URL",
|
||
)
|
||
|
||
tpdb_api_token: str | None = Field(default=None, validation_alias="TPDB_API_TOKEN")
|
||
tpdb_base_url: str = Field(
|
||
default="https://api.theporndb.net", validation_alias="TPDB_BASE_URL"
|
||
)
|
||
|
||
stashdb_api_key: str | None = Field(default=None, validation_alias="STASHDB_API_KEY")
|
||
stashdb_graphql_url: str = Field(
|
||
default="https://stashdb.org/graphql", validation_alias="STASHDB_GRAPHQL_URL"
|
||
)
|
||
|
||
log_level: str = Field(default="INFO", validation_alias="LOG_LEVEL")
|
||
|
||
|
||
# Sentry observability — pusty DSN = init no-op (devel/local). Cloud free tier
|
||
# 5k errors/mies wystarczy dla 1-user app.
|
||
sentry_dsn: str | None = Field(default=None, validation_alias="SENTRY_DSN")
|
||
sentry_environment: str = Field(default="dev", validation_alias="SENTRY_ENVIRONMENT")
|
||
sentry_traces_sample_rate: float = Field(
|
||
default=0.1, validation_alias="SENTRY_TRACES_SAMPLE_RATE"
|
||
)
|
||
|
||
api_keys_raw: str = Field(default="", validation_alias="API_KEYS")
|
||
"""Lista API keys oddzielona przecinkami. Pusta = auth wyłączony (tylko dev/local)."""
|
||
|
||
allowed_app_sig_hashes_raw: str = Field(default="", validation_alias="ALLOWED_APP_SIG_HASH")
|
||
"""Whitelist SHA256 (hex) podpisów APK akceptowane przez backend. Każdy request mobile
|
||
wysyła `X-App-Signature` z hashem signing certu (PackageManager.GET_SIGNING_CERTIFICATES).
|
||
Pusta = check wyłączony (dev/wstępny rollout). Lista = comma-separated lowercase hex.
|
||
Re-packaging APK innym keystorem zmienia hash → 403."""
|
||
|
||
auto_merge_threshold: float = 0.92
|
||
review_threshold: float = 0.75
|
||
fingerprint_hamming_max: int = 5
|
||
title_token_set_min: int = 88
|
||
date_window_days: int = 7
|
||
|
||
# Skip ingestu clip-store (ManyVids/IWantClips/Clips4Sale/...) z canonical source —
|
||
# to permanentne orphany (free tubes nie hostują), ~56% ingestu TPDB/StashDB.
|
||
# False = wciągaj jak dawniej. Tube'y z clip-store studiem NIE są skipowane (mają playback).
|
||
skip_clip_store: bool = Field(default=True, validation_alias="GOON_SKIP_CLIP_STORE")
|
||
|
||
# Minimalny duration sceny z tube/scraper przy ingescie — <N s = trailer/teaser/preview.
|
||
# 0 = wyłączony. Nieznany duration nie jest wycinany. NIE dotyczy canonical (TPDB/StashDB).
|
||
min_ingest_duration_sec: int = Field(default=180, validation_alias="GOON_MIN_INGEST_DURATION_SEC")
|
||
|
||
# APScheduler (M5). Każdy 0/None = job wyłączony.
|
||
sched_tpdb_hours: int = Field(default=6, validation_alias="GOON_SCHED_TPDB_HOURS")
|
||
sched_stashdb_hours: int = Field(default=6, validation_alias="GOON_SCHED_STASHDB_HOURS")
|
||
sched_performer_driven_hours: int = Field(
|
||
default=12, validation_alias="GOON_SCHED_PERFORMER_DRIVEN_HOURS"
|
||
)
|
||
sched_performer_driven_top_n: int = Field(
|
||
default=20, validation_alias="GOON_SCHED_PERFORMER_DRIVEN_TOP_N"
|
||
)
|
||
# Continuous worker. interval=15s + max_instances=1 + coalesce=True ⇒ effective rate
|
||
# = max(15, real_tick_duration). Real tick ~50-80s przy full coverage. Set to 0 to disable.
|
||
sched_performer_continuous_seconds: int = Field(
|
||
default=15, validation_alias="GOON_SCHED_PERFORMER_CONTINUOUS_SECONDS"
|
||
)
|
||
sched_performer_continuous_refresh_days: int = Field(
|
||
default=30, validation_alias="GOON_SCHED_PERFORMER_CONTINUOUS_REFRESH_DAYS"
|
||
)
|
||
# Movie ingest — paradisehill (primary) + dooplay mirrory (mangoporn/streamporn/
|
||
# pandamovies). Każdy connector zapisuje swój `Source` i robi delta od ostatniego
|
||
# successful run. Set to 0 to disable. Domyślnie 24h: movie sites rosną wolniej
|
||
# niż tube'y (~5-30 nowych dziennie), nie ma sensu wymiatać częściej.
|
||
sched_movie_ingest_hours: int = Field(
|
||
default=24, validation_alias="GOON_SCHED_MOVIE_INGEST_HOURS"
|
||
)
|
||
# Browse-latest scheduler: freshporno/porn00/pornxp newest scenes.
|
||
# 6h cadence (zmiana z 24h 2026-05-20): user reportował brak Brazzers Exxtra po
|
||
# 15-05. Root cause był 2-fold: (1) freshporno publikuje sceny w ciągu dnia, 24h
|
||
# cadence łapie tylko te do 05:30 UTC; (2) meta_content/release_date bug osobno.
|
||
# 6h = 4 runs/dzień = każda freshporno scena zaingestowana w ciągu ~6h od publik.
|
||
sched_browse_latest_hours: int = Field(
|
||
default=6, validation_alias="GOON_SCHED_BROWSE_LATEST_HOURS"
|
||
)
|
||
sched_browse_latest_max_pages: int = Field(
|
||
default=5, validation_alias="GOON_SCHED_BROWSE_LATEST_MAX_PAGES"
|
||
)
|
||
# Deep-crawl (Faza 2a) — pełne katalogi browse-tube'ów (porndoe ~62k itd.), nie tylko
|
||
# top-N. Round-robin po tube'ach, wznawialny kursor (app/_state/deepcrawl_state.json).
|
||
# 0 = wyłączony. 60 stron/run × ~31 scen ≈ 1860 scen/run (~22 min, hard-timeout 1h).
|
||
sched_deep_crawl_hours: int = Field(default=1, validation_alias="GOON_SCHED_DEEP_CRAWL_HOURS")
|
||
deep_crawl_pages_per_run: int = Field(default=60, validation_alias="GOON_DEEP_CRAWL_PAGES_PER_RUN")
|
||
deepcrawl_state_path: str = Field(default="", validation_alias="GOON_DEEPCRAWL_STATE_PATH")
|
||
# Bulk-dedup performers safety net — auto-merge duplikatów które resolver-time
|
||
# scoring pominął. 12h cadence: leci 2x dziennie (po porannym browse-latest run).
|
||
sched_bulk_dedup_hours: int = Field(
|
||
default=12, validation_alias="GOON_SCHED_BULK_DEDUP_HOURS"
|
||
)
|
||
# Thumb-asset dedup — scala dupy hdporn.gg/fullmovies.xxx (ten sam film, różne tytuły,
|
||
# ten sam asset-id miniatury + długość). bulk_dedup tego nie łapie (brak phash/tytuł).
|
||
# Re-ingesty pod nowymi tytułami → dupy odrastają, stąd cykliczny job. 12h. 0 = off.
|
||
sched_thumb_dedup_hours: int = Field(
|
||
default=12, validation_alias="GOON_SCHED_THUMB_DEDUP_HOURS"
|
||
)
|
||
# Title+duration dedup — scala missing-merge dupy (ten sam performer + identyczny
|
||
# znormalizowany tytuł + długość co do sekundy), których bulk_dedup nie łapie (tube
|
||
# re-scrape / cross-tube np. porn00 vs xnxx, reports 28fe8181/32df33b1). Odrastają
|
||
# przy re-ingeście, stąd cyklicznie. 12h, playback-only (to co user widzi). 0 = off.
|
||
sched_title_dedup_hours: int = Field(
|
||
default=12, validation_alias="GOON_SCHED_TITLE_DEDUP_HOURS"
|
||
)
|
||
# Ingest freshness watchdog — alert do Sentry gdy aktywny tube (origin
|
||
# tube:<sitetag>) przestał dawać nowe sceny > próg. Łapie zamrożenie
|
||
# pojedynczego origin, którego globalny monitor (jeden Source "tube-scraper") nie
|
||
# widzi (np. freshporno browse z rotującego roota, report 14f3a655). 6h cadence
|
||
# (po browse-latest). Każdy 0/None = wyłączony.
|
||
sched_ingest_watchdog_hours: int = Field(
|
||
default=6, validation_alias="GOON_SCHED_INGEST_WATCHDOG_HOURS"
|
||
)
|
||
# Próg dla browse-scraperów (ALL_BROWSE_SCRAPERS) — crawlowane raz dziennie z
|
||
# listingu, więc 48h ciszy = anomalia.
|
||
ingest_watchdog_max_age_hours: int = Field(
|
||
default=48, validation_alias="GOON_INGEST_WATCHDOG_MAX_AGE_HOURS"
|
||
)
|
||
# Próg dla performer-driven search-scraperów (ALL_DIRECT_SCRAPERS) — kadencja jest
|
||
# nierówna (continuous queue ~30d refresh per performer, ingest orphan-heavy), więc
|
||
# 48h dawałoby false-positivy. 7d (168h): healthy search-tuby obserwowane <6h świeżości
|
||
# (continuous tick hituje wszystkie tuby per performer), zamrożone ≥73h → ~28× margines.
|
||
ingest_watchdog_search_max_age_hours: int = Field(
|
||
default=168, validation_alias="GOON_INGEST_WATCHDOG_SEARCH_MAX_AGE_HOURS"
|
||
)
|
||
# Taxonomy scene_count refresh — przelicza denormalizowane liczniki scen na
|
||
# tags/performers/studios (hot-path /tags|/performers|/studios|/favorites czyta
|
||
# gotową kolumnę zamiast agregować 6.3M scene_tags per-request). 3h cadence —
|
||
# counts do tego stale, dla sortu "popular" + badge "(N)" bez znaczenia. 0 = off.
|
||
sched_taxonomy_counts_hours: int = Field(
|
||
default=3, validation_alias="GOON_SCHED_TAXONOMY_COUNTS_HOURS"
|
||
)
|
||
|
||
# Hetzner Cloud bandwidth monitor — read-only API token (Security → API Tokens
|
||
# w panelu Hetzner Cloud). Bez tokenu monitor wyłączony (warning w log).
|
||
# Free traffic per server: CX22=20TB, CPX21=20TB itd. Overage = €1/TB.
|
||
hetzner_api_token: str | None = Field(default=None, validation_alias="HETZNER_API_TOKEN")
|
||
hetzner_server_id: int | None = Field(default=None, validation_alias="HETZNER_SERVER_ID")
|
||
# Alert thresholds (% of included_traffic) — Sentry severity levels.
|
||
hetzner_alert_info_pct: int = Field(default=50, validation_alias="HETZNER_ALERT_INFO_PCT")
|
||
hetzner_alert_warning_pct: int = Field(default=80, validation_alias="HETZNER_ALERT_WARNING_PCT")
|
||
hetzner_alert_error_pct: int = Field(default=95, validation_alias="HETZNER_ALERT_ERROR_PCT")
|
||
# Cadence sprawdzania transferu (godziny). 0/None = monitor wyłączony. Domyślnie 6h
|
||
# (transfer rośnie wolno; częściej bez sensu). Działa tylko gdy ustawiony token+id.
|
||
sched_hetzner_monitor_hours: int = Field(
|
||
default=6, validation_alias="GOON_SCHED_HETZNER_MONITOR_HOURS"
|
||
)
|
||
|
||
# Source ranking (Sites screen) — przelicz source_stats (freshness/richness/health
|
||
# per origin). 0/None = wyłączone. Domyślnie 6h (richness to ciężki agregat po
|
||
# ~2M live playback_sources; częściej bez sensu, dane zmieniają się powoli).
|
||
sched_source_stats_hours: int = Field(
|
||
default=6, validation_alias="GOON_SCHED_SOURCE_STATS_HOURS"
|
||
)
|
||
|
||
# Bright Data ISP proxy (stałe IP od ISP, rozliczane ryczałtem NIE per-GB) —
|
||
# używany do ingestu HTML (scrape) tubów które blokują VPS IP twardym Cloudflare
|
||
# 403 nawet z browser-TLS (superporn). Streamu i tak nie ruszamy proxy (tokeny CDN
|
||
# IP-bound). Format env: `host:port:user:pass` (panel Bright Data). Pusty = brak.
|
||
brightdata_proxy_raw: str = Field(default="", validation_alias="BRIGHTDATA_PROXY_URL")
|
||
|
||
@property
|
||
def brightdata_proxy_url(self) -> str | None:
|
||
"""`host:port:user:pass` → `http://user:pass@host:port` dla curl_cffi/httpx.
|
||
None gdy nieustawiony lub w złym formacie."""
|
||
parts = self.brightdata_proxy_raw.split(":")
|
||
if len(parts) != 4 or not all(parts):
|
||
return None
|
||
host, port, user, pwd = parts
|
||
return f"http://{user}:{pwd}@{host}:{port}"
|
||
|
||
@property
|
||
def api_keys(self) -> set[str]:
|
||
return {k.strip() for k in self.api_keys_raw.split(",") if k.strip()}
|
||
|
||
@property
|
||
def auth_enabled(self) -> bool:
|
||
return bool(self.api_keys)
|
||
|
||
@property
|
||
def allowed_app_sig_hashes(self) -> set[str]:
|
||
return {
|
||
h.strip().lower().replace(":", "")
|
||
for h in self.allowed_app_sig_hashes_raw.split(",")
|
||
if h.strip()
|
||
}
|
||
|
||
@property
|
||
def app_sig_check_enabled(self) -> bool:
|
||
return bool(self.allowed_app_sig_hashes)
|
||
|
||
|
||
@lru_cache
|
||
def get_settings() -> Settings:
|
||
return Settings()
|