Resolver/perf: - find_by_phash_within: nearest match via Postgres bit_count over bit(64) XOR instead of Python scan of all phash fingerprints (~20x faster per scene; unblocks long delta runs that were killed mid-run before since advanced). Scheduler/reliability: - reap ingest_runs stuck in 'running' on worker startup (killed_by_restart). - smoke_test: per-source ingest health, stuck-run and browse-freshness checks -> Sentry; exclude killed_by_restart from the failed-run alarm. Tags (ingest with tags + fill blanks): - wire infer_tag_slugs into normalize_scene so tube scenes get title-inferred tags (was dead code); union with connector tags. - scripts/backfill_inferred_tags.py: keyset/batched/idempotent backfill for existing tagless scenes (playable tag coverage 16% -> ~52%). Clip-store: - skip ManyVids/IWantClips/Clips4Sale/... from canonical sources at ingest (GOON_SKIP_CLIP_STORE, default on) — permanent orphans, ~56% of canonical ingest, never have a free-tube playback source. Browse tubes: - enable fullmovies + hdporn.gg: studio parsed from title prefix instead of the /networks/ sidebar (which always yielded the first listed network); drop phash compute (pilot: 0% canonical hit within Hamming 5 — auto-screenshots), matching relies on title/performer/duration. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
137 lines
6.6 KiB
Python
137 lines
6.6 KiB
Python
from functools import lru_cache
|
|
|
|
from pydantic import Field
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class Settings(BaseSettings):
|
|
model_config = SettingsConfigDict(env_file=".env", extra="ignore", case_sensitive=False)
|
|
|
|
database_url: str = Field(
|
|
default="postgresql+psycopg://goon:goon@localhost:5432/goon",
|
|
validation_alias="DATABASE_URL",
|
|
)
|
|
|
|
tpdb_api_token: str | None = Field(default=None, validation_alias="TPDB_API_TOKEN")
|
|
tpdb_base_url: str = Field(
|
|
default="https://api.theporndb.net", validation_alias="TPDB_BASE_URL"
|
|
)
|
|
|
|
stashdb_api_key: str | None = Field(default=None, validation_alias="STASHDB_API_KEY")
|
|
stashdb_graphql_url: str = Field(
|
|
default="https://stashdb.org/graphql", validation_alias="STASHDB_GRAPHQL_URL"
|
|
)
|
|
|
|
log_level: str = Field(default="INFO", validation_alias="LOG_LEVEL")
|
|
|
|
|
|
# Sentry observability — pusty DSN = init no-op (devel/local). Cloud free tier
|
|
# 5k errors/mies wystarczy dla 1-user app.
|
|
sentry_dsn: str | None = Field(default=None, validation_alias="SENTRY_DSN")
|
|
sentry_environment: str = Field(default="dev", validation_alias="SENTRY_ENVIRONMENT")
|
|
sentry_traces_sample_rate: float = Field(
|
|
default=0.1, validation_alias="SENTRY_TRACES_SAMPLE_RATE"
|
|
)
|
|
|
|
api_keys_raw: str = Field(default="", validation_alias="API_KEYS")
|
|
"""Lista API keys oddzielona przecinkami. Pusta = auth wyłączony (tylko dev/local)."""
|
|
|
|
allowed_app_sig_hashes_raw: str = Field(default="", validation_alias="ALLOWED_APP_SIG_HASH")
|
|
"""Whitelist SHA256 (hex) podpisów APK akceptowane przez backend. Każdy request mobile
|
|
wysyła `X-App-Signature` z hashem signing certu (PackageManager.GET_SIGNING_CERTIFICATES).
|
|
Pusta = check wyłączony (dev/wstępny rollout). Lista = comma-separated lowercase hex.
|
|
Re-packaging APK innym keystorem zmienia hash → 403."""
|
|
|
|
auto_merge_threshold: float = 0.92
|
|
review_threshold: float = 0.75
|
|
fingerprint_hamming_max: int = 5
|
|
title_token_set_min: int = 88
|
|
date_window_days: int = 7
|
|
|
|
# Skip ingestu clip-store (ManyVids/IWantClips/Clips4Sale/...) z canonical source —
|
|
# to permanentne orphany (free tubes nie hostują), ~56% ingestu TPDB/StashDB.
|
|
# False = wciągaj jak dawniej. Tube'y z clip-store studiem NIE są skipowane (mają playback).
|
|
skip_clip_store: bool = Field(default=True, validation_alias="GOON_SKIP_CLIP_STORE")
|
|
|
|
# APScheduler (M5). Każdy 0/None = job wyłączony.
|
|
sched_tpdb_hours: int = Field(default=6, validation_alias="GOON_SCHED_TPDB_HOURS")
|
|
sched_stashdb_hours: int = Field(default=6, validation_alias="GOON_SCHED_STASHDB_HOURS")
|
|
sched_performer_driven_hours: int = Field(
|
|
default=12, validation_alias="GOON_SCHED_PERFORMER_DRIVEN_HOURS"
|
|
)
|
|
sched_performer_driven_top_n: int = Field(
|
|
default=20, validation_alias="GOON_SCHED_PERFORMER_DRIVEN_TOP_N"
|
|
)
|
|
# Continuous worker. interval=15s + max_instances=1 + coalesce=True ⇒ effective rate
|
|
# = max(15, real_tick_duration). Real tick ~50-80s przy full coverage. Set to 0 to disable.
|
|
sched_performer_continuous_seconds: int = Field(
|
|
default=15, validation_alias="GOON_SCHED_PERFORMER_CONTINUOUS_SECONDS"
|
|
)
|
|
sched_performer_continuous_refresh_days: int = Field(
|
|
default=30, validation_alias="GOON_SCHED_PERFORMER_CONTINUOUS_REFRESH_DAYS"
|
|
)
|
|
# Movie ingest — paradisehill (primary) + dooplay mirrory (mangoporn/streamporn/
|
|
# pandamovies). Każdy connector zapisuje swój `Source` i robi delta od ostatniego
|
|
# successful run. Set to 0 to disable. Domyślnie 24h: movie sites rosną wolniej
|
|
# niż tube'y (~5-30 nowych dziennie), nie ma sensu wymiatać częściej.
|
|
sched_movie_ingest_hours: int = Field(
|
|
default=24, validation_alias="GOON_SCHED_MOVIE_INGEST_HOURS"
|
|
)
|
|
# Browse-latest scheduler: freshporno/porn00/pornxp newest scenes.
|
|
# 6h cadence (zmiana z 24h 2026-05-20): user reportował brak Brazzers Exxtra po
|
|
# 15-05. Root cause był 2-fold: (1) freshporno publikuje sceny w ciągu dnia, 24h
|
|
# cadence łapie tylko te do 05:30 UTC; (2) meta_content/release_date bug osobno.
|
|
# 6h = 4 runs/dzień = każda freshporno scena zaingestowana w ciągu ~6h od publik.
|
|
sched_browse_latest_hours: int = Field(
|
|
default=6, validation_alias="GOON_SCHED_BROWSE_LATEST_HOURS"
|
|
)
|
|
sched_browse_latest_max_pages: int = Field(
|
|
default=5, validation_alias="GOON_SCHED_BROWSE_LATEST_MAX_PAGES"
|
|
)
|
|
# Bulk-dedup performers safety net — auto-merge duplikatów które resolver-time
|
|
# scoring pominął. 12h cadence: leci 2x dziennie (po porannym browse-latest run).
|
|
sched_bulk_dedup_hours: int = Field(
|
|
default=12, validation_alias="GOON_SCHED_BULK_DEDUP_HOURS"
|
|
)
|
|
# Taxonomy scene_count refresh — przelicza denormalizowane liczniki scen na
|
|
# tags/performers/studios (hot-path /tags|/performers|/studios|/favorites czyta
|
|
# gotową kolumnę zamiast agregować 6.3M scene_tags per-request). 3h cadence —
|
|
# counts do tego stale, dla sortu "popular" + badge "(N)" bez znaczenia. 0 = off.
|
|
sched_taxonomy_counts_hours: int = Field(
|
|
default=3, validation_alias="GOON_SCHED_TAXONOMY_COUNTS_HOURS"
|
|
)
|
|
|
|
# Hetzner Cloud bandwidth monitor — read-only API token (Security → API Tokens
|
|
# w panelu Hetzner Cloud). Bez tokenu monitor wyłączony (warning w log).
|
|
# Free traffic per server: CX22=20TB, CPX21=20TB itd. Overage = €1/TB.
|
|
hetzner_api_token: str | None = Field(default=None, validation_alias="HETZNER_API_TOKEN")
|
|
hetzner_server_id: int | None = Field(default=None, validation_alias="HETZNER_SERVER_ID")
|
|
# Alert thresholds (% of included_traffic) — Sentry severity levels.
|
|
hetzner_alert_info_pct: int = Field(default=50, validation_alias="HETZNER_ALERT_INFO_PCT")
|
|
hetzner_alert_warning_pct: int = Field(default=80, validation_alias="HETZNER_ALERT_WARNING_PCT")
|
|
hetzner_alert_error_pct: int = Field(default=95, validation_alias="HETZNER_ALERT_ERROR_PCT")
|
|
|
|
@property
|
|
def api_keys(self) -> set[str]:
|
|
return {k.strip() for k in self.api_keys_raw.split(",") if k.strip()}
|
|
|
|
@property
|
|
def auth_enabled(self) -> bool:
|
|
return bool(self.api_keys)
|
|
|
|
@property
|
|
def allowed_app_sig_hashes(self) -> set[str]:
|
|
return {
|
|
h.strip().lower().replace(":", "")
|
|
for h in self.allowed_app_sig_hashes_raw.split(",")
|
|
if h.strip()
|
|
}
|
|
|
|
@property
|
|
def app_sig_check_enabled(self) -> bool:
|
|
return bool(self.allowed_app_sig_hashes)
|
|
|
|
|
|
@lru_cache
|
|
def get_settings() -> Settings:
|
|
return Settings()
|