Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
187 lines
5.9 KiB
Python
187 lines
5.9 KiB
Python
"""Kontrakt connectora źródła + neutralne DTO surowych rekordów.
|
|
|
|
Connector odpowiada za: paginację, retry, autoryzację, deltę. Zwraca strumień RawScene
|
|
(z ewentualnymi pre-rozwiniętymi performerami/studiem/tagami w polach inline). Cała
|
|
mechanika DB i normalizacji żyje wyżej w pipeline'ie ingest.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import abc
|
|
from collections.abc import Iterator
|
|
from datetime import date, datetime
|
|
from typing import Any
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
|
|
|
from app.models.source import SourceKind
|
|
|
|
|
|
class RawTag(BaseModel):
|
|
model_config = ConfigDict(extra="allow")
|
|
external_id: str | None = None
|
|
name: str
|
|
slug: str | None = None
|
|
|
|
|
|
class RawStudio(BaseModel):
|
|
model_config = ConfigDict(extra="allow")
|
|
external_id: str | None = None
|
|
name: str
|
|
slug: str | None = None
|
|
parent_external_id: str | None = None
|
|
parent_name: str | None = None
|
|
network: str | None = None
|
|
homepage_url: str | None = None
|
|
|
|
|
|
class RawPerformer(BaseModel):
|
|
model_config = ConfigDict(extra="allow")
|
|
external_id: str | None = None
|
|
name: str
|
|
aliases: list[str] = Field(default_factory=list)
|
|
gender: str | None = None
|
|
birth_date: date | None = None
|
|
country: str | None = None
|
|
as_alias_in_scene: str | None = None # imię użyte w tej konkretnej scenie (np. „Mia M.")
|
|
|
|
|
|
class RawFingerprint(BaseModel):
|
|
kind: str # phash | oshash | md5
|
|
value: str
|
|
|
|
|
|
class RawPlaybackSource(BaseModel):
|
|
"""Link do odtworzenia sceny z konkretnego tube/agregatora."""
|
|
|
|
model_config = ConfigDict(extra="allow")
|
|
|
|
origin: str
|
|
"""Krótka nazwa źródła, np. 'tube:hqpornercom', 'mangoporn:doodstream'."""
|
|
|
|
page_url: str
|
|
"""URL strony tube'a z player'em (deep link)."""
|
|
|
|
embed_url: str | None = None
|
|
stream_url: str | None = None
|
|
quality: str | None = None
|
|
duration_sec: int | None = None
|
|
thumbnail_url: str | None = None
|
|
animated_thumbnail_url: str | None = None
|
|
|
|
|
|
class RawScene(BaseModel):
|
|
model_config = ConfigDict(extra="allow")
|
|
|
|
external_id: str
|
|
title: str
|
|
description: str | None = None
|
|
release_date: date | None = None
|
|
duration_sec: int | None = None
|
|
code: str | None = None
|
|
director: str | None = None
|
|
url: str | None = None
|
|
|
|
studio: RawStudio | None = None
|
|
performers: list[RawPerformer] = Field(default_factory=list)
|
|
tags: list[RawTag] = Field(default_factory=list)
|
|
fingerprints: list[RawFingerprint] = Field(default_factory=list)
|
|
playback_sources: list[RawPlaybackSource] = Field(default_factory=list)
|
|
|
|
cross_source_refs: dict[str, str] = Field(default_factory=dict)
|
|
"""Mapowanie source_name → external_id deklarowane przez to źródło. Używane do path 2
|
|
w resolverze (cross-source UUID match). Klucz zgadza się z `Source.name` w DB
|
|
(np. 'tpdb', 'stashdb')."""
|
|
|
|
raw: dict[str, Any] = Field(default_factory=dict)
|
|
"""Oryginalny payload z API — leci do external_records.raw."""
|
|
|
|
|
|
class BaseConnector(abc.ABC):
|
|
"""Każde źródło dziedziczy. `kind` mapuje 1:1 na SourceKind w DB."""
|
|
|
|
kind: SourceKind
|
|
name: str
|
|
|
|
@abc.abstractmethod
|
|
def fetch_scenes(
|
|
self,
|
|
*,
|
|
since: datetime | None = None,
|
|
limit: int | None = None,
|
|
) -> Iterator[RawScene]:
|
|
"""Yield po jednej scenie. `since` to delta filter (opcjonalna, fallback do full)."""
|
|
raise NotImplementedError
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Movies — odrębny encja od scen, ale ten sam wzorzec connectorów
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class RawMovieChapter(BaseModel):
|
|
"""Pojedynczy rozdział filmu (movies czasem dzielą się na "Part 1/2/3" itp.).
|
|
|
|
Identyfikatory chaptera nie są kanonizowane między źródłami — są lokalne dla movie,
|
|
indeksowane przez `chapter_index`. Może linkować do separate scene (jeśli ta scena
|
|
znana z TPDB/StashDB) — tym zajmuje się normalizator wyżej."""
|
|
|
|
model_config = ConfigDict(extra="allow")
|
|
|
|
chapter_index: int
|
|
title: str | None = None
|
|
start_sec: int | None = None
|
|
end_sec: int | None = None
|
|
|
|
|
|
class RawMovie(BaseModel):
|
|
"""Surowy film z connectora — odpowiednik RawScene dla movies.
|
|
|
|
Performers / studio / tags reusable z RawPerformer / RawStudio / RawTag (te same
|
|
typy w obu pipelinach). Playback sources to lista mirrorów odtwarzania (paradisehill
|
|
primary, ewentualnie inne tube'y).
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="allow")
|
|
|
|
external_id: str
|
|
title: str
|
|
description: str | None = None
|
|
release_year: int | None = None
|
|
release_date: date | None = None
|
|
duration_sec: int | None = None
|
|
director: str | None = None
|
|
country: str | None = None
|
|
rating: float | None = None
|
|
poster_url: str | None = None
|
|
backdrop_url: str | None = None
|
|
url: str | None = None
|
|
|
|
studio: RawStudio | None = None
|
|
performers: list[RawPerformer] = Field(default_factory=list)
|
|
tags: list[RawTag] = Field(default_factory=list)
|
|
chapters: list[RawMovieChapter] = Field(default_factory=list)
|
|
playback_sources: list[RawPlaybackSource] = Field(default_factory=list)
|
|
|
|
cross_source_refs: dict[str, str] = Field(default_factory=dict)
|
|
|
|
raw: dict[str, Any] = Field(default_factory=dict)
|
|
|
|
|
|
class BaseMovieConnector(abc.ABC):
|
|
"""Connector dla source'a movies (paradisehill, psyplay, wp_movies).
|
|
|
|
Symetrycznie do BaseConnector ale yielduje RawMovie. Każde źródło zna własną
|
|
paginację i format ID — konwerter wyżej (resolver) dba o dedup między źródłami.
|
|
"""
|
|
|
|
kind: SourceKind
|
|
name: str
|
|
|
|
@abc.abstractmethod
|
|
def fetch_movies(
|
|
self,
|
|
*,
|
|
since: datetime | None = None,
|
|
limit: int | None = None,
|
|
) -> Iterator[RawMovie]:
|
|
"""Yield po jednym filmie. `since` opcjonalne, fallback do full crawl."""
|
|
raise NotImplementedError
|