Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
146 lines
7.9 KiB
Python
146 lines
7.9 KiB
Python
"""movies kanon + bliźniacze tabele do scen
|
|
|
|
Revision ID: 0009_movies
|
|
Revises: 0008_performer_search_meta
|
|
Create Date: 2026-05-06
|
|
|
|
Schema dla full-length adult films (paradisehill + mirrory). Movies różnią się od
|
|
scen: 60-180min runtime, multi-chapter struktura, więcej metadanych (director,
|
|
year, country, rating). Performers/studios/tags reusable (te same osoby/studia
|
|
występują w scenach i w filmach).
|
|
|
|
Nowe entity_kind: 'movie'. Nowe merge_kind: 'movie'. Movie-fingerprints rzadko
|
|
istnieją (movies nie mają standardowego pHash w industry), więc fingerprint table
|
|
pomijamy — dedup pójdzie po composite key (title+year+studio+cast Jaccard).
|
|
"""
|
|
from collections.abc import Sequence
|
|
|
|
import sqlalchemy as sa
|
|
from alembic import op
|
|
from sqlalchemy.dialects import postgresql
|
|
|
|
revision: str = "0009_movies"
|
|
down_revision: str | None = "0008_performer_search_meta"
|
|
branch_labels: str | Sequence[str] | None = None
|
|
depends_on: str | Sequence[str] | None = None
|
|
|
|
|
|
def upgrade() -> None:
|
|
# Rozszerz enumy o 'movie'
|
|
op.execute("ALTER TYPE entity_kind ADD VALUE IF NOT EXISTS 'movie'")
|
|
op.execute("ALTER TYPE merge_kind ADD VALUE IF NOT EXISTS 'movie'")
|
|
|
|
op.create_table(
|
|
"movies",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("title", sa.String(512), nullable=False),
|
|
sa.Column("title_normalized", sa.String(512), nullable=False),
|
|
sa.Column("slug", sa.String(512)),
|
|
sa.Column("release_year", sa.Integer),
|
|
sa.Column("release_date", sa.Date),
|
|
sa.Column("studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="SET NULL")),
|
|
sa.Column("director", sa.String(256)),
|
|
sa.Column("country", sa.String(64)),
|
|
sa.Column("duration_sec", sa.Integer),
|
|
sa.Column("description", sa.Text),
|
|
sa.Column("poster_url", sa.String(2048)),
|
|
sa.Column("backdrop_url", sa.String(2048)),
|
|
# Rating jako float (paradisehill ma like_count + rating 0-10; trzymamy
|
|
# uśredniony rating z primary source'a, jeśli dostępny).
|
|
sa.Column("rating", sa.Float),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
op.create_index("ix_movies_title_normalized", "movies", ["title_normalized"])
|
|
op.execute(
|
|
"CREATE INDEX ix_movies_title_normalized_trgm ON movies "
|
|
"USING GIN (title_normalized gin_trgm_ops);"
|
|
)
|
|
op.create_index("ix_movies_release_year", "movies", ["release_year"])
|
|
op.create_index("ix_movies_release_date", "movies", ["release_date"])
|
|
op.create_index("ix_movies_slug", "movies", ["slug"])
|
|
op.create_index("ix_movies_studio_id", "movies", ["studio_id"])
|
|
op.create_index("ix_movies_studio_year", "movies", ["studio_id", "release_year"])
|
|
|
|
op.create_table(
|
|
"movie_external_refs",
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("external_id", sa.String(256), primary_key=True),
|
|
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("confidence", sa.Float, nullable=False, server_default="1.0"),
|
|
sa.Column("url", sa.String(1024)),
|
|
sa.Column("first_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("last_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
op.create_index("ix_movie_external_refs_movie_id", "movie_external_refs", ["movie_id"])
|
|
|
|
op.create_table(
|
|
"movie_performers",
|
|
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("performer_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("performers.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("role", sa.String(64)),
|
|
sa.Column("position", sa.Integer),
|
|
sa.Column("as_alias", sa.String(256)),
|
|
)
|
|
|
|
op.create_table(
|
|
"movie_tags",
|
|
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("tag_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tags.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")),
|
|
)
|
|
|
|
# Chaptery — opcjonalna tabela dla filmów rozbitych na sceny/segmenty
|
|
# (paradisehill czasem ma timestamp markers, np. "Scene 1: 00:00-15:32").
|
|
# Każdy chapter MOŻE linkować do istniejącego Scene (jeśli ta scena też jest
|
|
# samodzielnie znana z TPDB/StashDB), albo żyje tylko jako anchor w movie.
|
|
op.create_table(
|
|
"movie_chapters",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("chapter_index", sa.Integer, nullable=False),
|
|
sa.Column("title", sa.String(512)),
|
|
sa.Column("start_sec", sa.Integer),
|
|
sa.Column("end_sec", sa.Integer),
|
|
sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="SET NULL")),
|
|
sa.UniqueConstraint("movie_id", "chapter_index", name="uq_movie_chapters_movie_id_chapter_index"),
|
|
)
|
|
op.create_index("ix_movie_chapters_movie_id", "movie_chapters", ["movie_id"])
|
|
|
|
# Playback sources dla movies — analog do playback_sources, oddzielna tabela
|
|
# bo nie chcemy mieszać scene_id/movie_id w jednym FK column. Reuse origin
|
|
# konwencji ('paradisehill', 'psyplay:streamporn', 'wp_movies:speedporn', itp.).
|
|
op.create_table(
|
|
"movie_playback_sources",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("origin", sa.String(64), nullable=False),
|
|
sa.Column("page_url", sa.String(2048), nullable=False),
|
|
sa.Column("embed_url", sa.String(2048)),
|
|
sa.Column("stream_url", sa.String(2048)),
|
|
sa.Column("quality", sa.String(16)),
|
|
sa.Column("duration_sec", sa.Integer),
|
|
sa.Column("thumbnail_url", sa.String(2048)),
|
|
sa.Column("animated_thumbnail_url", sa.String(2048)),
|
|
sa.Column("last_seen_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("dead_at", sa.DateTime(timezone=True)),
|
|
sa.Column("dead_reason", sa.String(512)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.UniqueConstraint("origin", "page_url", name="uq_movie_playback_sources_origin_page_url"),
|
|
)
|
|
op.create_index("ix_movie_playback_sources_movie_id", "movie_playback_sources", ["movie_id"])
|
|
op.create_index("ix_movie_playback_sources_origin", "movie_playback_sources", ["origin"])
|
|
op.create_index("ix_movie_playback_sources_dead_at", "movie_playback_sources", ["dead_at"])
|
|
|
|
|
|
def downgrade() -> None:
|
|
op.drop_table("movie_playback_sources")
|
|
op.drop_table("movie_chapters")
|
|
op.drop_table("movie_tags")
|
|
op.drop_table("movie_performers")
|
|
op.drop_table("movie_external_refs")
|
|
op.drop_table("movies")
|
|
# Postgres nie pozwala usuwać wartości z enum-a w prosty sposób — zostawiamy
|
|
# 'movie' w entity_kind / merge_kind. Niewielki overhead w katalogu enum-ów
|
|
# (rząd bajtów per typ), bezpieczniejsze niż próby DROP VALUE.
|