goon/alembic/versions/20260506_0009_movies.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

146 lines
7.9 KiB
Python

"""movies kanon + bliźniacze tabele do scen
Revision ID: 0009_movies
Revises: 0008_performer_search_meta
Create Date: 2026-05-06
Schema dla full-length adult films (paradisehill + mirrory). Movies różnią się od
scen: 60-180min runtime, multi-chapter struktura, więcej metadanych (director,
year, country, rating). Performers/studios/tags reusable (te same osoby/studia
występują w scenach i w filmach).
Nowe entity_kind: 'movie'. Nowe merge_kind: 'movie'. Movie-fingerprints rzadko
istnieją (movies nie mają standardowego pHash w industry), więc fingerprint table
pomijamy — dedup pójdzie po composite key (title+year+studio+cast Jaccard).
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql
revision: str = "0009_movies"
down_revision: str | None = "0008_performer_search_meta"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
# Rozszerz enumy o 'movie'
op.execute("ALTER TYPE entity_kind ADD VALUE IF NOT EXISTS 'movie'")
op.execute("ALTER TYPE merge_kind ADD VALUE IF NOT EXISTS 'movie'")
op.create_table(
"movies",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("title", sa.String(512), nullable=False),
sa.Column("title_normalized", sa.String(512), nullable=False),
sa.Column("slug", sa.String(512)),
sa.Column("release_year", sa.Integer),
sa.Column("release_date", sa.Date),
sa.Column("studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="SET NULL")),
sa.Column("director", sa.String(256)),
sa.Column("country", sa.String(64)),
sa.Column("duration_sec", sa.Integer),
sa.Column("description", sa.Text),
sa.Column("poster_url", sa.String(2048)),
sa.Column("backdrop_url", sa.String(2048)),
# Rating jako float (paradisehill ma like_count + rating 0-10; trzymamy
# uśredniony rating z primary source'a, jeśli dostępny).
sa.Column("rating", sa.Float),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
)
op.create_index("ix_movies_title_normalized", "movies", ["title_normalized"])
op.execute(
"CREATE INDEX ix_movies_title_normalized_trgm ON movies "
"USING GIN (title_normalized gin_trgm_ops);"
)
op.create_index("ix_movies_release_year", "movies", ["release_year"])
op.create_index("ix_movies_release_date", "movies", ["release_date"])
op.create_index("ix_movies_slug", "movies", ["slug"])
op.create_index("ix_movies_studio_id", "movies", ["studio_id"])
op.create_index("ix_movies_studio_year", "movies", ["studio_id", "release_year"])
op.create_table(
"movie_external_refs",
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True),
sa.Column("external_id", sa.String(256), primary_key=True),
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), nullable=False),
sa.Column("confidence", sa.Float, nullable=False, server_default="1.0"),
sa.Column("url", sa.String(1024)),
sa.Column("first_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
sa.Column("last_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
)
op.create_index("ix_movie_external_refs_movie_id", "movie_external_refs", ["movie_id"])
op.create_table(
"movie_performers",
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), primary_key=True),
sa.Column("performer_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("performers.id", ondelete="CASCADE"), primary_key=True),
sa.Column("role", sa.String(64)),
sa.Column("position", sa.Integer),
sa.Column("as_alias", sa.String(256)),
)
op.create_table(
"movie_tags",
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), primary_key=True),
sa.Column("tag_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tags.id", ondelete="CASCADE"), primary_key=True),
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")),
)
# Chaptery — opcjonalna tabela dla filmów rozbitych na sceny/segmenty
# (paradisehill czasem ma timestamp markers, np. "Scene 1: 00:00-15:32").
# Każdy chapter MOŻE linkować do istniejącego Scene (jeśli ta scena też jest
# samodzielnie znana z TPDB/StashDB), albo żyje tylko jako anchor w movie.
op.create_table(
"movie_chapters",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), nullable=False),
sa.Column("chapter_index", sa.Integer, nullable=False),
sa.Column("title", sa.String(512)),
sa.Column("start_sec", sa.Integer),
sa.Column("end_sec", sa.Integer),
sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="SET NULL")),
sa.UniqueConstraint("movie_id", "chapter_index", name="uq_movie_chapters_movie_id_chapter_index"),
)
op.create_index("ix_movie_chapters_movie_id", "movie_chapters", ["movie_id"])
# Playback sources dla movies — analog do playback_sources, oddzielna tabela
# bo nie chcemy mieszać scene_id/movie_id w jednym FK column. Reuse origin
# konwencji ('paradisehill', 'psyplay:streamporn', 'wp_movies:speedporn', itp.).
op.create_table(
"movie_playback_sources",
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
sa.Column("movie_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("movies.id", ondelete="CASCADE"), nullable=False),
sa.Column("origin", sa.String(64), nullable=False),
sa.Column("page_url", sa.String(2048), nullable=False),
sa.Column("embed_url", sa.String(2048)),
sa.Column("stream_url", sa.String(2048)),
sa.Column("quality", sa.String(16)),
sa.Column("duration_sec", sa.Integer),
sa.Column("thumbnail_url", sa.String(2048)),
sa.Column("animated_thumbnail_url", sa.String(2048)),
sa.Column("last_seen_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
sa.Column("dead_at", sa.DateTime(timezone=True)),
sa.Column("dead_reason", sa.String(512)),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
sa.UniqueConstraint("origin", "page_url", name="uq_movie_playback_sources_origin_page_url"),
)
op.create_index("ix_movie_playback_sources_movie_id", "movie_playback_sources", ["movie_id"])
op.create_index("ix_movie_playback_sources_origin", "movie_playback_sources", ["origin"])
op.create_index("ix_movie_playback_sources_dead_at", "movie_playback_sources", ["dead_at"])
def downgrade() -> None:
op.drop_table("movie_playback_sources")
op.drop_table("movie_chapters")
op.drop_table("movie_tags")
op.drop_table("movie_performers")
op.drop_table("movie_external_refs")
op.drop_table("movies")
# Postgres nie pozwala usuwać wartości z enum-a w prosty sposób — zostawiamy
# 'movie' w entity_kind / merge_kind. Niewielki overhead w katalogu enum-ów
# (rząd bajtów per typ), bezpieczniejsze niż próby DROP VALUE.