Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
313 lines
17 KiB
Python
313 lines
17 KiB
Python
"""initial schema
|
|
|
|
Revision ID: 0001_initial
|
|
Revises:
|
|
Create Date: 2026-05-02
|
|
|
|
"""
|
|
from collections.abc import Sequence
|
|
|
|
import sqlalchemy as sa
|
|
from alembic import op
|
|
from sqlalchemy.dialects import postgresql
|
|
|
|
revision: str = "0001_initial"
|
|
down_revision: str | None = None
|
|
branch_labels: str | Sequence[str] | None = None
|
|
depends_on: str | Sequence[str] | None = None
|
|
|
|
|
|
# `create_type=False` bo enum tworzymy raz jawnie poniżej; używanie tej samej instancji
|
|
# w wielu kolumnach z `create_type=True` próbowałoby tworzyć typ wielokrotnie.
|
|
SOURCE_KIND = postgresql.ENUM(
|
|
"tpdb", "stashdb", "scraper", "porn_app", "manual",
|
|
name="source_kind", create_type=False,
|
|
)
|
|
ENTITY_KIND = postgresql.ENUM(
|
|
"scene", "performer", "studio", "tag",
|
|
name="entity_kind", create_type=False,
|
|
)
|
|
PERFORMER_GENDER = postgresql.ENUM(
|
|
"female", "male", "transgender_female", "transgender_male",
|
|
"non_binary", "intersex", "unknown",
|
|
name="performer_gender", create_type=False,
|
|
)
|
|
FINGERPRINT_KIND = postgresql.ENUM(
|
|
"phash", "oshash", "md5", name="fingerprint_kind", create_type=False,
|
|
)
|
|
MERGE_KIND = postgresql.ENUM(
|
|
"scene", "performer", "studio", name="merge_kind", create_type=False,
|
|
)
|
|
MERGE_STATUS = postgresql.ENUM(
|
|
"pending", "auto_merged", "merged", "rejected",
|
|
name="merge_status", create_type=False,
|
|
)
|
|
INGEST_STATUS = postgresql.ENUM(
|
|
"running", "success", "partial", "failed",
|
|
name="ingest_status", create_type=False,
|
|
)
|
|
|
|
|
|
def upgrade() -> None:
|
|
op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;")
|
|
op.execute("CREATE EXTENSION IF NOT EXISTS unaccent;")
|
|
op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto;")
|
|
|
|
SOURCE_KIND.create(op.get_bind(), checkfirst=True)
|
|
ENTITY_KIND.create(op.get_bind(), checkfirst=True)
|
|
PERFORMER_GENDER.create(op.get_bind(), checkfirst=True)
|
|
FINGERPRINT_KIND.create(op.get_bind(), checkfirst=True)
|
|
MERGE_KIND.create(op.get_bind(), checkfirst=True)
|
|
MERGE_STATUS.create(op.get_bind(), checkfirst=True)
|
|
INGEST_STATUS.create(op.get_bind(), checkfirst=True)
|
|
|
|
op.create_table(
|
|
"sources",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("kind", SOURCE_KIND, nullable=False),
|
|
sa.Column("name", sa.String(128), nullable=False, unique=True),
|
|
sa.Column("base_url", sa.String(512)),
|
|
sa.Column("auth_secret_ref", sa.String(128)),
|
|
sa.Column("weight", sa.Float, nullable=False, server_default="1.0"),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
|
|
op.create_table(
|
|
"studios",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("name", sa.String(256), nullable=False),
|
|
sa.Column("name_normalized", sa.String(256), nullable=False),
|
|
sa.Column("slug", sa.String(256), nullable=False, unique=True),
|
|
sa.Column("parent_studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="SET NULL")),
|
|
sa.Column("network", sa.String(256)),
|
|
sa.Column("homepage_url", sa.String(512)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
op.create_index("ix_studios_name_normalized", "studios", ["name_normalized"])
|
|
op.execute(
|
|
"CREATE INDEX ix_studios_name_normalized_trgm ON studios "
|
|
"USING GIN (name_normalized gin_trgm_ops);"
|
|
)
|
|
|
|
op.create_table(
|
|
"studio_aliases",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("alias", sa.String(256), nullable=False),
|
|
sa.Column("alias_normalized", sa.String(256), nullable=False),
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")),
|
|
sa.UniqueConstraint("studio_id", "alias_normalized", name="uq_studio_aliases_studio_id_alias_normalized"),
|
|
)
|
|
op.create_index("ix_studio_aliases_studio_id", "studio_aliases", ["studio_id"])
|
|
op.create_index("ix_studio_aliases_alias_normalized", "studio_aliases", ["alias_normalized"])
|
|
op.execute(
|
|
"CREATE INDEX ix_studio_aliases_alias_normalized_trgm ON studio_aliases "
|
|
"USING GIN (alias_normalized gin_trgm_ops);"
|
|
)
|
|
|
|
op.create_table(
|
|
"studio_external_refs",
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("external_id", sa.String(256), primary_key=True),
|
|
sa.Column("studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("confidence", sa.Float, nullable=False, server_default="1.0"),
|
|
sa.Column("first_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("last_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
op.create_index("ix_studio_external_refs_studio_id", "studio_external_refs", ["studio_id"])
|
|
|
|
op.create_table(
|
|
"performers",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("canonical_name", sa.String(256), nullable=False),
|
|
sa.Column("name_normalized", sa.String(256), nullable=False),
|
|
sa.Column("slug", sa.String(256), nullable=False, unique=True),
|
|
sa.Column("gender", PERFORMER_GENDER),
|
|
sa.Column("birth_date", sa.Date),
|
|
sa.Column("country", sa.String(64)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
op.create_index("ix_performers_name_normalized", "performers", ["name_normalized"])
|
|
op.execute(
|
|
"CREATE INDEX ix_performers_name_normalized_trgm ON performers "
|
|
"USING GIN (name_normalized gin_trgm_ops);"
|
|
)
|
|
|
|
op.create_table(
|
|
"performer_aliases",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("performer_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("performers.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("alias", sa.String(256), nullable=False),
|
|
sa.Column("alias_normalized", sa.String(256), nullable=False),
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")),
|
|
sa.UniqueConstraint("performer_id", "alias_normalized", name="uq_performer_aliases_performer_id_alias_normalized"),
|
|
)
|
|
op.create_index("ix_performer_aliases_performer_id", "performer_aliases", ["performer_id"])
|
|
op.create_index("ix_performer_aliases_alias_normalized", "performer_aliases", ["alias_normalized"])
|
|
op.execute(
|
|
"CREATE INDEX ix_performer_aliases_alias_normalized_trgm ON performer_aliases "
|
|
"USING GIN (alias_normalized gin_trgm_ops);"
|
|
)
|
|
|
|
op.create_table(
|
|
"performer_external_refs",
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("external_id", sa.String(256), primary_key=True),
|
|
sa.Column("performer_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("performers.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("confidence", sa.Float, nullable=False, server_default="1.0"),
|
|
sa.Column("first_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("last_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
op.create_index("ix_performer_external_refs_performer_id", "performer_external_refs", ["performer_id"])
|
|
|
|
op.create_table(
|
|
"tags",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("name", sa.String(128), nullable=False),
|
|
sa.Column("slug", sa.String(128), nullable=False, unique=True),
|
|
sa.Column("parent_tag_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tags.id", ondelete="SET NULL")),
|
|
sa.Column("description", sa.String(1024)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
|
|
op.create_table(
|
|
"scenes",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("title", sa.String(512), nullable=False),
|
|
sa.Column("title_normalized", sa.String(512), nullable=False),
|
|
sa.Column("slug", sa.String(512)),
|
|
sa.Column("release_date", sa.Date),
|
|
sa.Column("studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="SET NULL")),
|
|
sa.Column("duration_sec", sa.Integer),
|
|
sa.Column("description", sa.Text),
|
|
sa.Column("code", sa.String(128)),
|
|
sa.Column("director", sa.String(256)),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
op.create_index("ix_scenes_title_normalized", "scenes", ["title_normalized"])
|
|
op.execute(
|
|
"CREATE INDEX ix_scenes_title_normalized_trgm ON scenes "
|
|
"USING GIN (title_normalized gin_trgm_ops);"
|
|
)
|
|
op.create_index("ix_scenes_release_date", "scenes", ["release_date"])
|
|
op.create_index("ix_scenes_slug", "scenes", ["slug"])
|
|
op.create_index("ix_scenes_studio_id", "scenes", ["studio_id"])
|
|
op.create_index("ix_scenes_code", "scenes", ["code"])
|
|
op.create_index("ix_scenes_studio_release_date", "scenes", ["studio_id", "release_date"])
|
|
|
|
op.create_table(
|
|
"scene_external_refs",
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("external_id", sa.String(256), primary_key=True),
|
|
sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("confidence", sa.Float, nullable=False, server_default="1.0"),
|
|
sa.Column("url", sa.String(1024)),
|
|
sa.Column("first_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("last_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
)
|
|
op.create_index("ix_scene_external_refs_scene_id", "scene_external_refs", ["scene_id"])
|
|
|
|
op.create_table(
|
|
"scene_fingerprints",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("kind", FINGERPRINT_KIND, nullable=False),
|
|
sa.Column("value", sa.String(128), nullable=False),
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")),
|
|
sa.UniqueConstraint("scene_id", "kind", "value", name="uq_scene_fingerprints_scene_id_kind_value"),
|
|
)
|
|
op.create_index("ix_scene_fingerprints_scene_id", "scene_fingerprints", ["scene_id"])
|
|
op.create_index("ix_scene_fingerprints_value", "scene_fingerprints", ["value"])
|
|
|
|
op.create_table(
|
|
"scene_performers",
|
|
sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("performer_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("performers.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("role", sa.String(64)),
|
|
sa.Column("position", sa.Integer),
|
|
sa.Column("as_alias", sa.String(256)),
|
|
)
|
|
|
|
op.create_table(
|
|
"scene_tags",
|
|
sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("tag_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tags.id", ondelete="CASCADE"), primary_key=True),
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")),
|
|
)
|
|
|
|
op.create_table(
|
|
"external_records",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("entity_kind", ENTITY_KIND, nullable=False),
|
|
sa.Column("external_id", sa.String(256), nullable=False),
|
|
sa.Column("raw", postgresql.JSONB, nullable=False),
|
|
sa.Column("raw_hash", sa.LargeBinary(32), nullable=False),
|
|
sa.Column("fetched_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("last_seen_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.UniqueConstraint("source_id", "entity_kind", "external_id", name="uq_external_records_source_id_entity_kind_external_id"),
|
|
)
|
|
op.create_index("ix_external_records_source_id", "external_records", ["source_id"])
|
|
|
|
op.create_table(
|
|
"merge_candidates",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("kind", MERGE_KIND, nullable=False),
|
|
sa.Column("left_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
sa.Column("right_id", postgresql.UUID(as_uuid=True), nullable=False),
|
|
sa.Column("score", sa.Float, nullable=False),
|
|
sa.Column("reasons", postgresql.JSONB, nullable=False, server_default="{}"),
|
|
sa.Column("status", MERGE_STATUS, nullable=False, server_default="pending"),
|
|
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("resolved_at", sa.DateTime(timezone=True)),
|
|
sa.Column("resolved_by", sa.String(128)),
|
|
)
|
|
op.create_index("ix_merge_candidates_left_id", "merge_candidates", ["left_id"])
|
|
op.create_index("ix_merge_candidates_right_id", "merge_candidates", ["right_id"])
|
|
op.create_index("ix_merge_candidates_status", "merge_candidates", ["status"])
|
|
|
|
op.create_table(
|
|
"ingest_runs",
|
|
sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")),
|
|
sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), nullable=False),
|
|
sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False),
|
|
sa.Column("finished_at", sa.DateTime(timezone=True)),
|
|
sa.Column("status", INGEST_STATUS, nullable=False, server_default="running"),
|
|
sa.Column("records_seen", sa.Integer, nullable=False, server_default="0"),
|
|
sa.Column("records_new", sa.Integer, nullable=False, server_default="0"),
|
|
sa.Column("records_updated", sa.Integer, nullable=False, server_default="0"),
|
|
sa.Column("errors", postgresql.JSONB),
|
|
)
|
|
op.create_index("ix_ingest_runs_source_id", "ingest_runs", ["source_id"])
|
|
|
|
|
|
def downgrade() -> None:
|
|
op.drop_table("ingest_runs")
|
|
op.drop_table("merge_candidates")
|
|
op.drop_table("external_records")
|
|
op.drop_table("scene_tags")
|
|
op.drop_table("scene_performers")
|
|
op.drop_table("scene_fingerprints")
|
|
op.drop_table("scene_external_refs")
|
|
op.drop_table("scenes")
|
|
op.drop_table("tags")
|
|
op.drop_table("performer_external_refs")
|
|
op.drop_table("performer_aliases")
|
|
op.drop_table("performers")
|
|
op.drop_table("studio_external_refs")
|
|
op.drop_table("studio_aliases")
|
|
op.drop_table("studios")
|
|
op.drop_table("sources")
|
|
|
|
INGEST_STATUS.drop(op.get_bind(), checkfirst=True)
|
|
MERGE_STATUS.drop(op.get_bind(), checkfirst=True)
|
|
MERGE_KIND.drop(op.get_bind(), checkfirst=True)
|
|
FINGERPRINT_KIND.drop(op.get_bind(), checkfirst=True)
|
|
PERFORMER_GENDER.drop(op.get_bind(), checkfirst=True)
|
|
ENTITY_KIND.drop(op.get_bind(), checkfirst=True)
|
|
SOURCE_KIND.drop(op.get_bind(), checkfirst=True)
|