"""initial schema Revision ID: 0001_initial Revises: Create Date: 2026-05-02 """ from collections.abc import Sequence import sqlalchemy as sa from alembic import op from sqlalchemy.dialects import postgresql revision: str = "0001_initial" down_revision: str | None = None branch_labels: str | Sequence[str] | None = None depends_on: str | Sequence[str] | None = None # `create_type=False` bo enum tworzymy raz jawnie poniżej; używanie tej samej instancji # w wielu kolumnach z `create_type=True` próbowałoby tworzyć typ wielokrotnie. SOURCE_KIND = postgresql.ENUM( "tpdb", "stashdb", "scraper", "porn_app", "manual", name="source_kind", create_type=False, ) ENTITY_KIND = postgresql.ENUM( "scene", "performer", "studio", "tag", name="entity_kind", create_type=False, ) PERFORMER_GENDER = postgresql.ENUM( "female", "male", "transgender_female", "transgender_male", "non_binary", "intersex", "unknown", name="performer_gender", create_type=False, ) FINGERPRINT_KIND = postgresql.ENUM( "phash", "oshash", "md5", name="fingerprint_kind", create_type=False, ) MERGE_KIND = postgresql.ENUM( "scene", "performer", "studio", name="merge_kind", create_type=False, ) MERGE_STATUS = postgresql.ENUM( "pending", "auto_merged", "merged", "rejected", name="merge_status", create_type=False, ) INGEST_STATUS = postgresql.ENUM( "running", "success", "partial", "failed", name="ingest_status", create_type=False, ) def upgrade() -> None: op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") op.execute("CREATE EXTENSION IF NOT EXISTS unaccent;") op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto;") SOURCE_KIND.create(op.get_bind(), checkfirst=True) ENTITY_KIND.create(op.get_bind(), checkfirst=True) PERFORMER_GENDER.create(op.get_bind(), checkfirst=True) FINGERPRINT_KIND.create(op.get_bind(), checkfirst=True) MERGE_KIND.create(op.get_bind(), checkfirst=True) MERGE_STATUS.create(op.get_bind(), checkfirst=True) INGEST_STATUS.create(op.get_bind(), checkfirst=True) op.create_table( "sources", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("kind", SOURCE_KIND, nullable=False), sa.Column("name", sa.String(128), nullable=False, unique=True), sa.Column("base_url", sa.String(512)), sa.Column("auth_secret_ref", sa.String(128)), sa.Column("weight", sa.Float, nullable=False, server_default="1.0"), sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), ) op.create_table( "studios", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("name", sa.String(256), nullable=False), sa.Column("name_normalized", sa.String(256), nullable=False), sa.Column("slug", sa.String(256), nullable=False, unique=True), sa.Column("parent_studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="SET NULL")), sa.Column("network", sa.String(256)), sa.Column("homepage_url", sa.String(512)), sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), ) op.create_index("ix_studios_name_normalized", "studios", ["name_normalized"]) op.execute( "CREATE INDEX ix_studios_name_normalized_trgm ON studios " "USING GIN (name_normalized gin_trgm_ops);" ) op.create_table( "studio_aliases", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="CASCADE"), nullable=False), sa.Column("alias", sa.String(256), nullable=False), sa.Column("alias_normalized", sa.String(256), nullable=False), sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")), sa.UniqueConstraint("studio_id", "alias_normalized", name="uq_studio_aliases_studio_id_alias_normalized"), ) op.create_index("ix_studio_aliases_studio_id", "studio_aliases", ["studio_id"]) op.create_index("ix_studio_aliases_alias_normalized", "studio_aliases", ["alias_normalized"]) op.execute( "CREATE INDEX ix_studio_aliases_alias_normalized_trgm ON studio_aliases " "USING GIN (alias_normalized gin_trgm_ops);" ) op.create_table( "studio_external_refs", sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True), sa.Column("external_id", sa.String(256), primary_key=True), sa.Column("studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="CASCADE"), nullable=False), sa.Column("confidence", sa.Float, nullable=False, server_default="1.0"), sa.Column("first_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("last_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), ) op.create_index("ix_studio_external_refs_studio_id", "studio_external_refs", ["studio_id"]) op.create_table( "performers", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("canonical_name", sa.String(256), nullable=False), sa.Column("name_normalized", sa.String(256), nullable=False), sa.Column("slug", sa.String(256), nullable=False, unique=True), sa.Column("gender", PERFORMER_GENDER), sa.Column("birth_date", sa.Date), sa.Column("country", sa.String(64)), sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), ) op.create_index("ix_performers_name_normalized", "performers", ["name_normalized"]) op.execute( "CREATE INDEX ix_performers_name_normalized_trgm ON performers " "USING GIN (name_normalized gin_trgm_ops);" ) op.create_table( "performer_aliases", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("performer_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("performers.id", ondelete="CASCADE"), nullable=False), sa.Column("alias", sa.String(256), nullable=False), sa.Column("alias_normalized", sa.String(256), nullable=False), sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")), sa.UniqueConstraint("performer_id", "alias_normalized", name="uq_performer_aliases_performer_id_alias_normalized"), ) op.create_index("ix_performer_aliases_performer_id", "performer_aliases", ["performer_id"]) op.create_index("ix_performer_aliases_alias_normalized", "performer_aliases", ["alias_normalized"]) op.execute( "CREATE INDEX ix_performer_aliases_alias_normalized_trgm ON performer_aliases " "USING GIN (alias_normalized gin_trgm_ops);" ) op.create_table( "performer_external_refs", sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True), sa.Column("external_id", sa.String(256), primary_key=True), sa.Column("performer_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("performers.id", ondelete="CASCADE"), nullable=False), sa.Column("confidence", sa.Float, nullable=False, server_default="1.0"), sa.Column("first_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("last_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), ) op.create_index("ix_performer_external_refs_performer_id", "performer_external_refs", ["performer_id"]) op.create_table( "tags", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("name", sa.String(128), nullable=False), sa.Column("slug", sa.String(128), nullable=False, unique=True), sa.Column("parent_tag_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tags.id", ondelete="SET NULL")), sa.Column("description", sa.String(1024)), sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), ) op.create_table( "scenes", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("title", sa.String(512), nullable=False), sa.Column("title_normalized", sa.String(512), nullable=False), sa.Column("slug", sa.String(512)), sa.Column("release_date", sa.Date), sa.Column("studio_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("studios.id", ondelete="SET NULL")), sa.Column("duration_sec", sa.Integer), sa.Column("description", sa.Text), sa.Column("code", sa.String(128)), sa.Column("director", sa.String(256)), sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), ) op.create_index("ix_scenes_title_normalized", "scenes", ["title_normalized"]) op.execute( "CREATE INDEX ix_scenes_title_normalized_trgm ON scenes " "USING GIN (title_normalized gin_trgm_ops);" ) op.create_index("ix_scenes_release_date", "scenes", ["release_date"]) op.create_index("ix_scenes_slug", "scenes", ["slug"]) op.create_index("ix_scenes_studio_id", "scenes", ["studio_id"]) op.create_index("ix_scenes_code", "scenes", ["code"]) op.create_index("ix_scenes_studio_release_date", "scenes", ["studio_id", "release_date"]) op.create_table( "scene_external_refs", sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True), sa.Column("external_id", sa.String(256), primary_key=True), sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="CASCADE"), nullable=False), sa.Column("confidence", sa.Float, nullable=False, server_default="1.0"), sa.Column("url", sa.String(1024)), sa.Column("first_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("last_seen", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), ) op.create_index("ix_scene_external_refs_scene_id", "scene_external_refs", ["scene_id"]) op.create_table( "scene_fingerprints", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="CASCADE"), nullable=False), sa.Column("kind", FINGERPRINT_KIND, nullable=False), sa.Column("value", sa.String(128), nullable=False), sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")), sa.UniqueConstraint("scene_id", "kind", "value", name="uq_scene_fingerprints_scene_id_kind_value"), ) op.create_index("ix_scene_fingerprints_scene_id", "scene_fingerprints", ["scene_id"]) op.create_index("ix_scene_fingerprints_value", "scene_fingerprints", ["value"]) op.create_table( "scene_performers", sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="CASCADE"), primary_key=True), sa.Column("performer_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("performers.id", ondelete="CASCADE"), primary_key=True), sa.Column("role", sa.String(64)), sa.Column("position", sa.Integer), sa.Column("as_alias", sa.String(256)), ) op.create_table( "scene_tags", sa.Column("scene_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("scenes.id", ondelete="CASCADE"), primary_key=True), sa.Column("tag_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("tags.id", ondelete="CASCADE"), primary_key=True), sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="SET NULL")), ) op.create_table( "external_records", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), nullable=False), sa.Column("entity_kind", ENTITY_KIND, nullable=False), sa.Column("external_id", sa.String(256), nullable=False), sa.Column("raw", postgresql.JSONB, nullable=False), sa.Column("raw_hash", sa.LargeBinary(32), nullable=False), sa.Column("fetched_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("last_seen_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.UniqueConstraint("source_id", "entity_kind", "external_id", name="uq_external_records_source_id_entity_kind_external_id"), ) op.create_index("ix_external_records_source_id", "external_records", ["source_id"]) op.create_table( "merge_candidates", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("kind", MERGE_KIND, nullable=False), sa.Column("left_id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("right_id", postgresql.UUID(as_uuid=True), nullable=False), sa.Column("score", sa.Float, nullable=False), sa.Column("reasons", postgresql.JSONB, nullable=False, server_default="{}"), sa.Column("status", MERGE_STATUS, nullable=False, server_default="pending"), sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("resolved_at", sa.DateTime(timezone=True)), sa.Column("resolved_by", sa.String(128)), ) op.create_index("ix_merge_candidates_left_id", "merge_candidates", ["left_id"]) op.create_index("ix_merge_candidates_right_id", "merge_candidates", ["right_id"]) op.create_index("ix_merge_candidates_status", "merge_candidates", ["status"]) op.create_table( "ingest_runs", sa.Column("id", postgresql.UUID(as_uuid=True), primary_key=True, server_default=sa.text("gen_random_uuid()")), sa.Column("source_id", postgresql.UUID(as_uuid=True), sa.ForeignKey("sources.id", ondelete="CASCADE"), nullable=False), sa.Column("started_at", sa.DateTime(timezone=True), server_default=sa.text("NOW()"), nullable=False), sa.Column("finished_at", sa.DateTime(timezone=True)), sa.Column("status", INGEST_STATUS, nullable=False, server_default="running"), sa.Column("records_seen", sa.Integer, nullable=False, server_default="0"), sa.Column("records_new", sa.Integer, nullable=False, server_default="0"), sa.Column("records_updated", sa.Integer, nullable=False, server_default="0"), sa.Column("errors", postgresql.JSONB), ) op.create_index("ix_ingest_runs_source_id", "ingest_runs", ["source_id"]) def downgrade() -> None: op.drop_table("ingest_runs") op.drop_table("merge_candidates") op.drop_table("external_records") op.drop_table("scene_tags") op.drop_table("scene_performers") op.drop_table("scene_fingerprints") op.drop_table("scene_external_refs") op.drop_table("scenes") op.drop_table("tags") op.drop_table("performer_external_refs") op.drop_table("performer_aliases") op.drop_table("performers") op.drop_table("studio_external_refs") op.drop_table("studio_aliases") op.drop_table("studios") op.drop_table("sources") INGEST_STATUS.drop(op.get_bind(), checkfirst=True) MERGE_STATUS.drop(op.get_bind(), checkfirst=True) MERGE_KIND.drop(op.get_bind(), checkfirst=True) FINGERPRINT_KIND.drop(op.get_bind(), checkfirst=True) PERFORMER_GENDER.drop(op.get_bind(), checkfirst=True) ENTITY_KIND.drop(op.get_bind(), checkfirst=True) SOURCE_KIND.drop(op.get_bind(), checkfirst=True)