goon/app/models/performer.py
jtrzupek 2163fee245 perf(taxonomy): denormalize scene_count for tags/performers/studios
Counts for /tags, /performers, /studios and /favorites were computed live
per-request by aggregating scene_tags / scene_performers with an EXISTS to
playback_sources. As the catalog grew to ~1.7M scenes (6.3M scene_tags) this
ran ~4.3s for /tags?order=popular (x2 incl. the total count) and ~950ms for
the default /scenes count, making those screens load in several seconds.

- migration 0019: add scene_count (+ DESC index) to tags/performers/studios
- background job _job_refresh_taxonomy_counts (every 3h) recomputes the counts
  in one UPDATE..FROM each (IS DISTINCT FROM to skip unchanged rows)
- /tags, /performers, /studios scenes path now read the column + ORDER BY the
  indexed scene_count; for_movies paths keep live aggregation (small tables)
- favorites read denormalized scene_count instead of a grouped EXISTS aggregate
- /scenes default count: 10-min in-process TTL cache (header is approximate)

Measured: /tags?order=popular&per_page=500 ~8s -> 66ms incl. serialization.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 17:53:48 +02:00

86 lines
3.4 KiB
Python

import enum
import uuid
from datetime import date, datetime
from sqlalchemy import Date, DateTime, Enum, Float, ForeignKey, Integer, String, UniqueConstraint, func
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import Mapped, mapped_column
from app.models.base import Base, TimestampMixin, UUIDPKMixin
class Gender(str, enum.Enum):
female = "female"
male = "male"
transgender_female = "transgender_female"
transgender_male = "transgender_male"
non_binary = "non_binary"
intersex = "intersex"
unknown = "unknown"
class Performer(UUIDPKMixin, TimestampMixin, Base):
__tablename__ = "performers"
canonical_name: Mapped[str] = mapped_column(String(256), nullable=False)
name_normalized: Mapped[str] = mapped_column(String(256), nullable=False, index=True)
slug: Mapped[str] = mapped_column(String(256), nullable=False, unique=True)
gender: Mapped[Gender | None] = mapped_column(Enum(Gender, name="performer_gender"))
birth_date: Mapped[date | None] = mapped_column(Date)
country: Mapped[str | None] = mapped_column(String(64))
# Continuous search worker: kiedy ostatni per-performer search across tubes.
# Queue: ORDER BY last_searched_at NULLS FIRST, search_run_count ASC. Po pełnym
# sweep cykliczne refresh najstarszych.
last_searched_at: Mapped[datetime | None] = mapped_column(
DateTime(timezone=True), nullable=True
)
search_run_count: Mapped[int] = mapped_column(
Integer, nullable=False, default=0, server_default="0"
)
# Denormalizowany licznik scen z żywym playback (refresh w tle). Patrz migracja
# 0019 + _job_refresh_taxonomy_counts. Sortowanie "popular" + badge w favorites.
scene_count: Mapped[int] = mapped_column(
Integer, nullable=False, default=0, server_default="0"
)
class PerformerAlias(Base):
__tablename__ = "performer_aliases"
__table_args__ = (UniqueConstraint("performer_id", "alias_normalized"),)
id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), primary_key=True, server_default=func.gen_random_uuid()
)
performer_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("performers.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
alias: Mapped[str] = mapped_column(String(256), nullable=False)
alias_normalized: Mapped[str] = mapped_column(String(256), nullable=False, index=True)
source_id: Mapped[uuid.UUID | None] = mapped_column(
UUID(as_uuid=True), ForeignKey("sources.id", ondelete="SET NULL")
)
class PerformerExternalRef(Base):
__tablename__ = "performer_external_refs"
source_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True), ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True
)
external_id: Mapped[str] = mapped_column(String, primary_key=True)
performer_id: Mapped[uuid.UUID] = mapped_column(
UUID(as_uuid=True),
ForeignKey("performers.id", ondelete="CASCADE"),
nullable=False,
index=True,
)
confidence: Mapped[float] = mapped_column(Float, nullable=False, default=1.0, server_default="1.0")
first_seen: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
last_seen: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)