goon/app/resolve/studio_resolver.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

118 lines
3.4 KiB
Python

"""M2: prosty resolver studia.
Ścieżka 1: exact external ref → update.
Ścieżka 2: name_normalized exact match → reuse + dopnij external_ref.
Ścieżka 3: insert nowego.
(Fuzzy alias matching dochodzi w M3.)
"""
from __future__ import annotations
import logging
import uuid
from sqlalchemy import select
from sqlalchemy.orm import Session
from app.models.studio import Studio, StudioAlias, StudioExternalRef
from app.normalize.scenes import NormalizedStudio
from app.normalize.text import slugify
log = logging.getLogger(__name__)
def resolve_studio(
session: Session,
*,
norm: NormalizedStudio,
source_id: uuid.UUID,
) -> Studio:
if norm.external_id:
ref = session.execute(
select(StudioExternalRef).where(
StudioExternalRef.source_id == source_id,
StudioExternalRef.external_id == norm.external_id,
)
).scalar_one_or_none()
if ref is not None:
studio = session.get(Studio, ref.studio_id)
assert studio is not None
_update_studio_fields(studio, norm)
return studio
studio = session.execute(
select(Studio).where(Studio.name_normalized == norm.name_normalized)
).scalar_one_or_none()
if studio is None:
studio = Studio(
name=norm.name,
name_normalized=norm.name_normalized,
slug=_unique_slug(session, norm.slug or slugify(norm.name) or "studio"),
network=norm.network,
homepage_url=norm.homepage_url,
)
session.add(studio)
session.flush()
log.debug("studio create id=%s name=%s", studio.id, studio.name)
else:
_update_studio_fields(studio, norm)
if norm.external_id:
existing_ref = session.execute(
select(StudioExternalRef).where(
StudioExternalRef.source_id == source_id,
StudioExternalRef.external_id == norm.external_id,
)
).scalar_one_or_none()
if existing_ref is None:
session.add(
StudioExternalRef(
source_id=source_id,
external_id=norm.external_id,
studio_id=studio.id,
confidence=1.0,
)
)
return studio
def _update_studio_fields(studio: Studio, norm: NormalizedStudio) -> None:
if norm.network and not studio.network:
studio.network = norm.network
if norm.homepage_url and not studio.homepage_url:
studio.homepage_url = norm.homepage_url
def _unique_slug(session: Session, base: str) -> str:
candidate = base
n = 1
while session.execute(select(Studio.id).where(Studio.slug == candidate)).first():
n += 1
candidate = f"{base}-{n}"
return candidate
def add_alias_if_missing(
session: Session,
*,
studio_id: uuid.UUID,
alias: str,
alias_normalized: str,
source_id: uuid.UUID | None,
) -> None:
existing = session.execute(
select(StudioAlias).where(
StudioAlias.studio_id == studio_id,
StudioAlias.alias_normalized == alias_normalized,
)
).scalar_one_or_none()
if existing is None:
session.add(
StudioAlias(
studio_id=studio_id,
alias=alias,
alias_normalized=alias_normalized,
source_id=source_id,
)
)