"""Scoring kandydat ↔ kandydat dla pipeline'u dedup.""" from __future__ import annotations import uuid from collections.abc import Iterable from sqlalchemy import select from sqlalchemy.orm import Session from app.models.scene import Scene, SceneFingerprint, ScenePerformer from app.normalize.scenes import NormalizedScene from app.resolve.scoring import ( ScoreBreakdown, composite_score, date_proximity, duration_proximity, performer_set_similarity, phash_similarity, title_similarity, ) def score_candidate( session: Session, *, candidate: Scene, norm: NormalizedScene, resolved_performer_ids: Iterable[uuid.UUID], studio_id: uuid.UUID | None, aggregator_mode: bool = False, ) -> ScoreBreakdown: """Liczy ScoreBreakdown dla pary (kandydat z DB, znormalizowana scena z importu). `aggregator_mode=True` dla scen pochodzących z tube/agregatora (np. pornapp): studio nie jest informatywne (tube agreguje wiele studiów), performers stają się głównym sygnałem — patrz `composite_score` szczegóły. """ fp = _best_phash_similarity(session, candidate.id, norm.fingerprints) title = title_similarity(candidate.title_normalized, norm.title_normalized) cand_perfs = _candidate_performer_ids(session, candidate.id) perf = performer_set_similarity(cand_perfs, list(resolved_performer_ids)) if (cand_perfs or list(resolved_performer_ids)) else None date_score = date_proximity(candidate.release_date, norm.release_date) duration_score = duration_proximity(candidate.duration_sec, norm.duration_sec) studio_match: bool | None if studio_id is None or candidate.studio_id is None: studio_match = None # nieinformatywne else: studio_match = candidate.studio_id == studio_id composite, reasons = composite_score( fp=fp, title=title, performers=perf, date_score=date_score if (candidate.release_date and norm.release_date) else None, duration_score=duration_score, studio_match=studio_match, aggregator_mode=aggregator_mode, ) breakdown = ScoreBreakdown( fp=fp, title=title, performers=perf, date=date_score, duration=duration_score, studio_match=studio_match, composite=composite, reasons=reasons, ) return breakdown def _best_phash_similarity( session: Session, scene_id: uuid.UUID, incoming_fingerprints: list[tuple[str, str]], ) -> float | None: """Najlepsza similarity między phashami sceny w DB a incoming.""" incoming = [v for kind, v in incoming_fingerprints if kind == "phash"] if not incoming: return None existing = ( session.execute( select(SceneFingerprint.value).where( SceneFingerprint.scene_id == scene_id, SceneFingerprint.kind == "phash", ) ) .scalars() .all() ) if not existing: return None best = 0.0 for left in incoming: for right in existing: if len(left) != len(right): continue try: sim = phash_similarity(left, right) except ValueError: continue if sim > best: best = sim return best def _candidate_performer_ids(session: Session, scene_id: uuid.UUID) -> list[uuid.UUID]: return list( session.execute( select(ScenePerformer.performer_id).where(ScenePerformer.scene_id == scene_id) ) .scalars() .all() )