Mobile / OTA: - Enable Expo Updates (app.json + AndroidManifest) → api.goon-foss.org - Bump 0.1.6 → 0.1.9 (build.gradle, app.json, appVersion.ts, main.py /version) - backend.ts: default public backend auto-connect (no manual login) WebView fallback fix (PlayerScreen INJECTED_JS): - Auto-dismiss cookie/consent gates (hqporner et al. blocked kt_player init) - Context-scoped: only clicks consent buttons inside cookie/gdpr containers - Retry window for <source>.src polling raised 5→15 ticks (post-dismiss init) Resolver: - Series-position + modifier mismatch detector (Episode 2≠4, BTS/unedited) → composite_score hard-reject / cap; wired into scene_score + bulk_dedup - aggregator-mode candidate query: LIMIT 500 + title-match ordering Connectors: - porndoe.com browse scraper (JSON-LD VideoObject) — theporndude audit pilot landing: APK links → goon-v0.1.9.apk Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
123 lines
3.7 KiB
Python
123 lines
3.7 KiB
Python
"""Scoring kandydat ↔ kandydat dla pipeline'u dedup."""
|
|
from __future__ import annotations
|
|
|
|
import uuid
|
|
from collections.abc import Iterable
|
|
|
|
from sqlalchemy import select
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.models.scene import Scene, SceneFingerprint, ScenePerformer
|
|
from app.normalize.scenes import NormalizedScene
|
|
from app.resolve.scoring import (
|
|
ScoreBreakdown,
|
|
composite_score,
|
|
date_proximity,
|
|
duration_proximity,
|
|
performer_set_similarity,
|
|
phash_similarity,
|
|
series_mismatch_strength,
|
|
title_similarity,
|
|
)
|
|
|
|
|
|
def score_candidate(
|
|
session: Session,
|
|
*,
|
|
candidate: Scene,
|
|
norm: NormalizedScene,
|
|
resolved_performer_ids: Iterable[uuid.UUID],
|
|
studio_id: uuid.UUID | None,
|
|
aggregator_mode: bool = False,
|
|
) -> ScoreBreakdown:
|
|
"""Liczy ScoreBreakdown dla pary (kandydat z DB, znormalizowana scena z importu).
|
|
|
|
`aggregator_mode=True` dla scen pochodzących z tube/agregatora (np. pornapp): studio
|
|
nie jest informatywne (tube agreguje wiele studiów), performers stają się głównym
|
|
sygnałem — patrz `composite_score` szczegóły.
|
|
"""
|
|
|
|
fp = _best_phash_similarity(session, candidate.id, norm.fingerprints)
|
|
title = title_similarity(candidate.title_normalized, norm.title_normalized)
|
|
cand_perfs = _candidate_performer_ids(session, candidate.id)
|
|
perf = performer_set_similarity(cand_perfs, list(resolved_performer_ids)) if (cand_perfs or list(resolved_performer_ids)) else None
|
|
date_score = date_proximity(candidate.release_date, norm.release_date)
|
|
duration_score = duration_proximity(candidate.duration_sec, norm.duration_sec)
|
|
|
|
studio_match: bool | None
|
|
if studio_id is None or candidate.studio_id is None:
|
|
studio_match = None # nieinformatywne
|
|
else:
|
|
studio_match = candidate.studio_id == studio_id
|
|
|
|
series_mismatch = series_mismatch_strength(
|
|
candidate.title_normalized, norm.title_normalized
|
|
)
|
|
|
|
composite, reasons = composite_score(
|
|
fp=fp,
|
|
title=title,
|
|
performers=perf,
|
|
date_score=date_score if (candidate.release_date and norm.release_date) else None,
|
|
duration_score=duration_score,
|
|
studio_match=studio_match,
|
|
aggregator_mode=aggregator_mode,
|
|
series_mismatch=series_mismatch,
|
|
)
|
|
|
|
breakdown = ScoreBreakdown(
|
|
fp=fp,
|
|
title=title,
|
|
performers=perf,
|
|
date=date_score,
|
|
duration=duration_score,
|
|
studio_match=studio_match,
|
|
composite=composite,
|
|
reasons=reasons,
|
|
)
|
|
return breakdown
|
|
|
|
|
|
def _best_phash_similarity(
|
|
session: Session,
|
|
scene_id: uuid.UUID,
|
|
incoming_fingerprints: list[tuple[str, str]],
|
|
) -> float | None:
|
|
"""Najlepsza similarity między phashami sceny w DB a incoming."""
|
|
incoming = [v for kind, v in incoming_fingerprints if kind == "phash"]
|
|
if not incoming:
|
|
return None
|
|
existing = (
|
|
session.execute(
|
|
select(SceneFingerprint.value).where(
|
|
SceneFingerprint.scene_id == scene_id,
|
|
SceneFingerprint.kind == "phash",
|
|
)
|
|
)
|
|
.scalars()
|
|
.all()
|
|
)
|
|
if not existing:
|
|
return None
|
|
best = 0.0
|
|
for left in incoming:
|
|
for right in existing:
|
|
if len(left) != len(right):
|
|
continue
|
|
try:
|
|
sim = phash_similarity(left, right)
|
|
except ValueError:
|
|
continue
|
|
if sim > best:
|
|
best = sim
|
|
return best
|
|
|
|
|
|
def _candidate_performer_ids(session: Session, scene_id: uuid.UUID) -> list[uuid.UUID]:
|
|
return list(
|
|
session.execute(
|
|
select(ScenePerformer.performer_id).where(ScenePerformer.scene_id == scene_id)
|
|
)
|
|
.scalars()
|
|
.all()
|
|
)
|