goon/app/resolve/scene_merge.py
jtrzupek e23e2d1f17 fix(merge): move playback_sources on scene merge + exact-title+duration dedup
merge_scenes never reassigned playback_sources → ON DELETE CASCADE dropped them
with the absorbed scene. Cross-source (canonical) merges rarely had tube playback
so it hid, but tube-dup merges silently LOST playback links. Add _move_playback_sources
(global unique (origin,page_url) guarantees no collision on reassign).

+ merge_exact_title_duration.py: catches missing-merge dupes bulk_dedup misses
(same performer + identical normalized title + identical duration_sec, no phash).
Bad Bella had 25 such pairs (bug-report ef92809d "duplikat, te same miniatury").

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-08 10:56:50 +02:00

242 lines
8.4 KiB
Python

"""Scalanie dwóch scen kanonicznych w jedną (admin merge).
`keep_id` przejmuje wszystko od `drop_id`:
- external_refs (ze zmianą scene_id na keep)
- scene_performers (z deduplikacją na (scene_id, performer_id))
- scene_tags
- scene_fingerprints
Następnie `drop` Scene jest usuwana — CASCADE i tak by wyczyściło reszta, ale
relacje i tak przepinamy do `keep`, a nie kasujemy razem ze sceną.
Pending merge_candidates referencjonujące `drop_id` (left lub right) są kasowane
żeby admin nie musiał ich ponownie rozstrzygać.
"""
from __future__ import annotations
import logging
import uuid
from datetime import UTC, datetime
from sqlalchemy import or_, select, update
from sqlalchemy.orm import Session
from app.models.merge_candidate import MergeCandidate, MergeKind, MergeStatus
from app.models.playback_source import PlaybackSource
from app.models.scene import (
Scene,
SceneExternalRef,
SceneFingerprint,
ScenePerformer,
SceneTag,
)
log = logging.getLogger(__name__)
class MergeError(Exception):
pass
def merge_scenes(
session: Session,
*,
keep_id: uuid.UUID,
drop_id: uuid.UUID,
resolved_by: str | None = None,
) -> Scene:
if keep_id == drop_id:
raise MergeError("cannot merge scene into itself")
keep = session.get(Scene, keep_id)
drop = session.get(Scene, drop_id)
if keep is None or drop is None:
raise MergeError("scene not found")
_move_external_refs(session, keep_id=keep_id, drop_id=drop_id)
_move_performers(session, keep_id=keep_id, drop_id=drop_id)
_move_tags(session, keep_id=keep_id, drop_id=drop_id)
_move_fingerprints(session, keep_id=keep_id, drop_id=drop_id)
_move_playback_sources(session, keep_id=keep_id, drop_id=drop_id)
_coalesce_canonical_fields(keep, drop)
session.delete(drop)
session.flush()
_close_pending_candidates(session, scene_id=drop_id, resolved_by=resolved_by)
return keep
def resolve_candidate(
session: Session,
*,
candidate_id: uuid.UUID,
action: str, # "merge" | "reject"
keep_left: bool = True,
resolved_by: str | None = None,
) -> MergeCandidate:
"""Rozstrzyga jeden MergeCandidate. Dla `merge` decyzja co zostaje:
`keep_left=True` (default) → `left_id` przejmuje `right_id`."""
cand = session.get(MergeCandidate, candidate_id)
if cand is None:
raise MergeError("candidate not found")
if cand.status != MergeStatus.pending:
raise MergeError(f"candidate already resolved: status={cand.status.value}")
if cand.kind != MergeKind.scene:
raise MergeError(f"only scene merges are supported (got {cand.kind.value})")
now = datetime.now(UTC)
if action == "reject":
cand.status = MergeStatus.rejected
cand.resolved_at = now
cand.resolved_by = resolved_by
return cand
if action == "merge":
keep_id, drop_id = (cand.left_id, cand.right_id) if keep_left else (cand.right_id, cand.left_id)
merge_scenes(session, keep_id=keep_id, drop_id=drop_id, resolved_by=resolved_by)
cand.status = MergeStatus.merged
cand.resolved_at = now
cand.resolved_by = resolved_by
# Update reasons z final decyzją (zachowaj poprzednie scoring data)
reasons = dict(cand.reasons or {})
reasons["resolution"] = {"keep_id": str(keep_id), "drop_id": str(drop_id)}
cand.reasons = reasons
return cand
raise MergeError(f"unsupported action: {action}")
# ---- helpery --------------------------------------------------------------
def _move_external_refs(session: Session, *, keep_id: uuid.UUID, drop_id: uuid.UUID) -> None:
drop_refs = (
session.execute(select(SceneExternalRef).where(SceneExternalRef.scene_id == drop_id))
.scalars()
.all()
)
for ref in drop_refs:
clash = session.execute(
select(SceneExternalRef).where(
SceneExternalRef.source_id == ref.source_id,
SceneExternalRef.external_id == ref.external_id,
SceneExternalRef.scene_id == keep_id,
)
).scalar_one_or_none()
if clash is not None:
# Już mamy ref pod keep — usuń konkurencyjny pod drop
session.delete(ref)
else:
ref.scene_id = keep_id
def _move_performers(session: Session, *, keep_id: uuid.UUID, drop_id: uuid.UUID) -> None:
drop_links = (
session.execute(select(ScenePerformer).where(ScenePerformer.scene_id == drop_id))
.scalars()
.all()
)
for link in drop_links:
clash = session.execute(
select(ScenePerformer).where(
ScenePerformer.scene_id == keep_id,
ScenePerformer.performer_id == link.performer_id,
)
).scalar_one_or_none()
if clash is not None:
if link.as_alias and not clash.as_alias:
clash.as_alias = link.as_alias
session.delete(link)
else:
link.scene_id = keep_id
def _move_tags(session: Session, *, keep_id: uuid.UUID, drop_id: uuid.UUID) -> None:
drop_links = (
session.execute(select(SceneTag).where(SceneTag.scene_id == drop_id))
.scalars()
.all()
)
for link in drop_links:
clash = session.execute(
select(SceneTag).where(
SceneTag.scene_id == keep_id, SceneTag.tag_id == link.tag_id
)
).scalar_one_or_none()
if clash is not None:
session.delete(link)
else:
link.scene_id = keep_id
def _move_fingerprints(session: Session, *, keep_id: uuid.UUID, drop_id: uuid.UUID) -> None:
drops = (
session.execute(select(SceneFingerprint).where(SceneFingerprint.scene_id == drop_id))
.scalars()
.all()
)
for fp in drops:
clash = session.execute(
select(SceneFingerprint).where(
SceneFingerprint.scene_id == keep_id,
SceneFingerprint.kind == fp.kind,
SceneFingerprint.value == fp.value,
)
).scalar_one_or_none()
if clash is not None:
session.delete(fp)
else:
fp.scene_id = keep_id
def _move_playback_sources(session: Session, *, keep_id: uuid.UUID, drop_id: uuid.UUID) -> None:
"""Przepnij playback_sources z `drop` na `keep`.
Wcześniej merge_scenes NIE ruszał playback_sources → CASCADE (ON DELETE) kasował
je razem z `drop` sceną. Dla cross-source merdży (tpdb↔stashdb canonical, zwykle
bez tube-playbacku) nie bolało, ale phash_exact / tube-dup merge GUBIŁ linki do
odtwarzania. Unique constraint `(origin, page_url)` jest GLOBALNY → drop i keep nie
mogą współdzielić tego samego źródła, więc samo przepięcie scene_id nie grozi kolizją.
"""
session.execute(
update(PlaybackSource)
.where(PlaybackSource.scene_id == drop_id)
.values(scene_id=keep_id)
)
def _coalesce_canonical_fields(keep: Scene, drop: Scene) -> None:
"""Wypełnij braki w `keep` polami z `drop`. Nie nadpisuje istniejących wartości."""
if not keep.description and drop.description:
keep.description = drop.description
if not keep.duration_sec and drop.duration_sec:
keep.duration_sec = drop.duration_sec
if not keep.code and drop.code:
keep.code = drop.code
if not keep.director and drop.director:
keep.director = drop.director
if not keep.release_date and drop.release_date:
keep.release_date = drop.release_date
if not keep.studio_id and drop.studio_id:
keep.studio_id = drop.studio_id
if drop.title and len(drop.title) > len(keep.title or ""):
keep.title = drop.title
keep.title_normalized = drop.title_normalized
def _close_pending_candidates(
session: Session, *, scene_id: uuid.UUID, resolved_by: str | None
) -> None:
"""Pending candidates referencjonujące usuniętą scenę kasujemy (status=rejected),
bo right_id już nie istnieje. Auto_merged/merged zostawiamy jako audit."""
session.execute(
update(MergeCandidate)
.where(
MergeCandidate.status == MergeStatus.pending,
or_(MergeCandidate.left_id == scene_id, MergeCandidate.right_id == scene_id),
)
.values(
status=MergeStatus.rejected,
resolved_at=datetime.now(UTC),
resolved_by=resolved_by or "auto:scene_dropped",
)
)