"""Batch fill TPDB external_refs dla top-N performerów którzy mają stashdb ref ale nie tpdb. Live lookup TPDB UUID po nazwie → INSERT do PerformerExternalRef. Po skończeniu printuje listę CSV nazw — gotową do podania do `worker --once --strategy=performer-driven --performers='...'`. """ from __future__ import annotations import sys from sqlalchemy import case, func, select from app.connectors.tpdb import TPDBConnector from app.db import session_scope from app.models.performer import Performer, PerformerExternalRef from app.models.scene import ScenePerformer from app.models.source import Source, SourceKind def main(top_n: int = 30) -> None: with session_scope() as session: tpdb_src_id = session.execute( select(Source.id).where(Source.kind == SourceKind.tpdb) ).scalar_one() stashdb_src_id = session.execute( select(Source.id).where(Source.kind == SourceKind.stashdb) ).scalar_one() has_tpdb = ( select(PerformerExternalRef.performer_id) .where(PerformerExternalRef.source_id == tpdb_src_id) .distinct() ).subquery() has_stashdb = ( select(PerformerExternalRef.performer_id) .where(PerformerExternalRef.source_id == stashdb_src_id) .distinct() ).subquery() candidates = session.execute( select( Performer.id, Performer.canonical_name, func.count(ScenePerformer.scene_id).label("c"), ) .outerjoin(ScenePerformer, ScenePerformer.performer_id == Performer.id) .outerjoin(has_tpdb, has_tpdb.c.performer_id == Performer.id) .join(has_stashdb, has_stashdb.c.performer_id == Performer.id) .where(has_tpdb.c.performer_id.is_(None)) .group_by(Performer.id, Performer.canonical_name) .order_by(func.count(ScenePerformer.scene_id).desc()) .limit(top_n) ).all() print(f"=== {len(candidates)} candidates (have stashdb, missing tpdb) ===") for pid, name, count in candidates: print(f" {count:5d} {name}") print("\n=== Live lookup TPDB ===") tpdb = TPDBConnector() matched: list[str] = [] not_found: list[str] = [] for pid, name, count in candidates: try: tpdb_id = tpdb.find_performer_id_by_name(name) except Exception as e: print(f" ERR {name}: {e}") tpdb_id = None if not tpdb_id: not_found.append(name) print(f" -- {name} (not found in TPDB)") continue # Insert ref with session_scope() as session: existing = session.execute( select(PerformerExternalRef).where( PerformerExternalRef.source_id == tpdb_src_id, PerformerExternalRef.external_id == tpdb_id, ) ).scalar_one_or_none() if existing: # Conflict: TPDB UUID już zmapowany do innego performera lokalnego. # Zostawić — ręczna decyzja czy mergować performerów. if existing.performer_id != pid: print( f" CONFLICT {name}: tpdb={tpdb_id} already mapped to " f"performer_id={existing.performer_id}" ) else: print(f" ok (already linked) {name}: tpdb={tpdb_id}") matched.append(name) continue session.add( PerformerExternalRef( source_id=tpdb_src_id, external_id=tpdb_id, performer_id=pid, confidence=0.9, ) ) print(f" + {name}: tpdb={tpdb_id}") matched.append(name) print(f"\n=== Done ===") print(f"linked: {len(matched)}") print(f"not_found in tpdb: {len(not_found)}") if matched: print("\nNames CSV (paste to --performers):") print(",".join(matched)) if __name__ == "__main__": n = int(sys.argv[1]) if len(sys.argv) > 1 else 30 main(n)