Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
113 lines
4.1 KiB
Python
113 lines
4.1 KiB
Python
"""Batch fill TPDB external_refs dla top-N performerów którzy mają stashdb ref ale nie tpdb.
|
|
|
|
Live lookup TPDB UUID po nazwie → INSERT do PerformerExternalRef. Po skończeniu printuje
|
|
listę CSV nazw — gotową do podania do `worker --once --strategy=performer-driven --performers='...'`.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
|
|
from sqlalchemy import case, func, select
|
|
|
|
from app.connectors.tpdb import TPDBConnector
|
|
from app.db import session_scope
|
|
from app.models.performer import Performer, PerformerExternalRef
|
|
from app.models.scene import ScenePerformer
|
|
from app.models.source import Source, SourceKind
|
|
|
|
|
|
def main(top_n: int = 30) -> None:
|
|
with session_scope() as session:
|
|
tpdb_src_id = session.execute(
|
|
select(Source.id).where(Source.kind == SourceKind.tpdb)
|
|
).scalar_one()
|
|
stashdb_src_id = session.execute(
|
|
select(Source.id).where(Source.kind == SourceKind.stashdb)
|
|
).scalar_one()
|
|
|
|
has_tpdb = (
|
|
select(PerformerExternalRef.performer_id)
|
|
.where(PerformerExternalRef.source_id == tpdb_src_id)
|
|
.distinct()
|
|
).subquery()
|
|
has_stashdb = (
|
|
select(PerformerExternalRef.performer_id)
|
|
.where(PerformerExternalRef.source_id == stashdb_src_id)
|
|
.distinct()
|
|
).subquery()
|
|
|
|
candidates = session.execute(
|
|
select(
|
|
Performer.id,
|
|
Performer.canonical_name,
|
|
func.count(ScenePerformer.scene_id).label("c"),
|
|
)
|
|
.outerjoin(ScenePerformer, ScenePerformer.performer_id == Performer.id)
|
|
.outerjoin(has_tpdb, has_tpdb.c.performer_id == Performer.id)
|
|
.join(has_stashdb, has_stashdb.c.performer_id == Performer.id)
|
|
.where(has_tpdb.c.performer_id.is_(None))
|
|
.group_by(Performer.id, Performer.canonical_name)
|
|
.order_by(func.count(ScenePerformer.scene_id).desc())
|
|
.limit(top_n)
|
|
).all()
|
|
|
|
print(f"=== {len(candidates)} candidates (have stashdb, missing tpdb) ===")
|
|
for pid, name, count in candidates:
|
|
print(f" {count:5d} {name}")
|
|
|
|
print("\n=== Live lookup TPDB ===")
|
|
tpdb = TPDBConnector()
|
|
matched: list[str] = []
|
|
not_found: list[str] = []
|
|
for pid, name, count in candidates:
|
|
try:
|
|
tpdb_id = tpdb.find_performer_id_by_name(name)
|
|
except Exception as e:
|
|
print(f" ERR {name}: {e}")
|
|
tpdb_id = None
|
|
if not tpdb_id:
|
|
not_found.append(name)
|
|
print(f" -- {name} (not found in TPDB)")
|
|
continue
|
|
# Insert ref
|
|
with session_scope() as session:
|
|
existing = session.execute(
|
|
select(PerformerExternalRef).where(
|
|
PerformerExternalRef.source_id == tpdb_src_id,
|
|
PerformerExternalRef.external_id == tpdb_id,
|
|
)
|
|
).scalar_one_or_none()
|
|
if existing:
|
|
# Conflict: TPDB UUID już zmapowany do innego performera lokalnego.
|
|
# Zostawić — ręczna decyzja czy mergować performerów.
|
|
if existing.performer_id != pid:
|
|
print(
|
|
f" CONFLICT {name}: tpdb={tpdb_id} already mapped to "
|
|
f"performer_id={existing.performer_id}"
|
|
)
|
|
else:
|
|
print(f" ok (already linked) {name}: tpdb={tpdb_id}")
|
|
matched.append(name)
|
|
continue
|
|
session.add(
|
|
PerformerExternalRef(
|
|
source_id=tpdb_src_id,
|
|
external_id=tpdb_id,
|
|
performer_id=pid,
|
|
confidence=0.9,
|
|
)
|
|
)
|
|
print(f" + {name}: tpdb={tpdb_id}")
|
|
matched.append(name)
|
|
|
|
print(f"\n=== Done ===")
|
|
print(f"linked: {len(matched)}")
|
|
print(f"not_found in tpdb: {len(not_found)}")
|
|
if matched:
|
|
print("\nNames CSV (paste to --performers):")
|
|
print(",".join(matched))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
n = int(sys.argv[1]) if len(sys.argv) > 1 else 30
|
|
main(n)
|