"""TPDB studio scenes batch ingest dla top-orphan studios w naszej DB. Bierze studios które: - Mają ≥1 freshporno scene jako orphan (bez TPDB/StashDB canonical ref) - Mają TPDB external_id (site ID) Per każde studio: fetch wszystkich scen z `/sites//scenes` TPDB endpoint, przepuszcza przez nasz standard resolver. Po backfill — phash/title fuzzy match może podpiąć orphany do nowo zaimportowanych canonical scenes (Path 3 resolver). """ from __future__ import annotations from app.connectors.tpdb import TPDBConnector from app.db import session_scope from app.models.source import SourceKind from app.scheduler.performer_driven import _ingest_iter_into_run # Używamy psycopg %s placeholders (nie SA text() z :colon) — `:` w wartości # 'tube:freshpornoorg' rozwala SA bind parser nawet przy explicit bindparams. QUERY = """ WITH fp_orphans AS ( SELECT DISTINCT sc.studio_id FROM scenes sc JOIN playback_sources ps ON ps.scene_id = sc.id WHERE ps.origin = %s AND ps.dead_at IS NULL AND sc.studio_id IS NOT NULL AND NOT EXISTS ( SELECT 1 FROM scene_external_refs er JOIN sources s ON s.id=er.source_id WHERE er.scene_id=sc.id AND s.name IN (%s, %s) ) ) SELECT st.name, ser.external_id FROM fp_orphans fo JOIN studios st ON st.id = fo.studio_id JOIN studio_external_refs ser ON ser.studio_id = st.id JOIN sources s ON s.id = ser.source_id AND s.name = %s ORDER BY st.name; """ def main() -> None: c = TPDBConnector() with session_scope() as sess: rows = list(sess.connection().exec_driver_sql( QUERY, ("tube:freshpornoorg", "tpdb", "stashdb", "tpdb"), )) # Resume idempotent — _ingest_iter_into_run dedups by external_id, więc # już-processed studios skipnęją szybko (seen ~= 0, new = 0). print(f"FOUND {len(rows)} top-orphan studios with TPDB site IDs", flush=True) total_new = 0 total_seen = 0 for i, (name, site_id) in enumerate(rows, 1): print(f"[{i}/{len(rows)}] {name} (site={site_id}) ...", flush=True) try: counters = _ingest_iter_into_run( source_kind=SourceKind.tpdb, source_name="tpdb", run_label=f"tpdb-studio-backfill:{name}", iterator_factory=lambda sid=site_id: c.fetch_scenes_for_site(sid), ) seen = counters.get("seen", 0) new = counters.get("new", 0) total_seen += seen total_new += new print(f" seen={seen} new={new}", flush=True) except Exception as e: print(f" ERR: {type(e).__name__}: {str(e)[:200]}", flush=True) print(f"\nDONE total_seen={total_seen} total_new={total_new}", flush=True) if __name__ == "__main__": main()