"""StashDB studio scenes batch ingest — analogiczne do tpdb_studio_backfill. Iteruje studios z freshporno orphans które mają StashDB external_id, fetcha wszystkich scen per-studio. StashDB ma ~12% match rate freshporno (vs 65% TPDB), ale komplementarne dane — niektóre studios indexed tylko w StashDB (Adult Time sub-sites, european studios). """ from __future__ import annotations from app.connectors.stashdb import StashDBConnector from app.db import session_scope from app.models.source import SourceKind from app.scheduler.performer_driven import _ingest_iter_into_run QUERY = """ WITH fp_orphans AS ( SELECT DISTINCT sc.studio_id FROM scenes sc JOIN playback_sources ps ON ps.scene_id = sc.id WHERE ps.origin = %s AND ps.dead_at IS NULL AND sc.studio_id IS NOT NULL AND NOT EXISTS ( SELECT 1 FROM scene_external_refs er JOIN sources s ON s.id=er.source_id WHERE er.scene_id=sc.id AND s.name IN (%s, %s) ) ) SELECT st.name, ser.external_id FROM fp_orphans fo JOIN studios st ON st.id = fo.studio_id JOIN studio_external_refs ser ON ser.studio_id = st.id JOIN sources s ON s.id = ser.source_id AND s.name = %s ORDER BY st.name; """ def main() -> None: c = StashDBConnector() with session_scope() as sess: rows = list(sess.connection().exec_driver_sql( QUERY, ("tube:freshpornoorg", "tpdb", "stashdb", "stashdb"), )) print(f"FOUND {len(rows)} top-orphan studios with StashDB IDs", flush=True) total_new = 0 total_seen = 0 for i, (name, studio_id) in enumerate(rows, 1): print(f"[{i}/{len(rows)}] {name} (sdb={studio_id}) ...", flush=True) try: counters = _ingest_iter_into_run( source_kind=SourceKind.stashdb, source_name="stashdb", run_label=f"stashdb-studio-backfill:{name}", iterator_factory=lambda sid=studio_id: c.fetch_scenes_for_studio(sid), ) seen = counters.get("seen", 0) new = counters.get("new", 0) total_seen += seen total_new += new print(f" seen={seen} new={new}", flush=True) except Exception as e: print(f" ERR: {type(e).__name__}: {str(e)[:200]}", flush=True) print(f"\nDONE total_seen={total_seen} total_new={total_new}", flush=True) if __name__ == "__main__": main()