Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
72 lines
2.7 KiB
Python
72 lines
2.7 KiB
Python
"""TPDB studio scenes batch ingest dla top-orphan studios w naszej DB.
|
|
|
|
Bierze studios które:
|
|
- Mają ≥1 freshporno scene jako orphan (bez TPDB/StashDB canonical ref)
|
|
- Mają TPDB external_id (site ID)
|
|
|
|
Per każde studio: fetch wszystkich scen z `/sites/<id>/scenes` TPDB endpoint,
|
|
przepuszcza przez nasz standard resolver. Po backfill — phash/title fuzzy match
|
|
może podpiąć orphany do nowo zaimportowanych canonical scenes (Path 3 resolver).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from app.connectors.tpdb import TPDBConnector
|
|
from app.db import session_scope
|
|
from app.models.source import SourceKind
|
|
from app.scheduler.performer_driven import _ingest_iter_into_run
|
|
|
|
|
|
# Używamy psycopg %s placeholders (nie SA text() z :colon) — `:` w wartości
|
|
# 'tube:freshpornoorg' rozwala SA bind parser nawet przy explicit bindparams.
|
|
QUERY = """
|
|
WITH fp_orphans AS (
|
|
SELECT DISTINCT sc.studio_id FROM scenes sc
|
|
JOIN playback_sources ps ON ps.scene_id = sc.id
|
|
WHERE ps.origin = %s AND ps.dead_at IS NULL
|
|
AND sc.studio_id IS NOT NULL
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM scene_external_refs er JOIN sources s ON s.id=er.source_id
|
|
WHERE er.scene_id=sc.id AND s.name IN (%s, %s)
|
|
)
|
|
)
|
|
SELECT st.name, ser.external_id
|
|
FROM fp_orphans fo
|
|
JOIN studios st ON st.id = fo.studio_id
|
|
JOIN studio_external_refs ser ON ser.studio_id = st.id
|
|
JOIN sources s ON s.id = ser.source_id AND s.name = %s
|
|
ORDER BY st.name;
|
|
"""
|
|
|
|
|
|
def main() -> None:
|
|
c = TPDBConnector()
|
|
with session_scope() as sess:
|
|
rows = list(sess.connection().exec_driver_sql(
|
|
QUERY, ("tube:freshpornoorg", "tpdb", "stashdb", "tpdb"),
|
|
))
|
|
# Resume idempotent — _ingest_iter_into_run dedups by external_id, więc
|
|
# już-processed studios skipnęją szybko (seen ~= 0, new = 0).
|
|
print(f"FOUND {len(rows)} top-orphan studios with TPDB site IDs", flush=True)
|
|
total_new = 0
|
|
total_seen = 0
|
|
for i, (name, site_id) in enumerate(rows, 1):
|
|
print(f"[{i}/{len(rows)}] {name} (site={site_id}) ...", flush=True)
|
|
try:
|
|
counters = _ingest_iter_into_run(
|
|
source_kind=SourceKind.tpdb,
|
|
source_name="tpdb",
|
|
run_label=f"tpdb-studio-backfill:{name}",
|
|
iterator_factory=lambda sid=site_id: c.fetch_scenes_for_site(sid),
|
|
)
|
|
seen = counters.get("seen", 0)
|
|
new = counters.get("new", 0)
|
|
total_seen += seen
|
|
total_new += new
|
|
print(f" seen={seen} new={new}", flush=True)
|
|
except Exception as e:
|
|
print(f" ERR: {type(e).__name__}: {str(e)[:200]}", flush=True)
|
|
print(f"\nDONE total_seen={total_seen} total_new={total_new}", flush=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|