Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
65 lines
2.3 KiB
Python
65 lines
2.3 KiB
Python
"""StashDB studio scenes batch ingest — analogiczne do tpdb_studio_backfill.
|
|
|
|
Iteruje studios z freshporno orphans które mają StashDB external_id, fetcha
|
|
wszystkich scen per-studio. StashDB ma ~12% match rate freshporno (vs 65% TPDB),
|
|
ale komplementarne dane — niektóre studios indexed tylko w StashDB (Adult Time
|
|
sub-sites, european studios).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from app.connectors.stashdb import StashDBConnector
|
|
from app.db import session_scope
|
|
from app.models.source import SourceKind
|
|
from app.scheduler.performer_driven import _ingest_iter_into_run
|
|
|
|
|
|
QUERY = """
|
|
WITH fp_orphans AS (
|
|
SELECT DISTINCT sc.studio_id FROM scenes sc
|
|
JOIN playback_sources ps ON ps.scene_id = sc.id
|
|
WHERE ps.origin = %s AND ps.dead_at IS NULL
|
|
AND sc.studio_id IS NOT NULL
|
|
AND NOT EXISTS (
|
|
SELECT 1 FROM scene_external_refs er JOIN sources s ON s.id=er.source_id
|
|
WHERE er.scene_id=sc.id AND s.name IN (%s, %s)
|
|
)
|
|
)
|
|
SELECT st.name, ser.external_id
|
|
FROM fp_orphans fo
|
|
JOIN studios st ON st.id = fo.studio_id
|
|
JOIN studio_external_refs ser ON ser.studio_id = st.id
|
|
JOIN sources s ON s.id = ser.source_id AND s.name = %s
|
|
ORDER BY st.name;
|
|
"""
|
|
|
|
|
|
def main() -> None:
|
|
c = StashDBConnector()
|
|
with session_scope() as sess:
|
|
rows = list(sess.connection().exec_driver_sql(
|
|
QUERY, ("tube:freshpornoorg", "tpdb", "stashdb", "stashdb"),
|
|
))
|
|
print(f"FOUND {len(rows)} top-orphan studios with StashDB IDs", flush=True)
|
|
total_new = 0
|
|
total_seen = 0
|
|
for i, (name, studio_id) in enumerate(rows, 1):
|
|
print(f"[{i}/{len(rows)}] {name} (sdb={studio_id}) ...", flush=True)
|
|
try:
|
|
counters = _ingest_iter_into_run(
|
|
source_kind=SourceKind.stashdb,
|
|
source_name="stashdb",
|
|
run_label=f"stashdb-studio-backfill:{name}",
|
|
iterator_factory=lambda sid=studio_id: c.fetch_scenes_for_studio(sid),
|
|
)
|
|
seen = counters.get("seen", 0)
|
|
new = counters.get("new", 0)
|
|
total_seen += seen
|
|
total_new += new
|
|
print(f" seen={seen} new={new}", flush=True)
|
|
except Exception as e:
|
|
print(f" ERR: {type(e).__name__}: {str(e)[:200]}", flush=True)
|
|
print(f"\nDONE total_seen={total_seen} total_new={total_new}", flush=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|