Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
173 lines
6.5 KiB
Python
173 lines
6.5 KiB
Python
"""One-shot migration: paradisehill scenes → movies.
|
|
|
|
Paradisehill content jest faktycznie filmami (`og:type=video.movie`),
|
|
ale wczesna implementacja wlądowała je w `scenes` table. Ten skrypt
|
|
przenosi 73 entries z `scenes` do `movies` z zachowaniem UUID, performers,
|
|
tags, studios, external_refs i playback_sources. Po migracji DELETE z scenes
|
|
cascades pozostałe linki.
|
|
|
|
Bezpieczne:
|
|
- 0 favorite_scenes, 0 scene_play_progress, 0 bug_reports, 0 movie_chapters
|
|
(zweryfikowano 2026-05-15)
|
|
- 0 stashdb/tpdb refs (poza 1+1 — accepted loss; matche były nietrafione)
|
|
- pornapp refs przenosimy do movie_external_refs (reuse source_id)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
from sqlalchemy import text
|
|
|
|
sys.path.insert(0, "/srv")
|
|
from app.db import SessionLocal
|
|
|
|
ORIGIN = "tube:paradisehillcc"
|
|
|
|
|
|
def main() -> None:
|
|
with SessionLocal() as session, session.begin():
|
|
# 0. Collect scene IDs
|
|
scene_ids = session.execute(
|
|
text(
|
|
"SELECT DISTINCT scene_id FROM playback_sources "
|
|
"WHERE origin = :origin AND dead_at IS NULL"
|
|
),
|
|
{"origin": ORIGIN},
|
|
).scalars().all()
|
|
print(f"scenes to migrate: {len(scene_ids)}")
|
|
if not scene_ids:
|
|
print("nothing to do")
|
|
return
|
|
|
|
params = {"ids": [str(sid) for sid in scene_ids]}
|
|
|
|
# 1. Movies — preserve UUID. scenes.title may exceed VARCHAR(512) movies.title
|
|
# constraint; clamp w SELECT.
|
|
ins_movies = session.execute(
|
|
text(
|
|
"""
|
|
INSERT INTO movies (
|
|
id, title, title_normalized, slug,
|
|
release_date, release_year,
|
|
studio_id, director, duration_sec, description,
|
|
created_at, updated_at
|
|
)
|
|
SELECT
|
|
s.id,
|
|
LEFT(s.title, 512),
|
|
LEFT(s.title_normalized, 512),
|
|
LEFT(s.slug, 512),
|
|
s.release_date,
|
|
EXTRACT(YEAR FROM s.release_date)::int,
|
|
s.studio_id,
|
|
s.director,
|
|
s.duration_sec,
|
|
s.description,
|
|
s.created_at,
|
|
s.updated_at
|
|
FROM scenes s
|
|
WHERE s.id = ANY(CAST(:ids AS uuid[]))
|
|
ON CONFLICT (id) DO NOTHING
|
|
"""
|
|
),
|
|
params,
|
|
)
|
|
print(f"movies inserted: {ins_movies.rowcount}")
|
|
|
|
# 2. Performers (scene_performers has different shape — check first)
|
|
ins_perf = session.execute(
|
|
text(
|
|
"""
|
|
INSERT INTO movie_performers (movie_id, performer_id, as_alias)
|
|
SELECT sp.scene_id, sp.performer_id, sp.as_alias
|
|
FROM scene_performers sp
|
|
WHERE sp.scene_id = ANY(CAST(:ids AS uuid[]))
|
|
ON CONFLICT (movie_id, performer_id) DO NOTHING
|
|
"""
|
|
),
|
|
params,
|
|
)
|
|
print(f"movie_performers inserted: {ins_perf.rowcount}")
|
|
|
|
# 3. Tags
|
|
ins_tags = session.execute(
|
|
text(
|
|
"""
|
|
INSERT INTO movie_tags (movie_id, tag_id, source_id)
|
|
SELECT st.scene_id, st.tag_id, st.source_id
|
|
FROM scene_tags st
|
|
WHERE st.scene_id = ANY(CAST(:ids AS uuid[]))
|
|
ON CONFLICT (movie_id, tag_id) DO NOTHING
|
|
"""
|
|
),
|
|
params,
|
|
)
|
|
print(f"movie_tags inserted: {ins_tags.rowcount}")
|
|
|
|
# 4. External refs (preserve confidence + URL + first/last seen)
|
|
ins_refs = session.execute(
|
|
text(
|
|
"""
|
|
INSERT INTO movie_external_refs (
|
|
source_id, external_id, movie_id, confidence, url, first_seen, last_seen
|
|
)
|
|
SELECT
|
|
ser.source_id, ser.external_id, ser.scene_id,
|
|
COALESCE(ser.confidence, 1.0),
|
|
ser.url, ser.first_seen, ser.last_seen
|
|
FROM scene_external_refs ser
|
|
WHERE ser.scene_id = ANY(CAST(:ids AS uuid[]))
|
|
ON CONFLICT (source_id, external_id) DO NOTHING
|
|
"""
|
|
),
|
|
params,
|
|
)
|
|
print(f"movie_external_refs inserted: {ins_refs.rowcount}")
|
|
|
|
# 5. Playback sources (origin keeps `tube:paradisehillcc` — to potem zaktualizujemy
|
|
# osobnym statementem na `paradisehill` żeby pasowało do nowego ingest origin)
|
|
ins_pb = session.execute(
|
|
text(
|
|
"""
|
|
INSERT INTO movie_playback_sources (
|
|
id, movie_id, origin, page_url, embed_url, stream_url,
|
|
quality, duration_sec, thumbnail_url, animated_thumbnail_url,
|
|
last_seen_at, dead_at, dead_reason, created_at, updated_at
|
|
)
|
|
SELECT
|
|
ps.id, ps.scene_id,
|
|
'paradisehill', -- normalizujemy origin na nowy
|
|
ps.page_url, ps.embed_url, ps.stream_url,
|
|
ps.quality, ps.duration_sec, ps.thumbnail_url, ps.animated_thumbnail_url,
|
|
ps.last_seen_at, ps.dead_at, ps.dead_reason, ps.created_at, ps.updated_at
|
|
FROM playback_sources ps
|
|
WHERE ps.scene_id = ANY(CAST(:ids AS uuid[]))
|
|
ON CONFLICT (origin, page_url) DO NOTHING
|
|
"""
|
|
),
|
|
params,
|
|
)
|
|
print(f"movie_playback_sources inserted: {ins_pb.rowcount}")
|
|
|
|
# 6. Delete scenes — CASCADE drops scene_performers/tags/external_refs/
|
|
# fingerprints/playback_sources/play_progress/favorites automatycznie.
|
|
del_scenes = session.execute(
|
|
text("DELETE FROM scenes WHERE id = ANY(CAST(:ids AS uuid[]))"),
|
|
params,
|
|
)
|
|
print(f"scenes deleted: {del_scenes.rowcount}")
|
|
|
|
# Verify
|
|
leftover = session.execute(
|
|
text("SELECT COUNT(*) FROM playback_sources WHERE origin = :origin"),
|
|
{"origin": ORIGIN},
|
|
).scalar_one()
|
|
movies_count = session.execute(
|
|
text("SELECT COUNT(*) FROM movie_playback_sources WHERE origin = 'paradisehill'"),
|
|
).scalar_one()
|
|
print(f"\nverify:")
|
|
print(f" remaining playback_sources origin={ORIGIN}: {leftover}")
|
|
print(f" movie_playback_sources origin=paradisehill: {movies_count}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|