goon/scripts/migrate_paradisehill_to_movies.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

173 lines
6.5 KiB
Python

"""One-shot migration: paradisehill scenes → movies.
Paradisehill content jest faktycznie filmami (`og:type=video.movie`),
ale wczesna implementacja wlądowała je w `scenes` table. Ten skrypt
przenosi 73 entries z `scenes` do `movies` z zachowaniem UUID, performers,
tags, studios, external_refs i playback_sources. Po migracji DELETE z scenes
cascades pozostałe linki.
Bezpieczne:
- 0 favorite_scenes, 0 scene_play_progress, 0 bug_reports, 0 movie_chapters
(zweryfikowano 2026-05-15)
- 0 stashdb/tpdb refs (poza 1+1 — accepted loss; matche były nietrafione)
- pornapp refs przenosimy do movie_external_refs (reuse source_id)
"""
from __future__ import annotations
import sys
from sqlalchemy import text
sys.path.insert(0, "/srv")
from app.db import SessionLocal
ORIGIN = "tube:paradisehillcc"
def main() -> None:
with SessionLocal() as session, session.begin():
# 0. Collect scene IDs
scene_ids = session.execute(
text(
"SELECT DISTINCT scene_id FROM playback_sources "
"WHERE origin = :origin AND dead_at IS NULL"
),
{"origin": ORIGIN},
).scalars().all()
print(f"scenes to migrate: {len(scene_ids)}")
if not scene_ids:
print("nothing to do")
return
params = {"ids": [str(sid) for sid in scene_ids]}
# 1. Movies — preserve UUID. scenes.title may exceed VARCHAR(512) movies.title
# constraint; clamp w SELECT.
ins_movies = session.execute(
text(
"""
INSERT INTO movies (
id, title, title_normalized, slug,
release_date, release_year,
studio_id, director, duration_sec, description,
created_at, updated_at
)
SELECT
s.id,
LEFT(s.title, 512),
LEFT(s.title_normalized, 512),
LEFT(s.slug, 512),
s.release_date,
EXTRACT(YEAR FROM s.release_date)::int,
s.studio_id,
s.director,
s.duration_sec,
s.description,
s.created_at,
s.updated_at
FROM scenes s
WHERE s.id = ANY(CAST(:ids AS uuid[]))
ON CONFLICT (id) DO NOTHING
"""
),
params,
)
print(f"movies inserted: {ins_movies.rowcount}")
# 2. Performers (scene_performers has different shape — check first)
ins_perf = session.execute(
text(
"""
INSERT INTO movie_performers (movie_id, performer_id, as_alias)
SELECT sp.scene_id, sp.performer_id, sp.as_alias
FROM scene_performers sp
WHERE sp.scene_id = ANY(CAST(:ids AS uuid[]))
ON CONFLICT (movie_id, performer_id) DO NOTHING
"""
),
params,
)
print(f"movie_performers inserted: {ins_perf.rowcount}")
# 3. Tags
ins_tags = session.execute(
text(
"""
INSERT INTO movie_tags (movie_id, tag_id, source_id)
SELECT st.scene_id, st.tag_id, st.source_id
FROM scene_tags st
WHERE st.scene_id = ANY(CAST(:ids AS uuid[]))
ON CONFLICT (movie_id, tag_id) DO NOTHING
"""
),
params,
)
print(f"movie_tags inserted: {ins_tags.rowcount}")
# 4. External refs (preserve confidence + URL + first/last seen)
ins_refs = session.execute(
text(
"""
INSERT INTO movie_external_refs (
source_id, external_id, movie_id, confidence, url, first_seen, last_seen
)
SELECT
ser.source_id, ser.external_id, ser.scene_id,
COALESCE(ser.confidence, 1.0),
ser.url, ser.first_seen, ser.last_seen
FROM scene_external_refs ser
WHERE ser.scene_id = ANY(CAST(:ids AS uuid[]))
ON CONFLICT (source_id, external_id) DO NOTHING
"""
),
params,
)
print(f"movie_external_refs inserted: {ins_refs.rowcount}")
# 5. Playback sources (origin keeps `tube:paradisehillcc` — to potem zaktualizujemy
# osobnym statementem na `paradisehill` żeby pasowało do nowego ingest origin)
ins_pb = session.execute(
text(
"""
INSERT INTO movie_playback_sources (
id, movie_id, origin, page_url, embed_url, stream_url,
quality, duration_sec, thumbnail_url, animated_thumbnail_url,
last_seen_at, dead_at, dead_reason, created_at, updated_at
)
SELECT
ps.id, ps.scene_id,
'paradisehill', -- normalizujemy origin na nowy
ps.page_url, ps.embed_url, ps.stream_url,
ps.quality, ps.duration_sec, ps.thumbnail_url, ps.animated_thumbnail_url,
ps.last_seen_at, ps.dead_at, ps.dead_reason, ps.created_at, ps.updated_at
FROM playback_sources ps
WHERE ps.scene_id = ANY(CAST(:ids AS uuid[]))
ON CONFLICT (origin, page_url) DO NOTHING
"""
),
params,
)
print(f"movie_playback_sources inserted: {ins_pb.rowcount}")
# 6. Delete scenes — CASCADE drops scene_performers/tags/external_refs/
# fingerprints/playback_sources/play_progress/favorites automatycznie.
del_scenes = session.execute(
text("DELETE FROM scenes WHERE id = ANY(CAST(:ids AS uuid[]))"),
params,
)
print(f"scenes deleted: {del_scenes.rowcount}")
# Verify
leftover = session.execute(
text("SELECT COUNT(*) FROM playback_sources WHERE origin = :origin"),
{"origin": ORIGIN},
).scalar_one()
movies_count = session.execute(
text("SELECT COUNT(*) FROM movie_playback_sources WHERE origin = 'paradisehill'"),
).scalar_one()
print(f"\nverify:")
print(f" remaining playback_sources origin={ORIGIN}: {leftover}")
print(f" movie_playback_sources origin=paradisehill: {movies_count}")
if __name__ == "__main__":
main()