goon/scripts/repair_dooplay_movies.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

118 lines
4.2 KiB
Python

"""Re-fetch sub-hosters dla dooplay movies (pandamovies/mangoporn/streamporn)
zaingestowanych PRZED fixem markupu commit 12a897b (2026-05-16 15:55).
Symptom: movie ma TYLKO raw landing playback_source (`origin='pandamovies'` plain,
bez sub-hostera). User otwiera w WebView → ad redirect → no video. Bug-reports
22:13-22:15 (2026-05-17) confirmed.
Logika:
1. Find movies gdzie origin pandamovies/mangoporn/streamporn + tylko 1 raw playback
2. Re-fetch detail HTML
3. Run dooplay extractor → RawMovie z sub-hosters
4. _sync_playback_sources insert/upsert (resolver helper)
Uruchomienie: docker exec goon-api-1 python -m scripts.repair_dooplay_movies [--site=pandamovies] [--limit=20]
"""
from __future__ import annotations
import argparse
import logging
import uuid
import sqlalchemy as sa
from app.connectors.dooplay import (
MangopornConnector,
PandamoviesConnector,
StreampornConnector,
)
from app.db import session_scope
from app.models.movie import Movie
from app.models.movie_playback_source import MoviePlaybackSource
from app.normalize.movies import normalize_movie
from app.resolve.movie_resolver import _sync_playback_sources
log = logging.getLogger(__name__)
CONNECTORS = {
"pandamovies": PandamoviesConnector,
"mangoporn": MangopornConnector,
"streamporn": StreampornConnector,
}
def repair(site: str, limit: int) -> dict:
cls = CONNECTORS[site]
conn = cls()
counters = {"checked": 0, "added_hosters": 0, "fetch_fail": 0, "no_hosters": 0}
with session_scope() as session:
# Find movies o tym origin + tylko 1 raw playback (brak sub-hosterów)
bad_movie_ids = session.execute(
sa.text("""
SELECT m.id, mps.page_url
FROM movies m
JOIN movie_playback_sources mps ON mps.movie_id = m.id
WHERE mps.origin = :origin
AND (SELECT COUNT(*) FROM movie_playback_sources mps2
WHERE mps2.movie_id = m.id) = 1
LIMIT :limit
"""),
{"origin": site, "limit": limit},
).all()
log.info("%s: %d movies needing repair", site, len(bad_movie_ids))
for movie_id, page_url in bad_movie_ids:
counters["checked"] += 1
try:
raw_movie = conn._fetch_detail(page_url)
except Exception as e:
log.warning("fetch %s failed: %s", page_url, e)
counters["fetch_fail"] += 1
continue
if raw_movie is None:
log.warning("fetch %s returned None", page_url)
counters["fetch_fail"] += 1
continue
# _sync_playback_sources oczekuje NormalizedMovie
norm = normalize_movie(raw_movie)
before_count = session.execute(
sa.text("SELECT COUNT(*) FROM movie_playback_sources WHERE movie_id = :mid"),
{"mid": movie_id},
).scalar()
movie = session.get(Movie, movie_id)
if movie is None:
continue
_sync_playback_sources(session, movie_id=movie_id, norm=norm)
session.flush()
after_count = session.execute(
sa.text("SELECT COUNT(*) FROM movie_playback_sources WHERE movie_id = :mid"),
{"mid": movie_id},
).scalar()
added = (after_count or 0) - (before_count or 0)
if added > 0:
counters["added_hosters"] += added
log.info("repaired %s: +%d hosters (%s)", movie_id, added, raw_movie.title[:50])
else:
counters["no_hosters"] += 1
log.info("no new hosters for %s (%s)", movie_id, raw_movie.title[:50])
session.commit()
return counters
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--site", choices=list(CONNECTORS), default="pandamovies")
parser.add_argument("--limit", type=int, default=50)
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
counters = repair(args.site, args.limit)
log.info("done: %s", counters)
return 0
if __name__ == "__main__":
raise SystemExit(main())