"""Per-origin extractor check: dla 1 sample sceny z każdego tube origin, wywołaj try_extract i sklasyfikuj wynik (direct mp4/m3u8 vs WebView hoster vs fail). Uruchamiać na VPS: docker compose exec -T api python scripts/check_all_hosters.py """ from app.db import SessionLocal from sqlalchemy import text from app.extractors import try_extract def main(): with SessionLocal() as s: rows = s.execute(text(""" SELECT DISTINCT ON (ps.origin) ps.origin, ps.page_url, sc.title FROM playback_sources ps JOIN scenes sc ON sc.id = ps.scene_id WHERE ps.dead_at IS NULL AND ps.origin LIKE 'tube:%' AND ps.page_url IS NOT NULL ORDER BY ps.origin, sc.created_at DESC """)).all() print(f"{'origin':<26} {'result':<48} verdict") print("-" * 95) for r in rows: sitetag = r.origin.replace("tube:", "") try: sources = try_extract(sitetag, r.page_url) except Exception as e: print(f"{r.origin:<26} EXC: {str(e)[:42]:<48} ERROR") continue if not sources: print(f"{r.origin:<26} {'None (no sources)':<48} FAIL") continue # Klasyfikacja po type pierwszego źródła types = [getattr(x, "type", "?") for x in sources] first = sources[0] t = getattr(first, "type", "?") link = (getattr(first, "link", "") or "")[:40] if t == "hoster": verdict = "WEBVIEW (page → ad risk)" elif t in ("mp4", "m3u8", "hls", "mpd"): verdict = "DIRECT (native ExoPlayer)" else: verdict = f"OTHER({t})" n = len(sources) print(f"{r.origin:<26} {f'{t} x{n} {link}':<48} {verdict}") if __name__ == "__main__": main()