Mobile / OTA: - Enable Expo Updates (app.json + AndroidManifest) → api.goon-foss.org - Bump 0.1.6 → 0.1.9 (build.gradle, app.json, appVersion.ts, main.py /version) - backend.ts: default public backend auto-connect (no manual login) WebView fallback fix (PlayerScreen INJECTED_JS): - Auto-dismiss cookie/consent gates (hqporner et al. blocked kt_player init) - Context-scoped: only clicks consent buttons inside cookie/gdpr containers - Retry window for <source>.src polling raised 5→15 ticks (post-dismiss init) Resolver: - Series-position + modifier mismatch detector (Episode 2≠4, BTS/unedited) → composite_score hard-reject / cap; wired into scene_score + bulk_dedup - aggregator-mode candidate query: LIMIT 500 + title-match ordering Connectors: - porndoe.com browse scraper (JSON-LD VideoObject) — theporndude audit pilot landing: APK links → goon-v0.1.9.apk Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
87 lines
3 KiB
Python
87 lines
3 KiB
Python
"""Per 166 review slugs z top-porn-tube-sites:
|
|
1. Fetch review page → extract pdude.link Visit URL + rating + score badges
|
|
2. Follow pdude.link → real tube domain
|
|
3. Cross-check vs nasze 25 tube origins
|
|
4. Output JSON: { slug, name, theporndude_rank, theporndude_score, real_domain, in_our_db, our_origin }
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
import httpx
|
|
|
|
REVIEWS_FILE = Path("theporndude_free_tubes.json")
|
|
OUT_FILE = Path("theporndude_resolved.json")
|
|
|
|
UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
|
|
|
|
|
|
async def fetch_review(cli: httpx.AsyncClient, review: dict, rank: int) -> dict:
|
|
url = f"https://theporndude.com/{review['id']}/{review['slug']}"
|
|
try:
|
|
r = await cli.get(url, headers={"User-Agent": UA})
|
|
html = r.text
|
|
except Exception as e:
|
|
return {**review, "rank": rank, "error": f"fetch_review: {e}"}
|
|
|
|
# Wyciągnij score
|
|
score_m = re.search(r'class="rate__num">\s*(\d+(?:\.\d+)?)\s*<', html)
|
|
# Wyciągnij pdude.link visit URL
|
|
pdude_m = re.search(r'href="(https://pdude\.link/[\w\-\.]+)"', html)
|
|
# Wyciągnij <title> + meta description
|
|
title_m = re.search(r"<title>([^<]+)</title>", html)
|
|
desc_m = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', html)
|
|
|
|
out = {
|
|
**review,
|
|
"rank": rank,
|
|
"theporndude_score": float(score_m.group(1)) if score_m else None,
|
|
"page_title": (title_m.group(1) if title_m else "")[:120],
|
|
"page_desc": (desc_m.group(1) if desc_m else "")[:200],
|
|
}
|
|
if not pdude_m:
|
|
out["error"] = "no_pdude_link"
|
|
return out
|
|
pdude_url = pdude_m.group(1)
|
|
|
|
# Follow pdude.link
|
|
try:
|
|
r2 = await cli.get(pdude_url, headers={"User-Agent": UA})
|
|
# Final URL po wszystkich redirectach
|
|
final_url = str(r2.url)
|
|
host = urlparse(final_url).hostname or ""
|
|
host = host.replace("www.", "")
|
|
out["real_domain"] = host
|
|
out["final_url"] = final_url[:200]
|
|
except Exception as e:
|
|
out["error"] = f"pdude_follow: {e}"
|
|
return out
|
|
|
|
|
|
async def main():
|
|
reviews = json.loads(REVIEWS_FILE.read_text())["reviews"]
|
|
|
|
timeout = httpx.Timeout(20.0, connect=10.0)
|
|
limits = httpx.Limits(max_keepalive_connections=10, max_connections=20)
|
|
async with httpx.AsyncClient(
|
|
timeout=timeout, limits=limits, follow_redirects=True, http2=False
|
|
) as cli:
|
|
sem = asyncio.Semaphore(8)
|
|
|
|
async def worker(rev, rank):
|
|
async with sem:
|
|
return await fetch_review(cli, rev, rank)
|
|
|
|
tasks = [worker(r, i + 1) for i, r in enumerate(reviews)]
|
|
results = await asyncio.gather(*tasks)
|
|
|
|
OUT_FILE.write_text(json.dumps(results, indent=2))
|
|
ok = sum(1 for r in results if r.get("real_domain"))
|
|
print(f"resolved {ok}/{len(results)} ({ok*100/len(results):.0f}%)")
|
|
print(f"out -> {OUT_FILE}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|