Mobile / OTA: - Enable Expo Updates (app.json + AndroidManifest) → api.goon-foss.org - Bump 0.1.6 → 0.1.9 (build.gradle, app.json, appVersion.ts, main.py /version) - backend.ts: default public backend auto-connect (no manual login) WebView fallback fix (PlayerScreen INJECTED_JS): - Auto-dismiss cookie/consent gates (hqporner et al. blocked kt_player init) - Context-scoped: only clicks consent buttons inside cookie/gdpr containers - Retry window for <source>.src polling raised 5→15 ticks (post-dismiss init) Resolver: - Series-position + modifier mismatch detector (Episode 2≠4, BTS/unedited) → composite_score hard-reject / cap; wired into scene_score + bulk_dedup - aggregator-mode candidate query: LIMIT 500 + title-match ordering Connectors: - porndoe.com browse scraper (JSON-LD VideoObject) — theporndude audit pilot landing: APK links → goon-v0.1.9.apk Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
118 lines
4.6 KiB
Python
118 lines
4.6 KiB
Python
"""Cross-check 166 resolved theporndude domains vs nasze 25 tube origins."""
|
|
import json
|
|
from pathlib import Path
|
|
|
|
# Origins z DB (live + dead) + extractor REGISTRY w app/extractors/__init__.py
|
|
OUR_ORIGINS = [
|
|
# DB live + dead
|
|
"tube:0dayxxcom", "tube:epornercom", "tube:fpoxxx", "tube:freshpornoorg",
|
|
"tube:hqpornercom", "tube:latestpornvideocom", "tube:mypornerleakcom",
|
|
"tube:perverzijacom", "tube:porn00org", "tube:porndishcom", "tube:porndittcom",
|
|
"tube:pornhatcom", "tube:pornhubcom", "tube:porntrexcom", "tube:pornxpph",
|
|
"tube:redtubecom", "tube:sxylandcom", "tube:sxyprncom", "tube:xhamstercom",
|
|
"tube:xnxxcom", "tube:xvideoscom", "tube:youporncom", "tube:latestleaksco",
|
|
"tube:siskavideo", "tube:hdporn92com",
|
|
# REGISTRY only (extractor known, brak playback w live DB)
|
|
"tube:xmoviesforyoucom", "tube:watchporn", "tube:porn4dayspw",
|
|
"tube:paradisehillcc",
|
|
]
|
|
|
|
|
|
# Tylko realne TLD-y. NIE "tube"/"porn"/"xxx" bo to często części nazwy (redtube, pornhub, fpoxxx).
|
|
_TLD_RE = __import__("re").compile(r"(com|net|org|tv|cc|pw|co|to|ws|me|sx|info|biz)$")
|
|
|
|
|
|
def _strip_tld(s: str) -> str:
|
|
"""xvideoscom -> xvideos; pornhubcom -> pornhub; hdporn92com -> hdporn92"""
|
|
return _TLD_RE.sub("", s)
|
|
|
|
# Build sitetag → matching variants for fuzzy match
|
|
def origin_to_sitetag(origin: str) -> str:
|
|
return origin.replace("tube:", "")
|
|
|
|
|
|
def domain_to_sitetag(domain: str) -> str:
|
|
"""xvideos.com -> xvideoscom, porntrex.com -> porntrexcom"""
|
|
return domain.lower().replace(".", "").replace("-", "")
|
|
|
|
|
|
def match(slug: str, domain: str) -> str | None:
|
|
"""Match po `slug` (z theporndude review URL) lub `real_domain` (z pdude.link).
|
|
Slug to nazwa tube'a (np. 'xvideos', 'pornhub', 'paradisehill').
|
|
Origin format: tube:<sitetag>, gdzie sitetag = domain.replace('.', '').
|
|
Match na "slug pasuje do sitetag bez TLD" daje dobry recall.
|
|
"""
|
|
candidates = []
|
|
if slug:
|
|
candidates.append(slug.lower().replace("-", ""))
|
|
if domain:
|
|
candidates.append(domain_to_sitetag(domain))
|
|
if not candidates:
|
|
return None
|
|
|
|
for o in OUR_ORIGINS:
|
|
st = origin_to_sitetag(o)
|
|
st_no_tld = _strip_tld(st)
|
|
for c in candidates:
|
|
c_no_tld = _strip_tld(c)
|
|
if c_no_tld == st_no_tld and len(c_no_tld) >= 3:
|
|
return o
|
|
return None
|
|
|
|
|
|
def main():
|
|
data = json.loads(Path("theporndude_resolved.json").read_text())
|
|
have = []
|
|
new = []
|
|
error = []
|
|
for r in data:
|
|
if "error" in r and not r.get("real_domain"):
|
|
error.append(r)
|
|
continue
|
|
domain = r.get("real_domain", "")
|
|
our = match(r.get("slug", ""), domain)
|
|
r["our_origin"] = our
|
|
if our:
|
|
have.append(r)
|
|
else:
|
|
new.append(r)
|
|
|
|
print(f"=== Coverage ===")
|
|
print(f"Total theporndude top-porn-tubes: {len(data)}")
|
|
print(f" Already in our DB: {len(have)}")
|
|
print(f" NEW (potential candidates): {len(new)}")
|
|
print(f" Errors: {len(error)}")
|
|
print()
|
|
print(f"=== Already have (matched) — top 30 by theporndude rank ===")
|
|
for r in sorted(have, key=lambda x: x["rank"])[:30]:
|
|
print(
|
|
f" #{r['rank']:>3} score={r.get('theporndude_score') or '?':>4} "
|
|
f"{r['real_domain']:<28} -> {r['our_origin']}"
|
|
)
|
|
print()
|
|
print(f"=== NEW candidates (not in DB) — top 60 by theporndude rank ===")
|
|
for r in sorted(new, key=lambda x: x["rank"])[:60]:
|
|
print(
|
|
f" #{r['rank']:>3} score={r.get('theporndude_score') or '?':>4} "
|
|
f"{r.get('real_domain') or '?':<30} ({r['slug']})"
|
|
)
|
|
|
|
# Output detailed
|
|
summary = {
|
|
"total": len(data),
|
|
"already_have": [{"rank": r["rank"], "slug": r["slug"], "domain": r["real_domain"],
|
|
"score": r.get("theporndude_score"), "our_origin": r["our_origin"]}
|
|
for r in sorted(have, key=lambda x: x["rank"])],
|
|
"new_candidates": [{"rank": r["rank"], "slug": r["slug"], "domain": r.get("real_domain"),
|
|
"score": r.get("theporndude_score"),
|
|
"final_url": r.get("final_url", "")}
|
|
for r in sorted(new, key=lambda x: x["rank"])],
|
|
"errors": [{"rank": r["rank"], "slug": r["slug"], "error": r.get("error")}
|
|
for r in error],
|
|
}
|
|
Path("theporndude_coverage.json").write_text(json.dumps(summary, indent=2))
|
|
print(f"\n-> theporndude_coverage.json")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|