goon/scripts/theporndude_scorecard.py
https://github.com/goon-foss/goon 642f1ab8b8 Mobile 0.1.9: OTA enable, WebView cookie-dismiss fix, porndoe connector
Mobile / OTA:
- Enable Expo Updates (app.json + AndroidManifest) → api.goon-foss.org
- Bump 0.1.6 → 0.1.9 (build.gradle, app.json, appVersion.ts, main.py /version)
- backend.ts: default public backend auto-connect (no manual login)

WebView fallback fix (PlayerScreen INJECTED_JS):
- Auto-dismiss cookie/consent gates (hqporner et al. blocked kt_player init)
- Context-scoped: only clicks consent buttons inside cookie/gdpr containers
- Retry window for <source>.src polling raised 5→15 ticks (post-dismiss init)

Resolver:
- Series-position + modifier mismatch detector (Episode 2≠4, BTS/unedited)
  → composite_score hard-reject / cap; wired into scene_score + bulk_dedup
- aggregator-mode candidate query: LIMIT 500 + title-match ordering

Connectors:
- porndoe.com browse scraper (JSON-LD VideoObject) — theporndude audit pilot

landing: APK links → goon-v0.1.9.apk

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 11:20:57 +02:00

104 lines
3.9 KiB
Python

"""Generuje końcowy scorecard JSON dla wszystkich 166 theporndude top-porn-tube-sites:
- coverage status (already_have/new/dead/low_value)
- canonical_value_score 0-5 (heurystyka + nasz ranking)
- recommendation: skip / consider / pilot / integrate
Plus markdown summary dla człowieka.
"""
import json
from pathlib import Path
COVERAGE = json.loads(Path("theporndude_coverage.json").read_text())
TRIAGE = json.loads(Path("theporndude_triage.json").read_text())
def main():
triage_by_slug = {r["slug"]: r for r in TRIAGE}
scorecards = []
for r in COVERAGE["already_have"]:
scorecards.append({
"rank": r["rank"],
"slug": r["slug"],
"domain": r["domain"],
"status": "already_have",
"our_origin": r["our_origin"],
"canonical_value_score": None,
"recommendation": "skip — already integrated",
})
for r in COVERAGE["new_candidates"]:
t = triage_by_slug.get(r["slug"], {})
score = t.get("heuristic_score", 0)
findings = t.get("root_findings", {})
reasons = t.get("reasons", [])
root_status = t.get("root_status", 0)
domain = t.get("domain") or r.get("domain") or f"{r['slug']}.com"
if root_status <= 0 or findings.get("dead_404"):
status = "dead"
rec = "skip — dead/unreachable"
elif findings.get("auth_wall") and score < 2:
status = "auth_wall"
rec = "skip — login required, no public scenes"
elif score >= 2.5:
status = "promising"
rec = "pilot — deep audit + write extractor"
elif score >= 1:
status = "low_value"
rec = "consider — basic metadata only, low priority"
else:
status = "no_value"
rec = "skip — no canonical-fit signal in HTML"
scorecards.append({
"rank": r["rank"],
"slug": r["slug"],
"domain": domain,
"status": status,
"our_origin": None,
"canonical_value_score": score,
"heuristic_reasons": reasons,
"findings": findings,
"recommendation": rec,
})
scorecards.sort(key=lambda x: x["rank"])
out = {
"source": "theporndude.com/top-porn-tube-sites",
"fetched_at": "2026-05-20",
"total": len(scorecards),
"summary": {
"already_have": sum(1 for s in scorecards if s["status"] == "already_have"),
"promising": sum(1 for s in scorecards if s["status"] == "promising"),
"low_value": sum(1 for s in scorecards if s["status"] == "low_value"),
"no_value": sum(1 for s in scorecards if s["status"] == "no_value"),
"auth_wall": sum(1 for s in scorecards if s["status"] == "auth_wall"),
"dead": sum(1 for s in scorecards if s["status"] == "dead"),
},
"scorecards": scorecards,
}
Path("theporndude_scorecard.json").write_text(json.dumps(out, indent=2))
# Pretty print summary
print("=" * 70)
print(f"THEPORNDUDE.COM CANONICAL-FIT SCORECARD ({out['total']} tubes)")
print("=" * 70)
for k, v in out["summary"].items():
print(f" {k:<15} {v:>4} ({100*v/out['total']:.0f}%)")
print()
print("PROMISING (score >= 2.5) — pilot candidates:")
for s in scorecards:
if s["status"] == "promising":
r = ",".join(s.get("heuristic_reasons", []))[:60]
print(f" #{s['rank']:>3} score={s['canonical_value_score']:>4} {s['domain']:<25} ({s['slug']}) {r}")
print()
print("LOW_VALUE (1-2.5) — defer:")
for s in scorecards:
if s["status"] == "low_value":
r = ",".join(s.get("heuristic_reasons", []))[:50]
print(f" #{s['rank']:>3} score={s['canonical_value_score']:>4} {s['domain']:<25} ({s['slug']}) {r}")
if __name__ == "__main__":
main()