"""Generuje końcowy scorecard JSON dla wszystkich 166 theporndude top-porn-tube-sites: - coverage status (already_have/new/dead/low_value) - canonical_value_score 0-5 (heurystyka + nasz ranking) - recommendation: skip / consider / pilot / integrate Plus markdown summary dla człowieka. """ import json from pathlib import Path COVERAGE = json.loads(Path("theporndude_coverage.json").read_text()) TRIAGE = json.loads(Path("theporndude_triage.json").read_text()) def main(): triage_by_slug = {r["slug"]: r for r in TRIAGE} scorecards = [] for r in COVERAGE["already_have"]: scorecards.append({ "rank": r["rank"], "slug": r["slug"], "domain": r["domain"], "status": "already_have", "our_origin": r["our_origin"], "canonical_value_score": None, "recommendation": "skip — already integrated", }) for r in COVERAGE["new_candidates"]: t = triage_by_slug.get(r["slug"], {}) score = t.get("heuristic_score", 0) findings = t.get("root_findings", {}) reasons = t.get("reasons", []) root_status = t.get("root_status", 0) domain = t.get("domain") or r.get("domain") or f"{r['slug']}.com" if root_status <= 0 or findings.get("dead_404"): status = "dead" rec = "skip — dead/unreachable" elif findings.get("auth_wall") and score < 2: status = "auth_wall" rec = "skip — login required, no public scenes" elif score >= 2.5: status = "promising" rec = "pilot — deep audit + write extractor" elif score >= 1: status = "low_value" rec = "consider — basic metadata only, low priority" else: status = "no_value" rec = "skip — no canonical-fit signal in HTML" scorecards.append({ "rank": r["rank"], "slug": r["slug"], "domain": domain, "status": status, "our_origin": None, "canonical_value_score": score, "heuristic_reasons": reasons, "findings": findings, "recommendation": rec, }) scorecards.sort(key=lambda x: x["rank"]) out = { "source": "theporndude.com/top-porn-tube-sites", "fetched_at": "2026-05-20", "total": len(scorecards), "summary": { "already_have": sum(1 for s in scorecards if s["status"] == "already_have"), "promising": sum(1 for s in scorecards if s["status"] == "promising"), "low_value": sum(1 for s in scorecards if s["status"] == "low_value"), "no_value": sum(1 for s in scorecards if s["status"] == "no_value"), "auth_wall": sum(1 for s in scorecards if s["status"] == "auth_wall"), "dead": sum(1 for s in scorecards if s["status"] == "dead"), }, "scorecards": scorecards, } Path("theporndude_scorecard.json").write_text(json.dumps(out, indent=2)) # Pretty print summary print("=" * 70) print(f"THEPORNDUDE.COM CANONICAL-FIT SCORECARD ({out['total']} tubes)") print("=" * 70) for k, v in out["summary"].items(): print(f" {k:<15} {v:>4} ({100*v/out['total']:.0f}%)") print() print("PROMISING (score >= 2.5) — pilot candidates:") for s in scorecards: if s["status"] == "promising": r = ",".join(s.get("heuristic_reasons", []))[:60] print(f" #{s['rank']:>3} score={s['canonical_value_score']:>4} {s['domain']:<25} ({s['slug']}) {r}") print() print("LOW_VALUE (1-2.5) — defer:") for s in scorecards: if s["status"] == "low_value": r = ",".join(s.get("heuristic_reasons", []))[:50] print(f" #{s['rank']:>3} score={s['canonical_value_score']:>4} {s['domain']:<25} ({s['slug']}) {r}") if __name__ == "__main__": main()