From 4f0fb1636c3fa10e33742739e7eeb4b8bc60e16a Mon Sep 17 00:00:00 2001
From: jtrzupek <jtrzupek@gmail.com>
Date: Wed, 3 Jun 2026 11:23:49 +0200
Subject: [PATCH] chore(scripts): tube SSR-richness survey probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ad-hoc research tool: for a list of candidate tubes, fetch a listing page, grab a scene
URL, and classify the detail — reachable / JSON-LD VideoObject / duration / performers /
tags. Used 2026-06-03 to evaluate deep-crawl candidates (redtube + drtuber look strong;
pornhub/spankbang/porntrex/hqporner/youporn rejected; nuvid/motherless bare).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 scripts/_tube_survey.py | 67 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 scripts/_tube_survey.py
diff --git a/scripts/_tube_survey.py b/scripts/_tube_survey.py
new file mode 100644
index 0000000..b79996e
--- /dev/null
+++ b/scripts/_tube_survey.py
@@ -0,0 +1,67 @@
+"""Ad-hoc survey kandydatów tube do deep-crawla (SSR-richness). Throwaway research."""
+import re, json, httpx
+
+UA = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/140 Safari/537.36"}
+JL = re.compile(r"<script[^>]+application/ld\+json[^>]*>(.*?)</script>", re.S | re.I)
+DUR = re.compile(r'"duration"\s*:\s*"(PT[^"]+)"')
+DUR2 = re.compile(r'(video:duration"\s+content="\d+|"duration"\s*:\s*\d{2,}|>\d{1,2}:\d{2}<)')
+PERF = re.compile(r'href="[^"]*/(models?|pornstars?|actress|porn-star)/[a-z0-9_-]+', re.I)
+TAGRX = re.compile(r'href="[^"]*/(tags?|categor)[a-z]*/[a-z0-9_-]+', re.I)
+CF = re.compile(r"cloudflare|just a moment|cf-chl|captcha|enable javascript", re.I)
+
+
+def vobj(h):
+    for m in JL.finditer(h):
+        try:
+            d = json.loads(m.group(1).strip())
+        except Exception:
+            continue
+        items = d if isinstance(d, list) else (d.get("@graph", [d]) if isinstance(d, dict) else [])
+        for o in items:
+            if isinstance(o, dict) and o.get("@type") == "VideoObject":
+                return o
+    return None
+
+
+CANDIDATES = [
+    ("xnxx", "https://www.xnxx.com/", r"/video\.?[a-z0-9]+/[a-z0-9_]+", "https://www.xnxx.com"),
+    ("redtube", "https://www.redtube.com/newest", r"/[0-9]{6,}", "https://www.redtube.com"),
+    ("drtuber", "https://www.drtuber.com/videos/recent", r"/video/[0-9]+/[a-z0-9-]+", "https://www.drtuber.com"),
+    ("tube8", "https://www.tube8.com/", r"/[a-z0-9-]+/[0-9]{5,}", "https://www.tube8.com"),
+    ("nuvid", "https://www.nuvid.com/", r"/video/[0-9]+/[a-z0-9-]+", "https://www.nuvid.com"),
+    ("porntube", "https://www.porntube.com/videos", r"/videos/[a-z0-9-]+-[0-9]+", "https://www.porntube.com"),
+    ("anyporn", "https://anyporn.com/latest-updates/", r"/[0-9]+/", "https://anyporn.com"),
+    ("motherless", "https://motherless.com/new/videos", r"/[0-9A-F]{6,}", "https://motherless.com"),
+    ("sexvid", "https://www.sexvid.xxx/latest-updates/", r"/v/[a-z0-9-]+", "https://www.sexvid.xxx"),
+    ("pornone", "https://pornone.com/recent/", r"/[a-z0-9-]+/[0-9]+/", "https://pornone.com"),
+]
+
+
+def yn(b):
+    return "Y" if b else "-"
+
+
+with httpx.Client(timeout=15, headers=UA, follow_redirects=True) as c:
+    for name, listing, rgx, base in CANDIDATES:
+        try:
+            r = c.get(listing)
+        except Exception as e:
+            print(f"{name:10} LISTING-ERR {str(e)[:40]}")
+            continue
+        if r.status_code != 200:
+            print(f"{name:10} HTTP {r.status_code} (block/redirect)")
+            continue
+        m = re.search(rgx, r.text)
+        if not m:
+            print(f"{name:10} no-scene-link {'(CF/JS)' if CF.search(r.text) else ''}")
+            continue
+        su = m.group(0)
+        su = base + su if su.startswith("/") else su
+        try:
+            h = c.get(su).text
+        except Exception as e:
+            print(f"{name:10} DETAIL-ERR {str(e)[:30]}")
+            continue
+        o = vobj(h)
+        has_dur = bool((o and o.get("duration")) or DUR.search(h) or DUR2.search(h))
+        print(f"{name:10} OK jsonld={yn(o)} dur={yn(has_dur)} perf={yn(PERF.search(h))} tags={yn(TAGRX.search(h))}  {su[:55]}")