Mobile 0.1.9: OTA enable, WebView cookie-dismiss fix, porndoe connector

Mobile / OTA: - Enable Expo Updates (app.json + AndroidManifest) → api.goon-foss.org - Bump 0.1.6 → 0.1.9 (build.gradle, app.json, appVersion.ts, main.py /version) - backend.ts: default public backend auto-connect (no manual login) WebView fallback fix (PlayerScreen INJECTED_JS): - Auto-dismiss cookie/consent gates (hqporner et al. blocked kt_player init) - Context-scoped: only clicks consent buttons inside cookie/gdpr containers - Retry window for <source>.src polling raised 5→15 ticks (post-dismiss init) Resolver: - Series-position + modifier mismatch detector (Episode 2≠4, BTS/unedited) → composite_score hard-reject / cap; wired into scene_score + bulk_dedup - aggregator-mode candidate query: LIMIT 500 + title-match ordering Connectors: - porndoe.com browse scraper (JSON-LD VideoObject) — theporndude audit pilot landing: APK links → goon-v0.1.9.apk Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 11:20:57 +02:00 · 2026-05-22 11:20:57 +02:00 · 642f1ab8b8
commit 642f1ab8b8
parent ad0284585b
36 changed files with 1825 additions and 100 deletions
--- a/.gitignore
+++ b/.gitignore
@ -71,6 +71,13 @@ mcp-logs.txt
 # ADB / development debug artefakty (screenshots, ui dumps)
 .tmp_adb/

+# Marketing screenshots — kept local, hosted externally for posts/landing.
+# NOT committed: explicit thumbnails risk GitHub TOS takedown.
+screenshots/
+
+# Launch / marketing material — local working notes, not part of the codebase.
+launch/
+
 # Operational deploy scripts — moved to a private companion repo. Public repo
 # should NOT contain SSH commands, systemd units, or smoke-test playbooks
 # referencing concrete hosts.
--- a/app/api/playback.py
+++ b/app/api/playback.py
@ -184,21 +184,19 @@ def resolve_movie_playback(
                pb.id,
            )
            stream = None
+        # Mixdrop mxcontent CDN wymaga curl_cffi JA3 → wymusza VPS proxy.
+        # Pre-public: skip mixdrop direct, fallback na embed_url (mobile WebView z
+        # phone IP). Bandwidth + anonimowość VPS > UX. Movie ma zwykle 10+ alt
+        # hosterów (voe/luluvid/doply/etc.), user może wybrać alternative.
+        if stream and "mxcontent.net" in stream.lower():
+            log.info(
+                "movie playback %s: mixdrop mxcontent — skip (VPS-proxy required), WebView fallback",
+                pb.id,
+            )
+            stream = None
        if stream:
            type_hint = "m3u8" if ".m3u8" in stream.lower() else "mp4"
-            # Hostery których CDN wymaga Chrome JA3 (mxcontent dla mixdrop):
-            # proxy MUSI użyć curl_cffi impersonate inaczej 403. `proxy_impersonate=True`
-            # idzie przez `raw` → `_proxify_link` ustawi token `i=1`.
-            cdn_needs_impersonate = "mxcontent.net" in stream.lower()
            raw_meta: dict = {"origin": pb.origin, "host": target}
-            if cdn_needs_impersonate:
-                raw_meta["proxy_impersonate"] = True
-                # Mixdrop: same-session cookies + chrome JA3 wymagane dla mp4.
-                # Backend extract zamknął sesję — proxy musi re-fetchować
-                # embed page w fresh curl_cffi session żeby re-extract mp4
-                # z aktualnymi cookies.
-                raw_meta["refetch_url"] = target
-                raw_meta["refetch_hoster"] = "mixdrop"
            links.append(
                StreamLink(
                    stream_url=stream,
--- a/app/config.py
+++ b/app/config.py
@ -72,13 +72,22 @@ class Settings(BaseSettings):
    sched_movie_ingest_hours: int = Field(
        default=24, validation_alias="GOON_SCHED_MOVIE_INGEST_HOURS"
    )
-    # Browse-latest scheduler: freshporno/porn00/pornxp newest scenes raz dziennie.
+    # Browse-latest scheduler: freshporno/porn00/pornxp newest scenes.
+    # 6h cadence (zmiana z 24h 2026-05-20): user reportował brak Brazzers Exxtra po
+    # 15-05. Root cause był 2-fold: (1) freshporno publikuje sceny w ciągu dnia, 24h
+    # cadence łapie tylko te do 05:30 UTC; (2) meta_content/release_date bug osobno.
+    # 6h = 4 runs/dzień = każda freshporno scena zaingestowana w ciągu ~6h od publik.
    sched_browse_latest_hours: int = Field(
-        default=24, validation_alias="GOON_SCHED_BROWSE_LATEST_HOURS"
+        default=6, validation_alias="GOON_SCHED_BROWSE_LATEST_HOURS"
    )
    sched_browse_latest_max_pages: int = Field(
        default=5, validation_alias="GOON_SCHED_BROWSE_LATEST_MAX_PAGES"
    )
+    # Bulk-dedup performers safety net — auto-merge duplikatów które resolver-time
+    # scoring pominął. 12h cadence: leci 2x dziennie (po porannym browse-latest run).
+    sched_bulk_dedup_hours: int = Field(
+        default=12, validation_alias="GOON_SCHED_BULK_DEDUP_HOURS"
+    )

    # Hetzner Cloud bandwidth monitor — read-only API token (Security → API Tokens
    # w panelu Hetzner Cloud). Bez tokenu monitor wyłączony (warning w log).
--- a/app/connectors/direct_scrapers/init.py
+++ b/app/connectors/direct_scrapers/init.py
@ -137,6 +137,7 @@ ALL_DIRECT_SCRAPERS: list[type[BaseDirectTubeScraper]] = [
 #     (phash Hamming 0). Oryginalne tytuły + channels=studio 1:1. **Aktywny.**
 from app.connectors.direct_scrapers.freshporno import FreshpornoScraper  # noqa: E402
 from app.connectors.direct_scrapers.porn00 import Porn00Scraper  # noqa: E402
+from app.connectors.direct_scrapers.porndoe import PornDoeScraper  # noqa: E402
 from app.connectors.direct_scrapers.pornxp import PornXPScraper  # noqa: E402
 from app.connectors.direct_scrapers.shyfap import ShyfapScraper  # noqa: E402, F401

@ -152,6 +153,13 @@ ALL_BROWSE_SCRAPERS: list[type[BaseBrowseScraper]] = [
    # 720p). Tytuł zachowuje studio prefix ("Studio Title - Scene Name") → title
    # fuzzy match (rapidfuzz token_set_ratio) może załapać canonical. Monitorować.
    Porn00Scraper,
+    # PornDoeScraper — dołączony 2026-05-21 (theporndude audit). Każda scena ma
+    # kompletny JSON-LD VideoObject: title + uploadDate + duration + named studio
+    # (producer/publisher) + named performers (actor[]) + thumbnail. Najbogatsze
+    # strukturalne metadane spośród browse scraperów — composite fuzzy match ma
+    # komplet sygnałów. Phash hit-rate niski (własne crop-thumbnaile), studio +
+    # performer + date + duration nadrabiają.
+    PornDoeScraper,
    # ShyfapScraper — wyłączony 2026-05-12 (pilot fail, 0% match — orphan factory).
    # Follow-up: dorobić te tubey i sprawdzić phash distance:
    #   - fullmovies.xxx (channel/network/pornstars/categories, brak duration)
--- a/app/connectors/direct_scrapers/freshporno.py
+++ b/app/connectors/direct_scrapers/freshporno.py
@ -163,11 +163,25 @@ class FreshpornoScraper(BaseBrowseScraper):
            )
        ]

+        # Release date — freshporno emituje `<meta itemprop="uploadDate" content="2026-05-20T...">`.
+        # To data wrzucenia na freshporno, NIE oryginalna release_date studio — ale dla
+        # świeżych scen (uploaded niedługo po publikacji) różnica ≤ 3-7 dni, mieści się w
+        # `date_window_days=7` w resolverze. Bez tego pola scene NULL → match score 0 →
+        # duplicate scene zamiast freshporno PS dodane do TPDB canonical (bug-report
+        # 2026-05-20: brak Brazzers Exxtra po 15-05).
+        release_date_parsed: date | None = None
+        if (m := re.search(r'itemprop="uploadDate"[^>]+content="(\d{4}-\d{2}-\d{2})', detail_html)):
+            try:
+                release_date_parsed = date.fromisoformat(m.group(1))
+            except ValueError:
+                pass
+
        return RawScene(
            external_id=f"{self.sitetag}:{scene_url}",
            title=title,
            description=description,
            duration_sec=duration_sec,
+            release_date=release_date_parsed,
            url=scene_url,
            studio=studio,
            performers=performers,
--- a/app/connectors/direct_scrapers/porndoe.py
+++ b/app/connectors/direct_scrapers/porndoe.py
@ -0,0 +1,271 @@
+"""porndoe.com — latest-vids browse scraper.
+
+Dołączony 2026-05-21 (theporndude audit). Jedyny verified high-value candidate
+z 172 tube'ów na theporndude.com/top-porn-tube-sites + /full-porn-movies-sites.
+
+Czemu wart: każda scena ma kompletny **JSON-LD VideoObject** schema:
+  - name (title), description, uploadDate (ISO timestamp), duration (ISO 8601)
+  - producer + publisher → named studio z `/channel-profile/<slug>` URL
+  - actor[] → named performers z `/pornstars-profile/<slug>` URL
+  - thumbnailUrl (CDN p.cdnc.porndoe.com)
+
+To wystarczy do composite fuzzy match w resolverze (studio + performer Jaccard +
+date proximity + title token-set + duration). Phash hit-rate niski (porndoe robi
+własne crop-thumbnaile 390x219, nie hot-linkuje studio art) — ale rich metadata
+nadrabia, jak pornxp/porn00.
+
+URL patterns:
+  - Listing: `/videos/most-recent?page=N` (page 1 = newest, ~31 scen/page)
+  - Scene:   `/watch/<id>` gdzie id = `pd` + 10 alfanum (stable)
+  - Studio:  `/channel-profile/<slug>`
+  - Performer: `/pornstars-profile/<slug>`
+  - Tags/categories: `/categories/<slug>`
+
+Playback: stream URL NIE jest inline w SSR HTML — player JS init dopiero po user
+"Play" click. Dajemy playback_source z page_url + origin `tube:porndoecom`;
+extractor w `_REGISTRY` mapuje na `_vps_blocked_fallback.extract` → mobile WebView
+INJECTED_JS scrapuje `<video>.src` po phone IP (0 VPS bandwidth, zgodne z
+pre-public bandwidth/anonimowość priorytet).
+"""
+from __future__ import annotations
+
+import json
+import logging
+import re
+from datetime import date, datetime
+
+from app.connectors.base import (
+    RawFingerprint,
+    RawPerformer,
+    RawPlaybackSource,
+    RawScene,
+    RawStudio,
+    RawTag,
+)
+from app.connectors.direct_scrapers._browse_base import (
+    BaseBrowseScraper,
+    compute_thumbnail_phash,
+)
+
+log = logging.getLogger(__name__)
+
+_BASE = "https://porndoe.com"
+
+# Scene listing — `<a href="/watch/pd7a3o0e8v2b">`. Id = `pd` + alfanum.
+_SCENE_URL_RE = re.compile(r'href="(/watch/[a-z0-9]+)"', re.IGNORECASE)
+_WATCH_ID_RE = re.compile(r"/watch/([a-z0-9]+)", re.IGNORECASE)
+
+# JSON-LD <script> bloki.
+_JSONLD_RE = re.compile(
+    r'<script[^>]+type=["\']application/ld\+json["\'][^>]*>(.*?)</script>',
+    re.IGNORECASE | re.DOTALL,
+)
+
+# Tagi/kategorie z DOM (JSON-LD genre bywa pusty). porndoe URL: `/category/<id>/<slug>`.
+_TAG_LINK_RE = re.compile(
+    r'href="/category/\d+/([a-z0-9\-]+)"[^>]*>([^<]+)</a>', re.IGNORECASE
+)
+
+# ISO 8601 duration — porndoe emituje "PT8M0S" (czasem "T8M0S" bez P).
+_ISO_DUR_RE = re.compile(
+    r"^P?T?(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?$", re.IGNORECASE
+)
+
+
+def _parse_iso_duration(value: str | None) -> int | None:
+    """`PT11M7S` / `T8M0S` → sekundy. None gdy format nieznany."""
+    if not value:
+        return None
+    m = _ISO_DUR_RE.match(value.strip())
+    if not m:
+        return None
+    h = int(m.group(1) or 0)
+    mn = int(m.group(2) or 0)
+    s = int(m.group(3) or 0)
+    total = h * 3600 + mn * 60 + s
+    return total or None
+
+
+def _parse_iso_date(value: str | None) -> date | None:
+    """`2026-05-20T14:55:13+00:00` → date. None gdy parse fail."""
+    if not value:
+        return None
+    try:
+        return datetime.fromisoformat(value.replace("Z", "+00:00")).date()
+    except ValueError:
+        # Fallback: pierwsze 10 znaków YYYY-MM-DD
+        m = re.match(r"(\d{4}-\d{2}-\d{2})", value)
+        if m:
+            try:
+                return date.fromisoformat(m.group(1))
+            except ValueError:
+                return None
+        return None
+
+
+def _slug_from_url(url: str | None) -> str | None:
+    """`https://porndoe.com/channel-profile/fantasy-girl-pass` → `fantasy-girl-pass`."""
+    if not url:
+        return None
+    m = re.search(r"/(?:channel-profile|pornstars-profile)/([a-z0-9\-]+)", url, re.IGNORECASE)
+    return m.group(1) if m else None
+
+
+def _iter_jsonld_objects(data: object):
+    """Spłaszcza JSON-LD: dict / list / @graph → strumień dict-ów."""
+    if isinstance(data, dict):
+        graph = data.get("@graph")
+        if isinstance(graph, list):
+            for item in graph:
+                yield from _iter_jsonld_objects(item)
+        else:
+            yield data
+    elif isinstance(data, list):
+        for item in data:
+            yield from _iter_jsonld_objects(item)
+
+
+def _extract_video_object(html: str) -> dict | None:
+    """Znajdź pierwszy JSON-LD VideoObject w HTML."""
+    for m in _JSONLD_RE.finditer(html):
+        raw = m.group(1).strip()
+        if not raw:
+            continue
+        try:
+            data = json.loads(raw)
+        except (json.JSONDecodeError, ValueError):
+            continue
+        for obj in _iter_jsonld_objects(data):
+            if obj.get("@type") == "VideoObject":
+                return obj
+    return None
+
+
+class PornDoeScraper(BaseBrowseScraper):
+    sitetag = "porndoecom"
+
+    def _listing_url(self, page: int) -> str:
+        if page <= 1:
+            return f"{_BASE}/videos/most-recent"
+        return f"{_BASE}/videos/most-recent?page={page}"
+
+    def _extract_scene_urls(self, listing_html: str) -> list[str]:
+        seen: set[str] = set()
+        out: list[str] = []
+        for m in _SCENE_URL_RE.finditer(listing_html):
+            url = f"{_BASE}{m.group(1)}"
+            if url in seen:
+                continue
+            seen.add(url)
+            out.append(url)
+        return out
+
+    def _parse_detail(self, scene_url: str, detail_html: str) -> RawScene | None:
+        video = _extract_video_object(detail_html)
+        if not video:
+            log.info("porndoe: no JSON-LD VideoObject on %s", scene_url)
+            return None
+
+        title = (video.get("name") or "").strip()
+        if not title:
+            return None
+
+        watch_id_m = _WATCH_ID_RE.search(scene_url)
+        watch_id = watch_id_m.group(1) if watch_id_m else None
+
+        description = (video.get("description") or "").strip() or None
+        duration_sec = _parse_iso_duration(video.get("duration"))
+        release_date = _parse_iso_date(
+            video.get("uploadDate") or video.get("datePublished")
+        )
+        thumbnail_url = video.get("thumbnailUrl") or None
+
+        # Studio: producer / publisher (Organization). Preferuj producer.
+        studio: RawStudio | None = None
+        for key in ("producer", "publisher"):
+            org = video.get(key)
+            if isinstance(org, dict) and org.get("name"):
+                name = org["name"].strip()
+                slug = _slug_from_url(org.get("url")) or re.sub(
+                    r"[^a-z0-9]+", "-", name.lower()
+                ).strip("-")
+                if name:
+                    studio = RawStudio(
+                        external_id=f"{self.sitetag}:channel:{slug}",
+                        name=name,
+                        slug=slug,
+                    )
+                    break
+
+        # Performers: actor[] (lista Person lub pojedynczy Person).
+        performers: list[RawPerformer] = []
+        seen_perf: set[str] = set()
+        actors = video.get("actor")
+        if isinstance(actors, dict):
+            actors = [actors]
+        if isinstance(actors, list):
+            for actor in actors:
+                if not isinstance(actor, dict):
+                    continue
+                name = (actor.get("name") or "").strip()
+                if not name:
+                    continue
+                slug = _slug_from_url(actor.get("url")) or re.sub(
+                    r"[^a-z0-9]+", "-", name.lower()
+                ).strip("-")
+                if slug in seen_perf:
+                    continue
+                seen_perf.add(slug)
+                performers.append(
+                    RawPerformer(
+                        external_id=f"{self.sitetag}:performer:{slug}",
+                        name=name,
+                    )
+                )
+
+        # Tagi: z DOM (`/categories/<slug>` / `/tags/<slug>`).
+        tags: list[RawTag] = []
+        seen_tag: set[str] = set()
+        for m in _TAG_LINK_RE.finditer(detail_html):
+            slug, name = m.group(1), m.group(2).strip()
+            if not name or name.lower() in ("categories", "tags", ""):
+                continue
+            if slug in seen_tag or len(slug) > 60:
+                continue
+            seen_tag.add(slug)
+            tags.append(
+                RawTag(external_id=f"{self.sitetag}:tag:{slug}", name=name, slug=slug)
+            )
+
+        # Phash z thumbnail (porndoe robi własne crop-thumbnaile — niski hit-rate
+        # oczekiwany, ale graceful: brak match → resolver spada do composite scoring).
+        fingerprints: list[RawFingerprint] = []
+        if thumbnail_url:
+            ph = compute_thumbnail_phash(thumbnail_url, referer=_BASE + "/")
+            if ph:
+                fingerprints.append(RawFingerprint(kind="phash", value=ph))
+
+        # Playback — page_url do scena strony. Stream JS-rendered, więc extractor
+        # `porndoecom` → `_vps_blocked_fallback.extract` (mobile WebView scrape).
+        playback_sources = [
+            RawPlaybackSource(
+                origin=f"tube:{self.sitetag}",
+                page_url=scene_url,
+                duration_sec=duration_sec,
+                thumbnail_url=thumbnail_url,
+            )
+        ]
+
+        return RawScene(
+            external_id=f"{self.sitetag}:{watch_id or scene_url}",
+            title=title,
+            description=description,
+            release_date=release_date,
+            duration_sec=duration_sec,
+            url=scene_url,
+            studio=studio,
+            performers=performers,
+            tags=tags,
+            fingerprints=fingerprints,
+            playback_sources=playback_sources,
+        )
--- a/app/extractors/init.py
+++ b/app/extractors/init.py
@ -50,12 +50,13 @@ log = logging.getLogger(__name__)
 # embed-iframe extractor (page → /e/<id> iframe → P.A.C.K.E.R. unpack). Custom kod
 # tylko tam gdzie tube ma niestandardowy schemat (eporner XHR, sxyprn URL transform).
 _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
-    # Custom (zoptymalizowane / niestandardowy player)
-    # hqporner — CDN URL (bigcdn.cc, video.flyflv.com z `ip=` parametrem) IP-bound do
-    # requestera. VPS resolve daje 200 ale mobile direct = 404/403. Switch na WebView
-    # fallback: mobile pobiera embed iframe (mydaddy.cc/hqwo.cc) z phone IP, FluidPlayer
-    # JS decoduje mp4 URL z mobile session. Plus INJECTED_JS skanuje `<source>.src`.
-    # ~32k scen (drugi po porntrex największy single saving). Verified 2026-05-18.
+    # hqporner — CDN URLs IP-bound do VPS, force_proxy wymusza ruch przez VPS proxy.
+    # 2026-05-20 (pre-public): bandwidth + anonimowość VPS > UX. Switch na WebView
+    # fallback — mobile pobiera embed iframe z phone IP, FluidPlayer JS decoduje
+    # mp4, ExoPlayer odtwarza direct z phone CDN session. **0 VPS bandwidth + VPS
+    # IP nie ujawniony** (mobile nie łączy się z VPS proxy URL).
+    # Trade-off: WebView ma 1 extra step (page → player JS) ale bez popup-ads jak
+    # hqporner.com bo INJECTED_JS w PlayerScreen.tsx blokuje + scrape `<source>.src`.
    "hqpornercom": _vps_blocked_fallback.extract,
    "epornercom": eporner.extract,
    "sxyprncom": sxyprn.extract,
@ -94,13 +95,12 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
    # trailer URLs `_preview*.mp4`), dedupe po filename. Get_file 302 → CDN, proxy
    # follow_redirects=True wymagane (fix w stream_proxy.py).
    "pornhatcom": pornhat.extract,
-    # Freshporno KVS — `cv=` HMAC signed token IP-bound. Server-side resolve dało
-    # 200 z VPS, ale laptop dostał 302+SSL error → token validate'uje requester IP.
-    # Switch na WebView fallback: mobile pobiera embed page, KVS player decoduje
-    # video_url w-page, ExoPlayer dostaje URL z phone session. ~15k scen.
+    # Freshporno KVS — `cv=` HMAC signed token IP-bound do VPS. 2026-05-20 pre-public:
+    # bandwidth + VPS anonimowość priorytet. WebView fallback → mobile pobiera embed
+    # z phone IP, KVS player JS decoduje video_url, ExoPlayer odtwarza direct z CDN.
    "freshpornoorg": _vps_blocked_fallback.extract,
-    # porn00 / pornxp — force_proxy=True wprost (IP-bound CDN). Switch na WebView
-    # fallback. Niski volume (84 scen), trivial saving ale konsystencja flow.
+    # porn00 / pornxp — IP-bound CDN tokens. Pre-public WebView fallback (bandwidth +
+    # anonimowość VPS). Niski volume (84 scen), trivial.
    "porn00org": _vps_blocked_fallback.extract,
    "pornxpph": _vps_blocked_fallback.extract,
    # Direct-scraping tubes (mają też search scraper w connectors/direct_scrapers/)
@ -114,6 +114,11 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
    "perverzijacom": _embed_iframe.extract,
    # Special: WebView-only (Yii2 session-bound player).
    "paradisehillcc": paradisehill.extract,
+    # PornDoe — dołączony 2026-05-21 (theporndude audit). Stream URL nie inline w
+    # SSR HTML (player JS init po Play click), więc WebView fallback: mobile pobiera
+    # /watch/<id> z phone IP, player JS dekoduje video.src, INJECTED_JS scrape.
+    # 0 VPS bandwidth — zgodne z pre-public bandwidth/anonimowość priorytet.
+    "porndoecom": _vps_blocked_fallback.extract,
 }


--- a/app/extractors/tubes/freshporno.py
+++ b/app/extractors/tubes/freshporno.py
@ -45,7 +45,15 @@ def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | Non
        if basename in seen_keys:
            continue
        seen_keys.add(basename)
-        result.append(StreamSource(link=url, type="mp4", quality=quality))
+        # `force_proxy=True` (2026-05-20): freshporno get_file 302 → cdn4.freshporno.org
+        # IP-bound (cv= HMAC token). Mobile direct = 403/SSL fail → fallback proxy
+        # generuje "mrugnięcie" (user bug 743eefbf "najpierw strona potem video").
+        # Force_proxy wymusza mobile użycie proxied URL od razu — bez flickera +
+        # natywny ExoPlayer + quality picker zachowane.
+        result.append(StreamSource(
+            link=url, type="mp4", quality=quality,
+            raw={"force_proxy": True},
+        ))

    if not result:
        log.info("freshporno: no MP4 anchor matches on %s", page_url)
--- a/app/extractors/tubes/hqporner.py
+++ b/app/extractors/tubes/hqporner.py
@ -95,7 +95,15 @@ def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | Non
            continue
        seen_urls.add(url)
        title = (sm.group(2) or "").strip()
-        sources.append(StreamSource(link=url, quality=title or None, type="mp4", referer=iframe_referer))
+        # `force_proxy=True` (2026-05-20): CDN-y bigcdn.cc/flyflv IP-bound + flyflv ma
+        # `ip=46.62.219.154` w URL path. Mobile direct = 404/403 → fallback proxy
+        # generuje flicker. Force_proxy wymusza mobile użycie proxied od razu.
+        # Bug-report e8ddd8d4: "kliknięcie otwiera reklamę" gdy _vps_blocked_fallback
+        # (hqporner page ads). Force_proxy + native mp4 = quality picker + natywny.
+        sources.append(StreamSource(
+            link=url, quality=title or None, type="mp4", referer=iframe_referer,
+            raw={"force_proxy": True},
+        ))

    if sources:
        return sources
--- a/app/main.py
+++ b/app/main.py
@ -111,7 +111,7 @@ def version() -> dict[str, str | None]:
        # mobile sklei z baseUrl.
        public_url = os.environ.get("BACKEND_PUBLIC_URL", "").rstrip("/")
        apk_url = f"{public_url}/static/app-release.apk" if public_url else "/static/app-release.apk"
-    return {"version": "0.1.8", "apk_url": apk_url}
+    return {"version": "0.1.9", "apk_url": apk_url}


@app.get("/readyz")
--- a/app/resolve/scene_resolver.py
+++ b/app/resolve/scene_resolver.py
@ -123,14 +123,38 @@ def resolve_scene(
        result = find_by_phash_within(session, phash=value)
        if result is not None:
            scene_match, distance = result
-            score = 1.0 - distance / 64.0
+            raw_phash_score = 1.0 - distance / 64.0
            # Duration sanity check: phash może collide gdy compilation zawiera chapter sceny
            # (oba mają ten sam frame sample), ale duration będzie wyraźnie inny.
            # Wymagamy proximity ≥0.5 (±30s) dla auto-merge; inaczej → review queue.
-            from app.resolve.scoring import duration_proximity
+            from app.resolve.scoring import duration_proximity, series_mismatch_strength
            dur_prox = duration_proximity(scene_match.duration_sec, norm.duration_sec)
+
+            # Series-position guard (Episode 2 vs Episode 4): phash zwykle pixel-identical
+            # bo studio reusuje cover art między episodami, ale to OSOBNE sceny. Hard split,
+            # bez merge_candidate (nie ma czego mergować — żaden human reviewer też nie
+            # powie "Episode 2 to to samo co Episode 4").
+            sp_strength = series_mismatch_strength(
+                scene_match.title_normalized, norm.title_normalized
+            )
+            if sp_strength >= 1.0:
+                new_scene = _create_canonical(session, norm=norm, studio_id=studio_id)
+                _attach_external_ref(session, scene_id=new_scene.id, source_id=source_id, norm=norm)
+                _sync_attached_entities(session, scene=new_scene, norm=norm, source_id=source_id)
+                return SceneResolveResult(
+                    scene=new_scene,
+                    was_created=True,
+                    path="fp_phash_series_split",
+                    score=0.0,
+                )
+
            if dur_prox is not None and dur_prox < 0.5:
                # phash match ale duration rozjeżdża się → tworzymy nową scenę + review.
+                # Score reflectuje że to NIE jest auto-merge: dur_prox * phash_score,
+                # plus dalej cap przez series modifier mismatch (BTS/bonus/unedited).
+                penalised_score = raw_phash_score * max(dur_prox, 0.1)
+                if 0.0 < sp_strength < 1.0:
+                    penalised_score = min(penalised_score, 1.0 - sp_strength)
                new_scene = _create_canonical(session, norm=norm, studio_id=studio_id)
                _attach_external_ref(session, scene_id=new_scene.id, source_id=source_id, norm=norm)
                _sync_attached_entities(session, scene=new_scene, norm=norm, source_id=source_id)
@ -139,11 +163,14 @@ def resolve_scene(
                        kind=MergeKind.scene,
                        left_id=scene_match.id,
                        right_id=new_scene.id,
-                        score=score,
+                        score=penalised_score,
                        reasons={
                            "path": "fp_phash",
                            "hamming": distance,
+                            "phash_score": raw_phash_score,
                            "duration_mismatch": True,
+                            "dur_prox": dur_prox,
+                            "series_mismatch_strength": sp_strength,
                            "left_dur": scene_match.duration_sec,
                            "right_dur": norm.duration_sec,
                        },
@ -154,9 +181,42 @@ def resolve_scene(
                    scene=new_scene,
                    was_created=True,
                    path="fp_phash_review",
-                    score=score,
+                    score=penalised_score,
                    candidate_id=scene_match.id,
                )
+
+            # Modifier tag mismatch (BTS/bonus/unedited po jednej stronie) — nie hard-split,
+            # ale auto-merge zablokowane: tworzymy nową scenę + pending review.
+            if 0.0 < sp_strength < 1.0:
+                penalised_score = min(raw_phash_score, 1.0 - sp_strength)
+                new_scene = _create_canonical(session, norm=norm, studio_id=studio_id)
+                _attach_external_ref(session, scene_id=new_scene.id, source_id=source_id, norm=norm)
+                _sync_attached_entities(session, scene=new_scene, norm=norm, source_id=source_id)
+                session.add(
+                    MergeCandidate(
+                        kind=MergeKind.scene,
+                        left_id=scene_match.id,
+                        right_id=new_scene.id,
+                        score=penalised_score,
+                        reasons={
+                            "path": "fp_phash",
+                            "hamming": distance,
+                            "phash_score": raw_phash_score,
+                            "series_modifier_mismatch": True,
+                            "series_mismatch_strength": sp_strength,
+                        },
+                        status=MergeStatus.pending,
+                    )
+                )
+                return SceneResolveResult(
+                    scene=new_scene,
+                    was_created=True,
+                    path="fp_phash_modifier_review",
+                    score=penalised_score,
+                    candidate_id=scene_match.id,
+                )
+
+            score = raw_phash_score
            _update_scene_fields(scene_match, norm, studio_id=studio_id, source_kind=source_kind, session=session)
            _attach_external_ref(session, scene_id=scene_match.id, source_id=source_id, norm=norm)
            _sync_attached_entities(session, scene=scene_match, norm=norm, source_id=source_id)
@ -215,14 +275,24 @@ def resolve_scene(
    # które mają wspólny choć jeden performer z naszą sceną (mocny sygnał — performerzy
    # to też nasz "blocking key" gdy studio i date są nieinformatywne).
    if aggregator_mode and performer_ids:
-        from sqlalchemy import distinct
+        # **2026-05-20 fix**: poprzednio LIMIT 50 BEZ ORDER BY → dla popular performera
+        # (Eveline Dellai z 200+ scen w bazie) prawdziwy match mógł być out of top-50,
+        # postgres zwracał arbitrary order → resolver nie widział kandydata → duplicate.
+        # Bug-report: brak Brazzers Exxtra po 15-05. Now: 500 limit + title-match priority
+        # ORDER, plus exact title match jako gwarantowany kandydat (CASE expression).
+        from sqlalchemy import case
+        title_match_expr = case(
+            (Scene.title_normalized == norm.title_normalized, 1),
+            else_=0,
+        )
        more = (
            session.execute(
                select(Scene)
                .join(ScenePerformer, ScenePerformer.scene_id == Scene.id)
                .where(ScenePerformer.performer_id.in_(performer_ids))
                .group_by(Scene.id)
-                .limit(50)
+                .order_by(title_match_expr.desc(), Scene.release_date.desc().nullslast())
+                .limit(500)
            )
            .scalars()
            .all()
--- a/app/resolve/scene_score.py
+++ b/app/resolve/scene_score.py
@ -16,6 +16,7 @@ from app.resolve.scoring import (
    duration_proximity,
    performer_set_similarity,
    phash_similarity,
+    series_mismatch_strength,
    title_similarity,
 )

@ -49,6 +50,10 @@ def score_candidate(
    else:
        studio_match = candidate.studio_id == studio_id

+    series_mismatch = series_mismatch_strength(
+        candidate.title_normalized, norm.title_normalized
+    )
+
    composite, reasons = composite_score(
        fp=fp,
        title=title,
@ -57,6 +62,7 @@ def score_candidate(
        duration_score=duration_score,
        studio_match=studio_match,
        aggregator_mode=aggregator_mode,
+        series_mismatch=series_mismatch,
    )

    breakdown = ScoreBreakdown(
--- a/app/resolve/scoring.py
+++ b/app/resolve/scoring.py
@ -16,6 +16,7 @@ TPDB ma "Brazzers Exxtra" a StashDB "Brazzers" jako studio sceny).
 from __future__ import annotations

 import math
+import re
 import uuid
 from collections.abc import Iterable
 from dataclasses import dataclass
@ -105,6 +106,91 @@ def date_proximity(left: date | None, right: date | None, *, window_days: int =
    return 1.0 - delta / window_days


+# Wyłapuje "Episode 4" / "Ep 4" / "Part 2" / "Pt. 3" / "Vol 7" / "Volume 12" /
+# "Scene 5" / "Chapter 9" / "Ch.3" / "#7" / "S9:E8" / "S9E8" — wszystko po
+# normalizacji (lower-cased, punkt usunięty zwykle, ale tolerujemy \\.).
+# `(?<!\d)` + `(?!\d)` zapobiega wyłapaniu fragmentu cyfry z dłuższego ciągu —
+# np. "scene from 2020" nie wygeneruje fałszywego pos=0 z boundary-end-of-2020.
+_SERIES_NUM_RE = re.compile(
+    r"\b(?:episode|ep|part|pt|vol|volume|chapter|ch|scene|series)\b\s*\.?\s*#?\s*(?<!\d)(\d{1,3})(?!\d)"
+    r"|(?<!\w)#\s*(?<!\d)(\d{1,3})(?!\d)"
+    r"|\bs(?<!\d)(\d{1,2})(?!\d)\s*[:e]\s*e?(?<!\d)(\d{1,3})(?!\d)",
+    re.IGNORECASE,
+)
+
+# Tagi które wprost mówią że scena to wariant osobny (BTS / bonus / unedited /
+# trailer). Jeśli tag jest TYLKO po jednej stronie, to NIE jest ta sama scena.
+_MODIFIER_TAGS: tuple[str, ...] = (
+    "behind the scenes",
+    "behind-the-scenes",
+    "bts",
+    "bonus",
+    "unedited",
+    "uncut",
+    "extended",
+    "directors cut",
+    "director's cut",
+    "trailer",
+    "preview",
+    "teaser",
+    "compilation",
+)
+
+
+def detect_series_positions(title_normalized: str | None) -> set[int]:
+    """Zwraca wszystkie pozycje (Episode/Part/Vol/Scene/Chapter/# itp.) znalezione w tytule.
+
+    Tytuł powinien być znormalizowany (lowercase, unaccent), ale regex jest case-insensitive
+    i tolerancyjny — chodzi tylko o sygnał, nie o robust parsing.
+    """
+    if not title_normalized:
+        return set()
+    out: set[int] = set()
+    for m in _SERIES_NUM_RE.finditer(title_normalized):
+        for g in m.groups():
+            if g and g.isdigit():
+                out.add(int(g))
+    return out
+
+
+def detect_modifier_tags(title_normalized: str | None) -> set[str]:
+    """Zwraca set modifier tagów wykrytych w tytule (bts/bonus/unedited/itp.)."""
+    if not title_normalized:
+        return set()
+    lower = title_normalized.lower()
+    return {t for t in _MODIFIER_TAGS if t in lower}
+
+
+def series_mismatch_strength(
+    title_a_normalized: str | None,
+    title_b_normalized: str | None,
+) -> float:
+    """Wykrywa rozjazd "wariantu sceny" między tytułami.
+
+    Zwraca strength w [0.0, 1.0]:
+      0.0 — brak sygnału mismatchu (tytuły kompatybilne).
+      0.5 — modifier tags po obu stronach ale RÓŻNE (BTS vs trailer).
+      0.7 — modifier tag po jednej stronie tylko (BTS vs regular).
+      1.0 — series position mismatch (Episode 2 vs Episode 4 → twardy reject).
+    """
+    pos_a = detect_series_positions(title_a_normalized)
+    pos_b = detect_series_positions(title_b_normalized)
+    # Hard mismatch gdy oba mają jakieś pozycje i symmetric difference jest niepusty
+    # — przykład: "Vol 140 Scene 3" vs "Vol 140 Scene 4" mają wspólne 140 ale różne 3/4,
+    # to są osobne sceny ze wspólnej kompilacji. Asymetryczny brak (jedna strona ma
+    # pozycję a druga nie) nie liczy się jako mismatch — tube SEO często gubi numer.
+    if pos_a and pos_b and (pos_a ^ pos_b):
+        return 1.0
+
+    mod_a = detect_modifier_tags(title_a_normalized)
+    mod_b = detect_modifier_tags(title_b_normalized)
+    if (not mod_a) != (not mod_b):
+        return 0.7
+    if mod_a and mod_b and not (mod_a & mod_b):
+        return 0.5
+    return 0.0
+
+
 def duration_proximity(
    left: int | None, right: int | None, *, window_sec: int = 60
 ) -> float | None:
@ -145,6 +231,7 @@ def composite_score(
    duration_score: float | None = None,
    studio_match: bool | None,
    aggregator_mode: bool = False,
+    series_mismatch: float | None = None,
 ) -> tuple[float, dict]:
    """Łączy sub-score'y w jeden composite [0, 1] + zwraca raport reasons.

@ -153,9 +240,17 @@ def composite_score(
      - aggregator_mode=True (np. tube'y typu HQPorner agregują z różnych studiów,
        więc studio z naszej perspektywy nie jest informatywny — pomijamy hard reject
        i zwiększamy wagę performers).
+
+    `series_mismatch` (≥0.0): wartość z `series_mismatch_strength()` — gdy 1.0 (Episode 2
+    vs Episode 4), wymusza twardy reject niezależnie od pozostałych sygnałów; gdy 0.5-0.7
+    (modifier mismatch: BTS/bonus/unedited po jednej stronie), nakłada cap = `1 - strength`.
    """
    reasons: dict = {}

+    if series_mismatch is not None and series_mismatch >= 1.0:
+        reasons["series_position_mismatch"] = True
+        return 0.0, reasons
+
    if studio_match is False:
        if fp is not None and fp >= 0.95:
            reasons["studio_mismatch_overridden_by_fp"] = True
@ -257,6 +352,16 @@ def composite_score(
            reasons["duration_perf_strong_match_bump"] = True
            score = max(score, 0.92)

+    # Series-modifier cap: jedna ze stron ma "BTS"/"bonus"/"unedited" a druga nie,
+    # albo różne tagi. Twardy mismatch (różne pozycje numeryczne) został już złapany
+    # wcześniej (return 0.0). Tu zostają miękkie sygnały — cap żeby nigdy nie auto-merge.
+    if series_mismatch is not None and 0.0 < series_mismatch < 1.0:
+        cap = max(0.0, 1.0 - series_mismatch)
+        if score > cap:
+            reasons["series_modifier_cap"] = cap
+            reasons["series_mismatch_strength"] = series_mismatch
+            score = cap
+
    return _clamp(score), reasons


--- a/app/scheduler/bulk_dedup.py
+++ b/app/scheduler/bulk_dedup.py
@ -35,6 +35,7 @@ from app.resolve.scoring import (
    hamming_distance_hex,
    performer_set_similarity,
    phash_similarity,
+    series_mismatch_strength,
    title_similarity,
    triage,
 )
@ -121,6 +122,8 @@ def score_scene_pair(session: Session, a: Scene, b: Scene) -> ScoreBreakdown:
    else:
        studio_match = a.studio_id == b.studio_id

+    series_mismatch = series_mismatch_strength(a.title_normalized, b.title_normalized)
+
    # Bulk dedup nie jest aggregator — porównujemy dwie kanoniczne sceny, studio
    # to prawdziwe studio. Aggregator mode tylko w resolverze przy ingest z tube'a.
    composite, reasons = composite_score(
@ -131,6 +134,7 @@ def score_scene_pair(session: Session, a: Scene, b: Scene) -> ScoreBreakdown:
        duration_score=duration_score,
        studio_match=studio_match,
        aggregator_mode=False,
+        series_mismatch=series_mismatch,
    )

    return ScoreBreakdown(
--- a/app/scheduler/jobs.py
+++ b/app/scheduler/jobs.py
@ -92,6 +92,29 @@ def _job_movie_ingest() -> None:
            log.exception("[scheduler] movie ingest %s failed", name)


+def _job_bulk_dedup_performers() -> None:
+    """Pair-wise dedup po performer overlap — safety net dla duplikatów które
+    resolver-time scoring nie złapał.
+
+    Use case (bug-report 2026-05-20, "brak Brazzers Exxtra po 15-05"):
+    freshporno scrape przed fixem release_date tworzył duplicate scenes zamiast
+    PS-merge do canonical TPDB scen. Resolver scoring miał score >0.92 (auto)
+    z release_date, ale BEZ release_date wagi się przesuwały i wpadało w review/new.
+
+    Bulk_dedup performers strategy iteruje per performer, robi pair-wise scoring
+    dla wszystkich scen tego performera — łapie duplicate-y które ingest-time
+    resolver pominął (np. gdy 2 sceny tej samej title+performer ale różny release_date).
+    Auto-merge gdy score≥0.92, pending merge_candidate gdy 0.75-0.92.
+    """
+    log.info("[scheduler] bulk_dedup performers starting")
+    try:
+        from app.scheduler.bulk_dedup import run_bulk_dedup
+        bc = run_bulk_dedup(strategy="performers", dry_run=False)
+        log.info("[scheduler] bulk_dedup performers done: %s", bc)
+    except Exception:
+        log.exception("[scheduler] bulk_dedup performers failed")
+
+
 def _job_performer_continuous(refresh_after_days: int) -> None:
    """Continuous worker — 1 performer per tick, ORDER BY last_searched_at NULLS FIRST.

@ -174,6 +197,17 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
            cfg["browse_latest_hours"], max_pages,
        )

+    if cfg.get("bulk_dedup_hours"):
+        sched.add_job(
+            _job_bulk_dedup_performers,
+            IntervalTrigger(hours=cfg["bulk_dedup_hours"]),
+            id="bulk_dedup_performers",
+            replace_existing=True,
+            max_instances=1,
+            coalesce=True,
+        )
+        log.info("scheduler: bulk-dedup performers every %dh", cfg["bulk_dedup_hours"])
+
    if cfg.get("movie_ingest_hours"):
        sched.add_job(
            _job_movie_ingest,
--- a/app/scheduler/performer_driven.py
+++ b/app/scheduler/performer_driven.py
@ -37,7 +37,8 @@ from app.ingest import (
 )
 from app.models.ingest_run import IngestRun, IngestStatus
 from app.models.performer import Performer, PerformerExternalRef
-from app.models.scene import ScenePerformer
+from app.models.playback_source import PlaybackSource
+from app.models.scene import Scene, ScenePerformer
 from app.models.source import Source, SourceKind

 log = logging.getLogger(__name__)
@ -463,14 +464,19 @@ def _claim_next_for_search(
    """Wybiera 1 performera z queue + UPDATE last_searched_at = now() w jednej
    transakcji (skip locked → safe pod konkurencyjnym workerze).

-    Queue:
-      1. Performerzy NIGDY niesearchowani (last_searched_at IS NULL)
-      2. Performerzy searchowani > `refresh_after` temu
-      3. Filtruj scene_count >= min_scene_count (eliminuje noise/false performerów)
-      4. Order: NULLS FIRST, potem najstarsze last_searched_at
+    Queue priority (2026-05-20 update — orphan-rescue bias):
+      1. **Performerzy z RECENT scenes-without-playback** (last 7d, no live PS) —
+         najpilniejsi bo user widzi puste studio listings dla najnowszych scen.
+         Bug-report 2026-05-20: "brak Brazzers Exxtra po 15-05" → wszystkie nowe
+         TPDB sceny mają canonical metadata ale 0 playback bo continuous queue
+         nigdy ich nie dotyka (78k performers, 67k NULL → ~232 dni sweep).
+      2. Performerzy NIGDY niesearchowani (`last_searched_at IS NULL`)
+      3. Performerzy searchowani > `refresh_after` temu
+      4. Filtruj scene_count >= min_scene_count
    """
    cutoff = datetime.now(UTC) - refresh_after
-    # Subquery scene_count
+    orphan_cutoff = datetime.now(UTC) - timedelta(days=7)
+
    sc_sub = (
        select(
            ScenePerformer.performer_id.label("pid"),
@ -480,19 +486,41 @@ def _claim_next_for_search(
        .subquery()
    )

-    # NOTE: nie używamy FOR UPDATE bo PostgreSQL nie pozwala na to z GROUP BY
-    # subquery (scene_count agg). APScheduler max_instances=1 gwarantuje że tylko
-    # jeden tick runa się na raz, więc race nie jest realny.
+    # Orphan-scene count per performer: scenes z release_date w ostatnich 7d
+    # AND brak żywego playback source. Wysoki count = performer-z-rekordów-pustych.
+    orphan_sub = (
+        select(
+            ScenePerformer.performer_id.label("pid"),
+            func.count(ScenePerformer.scene_id).label("orphan_count"),
+        )
+        .join(Scene, Scene.id == ScenePerformer.scene_id)
+        .where(Scene.release_date > orphan_cutoff)
+        .where(
+            ~select(PlaybackSource.id)
+            .where(PlaybackSource.scene_id == Scene.id)
+            .where(PlaybackSource.dead_at.is_(None))
+            .exists()
+        )
+        .group_by(ScenePerformer.performer_id)
+        .subquery()
+    )
+
    row = session.execute(
        select(Performer)
        .join(sc_sub, sc_sub.c.pid == Performer.id, isouter=False)
+        .join(orphan_sub, orphan_sub.c.pid == Performer.id, isouter=True)
        .where(sc_sub.c.scene_count >= min_scene_count)
        .where(
            (Performer.last_searched_at.is_(None))
            | (Performer.last_searched_at < cutoff)
        )
        .order_by(
+            # 1. Orphan scenes (last 7d, no playback) FIRST — desc count.
+            #    COALESCE 0 sprawia że performerzy bez orphan idą za tymi z.
+            func.coalesce(orphan_sub.c.orphan_count, 0).desc(),
+            # 2. NULL last_searched_at next
            Performer.last_searched_at.asc().nullsfirst(),
+            # 3. Highest scene_count (popular performers earlier)
            sc_sub.c.scene_count.desc(),
        )
        .limit(1)
--- a/app/scheduler/worker.py
+++ b/app/scheduler/worker.py
@ -161,6 +161,9 @@ def run_forever() -> int:
        # to bulk import jednorazowy). Bug-report 93d3c485 (2026-05-19) "brak freshporno".
        "browse_latest_hours": getattr(settings, "sched_browse_latest_hours", 24) or None,
        "browse_latest_max_pages": getattr(settings, "sched_browse_latest_max_pages", 5),
+        # Bulk-dedup performers — safety net dla duplikatów które resolver
+        # pominął (np. freshporno scen przed fixem release_date). Run 12h.
+        "bulk_dedup_hours": getattr(settings, "sched_bulk_dedup_hours", 12) or None,
    }
    sched = build_scheduler(cfg)
    log.info("worker scheduled mode starting (jobs=%d)", len(sched.get_jobs()))
--- a/landing/index.html
+++ b/landing/index.html
@ -83,18 +83,14 @@
      <p class="text-lg md:text-xl text-gray-400 max-w-2xl leading-relaxed mb-10">
        Goon indexes scene metadata from TPDB &amp; StashDB, deduplicates across
        30+ public tubes, and serves a fast mobile client. Zero ads. Zero tracking.
-        Your data stays on your VPS.
+        Download, open, browse — no account, no setup.
      </p>

      <div class="flex flex-wrap gap-3">
-        <a href="https://github.com/REPLACE_PERSONA/goon/releases/latest"
+        <a href="/goon-v0.1.9.apk"
           class="px-6 py-4 rounded-xl bg-accent text-white font-bold hover:bg-accentDeep transition glow">
          Download APK
        </a>
-        <a href="https://github.com/REPLACE_PERSONA/goon"
-           class="px-6 py-4 rounded-xl bg-bgElevated border border-border text-gray-200 font-semibold hover:border-accent transition">
-          View source on GitHub
-        </a>
        <a href="#donate"
           class="px-6 py-4 rounded-xl bg-transparent border border-border text-gray-400 font-semibold hover:text-accent hover:border-accent transition">
          ♥ Support project
@ -102,7 +98,7 @@
      </div>

      <p class="text-xs text-gray-500 mt-6 font-mono">
-        Android only · self-hosted backend required · 18+
+        Android 7+ · no setup, no login · 18+
      </p>
    </div>
  </header>
@ -165,11 +161,10 @@

        <div class="card-hover bg-card border border-border rounded-2xl p-6">
          <div class="text-accent text-2xl mb-3">⌬</div>
-          <h3 class="text-lg font-bold mb-2">100% self-hosted</h3>
+          <h3 class="text-lg font-bold mb-2">Works out of the box</h3>
          <p class="text-sm text-gray-400 leading-relaxed">
-            One <code class="text-accent font-mono text-xs">docker compose up -d</code>
-            and you own the API, the DB, the worker. No SaaS dependencies.
-            Your search history is yours.
+            Download the APK and it connects automatically — no account, no
+            config. Power users can point it at their own self-hosted backend.
          </p>
        </div>
      </div>
@ -200,26 +195,33 @@
    </div>
  </section>

-  <!-- QUICK START -->
+  <!-- GET STARTED -->
  <section class="px-6 py-20 bg-bgElevated/30 border-y border-border">
    <div class="max-w-3xl mx-auto">
-      <h2 class="text-3xl font-extrabold mb-3 tracking-tight">Quick start</h2>
-      <p class="text-gray-500 mb-8">5 commands. Backend runs in 30 seconds on any Docker host.</p>
+      <h2 class="text-3xl font-extrabold mb-3 tracking-tight">Get started</h2>
+      <p class="text-gray-500 mb-8">Three steps. No account, no server, no config.</p>

-      <div class="bg-card border border-border rounded-2xl p-6 font-mono text-sm leading-relaxed">
-        <div class="text-gray-500">$ <span class="text-gray-300">git clone https://github.com/REPLACE_PERSONA/goon.git</span></div>
-        <div class="text-gray-500">$ <span class="text-gray-300">cd goon &amp;&amp; cp .env.example .env</span></div>
-        <div class="text-gray-500">$ <span class="text-gray-300"># edit .env: set TPDB_API_TOKEN, STASHDB_API_KEY, API_KEYS</span></div>
-        <div class="text-gray-500">$ <span class="text-gray-300">docker compose up -d</span></div>
-        <div class="text-gray-500">$ <span class="text-gray-300">curl localhost:8000/health</span></div>
-        <div class="text-good text-xs mt-3">{"status":"ok"}</div>
-      </div>
-
-      <p class="text-sm text-gray-500 mt-6">
-        Then download the APK above, point it at your backend, paste an API key.
-        Full docs in the
-        <a href="https://github.com/REPLACE_PERSONA/goon#readme" class="text-accent hover:underline">README</a>.
-      </p>
+      <ol class="space-y-4">
+        <li class="bg-card border border-border rounded-2xl p-5 flex gap-4">
+          <span class="text-accent font-extrabold text-xl">1</span>
+          <span class="text-sm text-gray-300 leading-relaxed">
+            <a href="/goon-v0.1.9.apk" class="text-accent font-bold hover:underline">Download the APK</a>
+            and open it. Allow "install from unknown sources" for your browser if Android asks.
+          </span>
+        </li>
+        <li class="bg-card border border-border rounded-2xl p-5 flex gap-4">
+          <span class="text-accent font-extrabold text-xl">2</span>
+          <span class="text-sm text-gray-300 leading-relaxed">
+            Open the app, accept the 18+ gate. It connects automatically — no login.
+          </span>
+        </li>
+        <li class="bg-card border border-border rounded-2xl p-5 flex gap-4">
+          <span class="text-accent font-extrabold text-xl">3</span>
+          <span class="text-sm text-gray-300 leading-relaxed">
+            Browse. That's it.
+          </span>
+        </li>
+      </ol>
    </div>
  </section>

@ -259,10 +261,7 @@
      </div>

      <p class="text-xs text-gray-500 mt-6">
-        Addresses are hard-coded in
-        <code class="font-mono text-accent">mobile/src/lib/donate.ts</code>
-        and shown in the app under Scenes &raquo; ♥. Always verify on-screen
-        against the repo before sending.
+        Addresses + QR codes are shown in the app under Scenes &raquo; ♥.
      </p>
    </div>
  </section>
@ -275,19 +274,18 @@
          <div class="w-2 h-2 rounded-full bg-accent"></div>
          <span class="font-bold tracking-widest uppercase">goon</span>
        </div>
-        <p>Self-hosted adult content metadata aggregator.</p>
+        <p>Adult content metadata aggregator. FOSS, ad-free.</p>
        <p>MIT license. No warranty. 18+ jurisdictions only.</p>
      </div>
      <div class="flex flex-col gap-1 text-right">
-        <a href="https://github.com/REPLACE_PERSONA/goon" class="hover:text-accent transition">GitHub</a>
-        <a href="https://github.com/REPLACE_PERSONA/goon/releases" class="hover:text-accent transition">Releases</a>
-        <a href="https://github.com/REPLACE_PERSONA/goon#readme" class="hover:text-accent transition">Docs</a>
+        <a href="/goon-v0.1.9.apk" class="hover:text-accent transition">Download APK</a>
+        <a href="#donate" class="hover:text-accent transition">Support</a>
      </div>
    </div>
    <p class="max-w-5xl mx-auto mt-6 text-[10px] text-gray-600 leading-relaxed">
      Goon does not host, transcode, store, or distribute any media. It scrapes
-      publicly-available metadata and links out to the source. Operators are
-      responsible for complying with local law. See README &raquo; Disclaimer.
+      publicly-available metadata and links out to the source. Users are
+      responsible for complying with local law.
    </p>
  </footer>

--- a/mobile/App.tsx
+++ b/mobile/App.tsx
@ -24,6 +24,7 @@ import { ClientProvider } from './src/ClientContext';
 import { ErrorBoundary } from './src/ErrorBoundary';
 import { isAccepted as isAgeGateAccepted } from './src/lib/agegate';
 import { APP_VERSION } from './src/lib/appVersion';
+import { DEFAULT_API_KEY, DEFAULT_BACKEND_URL } from './src/lib/backend';
 import { getSettings as getLockSettings } from './src/lib/applock';
 import { AppNavigator } from './src/navigation';
 import { AgeGateScreen } from './src/screens/AgeGateScreen';
@ -89,7 +90,13 @@ export default function App() {
        const accepted = await isAgeGateAccepted();
        setAgeAccepted(accepted);
        const creds = await loadCredentials();
-        if (creds) setClient(new GoonClient(creds.baseUrl, creds.apiKey));
+        if (creds) {
+          setClient(new GoonClient(creds.baseUrl, creds.apiKey));
+        } else {
+          // No stored credentials → auto-connect to the public instance.
+          // LoginScreen only appears after an explicit "Sign out".
+          setClient(new GoonClient(DEFAULT_BACKEND_URL, DEFAULT_API_KEY));
+        }
        const lockSettings = await getLockSettings();
        if (lockSettings.enabled && lockSettings.hasPin) {
          setLocked(true);
--- a/mobile/android/app/build.gradle
+++ b/mobile/android/app/build.gradle
@ -93,8 +93,8 @@ android {
        applicationId 'com.goon.mobile'
        minSdkVersion rootProject.ext.minSdkVersion
        targetSdkVersion rootProject.ext.targetSdkVersion
-        versionCode 6
-        versionName "0.1.6"
+        versionCode 9
+        versionName "0.1.9"
    }
    signingConfigs {
        debug {
--- a/mobile/android/app/src/main/AndroidManifest.xml
+++ b/mobile/android/app/src/main/AndroidManifest.xml
@ -16,15 +16,15 @@
  </queries>
  <application android:name=".MainApplication" android:label="@string/app_name" android:icon="@mipmap/ic_launcher" android:roundIcon="@mipmap/ic_launcher_round" android:allowBackup="true" android:theme="@style/AppTheme" android:supportsRtl="true" android:usesCleartextTraffic="false" android:networkSecurityConfig="@xml/network_security_config">
    <!--
-      Expo Updates is disabled by default in the public source tree. To enable
-      OTA updates for your fork, flip ENABLED to "true" and point EXPO_UPDATE_URL
-      at your backend's `/expo-updates/manifest` endpoint. See README "Quick start"
-      for the server-side setup.
+      Expo Updates — ENABLED 2026-05-22 dla public release. Manifest serwowany
+      przez backend `/expo-updates/manifest` (api.goon-foss.org). Nowe JS-only
+      fixy idą OTA bez rebuilda APK; native change wymaga bumpa runtimeVersion
+      + nowego APK przez PackageInstaller.
    -->
-    <meta-data android:name="expo.modules.updates.ENABLED" android:value="false"/>
+    <meta-data android:name="expo.modules.updates.ENABLED" android:value="true"/>
    <meta-data android:name="expo.modules.updates.EXPO_UPDATES_CHECK_ON_LAUNCH" android:value="ALWAYS"/>
    <meta-data android:name="expo.modules.updates.EXPO_UPDATES_LAUNCH_WAIT_MS" android:value="0"/>
-    <meta-data android:name="expo.modules.updates.EXPO_UPDATE_URL" android:value="https://invalid.example.invalid/expo-updates/manifest"/>
+    <meta-data android:name="expo.modules.updates.EXPO_UPDATE_URL" android:value="https://api.goon-foss.org/expo-updates/manifest"/>
    <meta-data android:name="expo.modules.updates.EXPO_RUNTIME_VERSION" android:value="1.0"/>
    <activity android:name=".MainActivity" android:configChanges="keyboard|keyboardHidden|orientation|screenSize|screenLayout|uiMode" android:launchMode="singleTask" android:windowSoftInputMode="adjustResize" android:theme="@style/Theme.App.SplashScreen" android:exported="true" android:screenOrientation="portrait">
      <intent-filter>
--- a/mobile/app.json
+++ b/mobile/app.json
@ -2,14 +2,14 @@
  "expo": {
    "name": "goon",
    "slug": "goon",
-    "version": "0.1.8",
+    "version": "0.1.9",
    "orientation": "portrait",
    "userInterfaceStyle": "automatic",
    "newArchEnabled": false,
    "runtimeVersion": "1.0",
    "updates": {
-      "enabled": false,
-      "url": "https://invalid.example.invalid/expo-updates/manifest",
+      "enabled": true,
+      "url": "https://api.goon-foss.org/expo-updates/manifest",
      "checkAutomatically": "ON_LOAD",
      "fallbackToCacheTimeout": 0
    },
--- a/mobile/src/lib/appVersion.ts
+++ b/mobile/src/lib/appVersion.ts
@ -17,4 +17,4 @@ import Constants from 'expo-constants';
 * też nie idzie do góry, więc consistency jest zachowana.
 */
 export const APP_VERSION: string =
-  (Constants.expoConfig?.version as string | undefined) || '0.1.8';
+  (Constants.expoConfig?.version as string | undefined) || '0.1.9';
--- a/mobile/src/lib/backend.ts
+++ b/mobile/src/lib/backend.ts
@ -0,0 +1,12 @@
+// Default public instance. A fresh install with no stored credentials
+// auto-connects here, so the app works out-of-the-box without a login step.
+//
+// Power users who want their own self-hosted backend can still override:
+// after "Sign out" the login screen lets them enter a different URL + key.
+//
+// The API key below is intentionally shipped in the APK. It is a coarse
+// bot/scraper filter, not a secret — anyone can decompile the APK to read it.
+// If it gets abused, rotate it: append a new key to API_KEYS on the server,
+// ship an APK update, then drop the old key.
+export const DEFAULT_BACKEND_URL = 'https://api.goon-foss.org';
+export const DEFAULT_API_KEY = 'W20ggQgYjH_evCZCSBTWJsGgLMaJQP_7';
--- a/mobile/src/screens/PlayerScreen.tsx
+++ b/mobile/src/screens/PlayerScreen.tsx
@ -754,6 +754,40 @@ const INJECTED_JS = `
    };
  } catch (e) {}

+  // -- 1.5. Cookie/consent auto-dismiss --------------------------------------
+  // Tube'y typu hqporner mają cookie-consent gate ("Allow All / Allow Essential
+  // Only") który blokuje kt_player JS — player nie inicjalizuje się dopóki user
+  // nie kliknie. INJECTED_JS scrape \`<source>.src\` odpala się więc za wcześnie
+  // (DOM nie ma jeszcze video). Auto-klikamy consent żeby odblokować player.
+  //
+  // Bezpieczeństwo: klikamy TYLKO element którego tekst pasuje do consent-frazy
+  // ORAZ leży w kontenerze z markerem cookie/consent/gdpr (≤6 przodków). To
+  // wyklucza przypadkowy klik w reklamę "Continue to site".
+  const CONSENT_TEXT_RE = /^(allow all|accept all|accept|accept & continue|accept and continue|i accept|i agree|agree|agree & continue|got it|enable all|consent|continue|ok|akceptuj.*|zgadzam.*|zgoda|rozumiem|wyra(z|ż)am zgod)$/i;
+  const CONSENT_CTX_RE = /(cookie|consent|gdpr|privacy|cmp|onetrust|didomi|cookiebar|cookie-?notice)/i;
+  const dismissConsent = function() {
+    const els = document.querySelectorAll('button, a, [role="button"], div[onclick], span[onclick], input[type="button"], input[type="submit"]');
+    for (let i = 0; i < els.length; i++) {
+      const el = els[i];
+      const txt = ((el.textContent || el.value || '') + '').trim();
+      if (!txt || txt.length > 32) continue;
+      if (!CONSENT_TEXT_RE.test(txt)) continue;
+      // Kontekst: element lub ≤6 przodków ma cookie/consent marker (class/id).
+      let ctx = el, depth = 0, inCtx = false;
+      while (ctx && depth < 7) {
+        const cn = ctx.className;
+        const sig = ((typeof cn === 'string' ? cn : (cn && cn.baseVal) || '') + ' ' + (ctx.id || '')).toLowerCase();
+        if (CONSENT_CTX_RE.test(sig)) { inCtx = true; break; }
+        ctx = ctx.parentElement; depth++;
+      }
+      if (!inCtx) continue;
+      try {
+        el.click();
+        window.ReactNativeWebView.postMessage(JSON.stringify({type: 'consent_dismissed'}));
+      } catch (e) {}
+    }
+  };
+
  // Niektóre hostery wstrzykują full-screen <iframe> jako ad — usuwamy periodically.
  // Plus iframe-ad już istniejące przed naszym patchowaniem (race condition).
  const removeAdIframes = function() {
@ -778,7 +812,13 @@ const INJECTED_JS = `
      }
    });
  };
-  setInterval(removeAdIframes, 1000);
+  setInterval(function() {
+    removeAdIframes();
+    dismissConsent();
+  }, 1000);
+  // Pierwsza próba consent natychmiast (banner bywa w SSR HTML) — bez czekania
+  // na pierwszy tick interwału.
+  dismissConsent();

  // -- 2. Auto-extract m3u8/mp4 -----------------------------------------------
  const VIDEO_RE = /https?:\\/\\/[^"'\\s<>]+\\.(?:m3u8|mp4|mpd)(?:\\?[^"'\\s<>]*)?/i;
@ -821,9 +861,12 @@ const INJECTED_JS = `
        } catch (e) {}
      }
    });
-    // Jeśli mamy video URL i video się odpaliło, możemy zatrzymać polling
-    if (seen.size > 0 && ticks > 5) clearInterval(interval);
-    if (ticks > 60) clearInterval(interval);
+    // Jeśli mamy video URL i video się odpaliło, możemy zatrzymać polling.
+    // Próg podniesiony 5→15: po auto-dismiss cookie consent kt_player (hqporner)
+    // potrzebuje kilku sekund na init — zbyt wczesny stop łapał tylko preroll-ad
+    // URL zanim pojawił się prawdziwy <source>. 15 ticków = ~15s retry window.
+    if (seen.size > 0 && ticks > 15) clearInterval(interval);
+    if (ticks > 90) clearInterval(interval);
  }, 1000);

  true;
--- a/scripts/check_all_hosters.py
+++ b/scripts/check_all_hosters.py
@ -0,0 +1,50 @@
+"""Per-origin extractor check: dla 1 sample sceny z każdego tube origin,
+wywołaj try_extract i sklasyfikuj wynik (direct mp4/m3u8 vs WebView hoster vs fail).
+Uruchamiać na VPS: docker compose exec -T api python scripts/check_all_hosters.py
+"""
+from app.db import SessionLocal
+from sqlalchemy import text
+from app.extractors import try_extract
+
+
+def main():
+    with SessionLocal() as s:
+        rows = s.execute(text("""
+            SELECT DISTINCT ON (ps.origin)
+                ps.origin, ps.page_url, sc.title
+            FROM playback_sources ps
+            JOIN scenes sc ON sc.id = ps.scene_id
+            WHERE ps.dead_at IS NULL AND ps.origin LIKE 'tube:%'
+              AND ps.page_url IS NOT NULL
+            ORDER BY ps.origin, sc.created_at DESC
+        """)).all()
+
+    print(f"{'origin':<26} {'result':<48} verdict")
+    print("-" * 95)
+    for r in rows:
+        sitetag = r.origin.replace("tube:", "")
+        try:
+            sources = try_extract(sitetag, r.page_url)
+        except Exception as e:
+            print(f"{r.origin:<26} EXC: {str(e)[:42]:<48} ERROR")
+            continue
+        if not sources:
+            print(f"{r.origin:<26} {'None (no sources)':<48} FAIL")
+            continue
+        # Klasyfikacja po type pierwszego źródła
+        types = [getattr(x, "type", "?") for x in sources]
+        first = sources[0]
+        t = getattr(first, "type", "?")
+        link = (getattr(first, "link", "") or "")[:40]
+        if t == "hoster":
+            verdict = "WEBVIEW (page → ad risk)"
+        elif t in ("mp4", "m3u8", "hls", "mpd"):
+            verdict = "DIRECT (native ExoPlayer)"
+        else:
+            verdict = f"OTHER({t})"
+        n = len(sources)
+        print(f"{r.origin:<26} {f'{t} x{n} {link}':<48} {verdict}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/check_series_detector.py
+++ b/scripts/check_series_detector.py
@ -0,0 +1,29 @@
+"""Quick sanity check series-mismatch detector na realnych pendingach z bazy."""
+from app.resolve.scoring import (
+    detect_modifier_tags,
+    detect_series_positions,
+    series_mismatch_strength,
+)
+
+cases = [
+    ("pleasureville a dp xxx parody episode 2", "pleasureville a dp xxx parody episode 4", "Episode 2/4"),
+    ("make em sweat #7", "make em sweat #7 bts", "BTS asymmetric"),
+    ("training ravyn", "training ravyn (bts - 1)", "BTS asymmetric"),
+    ("women seeking women volume 140 scene 3", "women seeking women volume 140 scene 4", "Vol same scene diff"),
+    ("women seeking women #131 scene 2", "women seeking women volume 139 scene 1", "Multi num"),
+    ("bad bella stinky feet preparation 1080p", "bad bella stinky feet preparation (unedited) 1080p", "Unedited"),
+    ("alexis fawx step son becomes a man part 1", "alexis fawx step son becomes a man part 2", "Part 1/2"),
+    ("neon moonlight pt. 1", "neon moonlight pt. 2", "Pt 1/2"),
+    ("internet outage poundage", "internet outage poundage alexis fawx", "Same scene"),
+    ("the great heist", "the great heist", "Identical"),
+    ("training ravyn", "training ravyn", "Identical"),
+    ("slut hunt ep.6 ravyn", "slut hunt ep.6 ravyn full", "Same Episode 6"),
+]
+print(f'{"strength":>8}  case')
+for a, b, desc in cases:
+    s = series_mismatch_strength(a, b)
+    pa = detect_series_positions(a)
+    pb = detect_series_positions(b)
+    ma = detect_modifier_tags(a)
+    mb = detect_modifier_tags(b)
+    print(f'{s:>8.2f}  {desc:25s}  pos={pa or "{}"} vs {pb or "{}"}  mod={ma or "{}"} vs {mb or "{}"}')
--- a/scripts/goon_debug_proxy.py
+++ b/scripts/goon_debug_proxy.py
@ -0,0 +1,92 @@
+"""Debug reverse-proxy: http://0.0.0.0:8099 → https://api.goon-foss.org
+
+Emulator app (via http://10.0.2.2:8099, cleartext dozwolony w NSC dla 10.0.2.2)
+uderza ten proxy → forward do prawdziwego backendu. Loguje każdy request:
+method, path, headers (X-API-Key, X-App-Signature), response status.
+
+Cel: zdiagnozować czy app fetch w ogóle działa + jakie headers wysyła.
+"""
+import http.server
+import socketserver
+import ssl
+import urllib.request
+import urllib.error
+
+UPSTREAM = "https://api.goon-foss.org"
+PORT = 8099
+
+
+class ProxyHandler(http.server.BaseHTTPRequestHandler):
+    protocol_version = "HTTP/1.1"
+
+    def _proxy(self, method):
+        body_len = int(self.headers.get("Content-Length", 0))
+        body = self.rfile.read(body_len) if body_len else None
+
+        print(f"\n>>> {method} {self.path}")
+        for h in ("X-API-Key", "X-App-Signature", "Authorization", "User-Agent", "Accept", "Content-Type"):
+            if h in self.headers:
+                val = self.headers[h]
+                if h == "X-App-Signature":
+                    print(f"    {h}: {val[:20]}...{val[-8:]} (len={len(val)})")
+                elif h == "X-API-Key":
+                    print(f"    {h}: {val[:8]}... (len={len(val)})")
+                else:
+                    print(f"    {h}: {val}")
+
+        url = UPSTREAM + self.path
+        req = urllib.request.Request(url, data=body, method=method)
+        for k, v in self.headers.items():
+            if k.lower() not in ("host", "content-length", "connection", "accept-encoding"):
+                req.add_header(k, v)
+
+        ctx = ssl.create_default_context()
+        try:
+            with urllib.request.urlopen(req, context=ctx, timeout=30) as resp:
+                data = resp.read()
+                print(f"<<< {resp.status} ({len(data)} bytes)")
+                self.send_response(resp.status)
+                for k, v in resp.headers.items():
+                    if k.lower() not in ("transfer-encoding", "connection", "content-encoding", "content-length"):
+                        self.send_header(k, v)
+                self.send_header("Content-Length", str(len(data)))
+                self.end_headers()
+                self.wfile.write(data)
+        except urllib.error.HTTPError as e:
+            data = e.read()
+            print(f"<<< HTTP {e.code} ({len(data)} bytes): {data[:200]}")
+            self.send_response(e.code)
+            self.send_header("Content-Type", e.headers.get("Content-Type", "application/json"))
+            self.send_header("Content-Length", str(len(data)))
+            self.end_headers()
+            self.wfile.write(data)
+        except Exception as e:
+            print(f"<<< PROXY ERROR: {type(e).__name__}: {e}")
+            msg = f'{{"detail":"proxy error: {e}"}}'.encode()
+            self.send_response(502)
+            self.send_header("Content-Type", "application/json")
+            self.send_header("Content-Length", str(len(msg)))
+            self.end_headers()
+            self.wfile.write(msg)
+
+    def do_GET(self):
+        self._proxy("GET")
+
+    def do_POST(self):
+        self._proxy("POST")
+
+    def do_DELETE(self):
+        self._proxy("DELETE")
+
+    def log_message(self, *args):
+        pass  # silence default logging
+
+
+class ThreadingServer(socketserver.ThreadingMixIn, http.server.HTTPServer):
+    daemon_threads = True
+
+
+if __name__ == "__main__":
+    print(f"debug proxy: http://0.0.0.0:{PORT} -> {UPSTREAM}")
+    print(f"emulator app should point to http://10.0.2.2:{PORT}")
+    ThreadingServer(("0.0.0.0", PORT), ProxyHandler).serve_forever()
--- a/scripts/test_porndoe_scraper.py
+++ b/scripts/test_porndoe_scraper.py
@ -0,0 +1,59 @@
+"""Smoke test PornDoeScraper — fetch sample + sprawdz parsing."""
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+from app.connectors.direct_scrapers.porndoe import PornDoeScraper
+
+
+def main():
+    scraper = PornDoeScraper()
+    print(f"sitetag: {scraper.sitetag}")
+    print(f"listing url p1: {scraper._listing_url(1)}")
+    print(f"listing url p2: {scraper._listing_url(2)}")
+    print()
+
+    count = 0
+    ok_studio = ok_perf = ok_date = ok_dur = ok_thumb = ok_phash = 0
+    for scene in scraper.latest_scenes(max_pages=1):
+        count += 1
+        if scene.studio:
+            ok_studio += 1
+        if scene.performers:
+            ok_perf += 1
+        if scene.release_date:
+            ok_date += 1
+        if scene.duration_sec:
+            ok_dur += 1
+        if scene.playback_sources and scene.playback_sources[0].thumbnail_url:
+            ok_thumb += 1
+        if scene.fingerprints:
+            ok_phash += 1
+        if count <= 5:
+            print(f"--- scene {count} ---")
+            print(f"  ext_id:   {scene.external_id}")
+            print(f"  title:    {scene.title[:60]}")
+            print(f"  studio:   {scene.studio.name if scene.studio else None}")
+            print(f"  perf:     {[p.name for p in scene.performers]}")
+            print(f"  date:     {scene.release_date}")
+            print(f"  duration: {scene.duration_sec}s")
+            print(f"  tags:     {[t.name for t in scene.tags][:5]}")
+            print(f"  thumb:    {(scene.playback_sources[0].thumbnail_url or '')[:70]}")
+            print(f"  phash:    {[f.value for f in scene.fingerprints]}")
+            print()
+        if count >= 15:
+            break
+
+    print("=" * 50)
+    print(f"total scraped: {count}")
+    if count:
+        print(f"  studio:    {ok_studio}/{count} ({100*ok_studio//count}%)")
+        print(f"  performer: {ok_perf}/{count} ({100*ok_perf//count}%)")
+        print(f"  date:      {ok_date}/{count} ({100*ok_date//count}%)")
+        print(f"  duration:  {ok_dur}/{count} ({100*ok_dur//count}%)")
+        print(f"  thumbnail: {ok_thumb}/{count} ({100*ok_thumb//count}%)")
+        print(f"  phash:     {ok_phash}/{count} ({100*ok_phash//count}%)")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/theporndude_coverage_check.py
+++ b/scripts/theporndude_coverage_check.py
@ -0,0 +1,29 @@
+"""Coverage check: ile tube'ów z theporndude.com mamy już w bazie."""
+from app.db import SessionLocal
+from sqlalchemy import text
+
+
+def main():
+    with SessionLocal() as s:
+        # Wszystkie distinct origins (canonical + tube: + pornapp:)
+        rows = s.execute(text("""
+            SELECT origin, COUNT(*) AS n,
+                   COUNT(*) FILTER (WHERE dead_at IS NULL) AS live,
+                   COUNT(*) FILTER (WHERE dead_at IS NOT NULL) AS dead
+            FROM playback_sources
+            GROUP BY origin
+            ORDER BY origin
+        """)).all()
+    print(f"distinct origins: {len(rows)}")
+    by_kind = {}
+    for r in rows:
+        kind = r.origin.split(":")[0] if ":" in r.origin else "other"
+        by_kind.setdefault(kind, []).append((r.origin, r.n, r.live, r.dead))
+    for kind, items in by_kind.items():
+        print(f"\n=== {kind} ({len(items)} origins) ===")
+        for origin, n, live, dead in sorted(items, key=lambda x: -x[2]):
+            print(f"  {origin:<35} n={n:>7,} live={live:>7,} dead={dead:>5,}")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/theporndude_coverage_match.py
+++ b/scripts/theporndude_coverage_match.py
@ -0,0 +1,118 @@
+"""Cross-check 166 resolved theporndude domains vs nasze 25 tube origins."""
+import json
+from pathlib import Path
+
+# Origins z DB (live + dead) + extractor REGISTRY w app/extractors/__init__.py
+OUR_ORIGINS = [
+    # DB live + dead
+    "tube:0dayxxcom", "tube:epornercom", "tube:fpoxxx", "tube:freshpornoorg",
+    "tube:hqpornercom", "tube:latestpornvideocom", "tube:mypornerleakcom",
+    "tube:perverzijacom", "tube:porn00org", "tube:porndishcom", "tube:porndittcom",
+    "tube:pornhatcom", "tube:pornhubcom", "tube:porntrexcom", "tube:pornxpph",
+    "tube:redtubecom", "tube:sxylandcom", "tube:sxyprncom", "tube:xhamstercom",
+    "tube:xnxxcom", "tube:xvideoscom", "tube:youporncom", "tube:latestleaksco",
+    "tube:siskavideo", "tube:hdporn92com",
+    # REGISTRY only (extractor known, brak playback w live DB)
+    "tube:xmoviesforyoucom", "tube:watchporn", "tube:porn4dayspw",
+    "tube:paradisehillcc",
+]
+
+
+# Tylko realne TLD-y. NIE "tube"/"porn"/"xxx" bo to często części nazwy (redtube, pornhub, fpoxxx).
+_TLD_RE = __import__("re").compile(r"(com|net|org|tv|cc|pw|co|to|ws|me|sx|info|biz)$")
+
+
+def _strip_tld(s: str) -> str:
+    """xvideoscom -> xvideos; pornhubcom -> pornhub; hdporn92com -> hdporn92"""
+    return _TLD_RE.sub("", s)
+
+# Build sitetag → matching variants for fuzzy match
+def origin_to_sitetag(origin: str) -> str:
+    return origin.replace("tube:", "")
+
+
+def domain_to_sitetag(domain: str) -> str:
+    """xvideos.com -> xvideoscom, porntrex.com -> porntrexcom"""
+    return domain.lower().replace(".", "").replace("-", "")
+
+
+def match(slug: str, domain: str) -> str | None:
+    """Match po `slug` (z theporndude review URL) lub `real_domain` (z pdude.link).
+    Slug to nazwa tube'a (np. 'xvideos', 'pornhub', 'paradisehill').
+    Origin format: tube:<sitetag>, gdzie sitetag = domain.replace('.', '').
+    Match na "slug pasuje do sitetag bez TLD" daje dobry recall.
+    """
+    candidates = []
+    if slug:
+        candidates.append(slug.lower().replace("-", ""))
+    if domain:
+        candidates.append(domain_to_sitetag(domain))
+    if not candidates:
+        return None
+
+    for o in OUR_ORIGINS:
+        st = origin_to_sitetag(o)
+        st_no_tld = _strip_tld(st)
+        for c in candidates:
+            c_no_tld = _strip_tld(c)
+            if c_no_tld == st_no_tld and len(c_no_tld) >= 3:
+                return o
+    return None
+
+
+def main():
+    data = json.loads(Path("theporndude_resolved.json").read_text())
+    have = []
+    new = []
+    error = []
+    for r in data:
+        if "error" in r and not r.get("real_domain"):
+            error.append(r)
+            continue
+        domain = r.get("real_domain", "")
+        our = match(r.get("slug", ""), domain)
+        r["our_origin"] = our
+        if our:
+            have.append(r)
+        else:
+            new.append(r)
+
+    print(f"=== Coverage ===")
+    print(f"Total theporndude top-porn-tubes:       {len(data)}")
+    print(f"  Already in our DB:                    {len(have)}")
+    print(f"  NEW (potential candidates):           {len(new)}")
+    print(f"  Errors:                               {len(error)}")
+    print()
+    print(f"=== Already have (matched) — top 30 by theporndude rank ===")
+    for r in sorted(have, key=lambda x: x["rank"])[:30]:
+        print(
+            f"  #{r['rank']:>3}  score={r.get('theporndude_score') or '?':>4}  "
+            f"{r['real_domain']:<28} -> {r['our_origin']}"
+        )
+    print()
+    print(f"=== NEW candidates (not in DB) — top 60 by theporndude rank ===")
+    for r in sorted(new, key=lambda x: x["rank"])[:60]:
+        print(
+            f"  #{r['rank']:>3}  score={r.get('theporndude_score') or '?':>4}  "
+            f"{r.get('real_domain') or '?':<30}  ({r['slug']})"
+        )
+
+    # Output detailed
+    summary = {
+        "total": len(data),
+        "already_have": [{"rank": r["rank"], "slug": r["slug"], "domain": r["real_domain"],
+                          "score": r.get("theporndude_score"), "our_origin": r["our_origin"]}
+                         for r in sorted(have, key=lambda x: x["rank"])],
+        "new_candidates": [{"rank": r["rank"], "slug": r["slug"], "domain": r.get("real_domain"),
+                            "score": r.get("theporndude_score"),
+                            "final_url": r.get("final_url", "")}
+                           for r in sorted(new, key=lambda x: x["rank"])],
+        "errors": [{"rank": r["rank"], "slug": r["slug"], "error": r.get("error")}
+                   for r in error],
+    }
+    Path("theporndude_coverage.json").write_text(json.dumps(summary, indent=2))
+    print(f"\n-> theporndude_coverage.json")
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/theporndude_curl_triage.py
+++ b/scripts/theporndude_curl_triage.py
@ -0,0 +1,176 @@
+"""Batch curl triage 144 nowych theporndude tubes:
+- HEAD root domain (200/4xx/5xx/timeout?)
+- GET / → check landing markers: video listing, sceny, login wall, redirect
+- GET /latest, /videos, /tube/recent → check które listing path działa
+- Wynik: per-slug status + landing markers + scene_url_pattern guess
+"""
+import asyncio
+import json
+import re
+from pathlib import Path
+from urllib.parse import urlparse
+
+import httpx
+
+COVERAGE_FILE = Path("theporndude_coverage.json")
+OUT_FILE = Path("theporndude_triage.json")
+
+UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
+
+# Common candidate landing paths
+LANDING_PATHS = ["/", "/latest", "/latest-videos", "/recent", "/new", "/videos", "/category/new", "/top-rated"]
+
+# Markers w HTML
+SCENE_LINK_PATTERNS = [
+    r'<a[^>]+href="(/(?:video|videos|watch|v|scene|scenes|stream|movie|movies|view|play|porn|tube)/[^"]+)"',
+    r'<a[^>]+href="((?:https?:)?//[^/"]+/(?:video|videos|watch|v|scene|scenes|stream|movie|movies|view|play|porn|tube)/[^"]+)"',
+]
+META_MARKERS = [
+    (r'jsonld|json-ld|"@type"\s*:\s*"VideoObject"', "jsonld_video"),
+    (r'<meta\s+property="og:type"\s+content="video', "og_video"),
+    (r'<meta\s+name="description"\s+content="([^"]+)"', "meta_desc"),
+    (r'class="[^"]*\b(?:video|scene|episode)-?(?:item|card|tile|thumb)\b', "video_card"),
+    (r'class="[^"]*\b(?:performer|actress|model|pornstar)\b', "performer_marker"),
+    (r'class="[^"]*\b(?:studio|production|brand|channel)\b', "studio_marker"),
+    (r'class="[^"]*\b(?:duration|runtime|length)\b|<time\s+datetime=', "duration_marker"),
+    (r'\b(?:HLS|hls|m3u8|application/x-mpegURL)\b', "hls_marker"),
+    (r'(?:hlsmanifest|videoUrl|video_url|stream_url|streamUrl)\s*[:=]\s*["\']', "stream_url_marker"),
+    (r'login\s*required|create\s+account|sign\s+(?:in|up)|members\s+only|join\s+now\s+to\s+watch', "auth_wall"),
+    (r'<title>[^<]*\b(?:404|not\s+found|gone|domain)\b[^<]*</title>', "dead_404"),
+    (r'<meta[^>]+http-equiv="refresh"[^>]+url=', "meta_refresh"),
+]
+
+
+async def fetch_one(cli: httpx.AsyncClient, url: str) -> tuple[int, str]:
+    try:
+        r = await cli.get(url, headers={"User-Agent": UA}, follow_redirects=True)
+        return r.status_code, r.text[:200_000]  # cap response
+    except httpx.ConnectError:
+        return -1, "conn_refused"
+    except httpx.TimeoutException:
+        return -2, "timeout"
+    except Exception as e:
+        return -9, str(e)[:120]
+
+
+def analyze_html(html: str) -> dict:
+    found = {}
+    for pattern, name in META_MARKERS:
+        if re.search(pattern, html, re.IGNORECASE):
+            found[name] = True
+    # Scene link patterns
+    scene_links = []
+    for p in SCENE_LINK_PATTERNS:
+        for m in re.finditer(p, html, re.IGNORECASE):
+            scene_links.append(m.group(1)[:120])
+            if len(scene_links) >= 5:
+                break
+        if len(scene_links) >= 5:
+            break
+    if scene_links:
+        found["scene_link_samples"] = scene_links[:3]
+        # Unique pattern (path prefix po slash)
+        prefixes = set()
+        for link in scene_links:
+            parts = link.lstrip("/").split("/", 2)
+            if parts:
+                prefixes.add("/" + parts[0])
+        found["scene_path_prefixes"] = sorted(prefixes)
+    return found
+
+
+async def audit_one(cli: httpx.AsyncClient, slug: str, domain: str) -> dict:
+    """Audit pojedynczego tube'a."""
+    out = {"slug": slug, "domain": domain}
+
+    # Próbuj https://<domain>/ root
+    if not domain or not re.match(r"^[\w\.-]+\.\w+$", domain):
+        out["error"] = "no_valid_domain"
+        return out
+
+    root_url = f"https://{domain}/"
+    status, html = await fetch_one(cli, root_url)
+    out["root_status"] = status
+    if status not in (200, 301, 302):
+        out["root_error"] = html[:80] if isinstance(html, str) else None
+        return out
+
+    out["root_findings"] = analyze_html(html)
+    # Heurystyka score 0-3
+    f = out["root_findings"]
+    score = 0
+    reasons = []
+    if f.get("jsonld_video"):
+        score += 1
+        reasons.append("jsonld_video")
+    if f.get("og_video"):
+        score += 1
+        reasons.append("og_video")
+    if f.get("video_card"):
+        score += 1
+        reasons.append("video_card")
+    if f.get("performer_marker"):
+        score += 1
+        reasons.append("performer_marker")
+    if f.get("studio_marker"):
+        score += 1
+        reasons.append("studio_marker")
+    if f.get("duration_marker"):
+        score += 0.5
+    if f.get("hls_marker") or f.get("stream_url_marker"):
+        score += 0.5
+    if f.get("scene_path_prefixes"):
+        score += 1
+        reasons.append(f"scene_paths={f['scene_path_prefixes']}")
+    if f.get("auth_wall"):
+        score -= 2
+        reasons.append("auth_wall")
+    if f.get("dead_404"):
+        score -= 5
+        reasons.append("dead_404")
+    if f.get("meta_refresh"):
+        score -= 1
+        reasons.append("meta_refresh")
+    out["heuristic_score"] = round(score, 1)
+    out["reasons"] = reasons
+    return out
+
+
+async def main():
+    cov = json.loads(COVERAGE_FILE.read_text())
+    new_candidates = cov["new_candidates"]
+    print(f"audytuję {len(new_candidates)} nowych kandydatów…")
+
+    timeout = httpx.Timeout(15.0, connect=8.0)
+    limits = httpx.Limits(max_keepalive_connections=20, max_connections=50)
+    async with httpx.AsyncClient(timeout=timeout, limits=limits, http2=False) as cli:
+        sem = asyncio.Semaphore(12)
+
+        async def worker(r):
+            async with sem:
+                # Use slug or guess domain (most slug.com)
+                domain = r.get("domain") or ""
+                # Jeśli pdude.link daje porndudecams.com (interstitial), użyj <slug>.com
+                if not domain or "porndudecams" in domain:
+                    domain = f"{r['slug'].lower()}.com"
+                return {**r, **(await audit_one(cli, r["slug"], domain))}
+
+        results = await asyncio.gather(*[worker(r) for r in new_candidates])
+
+    OUT_FILE.write_text(json.dumps(results, indent=2))
+
+    # Stats
+    by_score = {}
+    for r in results:
+        s = r.get("heuristic_score", 0)
+        bucket = "5+" if s >= 5 else "3-5" if s >= 3 else "1-3" if s >= 1 else "<1"
+        by_score.setdefault(bucket, []).append(r)
+    print("\n=== Heurystyczny rozkład (canonical-fit) ===")
+    for b in ["5+", "3-5", "1-3", "<1"]:
+        if b in by_score:
+            print(f"  {b:<5}  {len(by_score[b])} tubów")
+    print(f"\n-> {OUT_FILE}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/theporndude_movies_pipeline.py
+++ b/scripts/theporndude_movies_pipeline.py
@ -0,0 +1,234 @@
+"""Pełny pipeline dla theporndude /full-porn-movies-sites (94 tubes):
+1. Resolve real domains (pdude.link follow, ale follow only 1 hop)
+2. Coverage match vs nasze 25+ origins
+3. Curl triage HTML markers
+4. Per-tube scorecard
+"""
+import asyncio
+import json
+import re
+from pathlib import Path
+from urllib.parse import urlparse
+
+import httpx
+
+UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
+
+OUR_ORIGINS = [
+    "tube:0dayxxcom", "tube:epornercom", "tube:fpoxxx", "tube:freshpornoorg",
+    "tube:hqpornercom", "tube:latestpornvideocom", "tube:mypornerleakcom",
+    "tube:perverzijacom", "tube:porn00org", "tube:porndishcom", "tube:porndittcom",
+    "tube:pornhatcom", "tube:pornhubcom", "tube:porntrexcom", "tube:pornxpph",
+    "tube:redtubecom", "tube:sxylandcom", "tube:sxyprncom", "tube:xhamstercom",
+    "tube:xnxxcom", "tube:xvideoscom", "tube:youporncom", "tube:latestleaksco",
+    "tube:siskavideo", "tube:hdporn92com",
+    "tube:xmoviesforyoucom", "tube:watchporn", "tube:porn4dayspw",
+    "tube:paradisehillcc",
+]
+
+_TLD_RE = re.compile(r"(com|net|org|tv|cc|pw|co|to|ws|me|sx|info|biz)$")
+
+
+def _strip_tld(s: str) -> str:
+    return _TLD_RE.sub("", s)
+
+
+def match(slug: str, domain: str) -> str | None:
+    candidates = []
+    if slug:
+        candidates.append(slug.lower().replace("-", ""))
+    if domain:
+        candidates.append(domain.lower().replace(".", "").replace("-", ""))
+    for o in OUR_ORIGINS:
+        st = o.replace("tube:", "")
+        st_no_tld = _strip_tld(st)
+        for c in candidates:
+            c_no_tld = _strip_tld(c)
+            if c_no_tld == st_no_tld and len(c_no_tld) >= 3:
+                return o
+    return None
+
+
+SCENE_PATH_RE = re.compile(
+    r'<a[^>]+href="((?:https?:)?//?[^"]*?/(?:video|videos|watch|v|scene|movie|movies|play|view|stream)/[^"]+)"',
+    re.IGNORECASE,
+)
+META_MARKERS = [
+    (r'"@type"\s*:\s*"VideoObject"', "jsonld_video"),
+    (r'<meta\s+property="og:type"\s+content="video', "og_video"),
+    (r'class="[^"]*\b(?:video|scene|movie|episode)-?(?:item|card|tile|thumb|block)\b', "video_card"),
+    (r'class="[^"]*\b(?:performer|actress|model|pornstar|cast)\b|href="[^"]*/pornstar', "performer_marker"),
+    (r'class="[^"]*\b(?:studio|production|brand|channel|network)\b|href="[^"]*/studio', "studio_marker"),
+    (r'class="[^"]*\b(?:duration|runtime|length)\b|itemprop="duration"', "duration_marker"),
+    (r'\b(?:HLS|m3u8|application/x-mpegURL)\b', "hls_marker"),
+    (r'(?:videoUrl|video_url|stream_url|streamUrl)\s*[:=]\s*["\']', "stream_url_marker"),
+    (r'(?:login\s+required|create\s+account|members\s+only|join\s+now)', "auth_wall"),
+    (r'<title>[^<]*\b(?:404|not\s+found|domain\s+for\s+sale|gone)\b', "dead_404"),
+]
+
+
+async def fetch_one(cli: httpx.AsyncClient, url: str, *, max_redirects: int = 5) -> tuple[int, str, str]:
+    try:
+        r = await cli.get(url, headers={"User-Agent": UA}, follow_redirects=False)
+        # Follow up to max_redirects but stop on cross-domain redirect-out (to detect pdude.link → ad)
+        hops = 0
+        first_external_domain = None
+        cur = r
+        cur_url = url
+        while cur.status_code in (301, 302, 303, 307, 308) and hops < max_redirects:
+            loc = cur.headers.get("location")
+            if not loc:
+                break
+            if loc.startswith("/"):
+                p = urlparse(cur_url)
+                loc = f"{p.scheme}://{p.netloc}{loc}"
+            cur_url = loc
+            hops += 1
+            # Track first external (non-pdude, non-theporndude)
+            host = urlparse(loc).hostname or ""
+            if first_external_domain is None and not host.endswith("pdude.link") and not host.endswith("theporndude.com"):
+                first_external_domain = host.replace("www.", "")
+            cur = await cli.get(loc, headers={"User-Agent": UA}, follow_redirects=False)
+        return cur.status_code, cur.text[:200_000] if hasattr(cur, "text") else "", first_external_domain or (urlparse(cur_url).hostname or "").replace("www.", "")
+    except httpx.ConnectError:
+        return -1, "conn_refused", ""
+    except httpx.TimeoutException:
+        return -2, "timeout", ""
+    except Exception as e:
+        return -9, str(e)[:120], ""
+
+
+async def resolve_domain(cli: httpx.AsyncClient, slug: str) -> str:
+    """Pdude.link follow z early-exit dla first external."""
+    try:
+        r = await cli.get(f"https://pdude.link/{slug}", headers={"User-Agent": UA}, follow_redirects=False)
+        loc = r.headers.get("location", "")
+        if loc:
+            host = urlparse(loc).hostname or ""
+            host = host.replace("www.", "")
+            # Jeśli pdude.link redirectuje na affiliate (anexo.link/awejmp.com/etc) — wyciągnij subAffId
+            if "anexo.link" in host or "awejmp.com" in host or "porndudecams" in host:
+                # Try slug.com fallback
+                return ""
+            return host
+    except Exception:
+        pass
+    return ""
+
+
+def analyze_html(html: str) -> dict:
+    found = {}
+    for pattern, name in META_MARKERS:
+        if re.search(pattern, html, re.IGNORECASE):
+            found[name] = True
+    prefixes = set()
+    sample = []
+    for m in SCENE_PATH_RE.finditer(html):
+        link = m.group(1)
+        sample.append(link[:100])
+        # Wyciągnij prefix
+        # Normalize: //host/path → /path; otherwise full match
+        if link.startswith("//"):
+            link = "/" + link.split("/", 3)[3] if "/" in link[2:] else "/"
+        if link.startswith("/"):
+            parts = link.lstrip("/").split("/", 2)
+            if parts:
+                prefixes.add("/" + parts[0])
+        if len(sample) >= 5:
+            break
+    if prefixes:
+        found["scene_path_prefixes"] = sorted(prefixes)
+    if sample:
+        found["scene_link_samples"] = sample[:3]
+    return found
+
+
+def score_findings(f: dict) -> tuple[float, list]:
+    score, reasons = 0.0, []
+    if f.get("jsonld_video"):
+        score += 1.5; reasons.append("jsonld_video")
+    if f.get("og_video"):
+        score += 0.5; reasons.append("og_video")
+    if f.get("video_card"):
+        score += 1; reasons.append("video_card")
+    if f.get("performer_marker"):
+        score += 1; reasons.append("performer_marker")
+    if f.get("studio_marker"):
+        score += 1; reasons.append("studio_marker")
+    if f.get("duration_marker"):
+        score += 0.5; reasons.append("duration_marker")
+    if f.get("hls_marker") or f.get("stream_url_marker"):
+        score += 0.5
+    if f.get("scene_path_prefixes"):
+        score += 1; reasons.append(f"paths={f['scene_path_prefixes']}")
+    if f.get("auth_wall"):
+        score -= 2; reasons.append("auth_wall")
+    if f.get("dead_404"):
+        score -= 5; reasons.append("dead_404")
+    return round(score, 1), reasons
+
+
+async def main():
+    movies = json.loads(Path("theporndude_movies.json").read_text())["all"]
+    print(f"audyt {len(movies)} tubów z full-porn-movies-sites…")
+
+    timeout = httpx.Timeout(15.0, connect=8.0)
+    async with httpx.AsyncClient(timeout=timeout, http2=False) as cli:
+        sem = asyncio.Semaphore(12)
+
+        async def worker(r):
+            async with sem:
+                slug = r["slug"]
+                # Resolve real domain z pdude.link first hop
+                domain = await resolve_domain(cli, slug)
+                if not domain or any(x in domain for x in ["anexo.link", "awejmp.com", "porndudecams"]):
+                    domain = f"{slug.lower()}.com"
+                # Curl root + scene path heurystyka
+                status, html, _ = await fetch_one(cli, f"https://{domain}/")
+                findings = analyze_html(html) if status == 200 else {}
+                score, reasons = score_findings(findings)
+                our = match(slug, domain)
+                return {
+                    **r,
+                    "domain": domain,
+                    "root_status": status,
+                    "findings": findings,
+                    "score": score,
+                    "reasons": reasons,
+                    "our_origin": our,
+                }
+
+        results = await asyncio.gather(*[worker(r) for r in movies])
+
+    # Aggregate
+    have = [r for r in results if r["our_origin"]]
+    new_promising = [r for r in results if not r["our_origin"] and r["score"] >= 2.5]
+    new_low = [r for r in results if not r["our_origin"] and 1 <= r["score"] < 2.5]
+    new_zero = [r for r in results if not r["our_origin"] and 0 < r["score"] < 1]
+    new_dead = [r for r in results if not r["our_origin"] and (r["root_status"] <= 0 or r["score"] < 0)]
+    new_no_signal = [r for r in results if not r["our_origin"] and r["score"] == 0 and r["root_status"] == 200]
+
+    print(f"\n=== Coverage /full-porn-movies-sites ({len(results)} tubes) ===")
+    print(f"  already have:  {len(have):>3}")
+    print(f"  promising:     {len(new_promising):>3}")
+    print(f"  low value:     {len(new_low):>3}")
+    print(f"  no signal:     {len(new_no_signal):>3}")
+    print(f"  dead:          {len(new_dead):>3}")
+    print()
+    print("ALREADY HAVE:")
+    for r in have:
+        print(f"  {r['slug']:<20} -> {r['our_origin']}")
+    print()
+    print("PROMISING (score >= 2.5):")
+    for r in sorted(new_promising, key=lambda x: -x["score"]):
+        print(f"  score={r['score']:>4}  {r['domain']:<25} ({r['slug']:<20}) reasons={','.join(r['reasons'])[:60]}")
+    print()
+    print("LOW VALUE (1-2.5):")
+    for r in sorted(new_low, key=lambda x: -x["score"]):
+        print(f"  score={r['score']:>4}  {r['domain']:<25} ({r['slug']:<20}) reasons={','.join(r['reasons'])[:60]}")
+
+    Path("theporndude_movies_scorecard.json").write_text(json.dumps(results, indent=2))
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/theporndude_resolve_domains.py
+++ b/scripts/theporndude_resolve_domains.py
@ -0,0 +1,87 @@
+"""Per 166 review slugs z top-porn-tube-sites:
+1. Fetch review page → extract pdude.link Visit URL + rating + score badges
+2. Follow pdude.link → real tube domain
+3. Cross-check vs nasze 25 tube origins
+4. Output JSON: { slug, name, theporndude_rank, theporndude_score, real_domain, in_our_db, our_origin }
+"""
+import asyncio
+import json
+import re
+from pathlib import Path
+from urllib.parse import urlparse
+
+import httpx
+
+REVIEWS_FILE = Path("theporndude_free_tubes.json")
+OUT_FILE = Path("theporndude_resolved.json")
+
+UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0 Safari/537.36"
+
+
+async def fetch_review(cli: httpx.AsyncClient, review: dict, rank: int) -> dict:
+    url = f"https://theporndude.com/{review['id']}/{review['slug']}"
+    try:
+        r = await cli.get(url, headers={"User-Agent": UA})
+        html = r.text
+    except Exception as e:
+        return {**review, "rank": rank, "error": f"fetch_review: {e}"}
+
+    # Wyciągnij score
+    score_m = re.search(r'class="rate__num">\s*(\d+(?:\.\d+)?)\s*<', html)
+    # Wyciągnij pdude.link visit URL
+    pdude_m = re.search(r'href="(https://pdude\.link/[\w\-\.]+)"', html)
+    # Wyciągnij <title> + meta description
+    title_m = re.search(r"<title>([^<]+)</title>", html)
+    desc_m = re.search(r'<meta\s+name="description"\s+content="([^"]+)"', html)
+
+    out = {
+        **review,
+        "rank": rank,
+        "theporndude_score": float(score_m.group(1)) if score_m else None,
+        "page_title": (title_m.group(1) if title_m else "")[:120],
+        "page_desc": (desc_m.group(1) if desc_m else "")[:200],
+    }
+    if not pdude_m:
+        out["error"] = "no_pdude_link"
+        return out
+    pdude_url = pdude_m.group(1)
+
+    # Follow pdude.link
+    try:
+        r2 = await cli.get(pdude_url, headers={"User-Agent": UA})
+        # Final URL po wszystkich redirectach
+        final_url = str(r2.url)
+        host = urlparse(final_url).hostname or ""
+        host = host.replace("www.", "")
+        out["real_domain"] = host
+        out["final_url"] = final_url[:200]
+    except Exception as e:
+        out["error"] = f"pdude_follow: {e}"
+    return out
+
+
+async def main():
+    reviews = json.loads(REVIEWS_FILE.read_text())["reviews"]
+
+    timeout = httpx.Timeout(20.0, connect=10.0)
+    limits = httpx.Limits(max_keepalive_connections=10, max_connections=20)
+    async with httpx.AsyncClient(
+        timeout=timeout, limits=limits, follow_redirects=True, http2=False
+    ) as cli:
+        sem = asyncio.Semaphore(8)
+
+        async def worker(rev, rank):
+            async with sem:
+                return await fetch_review(cli, rev, rank)
+
+        tasks = [worker(r, i + 1) for i, r in enumerate(reviews)]
+        results = await asyncio.gather(*tasks)
+
+    OUT_FILE.write_text(json.dumps(results, indent=2))
+    ok = sum(1 for r in results if r.get("real_domain"))
+    print(f"resolved {ok}/{len(results)} ({ok*100/len(results):.0f}%)")
+    print(f"out -> {OUT_FILE}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/scripts/theporndude_scorecard.py
+++ b/scripts/theporndude_scorecard.py
@ -0,0 +1,104 @@
+"""Generuje końcowy scorecard JSON dla wszystkich 166 theporndude top-porn-tube-sites:
+- coverage status (already_have/new/dead/low_value)
+- canonical_value_score 0-5 (heurystyka + nasz ranking)
+- recommendation: skip / consider / pilot / integrate
+
+Plus markdown summary dla człowieka.
+"""
+import json
+from pathlib import Path
+
+COVERAGE = json.loads(Path("theporndude_coverage.json").read_text())
+TRIAGE = json.loads(Path("theporndude_triage.json").read_text())
+
+
+def main():
+    triage_by_slug = {r["slug"]: r for r in TRIAGE}
+
+    scorecards = []
+    for r in COVERAGE["already_have"]:
+        scorecards.append({
+            "rank": r["rank"],
+            "slug": r["slug"],
+            "domain": r["domain"],
+            "status": "already_have",
+            "our_origin": r["our_origin"],
+            "canonical_value_score": None,
+            "recommendation": "skip — already integrated",
+        })
+    for r in COVERAGE["new_candidates"]:
+        t = triage_by_slug.get(r["slug"], {})
+        score = t.get("heuristic_score", 0)
+        findings = t.get("root_findings", {})
+        reasons = t.get("reasons", [])
+        root_status = t.get("root_status", 0)
+        domain = t.get("domain") or r.get("domain") or f"{r['slug']}.com"
+
+        if root_status <= 0 or findings.get("dead_404"):
+            status = "dead"
+            rec = "skip — dead/unreachable"
+        elif findings.get("auth_wall") and score < 2:
+            status = "auth_wall"
+            rec = "skip — login required, no public scenes"
+        elif score >= 2.5:
+            status = "promising"
+            rec = "pilot — deep audit + write extractor"
+        elif score >= 1:
+            status = "low_value"
+            rec = "consider — basic metadata only, low priority"
+        else:
+            status = "no_value"
+            rec = "skip — no canonical-fit signal in HTML"
+
+        scorecards.append({
+            "rank": r["rank"],
+            "slug": r["slug"],
+            "domain": domain,
+            "status": status,
+            "our_origin": None,
+            "canonical_value_score": score,
+            "heuristic_reasons": reasons,
+            "findings": findings,
+            "recommendation": rec,
+        })
+
+    scorecards.sort(key=lambda x: x["rank"])
+
+    out = {
+        "source": "theporndude.com/top-porn-tube-sites",
+        "fetched_at": "2026-05-20",
+        "total": len(scorecards),
+        "summary": {
+            "already_have": sum(1 for s in scorecards if s["status"] == "already_have"),
+            "promising": sum(1 for s in scorecards if s["status"] == "promising"),
+            "low_value": sum(1 for s in scorecards if s["status"] == "low_value"),
+            "no_value": sum(1 for s in scorecards if s["status"] == "no_value"),
+            "auth_wall": sum(1 for s in scorecards if s["status"] == "auth_wall"),
+            "dead": sum(1 for s in scorecards if s["status"] == "dead"),
+        },
+        "scorecards": scorecards,
+    }
+    Path("theporndude_scorecard.json").write_text(json.dumps(out, indent=2))
+
+    # Pretty print summary
+    print("=" * 70)
+    print(f"THEPORNDUDE.COM CANONICAL-FIT SCORECARD ({out['total']} tubes)")
+    print("=" * 70)
+    for k, v in out["summary"].items():
+        print(f"  {k:<15} {v:>4} ({100*v/out['total']:.0f}%)")
+    print()
+    print("PROMISING (score >= 2.5) — pilot candidates:")
+    for s in scorecards:
+        if s["status"] == "promising":
+            r = ",".join(s.get("heuristic_reasons", []))[:60]
+            print(f"  #{s['rank']:>3}  score={s['canonical_value_score']:>4}  {s['domain']:<25} ({s['slug']})  {r}")
+    print()
+    print("LOW_VALUE (1-2.5) — defer:")
+    for s in scorecards:
+        if s["status"] == "low_value":
+            r = ",".join(s.get("heuristic_reasons", []))[:50]
+            print(f"  #{s['rank']:>3}  score={s['canonical_value_score']:>4}  {s['domain']:<25} ({s['slug']})  {r}")
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@ -9,9 +9,12 @@ import pytest
 from app.resolve.scoring import (
    composite_score,
    date_proximity,
+    detect_modifier_tags,
+    detect_series_positions,
    hamming_distance_hex,
    performer_set_similarity,
    phash_similarity,
+    series_mismatch_strength,
    title_similarity,
    triage,
 )
@ -146,6 +149,112 @@ def test_composite_clamps_to_unit() -> None:
    assert score == 1.0


+# ---- triage --------------------------------------------------------------
+
+# ---- series position / modifier detector ---------------------------------
+
+def test_detect_series_positions_episode() -> None:
+    assert detect_series_positions("pleasureville a dp xxx parody episode 4") == {4}
+
+def test_detect_series_positions_part_with_dot() -> None:
+    assert detect_series_positions("neon moonlight pt. 2") == {2}
+
+def test_detect_series_positions_hash_only() -> None:
+    assert detect_series_positions("women seeking women #131 scene 2") == {131, 2}
+
+def test_detect_series_positions_volume() -> None:
+    assert detect_series_positions("women seeking women volume 140 scene 3") == {140, 3}
+
+def test_detect_series_positions_s_e_style() -> None:
+    assert detect_series_positions("can you handle a woman like me s9 e8") == {9, 8}
+
+def test_detect_series_positions_empty() -> None:
+    assert detect_series_positions(None) == set()
+    assert detect_series_positions("") == set()
+
+def test_detect_modifier_tags_bts() -> None:
+    assert "bts" in detect_modifier_tags("training ravyn (bts - 1)")
+
+def test_detect_modifier_tags_behind_the_scenes() -> None:
+    assert "behind the scenes" in detect_modifier_tags(
+        "behind the scenes - two pairs of suckable melons"
+    )
+
+def test_detect_modifier_tags_unedited() -> None:
+    assert "unedited" in detect_modifier_tags("bad bella stinky feet prep (unedited)")
+
+def test_series_mismatch_episode_2_vs_4_hard() -> None:
+    # Episode 2 vs 4 → twardy mismatch (1.0)
+    s = series_mismatch_strength(
+        "pleasureville a dp xxx parody episode 2",
+        "pleasureville a dp xxx parody episode 4",
+    )
+    assert s == 1.0
+
+def test_series_mismatch_intersection_is_no_mismatch() -> None:
+    # Oba mają {7} (Make'em Sweat #7) → BRAK mismatchu na pozycji,
+    # ale BTS asymmetry → 0.7
+    s = series_mismatch_strength("make'em sweat #7", "make'em sweat #7 bts")
+    assert s == pytest.approx(0.7)
+
+def test_series_mismatch_partial_overlap_is_still_hard() -> None:
+    # "Volume 140 Scene 3" vs "Volume 140 Scene 4" — wspólny 140 ale różne 3/4,
+    # to są osobne sceny ze wspólnej kompilacji → hard split.
+    s = series_mismatch_strength(
+        "women seeking women volume 140 scene 3",
+        "women seeking women volume 140 scene 4",
+    )
+    assert s == 1.0
+
+def test_series_mismatch_no_year_false_positive() -> None:
+    # "scene from 2020" nie może wygenerować fałszywej pozycji z roku.
+    pos = detect_series_positions("scene from 2020")
+    # Może tu być {2020}? Nie — \d{1,3} z anti-greedy boundary nie złapie 4-cyfr.
+    assert pos == set()
+
+def test_series_mismatch_bts_asymmetric() -> None:
+    # Tytuły: Training Ravyn vs Training Ravyn (BTS - 1)
+    # pos: {} vs {1} → brak common pos ale jedna strona pusta → nie hard split
+    # BTS po jednej stronie → 0.7
+    s = series_mismatch_strength("training ravyn", "training ravyn (bts - 1)")
+    assert s == pytest.approx(0.7)
+
+def test_series_mismatch_no_signal() -> None:
+    s = series_mismatch_strength("the great heist", "the great heist")
+    assert s == 0.0
+
+def test_composite_series_position_hard_reject() -> None:
+    # Mimo wszystkich silnych sygnałów (fp/title/performers/date 1.0) — series mismatch
+    # 1.0 forsuje twardy reject. To gwarantuje że "Episode 2 vs Episode 4" z tym samym
+    # phashem (studio reuse cover art) NIE auto-mergeują.
+    score, reasons = composite_score(
+        fp=1.0, title=1.0, performers=1.0, date_score=1.0,
+        studio_match=True, series_mismatch=1.0,
+    )
+    assert score == 0.0
+    assert reasons.get("series_position_mismatch")
+
+def test_composite_series_modifier_cap_07() -> None:
+    # Modifier mismatch (BTS po jednej stronie) → cap = 1 - 0.7 = 0.3
+    score, reasons = composite_score(
+        fp=1.0, title=1.0, performers=1.0, date_score=1.0,
+        studio_match=True, series_mismatch=0.7,
+    )
+    assert score == pytest.approx(0.3)
+    assert reasons.get("series_modifier_cap") == pytest.approx(0.3)
+
+def test_composite_series_zero_no_effect() -> None:
+    score_a, _ = composite_score(
+        fp=1.0, title=1.0, performers=1.0, date_score=1.0,
+        studio_match=True, series_mismatch=0.0,
+    )
+    score_b, _ = composite_score(
+        fp=1.0, title=1.0, performers=1.0, date_score=1.0,
+        studio_match=True, series_mismatch=None,
+    )
+    assert score_a == score_b == pytest.approx(1.0)
+
+
 # ---- triage --------------------------------------------------------------

 def test_triage_thresholds() -> None: