session work: bug-report fixes + WIP cleanup
User-facing bugs resolved (per bug_reports table 2026-05-25): - 40cd28aa (short-scene filter): mobile api.ts default min_duration_sec=60 hides 6519 sub-60s scenes across all list endpoints (Performer/Site/Tag/ Browse). Caller may override with explicit 0. - 5e89ef7e (porndoe needs cookies/play click): INJECTED_JS in PlayerScreen now auto-clicks player-poster overlay (player-poster-play, big-play-button, vjs-big-play-button, jw-icon-display, btn-big-play, mejs__overlay-button, play-button, btn-play, videoPlayButton). Triggered same interval as consent-dismiss + ad-iframe removal. - b1b5e1a2 (Mixdrop czarny ekran): re-enable mixdrop direct stream via VPS curl_cffi proxy (was: skip → WebView fallback → blank screen). Backend pipeline (mixdrop.py extract + stream_proxy._curl_cffi_stream with JA3 + auto-refetch on token expire) was already complete; just removed the skip in app/api/playback.py. Plus ongoing WIP (paradisehill multi-part extraction, stream_proxy refetch logic, gesture race fix for long-press 2x speed, anti-adblock INJECTED_JS defenses, scripts for freshporno backfill, new sources API).
This commit is contained in:
parent
545fc8f9e3
commit
7979d5fa61
24 changed files with 1845 additions and 66 deletions
|
|
@ -91,6 +91,12 @@ def get_asset(
|
|||
zwykle `<update_id>/_expo/static/js/android/<hash>.js` lub
|
||||
`<update_id>/assets/<hash>`. Path traversal blocked przez resolve+is_relative.
|
||||
"""
|
||||
# Windows publish quirk: Expo metadata.json zapisuje assets[].path z backslashami
|
||||
# (os.sep) na Windowsie. publish_update.py kopiuje to do URL → manifest zawiera
|
||||
# `?asset=<update>/assets\<hash>`. Na Linux backslash nie jest separatorem path-a,
|
||||
# więc Path resolve nie znalazłby pliku (404 na każdy asset → mobile odrzuca cały
|
||||
# update). Normalizujemy tutaj zamiast wymagać re-publishu starych bundle'i.
|
||||
asset = asset.replace("\\", "/")
|
||||
runtime_dir = (_STATIC_DIR / runtimeVersion).resolve()
|
||||
target = (runtime_dir / asset).resolve()
|
||||
if not str(target).startswith(str(runtime_dir)):
|
||||
|
|
|
|||
|
|
@ -148,16 +148,42 @@ def resolve_movie_playback(
|
|||
links: list[StreamLink] = []
|
||||
|
||||
if pb.origin == "paradisehill":
|
||||
# Tylko WebView fallback — paradisehill player wymaga session login dla streamu.
|
||||
links = [
|
||||
StreamLink(
|
||||
stream_url=None,
|
||||
embed_url=pb.page_url,
|
||||
quality=pb.quality,
|
||||
type="hoster",
|
||||
raw={"origin": pb.origin},
|
||||
)
|
||||
]
|
||||
# Paradisehill: pobierz page, parsuj `var videoList = [...]` żeby dostać N parts.
|
||||
# Każdy part to direct mp4 z paradisehill CDN (v1.paradisehill.cc), serwowane
|
||||
# bez auth — 200 OK z plain User-Agent + Referer.
|
||||
# Bug-reports `c5693926`/`418270e4`/`3c999b27` 2026-05-21 ("ładuje tylko 1 z N").
|
||||
# Poprzednio: tylko WebView fallback → mobile gra 1. part w playerze paradisehilla,
|
||||
# nie ma sposobu przejść do następnego.
|
||||
try:
|
||||
from app.connectors.paradisehill import fetch_and_extract_parts
|
||||
parts = fetch_and_extract_parts(pb.page_url)
|
||||
except Exception as e:
|
||||
log.warning("paradisehill parts extract failed for %s: %s", pb.page_url, e)
|
||||
parts = []
|
||||
if parts:
|
||||
for url, label in parts:
|
||||
# NIE proxifikujemy tutaj — outer `_proxify_link` poniżej (linia 247) opakuje
|
||||
# wszystkie linki. Double-wrap → token wewnątrz tokena (broken proxy URL).
|
||||
links.append(
|
||||
StreamLink(
|
||||
stream_url=url,
|
||||
embed_url=None,
|
||||
quality=label,
|
||||
type="mp4",
|
||||
raw={"origin": pb.origin, "part_label": label},
|
||||
)
|
||||
)
|
||||
else:
|
||||
# Fallback: brak videoList (np. login-only movie) — WebView na całość.
|
||||
links = [
|
||||
StreamLink(
|
||||
stream_url=None,
|
||||
embed_url=pb.page_url,
|
||||
quality=pb.quality,
|
||||
type="hoster",
|
||||
raw={"origin": pb.origin},
|
||||
)
|
||||
]
|
||||
else:
|
||||
# dooplay mirror sources: spróbuj direct stream extract z hoster URL
|
||||
target = pb.embed_url or pb.page_url
|
||||
|
|
@ -185,15 +211,16 @@ def resolve_movie_playback(
|
|||
)
|
||||
stream = None
|
||||
# Mixdrop mxcontent CDN wymaga curl_cffi JA3 → wymusza VPS proxy.
|
||||
# Pre-public: skip mixdrop direct, fallback na embed_url (mobile WebView z
|
||||
# phone IP). Bandwidth + anonimowość VPS > UX. Movie ma zwykle 10+ alt
|
||||
# hosterów (voe/luluvid/doply/etc.), user może wybrać alternative.
|
||||
if stream and "mxcontent.net" in stream.lower():
|
||||
log.info(
|
||||
"movie playback %s: mixdrop mxcontent — skip (VPS-proxy required), WebView fallback",
|
||||
pb.id,
|
||||
)
|
||||
stream = None
|
||||
# Pre-2026-05-25 skipowaliśmy ten path "Bandwidth + anonimowość > UX",
|
||||
# ale bug-report b1b5e1a2 zgłosił że Mixdrop WebView fallback = czarny
|
||||
# ekran (recaptcha/adblock-detect blokują player init w in-app WebView).
|
||||
# Movie ma zwykle 10+ alt hosterów, ale jeśli WebView fallback nie
|
||||
# działa, user widzi tylko czarny ekran zamiast jakiejkolwiek alternatywy.
|
||||
# Backend ma pełen pipeline: mixdrop.py extract → raw={proxy_impersonate:
|
||||
# True, refetch_url} → stream_proxy._curl_cffi_stream z Chrome JA3 +
|
||||
# auto-refetch on token expire. Włączamy go z powrotem.
|
||||
# Bandwidth cost: ~485 MB/movie play; przy ~3 plays/day = 1.5 GB/day
|
||||
# (acceptable na 8GB/m Hetzner plan z 20 TB transfer).
|
||||
if stream:
|
||||
type_hint = "m3u8" if ".m3u8" in stream.lower() else "mp4"
|
||||
raw_meta: dict = {"origin": pb.origin, "host": target}
|
||||
|
|
@ -222,7 +249,14 @@ def resolve_movie_playback(
|
|||
raise HTTPException(status_code=502, detail="no playable links")
|
||||
|
||||
links = [_proxify_link(link, referer) for link in links]
|
||||
best = _pick_best(links) if links else None
|
||||
# Dla paradisehill multipart: `_pick_best` wybiera "Part N" z najwyższą cyfrą (parsuje
|
||||
# quality jako int), ale user chce zacząć od Part 1. Override: zawsze links[0].
|
||||
if pb.origin == "paradisehill" and len(links) > 1 and any(
|
||||
(link.raw or {}).get("part_label") for link in links
|
||||
):
|
||||
best = links[0]
|
||||
else:
|
||||
best = _pick_best(links) if links else None
|
||||
return ResolveOut(
|
||||
source=PlaybackSourceOut.model_validate(pb),
|
||||
best=best,
|
||||
|
|
|
|||
129
app/api/sources.py
Normal file
129
app/api/sources.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
"""GET /sources — lista tube źródeł dla feature "Sites" (mobile top-level tab).
|
||||
|
||||
Bug-report 2026-05-24 (ea6f05f9, Scenes screen): user chce wybrać "pages"
|
||||
obok Scenes i Movies — widzieć liście tube'ów i wchodzić w nie żeby zobaczyć
|
||||
najnowsze sceny z konkretnego źródła.
|
||||
|
||||
Endpoint enumeruje distinct `playback_sources.origin` z ŻYWYCH playback_sources
|
||||
(`dead_at IS NULL`), tylko origins zaczynające się od 'tube:' (kanoniczne źródła
|
||||
typu `canonical:tpdb_trailer` są pomijane — to nie są "scrapowane strony" w sensie
|
||||
intencji feature'a).
|
||||
|
||||
Sortowanie: scene_count DESC (najbardziej "wypełnione" tubey na górze).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Annotated
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.auth import require_api_key
|
||||
from app.db import get_session
|
||||
from app.models.playback_source import PlaybackSource
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/sources", tags=["sources"], dependencies=[Depends(require_api_key)])
|
||||
|
||||
|
||||
class SourceOut(BaseModel):
|
||||
origin: str
|
||||
"""Raw origin string z DB — np. 'tube:hqpornercom'. Używany jako parametr
|
||||
`origin=` filtra w GET /scenes (substring match)."""
|
||||
|
||||
sitetag: str
|
||||
"""Origin bez prefiksu 'tube:' — np. 'hqpornercom'. Stabilne ID tube'a (zgodne
|
||||
z `BaseDirectTubeScraper.sitetag`)."""
|
||||
|
||||
display_name: str
|
||||
"""Czytelna nazwa do UI — np. 'hqporner.com'. Wyprowadzona z sitetag przez
|
||||
`_sitetag_to_display`. Tylko presentation; logikę trzymamy na sitetag/origin."""
|
||||
|
||||
scene_count: int
|
||||
"""Liczba ŻYWYCH playback_sources (dead_at IS NULL) per origin. Approx scenes
|
||||
coverage — scena może mieć wiele sources tego samego origin (różne page_url),
|
||||
więc trochę zawyża rzeczywistą scene-distinct count, ale dla orientacji OK."""
|
||||
|
||||
last_scraped_at: datetime | None
|
||||
"""MAX(last_seen_at) — najświeższy scrape dla tego origin. Pozwala mobile pokazać
|
||||
'scrapowane Xh temu' i sortować świeżość."""
|
||||
|
||||
|
||||
class SourceListOut(BaseModel):
|
||||
items: list[SourceOut]
|
||||
total: int
|
||||
|
||||
|
||||
# Hardcoded display-name overrides dla edge cases. Większość sitetags mapuje się
|
||||
# czysto `_sitetag_to_display` regex'em (`hqpornercom` → `hqporner.com`), ale niektóre
|
||||
# tubey mają nietypowe TLDs / brakujące kropki w sitetag.
|
||||
_DISPLAY_OVERRIDES: dict[str, str] = {
|
||||
"fpoxxx": "fpo.xxx",
|
||||
"siskavideo": "siska.video",
|
||||
"porn4dayspw": "porn4days.pw",
|
||||
"porn00org": "porn00.org",
|
||||
"freshpornoorg": "freshporno.org",
|
||||
"pornxpph": "pornxp.ph",
|
||||
"0dayxxcom": "0dayxx.com",
|
||||
"shyfapnet": "shyfap.net",
|
||||
"hdporngg": "hdporn.gg",
|
||||
"fullmoviesxxx": "fullmovies.xxx",
|
||||
"latestleaksco": "latestleaks.co",
|
||||
"xxxfreewatch": "xxxfreewatch.com",
|
||||
"watchporn": "watchporn.to",
|
||||
}
|
||||
|
||||
|
||||
_TLD_RE = re.compile(r"^(.+?)(com|org|net|info)$")
|
||||
|
||||
|
||||
def _sitetag_to_display(sitetag: str) -> str:
|
||||
"""`hqpornercom` → `hqporner.com`. Fallback dla mainstream tube'ów."""
|
||||
if sitetag in _DISPLAY_OVERRIDES:
|
||||
return _DISPLAY_OVERRIDES[sitetag]
|
||||
m = _TLD_RE.match(sitetag)
|
||||
if m:
|
||||
return f"{m.group(1)}.{m.group(2)}"
|
||||
return sitetag
|
||||
|
||||
|
||||
@router.get("", response_model=SourceListOut)
|
||||
def list_sources(
|
||||
session: Annotated[Session, Depends(get_session)],
|
||||
) -> SourceListOut:
|
||||
"""Zwraca listę tube źródeł z ŻYWYMI playback_sources.
|
||||
|
||||
Filter: `origin LIKE 'tube:%'` (drop canonical:* — TPDB trailery to inna semantyka).
|
||||
"""
|
||||
rows = session.execute(
|
||||
select(
|
||||
PlaybackSource.origin,
|
||||
func.count(PlaybackSource.id).label("scene_count"),
|
||||
func.max(PlaybackSource.last_seen_at).label("last_scraped_at"),
|
||||
)
|
||||
.where(PlaybackSource.dead_at.is_(None))
|
||||
.where(PlaybackSource.origin.like("tube:%"))
|
||||
.group_by(PlaybackSource.origin)
|
||||
.order_by(func.count(PlaybackSource.id).desc())
|
||||
).all()
|
||||
|
||||
items: list[SourceOut] = []
|
||||
for origin, scene_count, last_scraped_at in rows:
|
||||
sitetag = origin.split(":", 1)[1] if origin.startswith("tube:") else origin
|
||||
items.append(
|
||||
SourceOut(
|
||||
origin=origin,
|
||||
sitetag=sitetag,
|
||||
display_name=_sitetag_to_display(sitetag),
|
||||
scene_count=scene_count,
|
||||
last_scraped_at=last_scraped_at,
|
||||
)
|
||||
)
|
||||
|
||||
return SourceListOut(items=items, total=len(items))
|
||||
|
|
@ -86,6 +86,53 @@ DEFAULT_UA = (
|
|||
"(KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36"
|
||||
)
|
||||
TOKEN_TTL_SEC = 4 * 60 * 60 # 4h
|
||||
|
||||
|
||||
# URL-level redirect cache: target_url -> (final_resolved_url, expires_ts).
|
||||
# Mobile ExoPlayer robi range-requesty per seek/preload — każdy hituje proxy z tym
|
||||
# samym tokenem, proxy GET-uje target_url. Dla `porntrex.com/get_file/...` (a także
|
||||
# fpoxxx, freshporno) URL jest **single-use**: pierwszy GET → 302 → CDN URL (time-bound),
|
||||
# drugi GET → 410. Bez cache: drugi range = 410 → ExoPlayer fail → mobile fallback do
|
||||
# `Linking.openURL(page_url)` → reklama (bug-reports `cee51c76`, `e2e365e3` 2026-05-22).
|
||||
#
|
||||
# Z cache: pierwszy GET follow-uje redirect, cache'uje final URL. Kolejne range hituje
|
||||
# direct w CDN URL który jest time-bound (~1-2h), nie single-use. Mobile gra do końca
|
||||
# bez fallbacku.
|
||||
#
|
||||
# TTL 1800s = 30 min: krócej niż typowy CDN signed-URL lifetime (~1h+), więc stale
|
||||
# entries nie powodują 403 spam. Mobile po expiry retry-uje /resolve → fresh token.
|
||||
_REDIRECT_CACHE: dict[str, tuple[str, float]] = {}
|
||||
_REDIRECT_CACHE_TTL_SEC = 1800
|
||||
_REDIRECT_CACHE_MAX = 1000
|
||||
|
||||
|
||||
def _redirect_cache_get(target_url: str) -> str | None:
|
||||
entry = _REDIRECT_CACHE.get(target_url)
|
||||
if not entry:
|
||||
return None
|
||||
final, exp = entry
|
||||
if exp < time.time():
|
||||
_REDIRECT_CACHE.pop(target_url, None)
|
||||
return None
|
||||
return final
|
||||
|
||||
|
||||
def _redirect_cache_put(target_url: str, final_url: str) -> None:
|
||||
if not final_url or target_url == final_url:
|
||||
return
|
||||
_REDIRECT_CACHE[target_url] = (final_url, time.time() + _REDIRECT_CACHE_TTL_SEC)
|
||||
if len(_REDIRECT_CACHE) > _REDIRECT_CACHE_MAX:
|
||||
cutoff = time.time()
|
||||
for k in list(_REDIRECT_CACHE.keys()):
|
||||
v = _REDIRECT_CACHE.get(k)
|
||||
if v is None or v[1] < cutoff:
|
||||
_REDIRECT_CACHE.pop(k, None)
|
||||
|
||||
|
||||
def _redirect_cache_invalidate(target_url: str) -> None:
|
||||
_REDIRECT_CACHE.pop(target_url, None)
|
||||
|
||||
|
||||
HOP_BY_HOP = {
|
||||
"connection",
|
||||
"keep-alive",
|
||||
|
|
@ -390,12 +437,17 @@ async def proxy_stream(
|
|||
request: Request,
|
||||
) -> Response:
|
||||
payload = parse_token(token)
|
||||
target = payload["u"]
|
||||
original_target = payload["u"]
|
||||
referer = payload["r"] or None
|
||||
use_impersonate = bool(payload.get("i"))
|
||||
refetch_url = payload.get("rf")
|
||||
refetch_hoster = payload.get("rh")
|
||||
|
||||
# Jeśli ten target był już wcześniej follow-redirect-ed, użyj cached final URL.
|
||||
# Powód: porntrex `get_file/` 410 po reuse — patrz `_REDIRECT_CACHE` docstring.
|
||||
cached_target = _redirect_cache_get(original_target)
|
||||
target = cached_target or original_target
|
||||
|
||||
# Forwardujemy Range header (HLS/MP4 player robi byte-range fetches dla seek/preload)
|
||||
headers = _build_headers(referer)
|
||||
range_h = request.headers.get("range")
|
||||
|
|
@ -437,8 +489,21 @@ async def proxy_stream(
|
|||
ups_headers = dict(upstream.headers)
|
||||
await upstream.aclose()
|
||||
await client.aclose()
|
||||
# Cached final URL zwrócił error (np. CDN signed-URL expired, 403/410) —
|
||||
# invaliduj cache i daj mobile retry przez fresh /resolve. Bez tego stale
|
||||
# cache trzymałby martwy CDN URL przez 30 min (TTL).
|
||||
if cached_target is not None and status in (401, 403, 404, 410):
|
||||
_redirect_cache_invalidate(original_target)
|
||||
return _upstream_error_response(status, ups_headers, target)
|
||||
|
||||
# Pierwszy successful pass dla single-use targets (np. porntrex get_file):
|
||||
# cache resolved final URL (po follow_redirects). Następne range-requesty
|
||||
# pójdą direct w CDN URL — get_file nie dostaje drugiego hita.
|
||||
if cached_target is None:
|
||||
final_url = str(upstream.url)
|
||||
if final_url != original_target:
|
||||
_redirect_cache_put(original_target, final_url)
|
||||
|
||||
ct = (upstream.headers.get("content-type") or "").lower()
|
||||
is_m3u8 = (
|
||||
path_suggests_m3u8
|
||||
|
|
|
|||
|
|
@ -72,6 +72,46 @@ _CHAPTER_RE = re.compile(
|
|||
r'<a\s+href="#"\s+class="js-list-item"\s+data-index="(\d+)">([^<]+)</a>',
|
||||
re.IGNORECASE,
|
||||
)
|
||||
# videoList JS array w detail page — może mieć multiple parts (Video.js playlist):
|
||||
# var videoList = [{"sources":[{"src":"...part1.mp4","type":"video/mp4"}]}, ...]
|
||||
# Bez parsowania tego mobile WebView gra tylko pierwszy part, kolejne pomija.
|
||||
# Bug-reports `c5693926`/`418270e4` 2026-05-21 ("ładuje tylko 1 z 4 części").
|
||||
_VIDEO_LIST_RE = re.compile(r"var\s+videoList\s*=\s*(\[.*?\])\s*;", re.IGNORECASE | re.DOTALL)
|
||||
_VIDEO_SRC_RE = re.compile(r'"src"\s*:\s*"([^"]+\.mp4[^"]*)"', re.IGNORECASE)
|
||||
|
||||
|
||||
def extract_video_parts(html: str) -> list[tuple[str, str]]:
|
||||
"""Wyciąga listę MP4 parts z paradisehill detail HTML.
|
||||
|
||||
Returns: [(mp4_url, label), ...] np. `[(.../part1.mp4, "Part 1"), ...]`.
|
||||
Pusta lista gdy `videoList` nieobecny lub bez sources (login-only filmy).
|
||||
"""
|
||||
m = _VIDEO_LIST_RE.search(html)
|
||||
if not m:
|
||||
return []
|
||||
parts: list[tuple[str, str]] = []
|
||||
for i, src_m in enumerate(_VIDEO_SRC_RE.finditer(m.group(1)), start=1):
|
||||
url = src_m.group(1).replace("\\/", "/")
|
||||
parts.append((url, f"Part {i}"))
|
||||
return parts
|
||||
|
||||
|
||||
def fetch_and_extract_parts(page_url: str, *, timeout: float = 20.0) -> list[tuple[str, str]]:
|
||||
"""Resolve-time helper: pobierz page, wyciągnij videoList parts.
|
||||
Używane przez `app.api.playback.resolve_movie_playback` dla origin='paradisehill'.
|
||||
"""
|
||||
with httpx.Client(
|
||||
timeout=timeout,
|
||||
follow_redirects=True,
|
||||
headers={
|
||||
"User-Agent": USER_AGENT,
|
||||
"Cookie": "is18=1",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
},
|
||||
) as client:
|
||||
r = client.get(page_url)
|
||||
r.raise_for_status()
|
||||
return extract_video_parts(r.text)
|
||||
# Listing page item:
|
||||
_LIST_ITEM_RE = re.compile(
|
||||
r'<div\s+class="item\s+list-film-item"[^>]*>\s*'
|
||||
|
|
@ -230,15 +270,32 @@ def _parse_detail(hex_id: str, html: str) -> RawMovie | None:
|
|||
|
||||
# Genre — pierwszy itemprop="genre" w samym block-inside (nie w recommendations).
|
||||
# Recommended films też mają itemprop="genre" więc match limity do block-inside.
|
||||
# Wcześniejszy regex wymagał `</div></div><div class="similar"` — ale paradisehill
|
||||
# czasami ma `</div></noindex>...<div class="similar"` (banner skin z 2026-05-19),
|
||||
# przez co block_match failował → fallback do html[:8000] → 0 tagów. Bug-report
|
||||
# `3c999b27` 2026-05-21 ("Brak kategorii"). Robust: szukaj similar jako stop boundary,
|
||||
# bez wymagania zamknięcia konkretnymi `</div>`.
|
||||
tags: list[RawTag] = []
|
||||
block_match = re.search(
|
||||
r'<div\s+class="block-inside"[^>]*itemtype="http://schema\.org/Movie"[^>]*>'
|
||||
r'(.*?)</div>\s*</div>\s*<div\s+class="similar',
|
||||
block_start = re.search(
|
||||
r'<div\s+class="block-inside"[^>]*itemtype="http://schema\.org/Movie"[^>]*>',
|
||||
html,
|
||||
re.DOTALL,
|
||||
)
|
||||
block = block_match.group(1) if block_match else html[:8000]
|
||||
for m_genre in re.finditer(r'itemprop="genre"[^>]*>([^<]+)</', block, re.IGNORECASE):
|
||||
if block_start:
|
||||
rest = html[block_start.end():]
|
||||
# Stop boundary: pierwszy <div class="similar...">. Wszystko przedtem to
|
||||
# właściwa zawartość filmu (genre/cast/itd.); reszta to recommendations
|
||||
# i komentarze ktore mają własne itemprop="genre".
|
||||
stop = re.search(r'<div\s+class="similar', rest)
|
||||
block = rest[: stop.start()] if stop else rest[:12000]
|
||||
else:
|
||||
block = html[:8000]
|
||||
# Paradisehill miesza dwa szablony per-page:
|
||||
# v1: `itemprop="genre">Female Domination</span>`
|
||||
# v2: `itemprop="genre"><a href="/category/...">All Sex</a></span>` (od 2026-05)
|
||||
# Optional `<a>` wrapper między `itemprop` a tekstem — bez tego v2 dawał empty.
|
||||
for m_genre in re.finditer(
|
||||
r'itemprop="genre"[^>]*>\s*(?:<a[^>]*>)?\s*([^<]+)', block, re.IGNORECASE,
|
||||
):
|
||||
name = _decode_html(m_genre.group(1).strip())
|
||||
if name and len(tags) < 10:
|
||||
tags.append(RawTag(name=name, slug=_slugify(name)))
|
||||
|
|
|
|||
|
|
@ -99,9 +99,14 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = {
|
|||
# bandwidth + VPS anonimowość priorytet. WebView fallback → mobile pobiera embed
|
||||
# z phone IP, KVS player JS decoduje video_url, ExoPlayer odtwarza direct z CDN.
|
||||
"freshpornoorg": _vps_blocked_fallback.extract,
|
||||
# porn00 / pornxp — IP-bound CDN tokens. Pre-public WebView fallback (bandwidth +
|
||||
# anonimowość VPS). Niski volume (84 scen), trivial.
|
||||
"porn00org": _vps_blocked_fallback.extract,
|
||||
# porn00 — KVS engine z v-acctoken w URL. Backend extract działa (zweryfikowane
|
||||
# 2026-05-23), zwraca świeże get_file URL-e z `force_proxy=True` flag.
|
||||
# `_proxify_link` rozwija je przez VPS proxy (CDN token IP-bound do VPS, mobile
|
||||
# direct = 403). Bug-reports `5037b3e3`/`e8e3198b` 2026-05-22: WebView fallback
|
||||
# pokazywał reklamę full-screen (porn00.org ma agresywny ad-network) — mobile
|
||||
# nigdy nie dochodził do `<video>` tag dla INJECTED_JS scrape. Z fresh extract
|
||||
# mobile dostaje proxy URL od razu, ExoPlayer gra bez WebView.
|
||||
"porn00org": porn00.extract,
|
||||
"pornxpph": _vps_blocked_fallback.extract,
|
||||
# Direct-scraping tubes (mają też search scraper w connectors/direct_scrapers/)
|
||||
# — używają identycznego embed-iframe pattern dla streamingu.
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from app.api.playback import movies_router as movies_playback_router
|
|||
from app.api.playback import router as playback_router
|
||||
from app.api.scene_favorites import router as scene_favorites_router
|
||||
from app.api.scenes import router as scenes_router
|
||||
from app.api.sources import router as sources_router
|
||||
from app.api.stream_proxy import router as stream_proxy_router
|
||||
from app.api.taxonomies import router as taxonomies_router
|
||||
from app.api.watch import router as watch_router
|
||||
|
|
@ -66,6 +67,7 @@ if _settings.sentry_dsn:
|
|||
|
||||
app = FastAPI(title="goon", version="0.1.8")
|
||||
app.include_router(scenes_router)
|
||||
app.include_router(sources_router)
|
||||
app.include_router(movies_router)
|
||||
app.include_router(playback_router)
|
||||
app.include_router(movies_playback_router)
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ worker. Dla multi-worker trzebaby Redis/SQLAlchemy job store + distributed lock.
|
|||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
from apscheduler.schedulers.blocking import BlockingScheduler
|
||||
|
|
@ -31,6 +32,20 @@ from app.scheduler.performer_driven import run_continuous_one_at_a_time, run_per
|
|||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Stała "epoka" dla IntervalTrigger.start_date — kotwica siatki fire-times.
|
||||
# Bez start_date APScheduler liczy next_run_time = add_job_time + interval, więc każdy
|
||||
# restart workera (a tych jest dużo — manual deploys, OOM, obraz przebudowany) odsuwa
|
||||
# kolejny fire o pełen interval. Bug-reporty 2026-05-19 (`93d3c485` "brak freshporno")
|
||||
# i 2026-05-23 (`2fbf1c73` "Czemu nie ma nowych filmów?") to dokładnie ten case:
|
||||
# worker restartowany 15× w ciągu 3 dni → movie_ingest (24h) nigdy nie odpalił po
|
||||
# 2026-05-20 05:29.
|
||||
#
|
||||
# Ze stałym start_date w przeszłości next_run_time leży na siatce co N godzin od tej
|
||||
# kotwicy → restart workera nie zmienia kiedy następny fire. 05:00 UTC = 07:00 PL,
|
||||
# niski ruch, bez kolizji z ręcznymi deployami w godzinach pracy.
|
||||
INTERVAL_ANCHOR = datetime(2026, 1, 1, 5, 0, tzinfo=timezone.utc)
|
||||
|
||||
|
||||
def _job_tpdb() -> None:
|
||||
log.info("[scheduler] tpdb delta starting")
|
||||
try:
|
||||
|
|
@ -147,7 +162,7 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
|
|||
if cfg.get("tpdb_hours"):
|
||||
sched.add_job(
|
||||
_job_tpdb,
|
||||
IntervalTrigger(hours=cfg["tpdb_hours"]),
|
||||
IntervalTrigger(hours=cfg["tpdb_hours"], start_date=INTERVAL_ANCHOR),
|
||||
id="tpdb",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
|
|
@ -158,7 +173,7 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
|
|||
if cfg.get("stashdb_hours"):
|
||||
sched.add_job(
|
||||
_job_stashdb,
|
||||
IntervalTrigger(hours=cfg["stashdb_hours"]),
|
||||
IntervalTrigger(hours=cfg["stashdb_hours"], start_date=INTERVAL_ANCHOR),
|
||||
id="stashdb",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
|
|
@ -170,7 +185,7 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
|
|||
top_n = cfg.get("performer_driven_top_n") or 20
|
||||
sched.add_job(
|
||||
lambda: _job_performer_driven(top_n),
|
||||
IntervalTrigger(hours=cfg["performer_driven_hours"]),
|
||||
IntervalTrigger(hours=cfg["performer_driven_hours"], start_date=INTERVAL_ANCHOR),
|
||||
id="performer_driven",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
|
|
@ -186,7 +201,7 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
|
|||
max_pages = cfg.get("browse_latest_max_pages") or 5
|
||||
sched.add_job(
|
||||
lambda: _job_browse_latest(max_pages),
|
||||
IntervalTrigger(hours=cfg["browse_latest_hours"]),
|
||||
IntervalTrigger(hours=cfg["browse_latest_hours"], start_date=INTERVAL_ANCHOR),
|
||||
id="browse_latest",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
|
|
@ -200,7 +215,7 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
|
|||
if cfg.get("bulk_dedup_hours"):
|
||||
sched.add_job(
|
||||
_job_bulk_dedup_performers,
|
||||
IntervalTrigger(hours=cfg["bulk_dedup_hours"]),
|
||||
IntervalTrigger(hours=cfg["bulk_dedup_hours"], start_date=INTERVAL_ANCHOR),
|
||||
id="bulk_dedup_performers",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
|
|
@ -211,7 +226,7 @@ def build_scheduler(cfg: dict[str, Any]) -> BlockingScheduler:
|
|||
if cfg.get("movie_ingest_hours"):
|
||||
sched.add_job(
|
||||
_job_movie_ingest,
|
||||
IntervalTrigger(hours=cfg["movie_ingest_hours"]),
|
||||
IntervalTrigger(hours=cfg["movie_ingest_hours"], start_date=INTERVAL_ANCHOR),
|
||||
id="movie_ingest",
|
||||
replace_existing=True,
|
||||
max_instances=1,
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ import type {
|
|||
ScenesListParams,
|
||||
SceneListOut,
|
||||
SceneOut,
|
||||
SourceListOut,
|
||||
StudioListOut,
|
||||
TagListOut,
|
||||
WatchListOut,
|
||||
|
|
@ -108,8 +109,12 @@ export class GoonClient {
|
|||
if (params.has_playback !== undefined) qs.set('has_playback', String(params.has_playback));
|
||||
if (params.has_animated_thumbnail !== undefined)
|
||||
qs.set('has_animated_thumbnail', String(params.has_animated_thumbnail));
|
||||
if (params.min_duration_sec !== undefined)
|
||||
qs.set('min_duration_sec', String(params.min_duration_sec));
|
||||
// Default: filtrujemy sceny <60s — bug-report 2026-05-23 (40cd28aa):
|
||||
// "Takie sceny po 1 min to można wywalić". Pornapp/freshporno czasem
|
||||
// zassuje teasery/trailery 30-50s, które są bezużyteczne na listach.
|
||||
// Caller może override przez explicit 0 (lub null) — np. admin browse.
|
||||
const minDur = params.min_duration_sec ?? 60;
|
||||
if (minDur > 0) qs.set('min_duration_sec', String(minDur));
|
||||
if (params.max_duration_sec !== undefined)
|
||||
qs.set('max_duration_sec', String(params.max_duration_sec));
|
||||
if (params.released_within_days !== undefined)
|
||||
|
|
@ -231,6 +236,10 @@ export class GoonClient {
|
|||
});
|
||||
}
|
||||
|
||||
async listSources(): Promise<SourceListOut> {
|
||||
return this.request('/sources');
|
||||
}
|
||||
|
||||
async listStudios(params: {
|
||||
q?: string;
|
||||
order?: 'name' | 'scene_count';
|
||||
|
|
|
|||
|
|
@ -18,6 +18,8 @@ import { PerformersScreen } from './screens/PerformersScreen';
|
|||
import { PlayerScreen } from './screens/PlayerScreen';
|
||||
import { ScenesScreen } from './screens/ScenesScreen';
|
||||
import { SceneDetailScreen } from './screens/SceneDetailScreen';
|
||||
import { SiteScenesScreen } from './screens/SiteScenesScreen';
|
||||
import { SitesScreen } from './screens/SitesScreen';
|
||||
import { StudioScenesScreen } from './screens/StudioScenesScreen';
|
||||
import { TagScenesScreen } from './screens/TagScenesScreen';
|
||||
import { TagsScreen } from './screens/TagsScreen';
|
||||
|
|
@ -26,6 +28,11 @@ import { theme } from './theme';
|
|||
export type RootStackParamList = {
|
||||
Scenes: undefined;
|
||||
Movies: undefined;
|
||||
Sites: undefined;
|
||||
// `origin`: raw playback_source.origin (np. 'tube:hqpornercom'). Idzie do
|
||||
// listScenes({origin}) — backend robi substring match. `name`: display name
|
||||
// do title bara (np. 'hqporner.com').
|
||||
SiteScenes: { origin: string; name: string };
|
||||
MovieDetail: { id: string };
|
||||
SceneDetail: { id: string };
|
||||
Performers: undefined;
|
||||
|
|
@ -66,6 +73,38 @@ export type RootStackParamList = {
|
|||
|
||||
const Stack = createNativeStackNavigator<RootStackParamList>();
|
||||
|
||||
type TopTab = 'Scenes' | 'Movies' | 'Sites';
|
||||
|
||||
function TopTabs({
|
||||
current,
|
||||
onNavigate,
|
||||
}: {
|
||||
current: TopTab;
|
||||
onNavigate: (tab: TopTab) => void;
|
||||
}) {
|
||||
const tabs: TopTab[] = ['Scenes', 'Movies', 'Sites'];
|
||||
return (
|
||||
<View style={{ flexDirection: 'row', gap: 14, paddingHorizontal: 12, alignItems: 'center' }}>
|
||||
{tabs.map((t) => {
|
||||
const active = t === current;
|
||||
return (
|
||||
<Pressable key={t} onPress={() => (active ? null : onNavigate(t))} hitSlop={12}>
|
||||
<Text
|
||||
style={{
|
||||
color: active ? theme.accent : theme.muted,
|
||||
fontSize: 14,
|
||||
fontWeight: active ? '700' : '400',
|
||||
}}
|
||||
>
|
||||
{t}
|
||||
</Text>
|
||||
</Pressable>
|
||||
);
|
||||
})}
|
||||
</View>
|
||||
);
|
||||
}
|
||||
|
||||
const navTheme = {
|
||||
...DefaultTheme,
|
||||
dark: true,
|
||||
|
|
@ -106,17 +145,15 @@ export function AppNavigator({ onLogout, client, appVersion }: AppNavigatorProps
|
|||
name="Scenes"
|
||||
component={ScenesScreen}
|
||||
options={({ navigation }) => ({
|
||||
title: 'Scenes',
|
||||
title: '',
|
||||
headerLeft: () => (
|
||||
<View style={{ paddingHorizontal: 12 }}>
|
||||
<Text style={{ color: theme.accent, fontSize: 14, fontWeight: '700' }}>Scenes</Text>
|
||||
</View>
|
||||
<TopTabs
|
||||
current="Scenes"
|
||||
onNavigate={(t) => navigation.replace(t)}
|
||||
/>
|
||||
),
|
||||
headerRight: () => (
|
||||
<View style={{ flexDirection: 'row', gap: 14, alignItems: 'center' }}>
|
||||
<Pressable onPress={() => navigation.replace('Movies')} hitSlop={12}>
|
||||
<Text style={{ color: theme.muted, fontSize: 14 }}>Movies</Text>
|
||||
</Pressable>
|
||||
<Pressable onPress={() => navigation.navigate('Donate')} hitSlop={12}>
|
||||
<Text style={{ color: theme.accent, fontSize: 18 }}>♥</Text>
|
||||
</Pressable>
|
||||
|
|
@ -140,17 +177,13 @@ export function AppNavigator({ onLogout, client, appVersion }: AppNavigatorProps
|
|||
title: '',
|
||||
headerBackVisible: false,
|
||||
headerLeft: () => (
|
||||
<Pressable
|
||||
onPress={() => navigation.replace('Scenes')}
|
||||
hitSlop={12}
|
||||
style={{ paddingHorizontal: 12 }}
|
||||
>
|
||||
<Text style={{ color: theme.muted, fontSize: 14 }}>Scenes</Text>
|
||||
</Pressable>
|
||||
<TopTabs
|
||||
current="Movies"
|
||||
onNavigate={(t) => navigation.replace(t)}
|
||||
/>
|
||||
),
|
||||
headerRight: () => (
|
||||
<View style={{ flexDirection: 'row', gap: 14, alignItems: 'center' }}>
|
||||
<Text style={{ color: theme.accent, fontSize: 14, fontWeight: '700' }}>Movies</Text>
|
||||
<Pressable onPress={onLogout} hitSlop={12}>
|
||||
<Text style={{ color: theme.muted, fontSize: 13 }}>Sign out</Text>
|
||||
</Pressable>
|
||||
|
|
@ -158,6 +191,32 @@ export function AppNavigator({ onLogout, client, appVersion }: AppNavigatorProps
|
|||
),
|
||||
})}
|
||||
/>
|
||||
<Stack.Screen
|
||||
name="Sites"
|
||||
component={SitesScreen}
|
||||
options={({ navigation }) => ({
|
||||
title: '',
|
||||
headerBackVisible: false,
|
||||
headerLeft: () => (
|
||||
<TopTabs
|
||||
current="Sites"
|
||||
onNavigate={(t) => navigation.replace(t)}
|
||||
/>
|
||||
),
|
||||
headerRight: () => (
|
||||
<View style={{ flexDirection: 'row', gap: 14, alignItems: 'center' }}>
|
||||
<Pressable onPress={onLogout} hitSlop={12}>
|
||||
<Text style={{ color: theme.muted, fontSize: 13 }}>Sign out</Text>
|
||||
</Pressable>
|
||||
</View>
|
||||
),
|
||||
})}
|
||||
/>
|
||||
<Stack.Screen
|
||||
name="SiteScenes"
|
||||
component={SiteScenesScreen}
|
||||
options={{ title: 'Site scenes' }}
|
||||
/>
|
||||
<Stack.Screen name="MovieDetail" component={MovieDetailScreen} options={{ title: '' }} />
|
||||
<Stack.Screen name="SceneDetail" component={SceneDetailScreen} options={{ title: '' }} />
|
||||
<Stack.Screen name="Performers" component={PerformersScreen} options={{ title: 'Performers' }} />
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ import {
|
|||
setTimeoutSeconds,
|
||||
verifyPin,
|
||||
} from '../lib/applock';
|
||||
import { APP_VERSION } from '../lib/appVersion';
|
||||
import { theme } from '../theme';
|
||||
import { PinEntry } from './PinEntry';
|
||||
|
||||
|
|
@ -255,6 +256,17 @@ export function AppLockSettingsScreen() {
|
|||
Aplikacja jest również ukryta na liście ostatnich aplikacji i blokuje zrzuty ekranu.
|
||||
</Text>
|
||||
</View>
|
||||
|
||||
<View style={styles.section}>
|
||||
<Text style={styles.sectionTitle}>O aplikacji</Text>
|
||||
<View style={styles.row}>
|
||||
<View style={{ flex: 1 }}>
|
||||
<Text style={styles.label}>Wersja</Text>
|
||||
<Text style={styles.hint}>Bieżący JS bundle (OTA-updated)</Text>
|
||||
</View>
|
||||
<Text style={styles.versionValue}>{APP_VERSION}</Text>
|
||||
</View>
|
||||
</View>
|
||||
</ScrollView>
|
||||
);
|
||||
}
|
||||
|
|
@ -307,6 +319,7 @@ const styles = StyleSheet.create({
|
|||
borderColor: theme.border,
|
||||
},
|
||||
chipActive: { backgroundColor: theme.accent, borderColor: theme.accent },
|
||||
versionValue: { color: theme.fg, fontSize: 15, fontWeight: '700', fontVariant: ['tabular-nums'] },
|
||||
chipText: { color: theme.muted, fontSize: 13 },
|
||||
chipTextActive: { color: '#fff', fontWeight: '700' },
|
||||
});
|
||||
|
|
|
|||
|
|
@ -190,6 +190,37 @@ function WatchChip({
|
|||
// best.stream_url to backend proxy URL gdy direct video się udało wyciągnąć,
|
||||
// lub embed_url gdy hoster nieudany — Player handler-uje obie ścieżki.
|
||||
// _absolutizeProxyUrls w GoonClient już prefixuje /proxy/... baseUrl-em.
|
||||
|
||||
// Multipart: paradisehill movies mają `videoList` z N MP4 parts. Backend
|
||||
// zwraca każdy jako StreamLink z `quality = "Part N"` + `raw.part_label`.
|
||||
// Bez part-picker mobile używałby tylko best (Part 1) — user nie miałby
|
||||
// dostępu do reszty (bug-reports `c5693926`/`418270e4` 2026-05-21).
|
||||
const links = res.links ?? [];
|
||||
const parts = links.filter((l) => l.raw && typeof l.raw === 'object' && (l.raw as any).part_label);
|
||||
if (parts.length > 1) {
|
||||
Alert.alert(
|
||||
title,
|
||||
'Film składa się z kilku części. Wybierz którą zacząć.',
|
||||
[
|
||||
...parts.map((p) => ({
|
||||
text: ((p.raw as any).part_label as string) ?? p.quality ?? 'Part',
|
||||
onPress: () => {
|
||||
navigation.navigate('Player', {
|
||||
url: p.stream_url || p.embed_url || pb.page_url,
|
||||
sceneId: movieId,
|
||||
durationSec: pb.duration_sec ?? null,
|
||||
title: `${title} — ${(p.raw as any).part_label ?? p.quality}`,
|
||||
mode: p.stream_url ? 'video' : 'webview',
|
||||
fallbackEmbedUrl: p.embed_url || pb.embed_url || pb.page_url,
|
||||
});
|
||||
},
|
||||
})),
|
||||
{ text: 'Anuluj', style: 'cancel' as const },
|
||||
],
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const target = res.best?.stream_url || res.best?.embed_url || pb.page_url;
|
||||
const fallbackEmbed = res.best?.embed_url || pb.embed_url || pb.page_url;
|
||||
navigation.navigate('Player', {
|
||||
|
|
|
|||
|
|
@ -84,18 +84,48 @@ export function PerformerScenesScreen() {
|
|||
// Bug-report 2026-05-16 (6fcaa5f4): xhamster scenes często mają puste thumbnails
|
||||
// (KVS player nie zwraca og:image dla wszystkich) i ubogie tagi. Per-scene enrich
|
||||
// jest on-demand z SceneDetail, ten button robi bulk dla całej listy.
|
||||
//
|
||||
// Auto-loop dla performerów z >50 scen: backend ma cap 50 scen / 55s (nginx 60s
|
||||
// timeout protection). Pojedyncze wywołanie zostawia resztę nieobsłużoną — user
|
||||
// musiałby klikać Rescrape wiele razy. Bug-report `e1fc4f92` 2026-05-17 "Rescrape
|
||||
// miniaturek nie pobrał wszystkich". Auto-loop dopóki backend zwraca `capped=true`
|
||||
// — idempotent (scena z thumb się skipuje na backendzie), więc kolejne iteracje
|
||||
// mielą tylko brakujące. Hard limit 10 iteracji jako safety net (max ~500 scen).
|
||||
const rescrapeMutation = useMutation({
|
||||
mutationFn: () => client.rescrapePerformer(id),
|
||||
mutationFn: async () => {
|
||||
let scenes_processed = 0;
|
||||
let scenes_total = 0;
|
||||
let thumbs_added = 0;
|
||||
let tags_added = 0;
|
||||
let iterations = 0;
|
||||
let last;
|
||||
do {
|
||||
last = await client.rescrapePerformer(id);
|
||||
scenes_processed += last.scenes_processed;
|
||||
thumbs_added += last.thumbs_added;
|
||||
tags_added += last.tags_added;
|
||||
scenes_total = last.scenes_total; // ostatni response ma aktualny total
|
||||
iterations += 1;
|
||||
} while (last.capped && iterations < 10);
|
||||
return {
|
||||
scenes_processed,
|
||||
scenes_total,
|
||||
thumbs_added,
|
||||
tags_added,
|
||||
iterations,
|
||||
capped: last.capped,
|
||||
cap_reason: last.cap_reason,
|
||||
};
|
||||
},
|
||||
onSuccess: (data) => {
|
||||
queryClient.invalidateQueries({ queryKey: ['performer-scenes', id] });
|
||||
// Rescrape też future-prooflinie movies (gdyby backend rozszerzył rescrape
|
||||
// o movie scenes) i SceneDetail (cached thumb/tags zmieniły się).
|
||||
queryClient.invalidateQueries({ queryKey: ['performer-movies', id] });
|
||||
queryClient.invalidateQueries({ queryKey: ['scenes'] });
|
||||
const capNote = data.capped ? ` (cap: ${data.cap_reason || 'limit'})` : '';
|
||||
const iterNote = data.iterations > 1 ? ` (${data.iterations} batches)` : '';
|
||||
const capNote = data.capped ? ` · still capped — retry to continue` : '';
|
||||
Alert.alert(
|
||||
'Rescrape complete',
|
||||
`${data.scenes_processed}/${data.scenes_total} scenes · +${data.thumbs_added} thumbs · +${data.tags_added} tags${capNote}`,
|
||||
`${data.scenes_processed} scenes · +${data.thumbs_added} thumbs · +${data.tags_added} tags${iterNote}${capNote}`,
|
||||
);
|
||||
},
|
||||
onError: (e: any) => {
|
||||
|
|
|
|||
|
|
@ -389,14 +389,18 @@ function NativeVideoPlayer({ params }: { params: RouteParams }) {
|
|||
[player, dur, cancelHide, scheduleHide],
|
||||
);
|
||||
|
||||
// Race: pierwszy aktywny gest wygrywa. Single-tap musi czekać aż double-tap fail.
|
||||
// panSeek na początku — gdy palec ruszy >20px, wygrywa nad tap/long-press.
|
||||
// longPress PRZED doubleTap: Exclusive priorytetyzuje po kolejności, a doubleTap
|
||||
// ma maxDelay=280ms "waiting state" który blokował longPress (minDuration=220ms)
|
||||
// — palec trzymany 220ms nigdy nie aktywował 2x speed bo doubleTap wciąż "myślał"
|
||||
// czy będzie drugi tap. Bug-report 2026-05-16 #7c13a549/#cdff6341 (eporner/hqporner).
|
||||
// panSeek + longPress mają niezależne triggery (motion 20px vs hold 220ms),
|
||||
// więc nie powinny się blokować — Gesture.Race pozwala pierwszemu który ACTIVATE
|
||||
// wygrać natychmiast. Wcześniejszy Exclusive(panSeek, ...) wymagał żeby panSeek
|
||||
// FAIL zanim longPress wystartuje, ale Pan z activeOffsetX nie failuje dopóki
|
||||
// touch trwa — efekt: longPress odpalał się dopiero przy puszczeniu palca
|
||||
// (bug-report 68483c6d 2026-05-23 v0.1.9: "Jak przytrzymuje nic. Dopiero jak
|
||||
// się puści, X2 pojawia się i znika"). Naprawa z 0136b68 (reorder Exclusive)
|
||||
// adresowała tylko interakcję z doubleTap, nie z panSeek.
|
||||
// Tap pair pozostaje Exclusive — singleTap MUSI czekać aż doubleTap fail,
|
||||
// inaczej każdy double-tap byłby najpierw zinterpretowany jako single (toggle controls).
|
||||
const composedGesture = React.useMemo(
|
||||
() => Gesture.Exclusive(panSeek, longPress, doubleTap, singleTap),
|
||||
() => Gesture.Race(panSeek, longPress, Gesture.Exclusive(doubleTap, singleTap)),
|
||||
[panSeek, longPress, doubleTap, singleTap],
|
||||
);
|
||||
|
||||
|
|
@ -637,6 +641,23 @@ const INJECTED_JS = `
|
|||
if (window.__goonPatched) return;
|
||||
window.__goonPatched = true;
|
||||
|
||||
// -- 0. Anti-adblock detection bypass --------------------------------------
|
||||
// Hostery sprawdzają czy ad-script się załadował (np. /js/dnsads.js ustawia
|
||||
// \`window.cRAds\`). Blokujemy te requesty na poziomie AD_HOSTS, więc flag
|
||||
// pozostaje undefined → pełnoekranowy "Disable AdBlock" overlay zakrywa player.
|
||||
// Bug-report \`02444895\` 2026-05-20 (Luluvid czarny ekran): hostery
|
||||
// sprawdzają flag w \`$(function(){})\` które odpala się po ad-script load.
|
||||
// Pre-ustawiamy flagi PRZED kodem strony żeby anti-adblock przeszedł.
|
||||
// Lista jest defensywna — większość overlapuje (dnsads.js ustawia różne nazwy
|
||||
// zależnie od skinu hostera). Nie szkodzi mieć wszystkie ustawione.
|
||||
try {
|
||||
window.cRAds = 1;
|
||||
window.adsbygoogle = window.adsbygoogle || [];
|
||||
window.canRunAds = true;
|
||||
window.cantRunAds = false;
|
||||
window.isAdBlockActive = false;
|
||||
} catch (e) {}
|
||||
|
||||
// -- 1. Ad-network domain blocklist ----------------------------------------
|
||||
// Sync z app/extractors/tubes/_embed_iframe.py:AD_DOMAIN_RE. Match na hostname
|
||||
// — jakikolwiek URL którego host KOŃCZY się tym (uwzględnia subdomeny ib.hoirms.com).
|
||||
|
|
@ -800,6 +821,14 @@ const INJECTED_JS = `
|
|||
f.remove();
|
||||
}
|
||||
});
|
||||
// AdBlock-detection overlays. Defense-in-depth dla bug-report \`02444895\` —
|
||||
// gdyby ktoś wszedł na hostera który NIE używa \`window.cRAds\` flag, usuwamy
|
||||
// div po id/klasie. Luluvid (#adbd.overdiv), streamwish/doodstream warianty.
|
||||
const ADBLOCK_OVERLAY_RE = /(^|\\s)(adbd|adblock|adb-detect|adblocker-detect|overdiv)(\\s|$)/i;
|
||||
document.querySelectorAll('#adbd, .overdiv, [class*="adblock"], [id*="adblock"]').forEach(function(d) {
|
||||
const sig = (d.id || '') + ' ' + (typeof d.className === 'string' ? d.className : '');
|
||||
if (ADBLOCK_OVERLAY_RE.test(sig)) d.remove();
|
||||
});
|
||||
// Również full-screen overlay divs (ads często overlay na video element)
|
||||
document.querySelectorAll('div[style*="z-index"], div[style*="position: fixed"], div[style*="position:fixed"]').forEach(function(d) {
|
||||
const style = d.getAttribute('style') || '';
|
||||
|
|
@ -812,9 +841,44 @@ const INJECTED_JS = `
|
|||
}
|
||||
});
|
||||
};
|
||||
// -- 1.6. Play-poster auto-click -------------------------------------------
|
||||
// Bug-report 5e89ef7e (porndoe): "Trzeba wejść na porndoe, zaakceptować
|
||||
// cookies, dać Play i dopiero idzie wideo". Porndoe (i niektóre inne) NIE
|
||||
// ładują video.src do DOM dopóki user nie kliknie poster-overlay z play
|
||||
// arrow. Bez kliku player JS nie inicjalizuje, INJECTED_JS XHR sniffer się
|
||||
// nie odpala — user widzi statyczny obrazek + reklamy obok.
|
||||
//
|
||||
// Markery klasy/id "play poster": player-poster-play (porndoe), big-play-button
|
||||
// (videojs), vjs-big-play-button, jw-icon-display (jwplayer), btn-big-play,
|
||||
// mejs__overlay-button (mediaelement.js), play-button, btn-play.
|
||||
// Bezpieczeństwo: musi być wewnątrz kontenera z player marker (≤6 przodków).
|
||||
const PLAY_POSTER_RE = /(player-poster-play|player-poster-arrow|big-play-button|vjs-big-play-button|jw-icon-display|btn-big-play|mejs__overlay-button|play-button|btn-play|videoPlayButton)/i;
|
||||
const PLAYER_CTX_RE = /(player|video-js|vjs|jw-player|jwplayer|mejs|videoplayer)/i;
|
||||
const clickPlayPoster = function() {
|
||||
const els = document.querySelectorAll('button, a, div, span, [role="button"]');
|
||||
for (let i = 0; i < els.length; i++) {
|
||||
const el = els[i];
|
||||
const sig = ((typeof el.className === 'string' ? el.className : '') + ' ' + (el.id || ''));
|
||||
if (!PLAY_POSTER_RE.test(sig)) continue;
|
||||
// ≤6-deep container z player markerem.
|
||||
let ctx = el.parentElement, depth = 0, inPlayer = false;
|
||||
while (ctx && depth < 6) {
|
||||
const csig = ((typeof ctx.className === 'string' ? ctx.className : '') + ' ' + (ctx.id || '')).toLowerCase();
|
||||
if (PLAYER_CTX_RE.test(csig)) { inPlayer = true; break; }
|
||||
ctx = ctx.parentElement; depth++;
|
||||
}
|
||||
if (!inPlayer) continue;
|
||||
try {
|
||||
el.click();
|
||||
window.ReactNativeWebView.postMessage(JSON.stringify({type: 'play_poster_clicked'}));
|
||||
} catch (e) {}
|
||||
}
|
||||
};
|
||||
|
||||
setInterval(function() {
|
||||
removeAdIframes();
|
||||
dismissConsent();
|
||||
clickPlayPoster();
|
||||
}, 1000);
|
||||
// Pierwsza próba consent natychmiast (banner bywa w SSR HTML) — bez czekania
|
||||
// na pierwszy tick interwału.
|
||||
|
|
|
|||
214
mobile/src/screens/SiteScenesScreen.tsx
Normal file
214
mobile/src/screens/SiteScenesScreen.tsx
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
// Sceny z konkretnego tube/source — listScenes z origin substring filter.
|
||||
// Bug-report 2026-05-24 (ea6f05f9): top-level Sites browse → tap site → tutaj.
|
||||
//
|
||||
// Sort: release_date DESC żeby user dostał świeże publikacje na górze. Sceny bez
|
||||
// release_date dryfują na koniec — to znany trade-off (patrz freshporno backfill
|
||||
// 2026-05-23, 10390 scen miało null date).
|
||||
//
|
||||
// Infinite scroll bo niektóre tubey mają 100k+ scen (porntrex, xvideos).
|
||||
import { RouteProp, useNavigation, useRoute } from '@react-navigation/native';
|
||||
import type { NativeStackNavigationProp } from '@react-navigation/native-stack';
|
||||
import { useInfiniteQuery } from '@tanstack/react-query';
|
||||
import React, { useState } from 'react';
|
||||
import * as Haptics from 'expo-haptics';
|
||||
import {
|
||||
ActivityIndicator,
|
||||
FlatList,
|
||||
Pressable,
|
||||
StyleSheet,
|
||||
Text,
|
||||
View,
|
||||
} from 'react-native';
|
||||
import { Thumb } from '../components/Thumb';
|
||||
import { useClient } from '../ClientContext';
|
||||
import type { RootStackParamList } from '../navigation';
|
||||
import { theme } from '../theme';
|
||||
import type { SceneOut } from '../types';
|
||||
|
||||
export function SiteScenesScreen() {
|
||||
const client = useClient();
|
||||
const navigation =
|
||||
useNavigation<NativeStackNavigationProp<RootStackParamList, 'SiteScenes'>>();
|
||||
const route = useRoute<RouteProp<RootStackParamList, 'SiteScenes'>>();
|
||||
const { origin, name } = route.params;
|
||||
|
||||
React.useLayoutEffect(() => {
|
||||
navigation.setOptions({ title: name });
|
||||
}, [navigation, name]);
|
||||
|
||||
const PER_PAGE = 50;
|
||||
const {
|
||||
data,
|
||||
isLoading,
|
||||
error,
|
||||
refetch,
|
||||
isRefetching,
|
||||
fetchNextPage,
|
||||
hasNextPage,
|
||||
isFetchingNextPage,
|
||||
} = useInfiniteQuery({
|
||||
queryKey: ['site-scenes', origin],
|
||||
queryFn: ({ pageParam = 1 }) =>
|
||||
client.listScenes({
|
||||
origin,
|
||||
sort: 'release_date',
|
||||
page: pageParam,
|
||||
per_page: PER_PAGE,
|
||||
}),
|
||||
initialPageParam: 1,
|
||||
getNextPageParam: (lastPage) => {
|
||||
const loaded = lastPage.page * lastPage.per_page;
|
||||
return loaded < lastPage.total ? lastPage.page + 1 : undefined;
|
||||
},
|
||||
});
|
||||
const items = data?.pages.flatMap((p) => p.items) ?? [];
|
||||
const total = data?.pages[0]?.total ?? 0;
|
||||
|
||||
return (
|
||||
<View style={styles.container}>
|
||||
{isLoading && <ActivityIndicator color={theme.fg} style={{ marginTop: 24 }} />}
|
||||
{error instanceof Error && <Text style={styles.error}>{error.message}</Text>}
|
||||
|
||||
<FlatList
|
||||
data={items}
|
||||
keyExtractor={(s) => s.id}
|
||||
renderItem={({ item }) => <SceneRow scene={item} />}
|
||||
refreshing={isRefetching}
|
||||
onRefresh={refetch}
|
||||
onEndReached={() => {
|
||||
if (hasNextPage && !isFetchingNextPage) fetchNextPage();
|
||||
}}
|
||||
onEndReachedThreshold={0.5}
|
||||
ListHeaderComponent={
|
||||
data ? (
|
||||
<Text style={styles.subtitle}>
|
||||
{total} {total === 1 ? 'scene' : 'scenes'} · sorted by release date
|
||||
</Text>
|
||||
) : null
|
||||
}
|
||||
ListFooterComponent={
|
||||
isFetchingNextPage ? (
|
||||
<ActivityIndicator color={theme.muted} style={{ marginVertical: 18 }} />
|
||||
) : !hasNextPage && items.length > 0 ? (
|
||||
<Text style={styles.muted}>{`${items.length} / ${total}`}</Text>
|
||||
) : null
|
||||
}
|
||||
ListEmptyComponent={!isLoading ? <Text style={styles.muted}>no scenes</Text> : null}
|
||||
contentContainerStyle={{ paddingBottom: 24 }}
|
||||
/>
|
||||
</View>
|
||||
);
|
||||
}
|
||||
|
||||
function SceneRow({ scene }: { scene: SceneOut }) {
|
||||
const navigation =
|
||||
useNavigation<NativeStackNavigationProp<RootStackParamList, 'SiteScenes'>>();
|
||||
const [isPreviewing, setIsPreviewing] = useState(false);
|
||||
const performers = scene.performers
|
||||
.slice(0, 3)
|
||||
.map((p) => p.canonical_name)
|
||||
.join(', ');
|
||||
|
||||
const animatedUrl = scene.playback_sources.find((s) => s.animated_thumbnail_url)
|
||||
?.animated_thumbnail_url;
|
||||
const staticUrl = scene.playback_sources.find((s) => s.thumbnail_url)?.thumbnail_url;
|
||||
const displayUrl = isPreviewing && animatedUrl ? animatedUrl : staticUrl ?? animatedUrl;
|
||||
|
||||
const startPreview = () => {
|
||||
if (!animatedUrl) return;
|
||||
setIsPreviewing(true);
|
||||
Haptics.selectionAsync().catch(() => {});
|
||||
};
|
||||
|
||||
const dim = scene.finished === true;
|
||||
|
||||
return (
|
||||
<Pressable
|
||||
style={[styles.row, dim && styles.rowDimmed]}
|
||||
onPress={() => navigation.push('SceneDetail', { id: scene.id })}
|
||||
onLongPress={startPreview}
|
||||
onPressOut={() => setIsPreviewing(false)}
|
||||
delayLongPress={180}
|
||||
>
|
||||
<Thumb url={displayUrl} style={styles.thumbnail} />
|
||||
{scene.is_favorite ? (
|
||||
<View style={styles.favBadge}>
|
||||
<Text style={styles.favBadgeText}>★</Text>
|
||||
</View>
|
||||
) : null}
|
||||
<View style={styles.rowContent}>
|
||||
<Text style={styles.rowTitle} numberOfLines={1}>
|
||||
{scene.title}
|
||||
</Text>
|
||||
{scene.release_date || scene.studio ? (
|
||||
<Text style={styles.rowMuted} numberOfLines={1}>
|
||||
{[scene.release_date, scene.studio?.name].filter(Boolean).join(' · ')}
|
||||
</Text>
|
||||
) : null}
|
||||
{performers ? (
|
||||
<Text style={styles.rowMuted} numberOfLines={1}>
|
||||
{performers}
|
||||
{scene.performers.length > 3 ? ` +${scene.performers.length - 3}` : ''}
|
||||
</Text>
|
||||
) : null}
|
||||
<Text style={styles.rowSources}>
|
||||
{[...new Set(scene.external_refs.map((r) => r.source))].join(' · ')}
|
||||
{scene.playback_sources.length > 0
|
||||
? ` ▶ ${scene.playback_sources.length}`
|
||||
: ''}
|
||||
{dim ? ' ✓ watched' : ''}
|
||||
</Text>
|
||||
</View>
|
||||
</Pressable>
|
||||
);
|
||||
}
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
container: { flex: 1, backgroundColor: theme.bg, paddingHorizontal: 12, paddingTop: 8 },
|
||||
subtitle: { color: theme.muted, marginBottom: 8, paddingHorizontal: 4 },
|
||||
row: {
|
||||
backgroundColor: theme.card,
|
||||
borderColor: theme.border,
|
||||
borderWidth: 1,
|
||||
borderRadius: 12,
|
||||
padding: 12,
|
||||
marginBottom: 10,
|
||||
shadowColor: '#000',
|
||||
shadowOffset: { width: 0, height: 2 },
|
||||
shadowOpacity: 0.2,
|
||||
shadowRadius: 4,
|
||||
elevation: 3,
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
gap: 12,
|
||||
},
|
||||
rowDimmed: { opacity: 0.45 },
|
||||
thumbnail: {
|
||||
width: 100,
|
||||
height: 56,
|
||||
borderRadius: 8,
|
||||
backgroundColor: theme.border,
|
||||
},
|
||||
favBadge: {
|
||||
position: 'absolute',
|
||||
top: 6,
|
||||
left: 6,
|
||||
backgroundColor: 'rgba(0,0,0,0.7)',
|
||||
paddingHorizontal: 5,
|
||||
paddingVertical: 1,
|
||||
borderRadius: 8,
|
||||
},
|
||||
favBadgeText: { color: theme.accent, fontSize: 12, fontWeight: '700' },
|
||||
rowContent: { flex: 1 },
|
||||
rowTitle: { color: theme.fg, fontWeight: '700', fontSize: 16, marginBottom: 4 },
|
||||
rowMuted: { color: theme.muted, fontSize: 14, marginTop: 2 },
|
||||
rowSources: {
|
||||
color: theme.accent,
|
||||
fontSize: 12,
|
||||
marginTop: 8,
|
||||
textTransform: 'uppercase',
|
||||
fontWeight: '600',
|
||||
},
|
||||
muted: { color: theme.muted, textAlign: 'center', marginTop: 24, fontSize: 14 },
|
||||
error: { color: theme.bad, padding: 12 },
|
||||
});
|
||||
290
mobile/src/screens/SitesScreen.tsx
Normal file
290
mobile/src/screens/SitesScreen.tsx
Normal file
|
|
@ -0,0 +1,290 @@
|
|||
// Lista tube źródeł — top-level tab obok Scenes/Movies. Tap → SiteScenes.
|
||||
// Bug-report 2026-05-24 (ea6f05f9): user chce wybierać "pages" obok Scenes
|
||||
// i Movies, widzieć najnowsze sceny z konkretnego scrapowanego site'u.
|
||||
//
|
||||
// Layout: chip-grid analogiczny do TagsScreen — krótkie nazwy (domena.tld)
|
||||
// plus scene_count + relative-time "Xh temu" scraped, jeśli świeży.
|
||||
import { useNavigation } from '@react-navigation/native';
|
||||
import type { NativeStackNavigationProp } from '@react-navigation/native-stack';
|
||||
import { useQuery } from '@tanstack/react-query';
|
||||
import React, { useMemo, useState } from 'react';
|
||||
import {
|
||||
ActivityIndicator,
|
||||
FlatList,
|
||||
Pressable,
|
||||
StyleSheet,
|
||||
Text,
|
||||
TextInput,
|
||||
View,
|
||||
} from 'react-native';
|
||||
import { useClient } from '../ClientContext';
|
||||
import type { RootStackParamList } from '../navigation';
|
||||
import { theme } from '../theme';
|
||||
import type { SourceOut } from '../types';
|
||||
|
||||
type Order = 'popular' | 'recent';
|
||||
|
||||
export function SitesScreen() {
|
||||
const client = useClient();
|
||||
const navigation =
|
||||
useNavigation<NativeStackNavigationProp<RootStackParamList, 'Sites'>>();
|
||||
const [q, setQ] = useState('');
|
||||
const [debouncedQ, setDebouncedQ] = useState('');
|
||||
const [order, setOrder] = useState<Order>('popular');
|
||||
const [searchFocused, setSearchFocused] = useState(false);
|
||||
|
||||
React.useEffect(() => {
|
||||
const t = setTimeout(() => setDebouncedQ(q), 250);
|
||||
return () => clearTimeout(t);
|
||||
}, [q]);
|
||||
|
||||
const { data, isLoading, error, refetch, isRefetching } = useQuery({
|
||||
queryKey: ['sources'],
|
||||
queryFn: () => client.listSources(),
|
||||
staleTime: 60_000,
|
||||
});
|
||||
|
||||
// Sort + filter client-side — lista ma <50 entries, nie warto roundtripować.
|
||||
// Backend zwraca pre-sorted po scene_count DESC, więc dla 'popular' kolejność
|
||||
// zachowana. Dla 'recent' sortujemy po last_scraped_at DESC.
|
||||
const items = useMemo<SourceOut[]>(() => {
|
||||
const all = data?.items ?? [];
|
||||
const filtered = debouncedQ
|
||||
? all.filter(
|
||||
(s) =>
|
||||
s.display_name.toLowerCase().includes(debouncedQ.toLowerCase()) ||
|
||||
s.sitetag.toLowerCase().includes(debouncedQ.toLowerCase()),
|
||||
)
|
||||
: all;
|
||||
if (order === 'recent') {
|
||||
return [...filtered].sort((a, b) => {
|
||||
if (!a.last_scraped_at && !b.last_scraped_at) return 0;
|
||||
if (!a.last_scraped_at) return 1;
|
||||
if (!b.last_scraped_at) return -1;
|
||||
return b.last_scraped_at.localeCompare(a.last_scraped_at);
|
||||
});
|
||||
}
|
||||
return filtered;
|
||||
}, [data?.items, debouncedQ, order]);
|
||||
|
||||
return (
|
||||
<View style={styles.container}>
|
||||
<View style={styles.headerRow}>
|
||||
<Text style={styles.headerLabel}>Sites</Text>
|
||||
<Text style={styles.headerCount}>{items.length}</Text>
|
||||
</View>
|
||||
<Text style={styles.hint}>tap a tube → newest scenes from that site</Text>
|
||||
|
||||
<View style={styles.toolbar}>
|
||||
<TextInput
|
||||
style={[styles.search, searchFocused && styles.searchFocused]}
|
||||
value={q}
|
||||
onChangeText={setQ}
|
||||
onFocus={() => setSearchFocused(true)}
|
||||
onBlur={() => setSearchFocused(false)}
|
||||
placeholder="search site…"
|
||||
placeholderTextColor={theme.mutedDim}
|
||||
autoCapitalize="none"
|
||||
/>
|
||||
</View>
|
||||
|
||||
<View style={styles.segment}>
|
||||
<SegButton
|
||||
active={order === 'popular'}
|
||||
onPress={() => setOrder('popular')}
|
||||
label="Top"
|
||||
/>
|
||||
<SegButton
|
||||
active={order === 'recent'}
|
||||
onPress={() => setOrder('recent')}
|
||||
label="Recent"
|
||||
/>
|
||||
</View>
|
||||
|
||||
{isLoading && <ActivityIndicator color={theme.fg} style={{ marginTop: 24 }} />}
|
||||
{error instanceof Error && <Text style={styles.error}>{error.message}</Text>}
|
||||
|
||||
<FlatList
|
||||
data={items}
|
||||
keyExtractor={(s) => s.origin}
|
||||
numColumns={2}
|
||||
columnWrapperStyle={styles.gridRow}
|
||||
renderItem={({ item }) => (
|
||||
<SiteChip
|
||||
source={item}
|
||||
onPress={() =>
|
||||
navigation.navigate('SiteScenes', {
|
||||
origin: item.origin,
|
||||
name: item.display_name,
|
||||
})
|
||||
}
|
||||
/>
|
||||
)}
|
||||
refreshing={isRefetching}
|
||||
onRefresh={refetch}
|
||||
ListEmptyComponent={
|
||||
!isLoading ? <Text style={styles.emptyText}>no sites</Text> : null
|
||||
}
|
||||
contentContainerStyle={{ paddingBottom: 24 }}
|
||||
/>
|
||||
</View>
|
||||
);
|
||||
}
|
||||
|
||||
function SegButton({
|
||||
active,
|
||||
onPress,
|
||||
label,
|
||||
}: {
|
||||
active: boolean;
|
||||
onPress: () => void;
|
||||
label: string;
|
||||
}) {
|
||||
return (
|
||||
<Pressable
|
||||
onPress={onPress}
|
||||
style={[styles.segButton, active && styles.segButtonActive]}
|
||||
>
|
||||
<Text style={[styles.segButtonText, active && styles.segButtonTextActive]}>
|
||||
{label}
|
||||
</Text>
|
||||
</Pressable>
|
||||
);
|
||||
}
|
||||
|
||||
function formatRelativeTime(iso: string | null): string | null {
|
||||
if (!iso) return null;
|
||||
const ts = Date.parse(iso);
|
||||
if (Number.isNaN(ts)) return null;
|
||||
const diffSec = (Date.now() - ts) / 1000;
|
||||
if (diffSec < 60) return 'just now';
|
||||
if (diffSec < 3600) return `${Math.floor(diffSec / 60)}m ago`;
|
||||
if (diffSec < 86400) return `${Math.floor(diffSec / 3600)}h ago`;
|
||||
const days = Math.floor(diffSec / 86400);
|
||||
if (days < 30) return `${days}d ago`;
|
||||
return null;
|
||||
}
|
||||
|
||||
function SiteChip({ source, onPress }: { source: SourceOut; onPress: () => void }) {
|
||||
const rel = formatRelativeTime(source.last_scraped_at);
|
||||
return (
|
||||
<Pressable
|
||||
style={({ pressed }) => [styles.chip, pressed && styles.chipPressed]}
|
||||
onPress={onPress}
|
||||
>
|
||||
<View style={styles.chipMain}>
|
||||
<Text style={styles.chipName} numberOfLines={1}>
|
||||
{source.display_name}
|
||||
</Text>
|
||||
{rel ? <Text style={styles.chipRel}>{rel}</Text> : null}
|
||||
</View>
|
||||
<View style={styles.chipCountWrap}>
|
||||
<Text style={styles.chipCount}>{source.scene_count}</Text>
|
||||
</View>
|
||||
</Pressable>
|
||||
);
|
||||
}
|
||||
|
||||
const styles = StyleSheet.create({
|
||||
container: { flex: 1, backgroundColor: theme.bg, paddingHorizontal: 16, paddingTop: 12 },
|
||||
|
||||
headerRow: {
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'space-between',
|
||||
paddingBottom: 4,
|
||||
},
|
||||
headerLabel: {
|
||||
color: theme.muted,
|
||||
fontSize: 12,
|
||||
textTransform: 'uppercase',
|
||||
letterSpacing: 1.2,
|
||||
fontWeight: '700',
|
||||
},
|
||||
headerCount: { color: theme.fg, fontSize: 22, fontWeight: '800' },
|
||||
hint: { color: theme.mutedDim, fontSize: 11, marginBottom: 12 },
|
||||
|
||||
toolbar: { flexDirection: 'row', gap: 12, marginBottom: 10 },
|
||||
search: {
|
||||
flex: 1,
|
||||
backgroundColor: theme.card,
|
||||
borderColor: theme.border,
|
||||
borderWidth: 1.5,
|
||||
borderRadius: 12,
|
||||
color: theme.fg,
|
||||
padding: 12,
|
||||
fontSize: 16,
|
||||
},
|
||||
searchFocused: { borderColor: theme.borderFocus },
|
||||
|
||||
segment: {
|
||||
flexDirection: 'row',
|
||||
backgroundColor: theme.bgElevated,
|
||||
borderColor: theme.border,
|
||||
borderWidth: 1,
|
||||
borderRadius: 12,
|
||||
padding: 4,
|
||||
marginBottom: 14,
|
||||
alignSelf: 'flex-start',
|
||||
},
|
||||
segButton: { paddingHorizontal: 14, paddingVertical: 6, borderRadius: 8 },
|
||||
segButtonActive: {
|
||||
backgroundColor: theme.accent,
|
||||
shadowColor: theme.accent,
|
||||
shadowOffset: { width: 0, height: 0 },
|
||||
shadowOpacity: 0.4,
|
||||
shadowRadius: 6,
|
||||
elevation: 2,
|
||||
},
|
||||
segButtonText: { color: theme.muted, fontWeight: '700', fontSize: 13 },
|
||||
segButtonTextActive: { color: theme.fg },
|
||||
|
||||
gridRow: { gap: 10, marginBottom: 10 },
|
||||
chip: {
|
||||
flex: 1,
|
||||
flexDirection: 'row',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'space-between',
|
||||
gap: 8,
|
||||
backgroundColor: theme.card,
|
||||
borderColor: theme.border,
|
||||
borderWidth: 1,
|
||||
borderRadius: 12,
|
||||
paddingHorizontal: 12,
|
||||
paddingVertical: 10,
|
||||
shadowColor: '#000',
|
||||
shadowOffset: { width: 0, height: 1 },
|
||||
shadowOpacity: 0.18,
|
||||
shadowRadius: 3,
|
||||
elevation: 2,
|
||||
},
|
||||
chipPressed: { borderColor: theme.borderFocus, backgroundColor: theme.bgElevated },
|
||||
chipMain: { flex: 1, gap: 2 },
|
||||
chipName: {
|
||||
color: theme.fg,
|
||||
fontWeight: '600',
|
||||
fontSize: 14,
|
||||
},
|
||||
chipRel: {
|
||||
color: theme.mutedDim,
|
||||
fontSize: 10,
|
||||
},
|
||||
chipCountWrap: {
|
||||
backgroundColor: `${theme.accentSecondary}1F`,
|
||||
borderColor: `${theme.accentSecondary}55`,
|
||||
borderWidth: 1,
|
||||
borderRadius: 8,
|
||||
paddingHorizontal: 8,
|
||||
paddingVertical: 2,
|
||||
minWidth: 36,
|
||||
alignItems: 'center',
|
||||
},
|
||||
chipCount: {
|
||||
color: theme.accentSecondary,
|
||||
fontSize: 12,
|
||||
fontWeight: '700',
|
||||
},
|
||||
|
||||
emptyText: { color: theme.muted, textAlign: 'center', marginTop: 48, fontSize: 16 },
|
||||
error: { color: theme.bad, padding: 16 },
|
||||
});
|
||||
|
|
@ -104,6 +104,19 @@ export interface StudioListOut {
|
|||
per_page: number;
|
||||
}
|
||||
|
||||
export interface SourceOut {
|
||||
origin: string;
|
||||
sitetag: string;
|
||||
display_name: string;
|
||||
scene_count: number;
|
||||
last_scraped_at: string | null;
|
||||
}
|
||||
|
||||
export interface SourceListOut {
|
||||
items: SourceOut[];
|
||||
total: number;
|
||||
}
|
||||
|
||||
export type ScenesSort = 'created_at' | 'release_date' | 'title' | 'studio';
|
||||
|
||||
export interface ScenesListParams {
|
||||
|
|
|
|||
135
scripts/_extract_apk_sig_hash.py
Normal file
135
scripts/_extract_apk_sig_hash.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
"""One-shot: parse APK Signing Block v2/v3 and print SHA-256(hex) of signing cert.
|
||||
|
||||
Matches what AntiTamperModule.kt computes — SHA-256 of the DER-encoded X.509 cert
|
||||
that the PackageManager would return for the APK.
|
||||
|
||||
Spec: https://source.android.com/docs/security/features/apksigning/v2
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import struct
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
APK_SIG_BLOCK_MAGIC = b"APK Sig Block 42"
|
||||
V2_BLOCK_ID = 0x7109871A
|
||||
V3_BLOCK_ID = 0xF05368C0
|
||||
V3_1_BLOCK_ID = 0x1B93AD61
|
||||
|
||||
|
||||
def _find_eocd(data: bytes) -> int:
|
||||
sig = b"PK\x05\x06"
|
||||
# EOCD must be in last 65557 bytes
|
||||
start = max(0, len(data) - 65557)
|
||||
idx = data.rfind(sig, start)
|
||||
if idx < 0:
|
||||
raise RuntimeError("EOCD not found")
|
||||
return idx
|
||||
|
||||
|
||||
def _read_uint32_le(b: bytes, off: int) -> int:
|
||||
return struct.unpack_from("<I", b, off)[0]
|
||||
|
||||
|
||||
def _read_uint64_le(b: bytes, off: int) -> int:
|
||||
return struct.unpack_from("<Q", b, off)[0]
|
||||
|
||||
|
||||
def extract_sig_block(path: Path) -> bytes:
|
||||
data = path.read_bytes()
|
||||
eocd = _find_eocd(data)
|
||||
cd_offset = _read_uint32_le(data, eocd + 16)
|
||||
# Magic ends at cd_offset; the 8 bytes before magic are block size (excluding self)
|
||||
magic_end = cd_offset
|
||||
magic_start = magic_end - len(APK_SIG_BLOCK_MAGIC)
|
||||
if data[magic_start:magic_end] != APK_SIG_BLOCK_MAGIC:
|
||||
raise RuntimeError("APK Signing Block magic not found before central directory")
|
||||
size_off = magic_start - 8
|
||||
block_size_excl = _read_uint64_le(data, size_off)
|
||||
# The block layout: size_of_block(8) | pairs | size_of_block(8) | magic(16)
|
||||
block_total = block_size_excl + 8
|
||||
block_start = magic_end - block_total
|
||||
# Block = leading_size(8) | pairs | trailing_size(8) | magic(16)
|
||||
# pairs region = between leading_size and trailing_size
|
||||
return data[block_start + 8 : magic_start - 8]
|
||||
|
||||
|
||||
def iter_pairs(pairs: bytes):
|
||||
i = 0
|
||||
n = len(pairs)
|
||||
while i < n:
|
||||
length = _read_uint64_le(pairs, i)
|
||||
i += 8
|
||||
pair_id = _read_uint32_le(pairs, i)
|
||||
value = pairs[i + 4 : i + length]
|
||||
yield pair_id, value
|
||||
i += length
|
||||
|
||||
|
||||
def extract_cert_der_v2_or_v3(block_value: bytes) -> bytes:
|
||||
# block_value = "signers" sequence
|
||||
# signers = length-prefixed sequence of signer
|
||||
# signer = signed_data || signatures || public_key (each length-prefixed)
|
||||
# signed_data = digests || certificates || additional_attributes (each length-prefixed)
|
||||
# certificates = sequence of length-prefixed DER X.509 certs
|
||||
|
||||
def read_lp(buf: bytes, off: int) -> tuple[bytes, int]:
|
||||
length = _read_uint32_le(buf, off)
|
||||
return buf[off + 4 : off + 4 + length], off + 4 + length
|
||||
|
||||
# outer = signers sequence (already length-prefixed by caller? actually block_value IS the value)
|
||||
# Per spec, the block "value" begins with sequence of length-prefixed signer structures
|
||||
# i.e. no outer length prefix here.
|
||||
off = 0
|
||||
# The very first uint32 in block_value is the length of the signers sequence
|
||||
signers_seq, _ = read_lp(block_value, 0)
|
||||
off = 0
|
||||
signer, _ = read_lp(signers_seq, off)
|
||||
# signer = signed_data || min_sdk(4) || max_sdk(4) || signatures || public_key (v3 has sdk fields)
|
||||
# Simplest: signed_data is the first length-prefixed blob in signer.
|
||||
signed_data, _ = read_lp(signer, 0)
|
||||
# signed_data = digests || certificates || ...
|
||||
inner_off = 0
|
||||
digests, inner_off = read_lp(signed_data, inner_off)
|
||||
certs_seq, inner_off = read_lp(signed_data, inner_off)
|
||||
# certs_seq = sequence of length-prefixed DER certs
|
||||
first_cert, _ = read_lp(certs_seq, 0)
|
||||
return first_cert
|
||||
|
||||
|
||||
def main(argv: list[str]) -> int:
|
||||
if len(argv) < 2:
|
||||
print("usage: _extract_apk_sig_hash.py <path-to-apk>", file=sys.stderr)
|
||||
return 2
|
||||
apk = Path(argv[1])
|
||||
if not apk.is_file():
|
||||
print(f"not found: {apk}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
pairs = extract_sig_block(apk)
|
||||
found_block: bytes | None = None
|
||||
chosen_id = None
|
||||
for pid, value in iter_pairs(pairs):
|
||||
if pid in (V2_BLOCK_ID, V3_BLOCK_ID, V3_1_BLOCK_ID):
|
||||
# prefer v3 over v2 if both present (matches what PM returns on modern Android)
|
||||
if chosen_id in (None, V2_BLOCK_ID) and pid in (V3_BLOCK_ID, V3_1_BLOCK_ID):
|
||||
found_block = value
|
||||
chosen_id = pid
|
||||
elif chosen_id is None:
|
||||
found_block = value
|
||||
chosen_id = pid
|
||||
if found_block is None:
|
||||
print("no v2/v3 signing block found", file=sys.stderr)
|
||||
return 3
|
||||
|
||||
cert_der = extract_cert_der_v2_or_v3(found_block)
|
||||
sha = hashlib.sha256(cert_der).hexdigest()
|
||||
print(sha)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main(sys.argv))
|
||||
54
scripts/_patch_manifest.py
Normal file
54
scripts/_patch_manifest.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
"""One-shot: napraw manifest.json dla istniejącego OTA update'u.
|
||||
|
||||
Bugs:
|
||||
1. Windows publish wpisywał `assets\<hash>` (os.sep) do URL'i.
|
||||
2. URL hosta ustawiony na 'goon-app.crawlbot.pl:8443' który nie ma DNS —
|
||||
mobile nie pobierze assetów. Właściwy host: api.goon-foss.org (port 443).
|
||||
|
||||
Normalizujemy `\` → `/` i podmieniamy hosta na CORRECT_HOST.
|
||||
"""
|
||||
import json
|
||||
import sys
|
||||
|
||||
CORRECT_HOST_PREFIX = "https://api.goon-foss.org/expo-updates/asset"
|
||||
WRONG_HOST_PREFIXES = [
|
||||
"https://goon-app.crawlbot.pl:8443/expo-updates/asset",
|
||||
]
|
||||
|
||||
|
||||
def fix_url(u: str) -> tuple[str, bool]:
|
||||
new = u.replace("\\", "/")
|
||||
for wrong in WRONG_HOST_PREFIXES:
|
||||
if new.startswith(wrong):
|
||||
new = new.replace(wrong, CORRECT_HOST_PREFIX, 1)
|
||||
break
|
||||
return new, new != u
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 2:
|
||||
print("usage: _patch_manifest.py <manifest.json path>")
|
||||
return 1
|
||||
|
||||
path = sys.argv[1]
|
||||
m = json.load(open(path))
|
||||
fixed = 0
|
||||
for a in m.get("assets", []):
|
||||
new, ch = fix_url(a["url"])
|
||||
if ch:
|
||||
a["url"] = new
|
||||
fixed += 1
|
||||
la = m.get("launchAsset", {})
|
||||
if "url" in la:
|
||||
new, ch = fix_url(la["url"])
|
||||
if ch:
|
||||
la["url"] = new
|
||||
fixed += 1
|
||||
with open(path, "w") as f:
|
||||
json.dump(m, f, indent=2)
|
||||
print(f"patched {fixed} URLs in {path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
215
scripts/auto_merge_freshporno_to_canonical.py
Normal file
215
scripts/auto_merge_freshporno_to_canonical.py
Normal file
|
|
@ -0,0 +1,215 @@
|
|||
"""Auto-merge freshporno orphan scenes do TPDB/StashDB canonical.
|
||||
|
||||
Wcześniejszy bulk_dedup `all` / `performers` OOM-ował na O(N²) collection
|
||||
wszystkich par. Tutaj inny pattern: O(N) — dla każdej freshporno orphan-with-date,
|
||||
query candidate canonical scen przez indexes (performer overlap + release_date
|
||||
window), score, decyzja.
|
||||
|
||||
Wykonanie po backfillu release_date dla 10390 freshporno scen — teraz mamy
|
||||
sygnał daty który wcześniej był null i blokował composite score ≥0.92.
|
||||
|
||||
Decyzje:
|
||||
- score ≥ auto_t (0.92): przenieś playback_source z tube → canonical,
|
||||
skopiuj brakujące tagi, usuń tube scenę.
|
||||
- review_t ≤ score < auto_t: insert merge_candidate (pending).
|
||||
- score < 0.75: skip.
|
||||
|
||||
Idempotent: orphan scene bez kandydatów lub już zmerged → no-op.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from datetime import timedelta
|
||||
|
||||
from sqlalchemy import and_, exists, select
|
||||
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
||||
|
||||
from app.config import get_settings
|
||||
from app.db import session_scope
|
||||
from app.models.merge_candidate import MergeCandidate, MergeKind, MergeStatus
|
||||
from app.models.playback_source import PlaybackSource
|
||||
from app.models.scene import Scene, ScenePerformer, SceneExternalRef, SceneTag
|
||||
from app.models.source import Source
|
||||
from app.scheduler.bulk_dedup import score_scene_pair
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
DATE_WINDOW_DAYS = 7
|
||||
|
||||
|
||||
def main() -> int:
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
settings = get_settings()
|
||||
auto_t = settings.auto_merge_threshold
|
||||
review_t = settings.review_threshold
|
||||
log.info("auto_t=%.2f review_t=%.2f", auto_t, review_t)
|
||||
|
||||
with session_scope() as session:
|
||||
canon_src_ids = list(session.execute(
|
||||
select(Source.id).where(Source.name.in_(["tpdb", "stashdb"]))
|
||||
).scalars().all())
|
||||
|
||||
# Freshporno orphans z release_date (nasi kandydaci na merge w canonical).
|
||||
orphan_ids = list(session.execute(
|
||||
select(Scene.id)
|
||||
.join(PlaybackSource, PlaybackSource.scene_id == Scene.id)
|
||||
.where(PlaybackSource.origin == "tube:freshpornoorg")
|
||||
.where(Scene.release_date.is_not(None))
|
||||
.where(~exists().where(and_(
|
||||
SceneExternalRef.scene_id == Scene.id,
|
||||
SceneExternalRef.source_id.in_(canon_src_ids),
|
||||
)))
|
||||
.distinct()
|
||||
).scalars().all())
|
||||
|
||||
log.info("freshporno orphan candidates (with date): %d", len(orphan_ids))
|
||||
|
||||
merged = 0
|
||||
pending_added = 0
|
||||
no_candidates = 0
|
||||
no_match = 0
|
||||
errors = 0
|
||||
|
||||
for scene_id in orphan_ids:
|
||||
try:
|
||||
with session_scope() as session:
|
||||
tube = session.get(Scene, scene_id)
|
||||
if tube is None:
|
||||
continue
|
||||
if tube.release_date is None:
|
||||
continue
|
||||
|
||||
# Performery tube scene
|
||||
perfs = list(session.execute(
|
||||
select(ScenePerformer.performer_id).where(
|
||||
ScenePerformer.scene_id == tube.id
|
||||
)
|
||||
).scalars().all())
|
||||
if not perfs:
|
||||
no_candidates += 1
|
||||
continue
|
||||
|
||||
# Query canonical candidates: scenes które mają ≥1 wspólnego performera
|
||||
# AND release_date w oknie ±N dni AND mają canonical external_ref (TPDB/StashDB).
|
||||
date_low = tube.release_date - timedelta(days=DATE_WINDOW_DAYS)
|
||||
date_high = tube.release_date + timedelta(days=DATE_WINDOW_DAYS)
|
||||
|
||||
cand_ids = list(session.execute(
|
||||
select(Scene.id).distinct()
|
||||
.join(ScenePerformer, ScenePerformer.scene_id == Scene.id)
|
||||
.where(ScenePerformer.performer_id.in_(perfs))
|
||||
.where(Scene.release_date.is_not(None))
|
||||
.where(Scene.release_date.between(date_low, date_high))
|
||||
.where(Scene.id != tube.id)
|
||||
.where(exists().where(and_(
|
||||
SceneExternalRef.scene_id == Scene.id,
|
||||
SceneExternalRef.source_id.in_(canon_src_ids),
|
||||
)))
|
||||
).scalars().all())
|
||||
|
||||
if not cand_ids:
|
||||
no_candidates += 1
|
||||
continue
|
||||
|
||||
# Score wszystkich kandydatów, weź best
|
||||
best_cand = None
|
||||
best_score = 0.0
|
||||
best_breakdown = None
|
||||
for cand_id in cand_ids:
|
||||
cand = session.get(Scene, cand_id)
|
||||
if cand is None:
|
||||
continue
|
||||
b = score_scene_pair(session, tube, cand)
|
||||
if b.composite > best_score:
|
||||
best_score = b.composite
|
||||
best_cand = cand
|
||||
best_breakdown = b
|
||||
|
||||
if best_cand is None or best_score < review_t:
|
||||
no_match += 1
|
||||
continue
|
||||
|
||||
if best_score >= auto_t:
|
||||
# Auto-merge: przenieś playback do canonical, skopiuj tagi, usuń tube scenę.
|
||||
session.execute(
|
||||
PlaybackSource.__table__.update()
|
||||
.where(PlaybackSource.scene_id == tube.id)
|
||||
.values(scene_id=best_cand.id)
|
||||
)
|
||||
# Merge tagi (unique constraint na pair scene_id+tag_id — ignore conflict)
|
||||
tube_tag_ids = list(session.execute(
|
||||
select(SceneTag.tag_id).where(SceneTag.scene_id == tube.id)
|
||||
).scalars().all())
|
||||
for tag_id in tube_tag_ids:
|
||||
session.execute(
|
||||
pg_insert(SceneTag.__table__)
|
||||
.values(scene_id=best_cand.id, tag_id=tag_id)
|
||||
.on_conflict_do_nothing()
|
||||
)
|
||||
# Move external_refs (freshporno)
|
||||
session.execute(
|
||||
SceneExternalRef.__table__.update()
|
||||
.where(SceneExternalRef.scene_id == tube.id)
|
||||
.values(scene_id=best_cand.id)
|
||||
)
|
||||
# Drop remaining attached rows + scene
|
||||
session.execute(
|
||||
SceneTag.__table__.delete().where(SceneTag.scene_id == tube.id)
|
||||
)
|
||||
session.execute(
|
||||
ScenePerformer.__table__.delete().where(ScenePerformer.scene_id == tube.id)
|
||||
)
|
||||
session.delete(tube)
|
||||
merged += 1
|
||||
|
||||
# Log audit
|
||||
session.add(MergeCandidate(
|
||||
kind=MergeKind.scene,
|
||||
left_id=best_cand.id,
|
||||
right_id=best_cand.id, # self-ref dla audit (drop scene już nie istnieje)
|
||||
score=best_score,
|
||||
reasons={"path": "freshporno_backfill_auto", **(best_breakdown.reasons if best_breakdown else {})},
|
||||
status=MergeStatus.auto_merged,
|
||||
))
|
||||
else:
|
||||
# Pending review (0.75-0.92)
|
||||
a_id, b_id = (tube.id, best_cand.id) if tube.id < best_cand.id else (best_cand.id, tube.id)
|
||||
existing = session.execute(
|
||||
select(MergeCandidate).where(
|
||||
MergeCandidate.kind == MergeKind.scene,
|
||||
MergeCandidate.left_id == a_id,
|
||||
MergeCandidate.right_id == b_id,
|
||||
).limit(1)
|
||||
).scalar_one_or_none()
|
||||
if existing is None:
|
||||
session.add(MergeCandidate(
|
||||
kind=MergeKind.scene,
|
||||
left_id=a_id,
|
||||
right_id=b_id,
|
||||
score=best_score,
|
||||
reasons={"path": "freshporno_backfill_review", **(best_breakdown.reasons if best_breakdown else {})},
|
||||
status=MergeStatus.pending,
|
||||
))
|
||||
pending_added += 1
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
if errors <= 5:
|
||||
log.warning("scene=%s failed: %s", scene_id, e)
|
||||
|
||||
total_done = merged + pending_added + no_candidates + no_match + errors
|
||||
if total_done % 200 == 0:
|
||||
log.info(
|
||||
"progress %d/%d: merged=%d pending=%d no_cand=%d no_match=%d errors=%d",
|
||||
total_done, len(orphan_ids), merged, pending_added,
|
||||
no_candidates, no_match, errors,
|
||||
)
|
||||
|
||||
log.info(
|
||||
"DONE: orphans=%d merged=%d pending_added=%d no_candidates=%d no_match=%d errors=%d",
|
||||
len(orphan_ids), merged, pending_added, no_candidates, no_match, errors,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
104
scripts/backfill_freshporno_dates.py
Normal file
104
scripts/backfill_freshporno_dates.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
"""One-shot: backfill `release_date` for freshporno scenes that were scraped before
|
||||
the `itemprop="uploadDate"` regex was added.
|
||||
|
||||
Tło: bug-report 2026-05-20 ("brak Brazzers Exxtra po 15-05") wymusił dodanie
|
||||
`release_date` extracta z `itemprop="uploadDate"` w freshporno connector. Stare
|
||||
scenes (z przed tego patcha) mają `release_date = NULL`, przez co scene_resolver
|
||||
nie liczy date-overlap signal → score < 0.92 → orphan zamiast merged z TPDB
|
||||
canonical.
|
||||
|
||||
10468 orphan freshporno scenes (vs 4789 canonical) — 99% bez release_date.
|
||||
Po backfill resolver auto-merge przy następnym bulk-dedup tick.
|
||||
|
||||
Idempotent: update tylko gdy aktualne `release_date IS NULL` i `uploadDate`
|
||||
ekstrakcja się powiedzie.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from datetime import UTC, date, datetime
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.db import session_scope
|
||||
from app.models import Scene
|
||||
from app.models.playback_source import PlaybackSource
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/140.0.0.0"
|
||||
_UPLOAD_DATE_RE = re.compile(
|
||||
r'itemprop="uploadDate"[^>]+content="(\d{4}-\d{2}-\d{2})',
|
||||
)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
|
||||
with session_scope() as session:
|
||||
rows = session.execute(
|
||||
select(Scene.id, PlaybackSource.page_url)
|
||||
.join(PlaybackSource, PlaybackSource.scene_id == Scene.id)
|
||||
.where(PlaybackSource.origin == "tube:freshpornoorg")
|
||||
.where(Scene.release_date.is_(None))
|
||||
).all()
|
||||
log.info("freshporno scenes without release_date: %d", len(rows))
|
||||
|
||||
client = httpx.Client(
|
||||
timeout=15.0,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
)
|
||||
|
||||
updated = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
|
||||
for scene_id, page_url in rows:
|
||||
try:
|
||||
r = client.get(page_url)
|
||||
if r.status_code != 200:
|
||||
if r.status_code in (404, 410):
|
||||
skipped += 1
|
||||
else:
|
||||
errors += 1
|
||||
continue
|
||||
m = _UPLOAD_DATE_RE.search(r.text)
|
||||
if not m:
|
||||
skipped += 1
|
||||
continue
|
||||
try:
|
||||
rd = date.fromisoformat(m.group(1))
|
||||
except ValueError:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
with session_scope() as s:
|
||||
scene = s.get(Scene, scene_id)
|
||||
if scene is None or scene.release_date is not None:
|
||||
continue
|
||||
scene.release_date = rd
|
||||
updated += 1
|
||||
if updated % 100 == 0:
|
||||
log.info(
|
||||
"progress: updated=%d skipped=%d errors=%d (%d/%d)",
|
||||
updated, skipped, errors,
|
||||
updated + skipped + errors, len(rows),
|
||||
)
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
if errors <= 5:
|
||||
log.warning("scene=%s url=%s failed: %s", scene_id, page_url, e)
|
||||
|
||||
client.close()
|
||||
log.info(
|
||||
"DONE: candidates=%d updated=%d skipped=%d errors=%d",
|
||||
len(rows), updated, skipped, errors,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
134
scripts/backfill_freshporno_titles.py
Normal file
134
scripts/backfill_freshporno_titles.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
"""One-shot: re-extract titles dla freshporno scen z pre-fix truncation bug.
|
||||
|
||||
Tło: `meta_content` regex sprzed 2026-05-20 obcinał title na pierwszym apostrofie
|
||||
(`<meta content="She's So Insatiable" />` → `She`). Fix wszedł 2026-05-20,
|
||||
ale scenes scrapped przed fixem mają broken titles w DB. Delta-ingest skipuje
|
||||
je przez external_id match — bez backfill nigdy się nie naprawią.
|
||||
|
||||
Bug-report `2fbf1c73` 2026-05-23 (kontekstowo, brak BE scen): część
|
||||
brakujących Brazzers Exxtra scen to faktycznie pre-fix victims które nie
|
||||
zmergowały z canonical TPDB record bo title się nie zgadzał.
|
||||
|
||||
Heurystyka:
|
||||
- origin = tube:freshpornoorg
|
||||
- created_at < 2026-05-20 (pre-fix)
|
||||
- title length < 15
|
||||
- slug freshporno URL ma więcej tokenów niż title (sygnał obcięcia)
|
||||
|
||||
Idempotent: po update tylko jeśli nowy title różni się od bieżącego.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from datetime import UTC, datetime
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.connectors.direct_scrapers._browse_base import meta_content
|
||||
from app.db import session_scope
|
||||
from app.models import Scene
|
||||
from app.models.playback_source import PlaybackSource
|
||||
from app.normalize.text import normalize, slugify
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
CUTOFF_DATE = datetime(2026, 5, 20, tzinfo=UTC)
|
||||
TITLE_MAX_LEN = 15
|
||||
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/140.0.0.0"
|
||||
|
||||
|
||||
def _slug_token_count(url: str) -> int:
|
||||
"""Liczy ile tokenów ma URL slug (np. `/videos/girls-night-gets-girth/` → 4)."""
|
||||
m = re.search(r"/videos/([^/]+)/?", url)
|
||||
if not m:
|
||||
return 0
|
||||
return sum(1 for tok in m.group(1).split("-") if tok and tok != "s")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
|
||||
with session_scope() as session:
|
||||
rows = session.execute(
|
||||
select(Scene.id, Scene.title, PlaybackSource.page_url)
|
||||
.join(PlaybackSource, PlaybackSource.scene_id == Scene.id)
|
||||
.where(PlaybackSource.origin == "tube:freshpornoorg")
|
||||
.where(Scene.created_at < CUTOFF_DATE)
|
||||
).all()
|
||||
log.info("pre-fix freshporno scenes: %d", len(rows))
|
||||
|
||||
# Filter: krótki title + slug ma więcej tokenów niż title (sygnał obcięcia)
|
||||
candidates = []
|
||||
for scene_id, title, page_url in rows:
|
||||
if title is None:
|
||||
continue
|
||||
if len(title) >= TITLE_MAX_LEN:
|
||||
continue
|
||||
title_tokens = len([t for t in title.split() if t])
|
||||
slug_tokens = _slug_token_count(page_url)
|
||||
if slug_tokens <= title_tokens:
|
||||
continue # title już ma tyle samo/więcej tokenów co slug — pewnie legit krótki
|
||||
candidates.append((scene_id, title, page_url))
|
||||
|
||||
log.info("candidates with slug>>title heurystyka: %d", len(candidates))
|
||||
|
||||
client = httpx.Client(
|
||||
timeout=15.0,
|
||||
follow_redirects=True,
|
||||
headers={"User-Agent": USER_AGENT},
|
||||
)
|
||||
|
||||
updated = 0
|
||||
skipped = 0
|
||||
errors = 0
|
||||
|
||||
for scene_id, old_title, page_url in candidates:
|
||||
try:
|
||||
r = client.get(page_url)
|
||||
if r.status_code != 200:
|
||||
errors += 1
|
||||
continue
|
||||
new_title = meta_content(r.text, property="og:title")
|
||||
if not new_title:
|
||||
m = re.search(r"<h1[^>]*itemprop=\"name\"[^>]*>([^<]+)</h1>", r.text)
|
||||
if m:
|
||||
new_title = m.group(1).strip()
|
||||
if not new_title or new_title == old_title:
|
||||
skipped += 1
|
||||
continue
|
||||
if len(new_title) < len(old_title):
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
with session_scope() as s:
|
||||
scene = s.get(Scene, scene_id)
|
||||
if scene is None:
|
||||
continue
|
||||
log.info("update %s: %r -> %r", scene_id, scene.title, new_title)
|
||||
scene.title = new_title
|
||||
scene.title_normalized = normalize(new_title)
|
||||
scene.slug = slugify(new_title)[:200]
|
||||
updated += 1
|
||||
if updated % 25 == 0:
|
||||
log.info(
|
||||
"progress: updated=%d skipped=%d errors=%d (%d/%d)",
|
||||
updated, skipped, errors,
|
||||
updated + skipped + errors, len(candidates),
|
||||
)
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
if errors <= 5:
|
||||
log.warning("scene=%s url=%s failed: %s", scene_id, page_url, e)
|
||||
|
||||
client.close()
|
||||
log.info(
|
||||
"DONE: candidates=%d updated=%d skipped=%d errors=%d",
|
||||
len(candidates), updated, skipped, errors,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
97
scripts/backfill_paradisehill_tags.py
Normal file
97
scripts/backfill_paradisehill_tags.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
"""One-shot: backfill paradisehill movie tags after regex fix.
|
||||
|
||||
Bug-report `3c999b27` 2026-05-21 "Brak kategorii, brak studia, brak aktorek" —
|
||||
paradisehill connector miał broken regex (`</div></div><div class="similar"`)
|
||||
który failował na nowym skinie z `</noindex>` w środku → fallback do html[:8000]
|
||||
→ 0 tagów. Fix w `paradisehill.py` (re-relaxed boundary + `<a>` wrapper support);
|
||||
ten skrypt re-scrapuje istniejące filmy żeby uzupełnić tagi które bug pominął.
|
||||
|
||||
Idempotent — re-run bez efektów ubocznych.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import time
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from app.connectors.paradisehill import ParadisehillConnector, _parse_detail
|
||||
from app.db import session_scope
|
||||
from app.models import Movie, MovieExternalRef, Tag
|
||||
from app.models.movie import MovieTag
|
||||
from app.models.source import Source
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||
c = ParadisehillConnector()
|
||||
done = 0
|
||||
new_tags_total = 0
|
||||
errors = 0
|
||||
|
||||
with session_scope() as session:
|
||||
# Filtr: tylko paradisehill movies (sources.name = 'paradisehill') bez tagów.
|
||||
# Wcześniej szukałem po external_id NOT LIKE '%:%' co łapało też mangoporn/
|
||||
# pandamovies/streamporn (slugi bez `:`) → 404 spam.
|
||||
pdh_src_id = session.execute(
|
||||
select(Source.id).where(Source.name == "paradisehill")
|
||||
).scalar_one()
|
||||
rows = session.execute(
|
||||
select(Movie.id, MovieExternalRef.external_id)
|
||||
.join(MovieExternalRef, MovieExternalRef.movie_id == Movie.id)
|
||||
.where(MovieExternalRef.source_id == pdh_src_id)
|
||||
.where(~Movie.id.in_(select(MovieTag.movie_id).distinct()))
|
||||
).all()
|
||||
log.info("paradisehill movies without tags: %d", len(rows))
|
||||
|
||||
for movie_id, hex_id in rows:
|
||||
try:
|
||||
r = c._client.get(f"/{hex_id}/")
|
||||
if r.status_code != 200:
|
||||
errors += 1
|
||||
continue
|
||||
raw_movie = _parse_detail(hex_id, r.text)
|
||||
if raw_movie is None or not raw_movie.tags:
|
||||
done += 1
|
||||
continue
|
||||
with session_scope() as s:
|
||||
for raw_tag in raw_movie.tags:
|
||||
tag = s.execute(
|
||||
select(Tag).where(Tag.slug == raw_tag.slug)
|
||||
).scalar_one_or_none()
|
||||
if tag is None:
|
||||
tag = Tag(name=raw_tag.name, slug=raw_tag.slug)
|
||||
s.add(tag)
|
||||
s.flush()
|
||||
exists = s.execute(
|
||||
select(MovieTag).where(
|
||||
MovieTag.movie_id == movie_id,
|
||||
MovieTag.tag_id == tag.id,
|
||||
)
|
||||
).scalar_one_or_none()
|
||||
if exists is None:
|
||||
s.add(MovieTag(movie_id=movie_id, tag_id=tag.id))
|
||||
new_tags_total += 1
|
||||
done += 1
|
||||
if done % 50 == 0:
|
||||
log.info(
|
||||
"progress: done=%d/%d new_tags=%d errors=%d",
|
||||
done, len(rows), new_tags_total, errors,
|
||||
)
|
||||
time.sleep(0.05) # gentle rate-limit
|
||||
except Exception as e:
|
||||
errors += 1
|
||||
if errors <= 5:
|
||||
log.warning("hex=%s failed: %s", hex_id, e)
|
||||
|
||||
log.info(
|
||||
"DONE: processed=%d/%d new_tags=%d errors=%d",
|
||||
done, len(rows), new_tags_total, errors,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
|
@ -85,7 +85,11 @@ def main() -> int:
|
|||
created_at = datetime.now(UTC).isoformat().replace("+00:00", "Z")
|
||||
|
||||
def asset_url(rel_path: str) -> str:
|
||||
# rel_path = "_expo/static/js/android/abc.hbc" lub "assets/abc"
|
||||
# rel_path = "_expo/static/js/android/abc.hbc" lub "assets/abc".
|
||||
# Windows: Expo metadata.json używa os.sep (`\`) w assets[].path. Normalizujemy
|
||||
# do `/` żeby URL był poprawny path-side (Linux backend nie traktuje `\` jako
|
||||
# separatora — bez tego mobile dostaje 404 na każdy asset i odrzuca update).
|
||||
rel_path = rel_path.replace("\\", "/")
|
||||
return f"{PUBLIC_BASE}?asset={update_id}/{rel_path}&runtimeVersion={args.runtime}&platform=android"
|
||||
|
||||
launch_asset = {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue