From 7979d5fa6162fa58d8dcf0adf2ce5b8b90231fa5 Mon Sep 17 00:00:00 2001 From: "https://github.com/goon-foss/goon" Date: Mon, 25 May 2026 22:02:52 +0200 Subject: [PATCH] session work: bug-report fixes + WIP cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-facing bugs resolved (per bug_reports table 2026-05-25): - 40cd28aa (short-scene filter): mobile api.ts default min_duration_sec=60 hides 6519 sub-60s scenes across all list endpoints (Performer/Site/Tag/ Browse). Caller may override with explicit 0. - 5e89ef7e (porndoe needs cookies/play click): INJECTED_JS in PlayerScreen now auto-clicks player-poster overlay (player-poster-play, big-play-button, vjs-big-play-button, jw-icon-display, btn-big-play, mejs__overlay-button, play-button, btn-play, videoPlayButton). Triggered same interval as consent-dismiss + ad-iframe removal. - b1b5e1a2 (Mixdrop czarny ekran): re-enable mixdrop direct stream via VPS curl_cffi proxy (was: skip → WebView fallback → blank screen). Backend pipeline (mixdrop.py extract + stream_proxy._curl_cffi_stream with JA3 + auto-refetch on token expire) was already complete; just removed the skip in app/api/playback.py. Plus ongoing WIP (paradisehill multi-part extraction, stream_proxy refetch logic, gesture race fix for long-press 2x speed, anti-adblock INJECTED_JS defenses, scripts for freshporno backfill, new sources API). --- app/api/expo_updates.py | 6 + app/api/playback.py | 74 +++-- app/api/sources.py | 129 ++++++++ app/api/stream_proxy.py | 67 +++- app/connectors/paradisehill.py | 69 ++++- app/extractors/__init__.py | 11 +- app/main.py | 2 + app/scheduler/jobs.py | 27 +- mobile/src/api.ts | 13 +- mobile/src/navigation.tsx | 89 +++++- mobile/src/screens/AppLockSettingsScreen.tsx | 13 + mobile/src/screens/MovieDetailScreen.tsx | 31 ++ mobile/src/screens/PerformerScenesScreen.tsx | 40 ++- mobile/src/screens/PlayerScreen.tsx | 78 ++++- mobile/src/screens/SiteScenesScreen.tsx | 214 +++++++++++++ mobile/src/screens/SitesScreen.tsx | 290 ++++++++++++++++++ mobile/src/types.ts | 13 + scripts/_extract_apk_sig_hash.py | 135 ++++++++ scripts/_patch_manifest.py | 54 ++++ scripts/auto_merge_freshporno_to_canonical.py | 215 +++++++++++++ scripts/backfill_freshporno_dates.py | 104 +++++++ scripts/backfill_freshporno_titles.py | 134 ++++++++ scripts/backfill_paradisehill_tags.py | 97 ++++++ scripts/publish_update.py | 6 +- 24 files changed, 1845 insertions(+), 66 deletions(-) create mode 100644 app/api/sources.py create mode 100644 mobile/src/screens/SiteScenesScreen.tsx create mode 100644 mobile/src/screens/SitesScreen.tsx create mode 100644 scripts/_extract_apk_sig_hash.py create mode 100644 scripts/_patch_manifest.py create mode 100644 scripts/auto_merge_freshporno_to_canonical.py create mode 100644 scripts/backfill_freshporno_dates.py create mode 100644 scripts/backfill_freshporno_titles.py create mode 100644 scripts/backfill_paradisehill_tags.py diff --git a/app/api/expo_updates.py b/app/api/expo_updates.py index 1f93dd2..43b7415 100644 --- a/app/api/expo_updates.py +++ b/app/api/expo_updates.py @@ -91,6 +91,12 @@ def get_asset( zwykle `/_expo/static/js/android/.js` lub `/assets/`. Path traversal blocked przez resolve+is_relative. """ + # Windows publish quirk: Expo metadata.json zapisuje assets[].path z backslashami + # (os.sep) na Windowsie. publish_update.py kopiuje to do URL → manifest zawiera + # `?asset=/assets\`. Na Linux backslash nie jest separatorem path-a, + # więc Path resolve nie znalazłby pliku (404 na każdy asset → mobile odrzuca cały + # update). Normalizujemy tutaj zamiast wymagać re-publishu starych bundle'i. + asset = asset.replace("\\", "/") runtime_dir = (_STATIC_DIR / runtimeVersion).resolve() target = (runtime_dir / asset).resolve() if not str(target).startswith(str(runtime_dir)): diff --git a/app/api/playback.py b/app/api/playback.py index e391dac..3f6307c 100644 --- a/app/api/playback.py +++ b/app/api/playback.py @@ -148,16 +148,42 @@ def resolve_movie_playback( links: list[StreamLink] = [] if pb.origin == "paradisehill": - # Tylko WebView fallback — paradisehill player wymaga session login dla streamu. - links = [ - StreamLink( - stream_url=None, - embed_url=pb.page_url, - quality=pb.quality, - type="hoster", - raw={"origin": pb.origin}, - ) - ] + # Paradisehill: pobierz page, parsuj `var videoList = [...]` żeby dostać N parts. + # Każdy part to direct mp4 z paradisehill CDN (v1.paradisehill.cc), serwowane + # bez auth — 200 OK z plain User-Agent + Referer. + # Bug-reports `c5693926`/`418270e4`/`3c999b27` 2026-05-21 ("ładuje tylko 1 z N"). + # Poprzednio: tylko WebView fallback → mobile gra 1. part w playerze paradisehilla, + # nie ma sposobu przejść do następnego. + try: + from app.connectors.paradisehill import fetch_and_extract_parts + parts = fetch_and_extract_parts(pb.page_url) + except Exception as e: + log.warning("paradisehill parts extract failed for %s: %s", pb.page_url, e) + parts = [] + if parts: + for url, label in parts: + # NIE proxifikujemy tutaj — outer `_proxify_link` poniżej (linia 247) opakuje + # wszystkie linki. Double-wrap → token wewnątrz tokena (broken proxy URL). + links.append( + StreamLink( + stream_url=url, + embed_url=None, + quality=label, + type="mp4", + raw={"origin": pb.origin, "part_label": label}, + ) + ) + else: + # Fallback: brak videoList (np. login-only movie) — WebView na całość. + links = [ + StreamLink( + stream_url=None, + embed_url=pb.page_url, + quality=pb.quality, + type="hoster", + raw={"origin": pb.origin}, + ) + ] else: # dooplay mirror sources: spróbuj direct stream extract z hoster URL target = pb.embed_url or pb.page_url @@ -185,15 +211,16 @@ def resolve_movie_playback( ) stream = None # Mixdrop mxcontent CDN wymaga curl_cffi JA3 → wymusza VPS proxy. - # Pre-public: skip mixdrop direct, fallback na embed_url (mobile WebView z - # phone IP). Bandwidth + anonimowość VPS > UX. Movie ma zwykle 10+ alt - # hosterów (voe/luluvid/doply/etc.), user może wybrać alternative. - if stream and "mxcontent.net" in stream.lower(): - log.info( - "movie playback %s: mixdrop mxcontent — skip (VPS-proxy required), WebView fallback", - pb.id, - ) - stream = None + # Pre-2026-05-25 skipowaliśmy ten path "Bandwidth + anonimowość > UX", + # ale bug-report b1b5e1a2 zgłosił że Mixdrop WebView fallback = czarny + # ekran (recaptcha/adblock-detect blokują player init w in-app WebView). + # Movie ma zwykle 10+ alt hosterów, ale jeśli WebView fallback nie + # działa, user widzi tylko czarny ekran zamiast jakiejkolwiek alternatywy. + # Backend ma pełen pipeline: mixdrop.py extract → raw={proxy_impersonate: + # True, refetch_url} → stream_proxy._curl_cffi_stream z Chrome JA3 + + # auto-refetch on token expire. Włączamy go z powrotem. + # Bandwidth cost: ~485 MB/movie play; przy ~3 plays/day = 1.5 GB/day + # (acceptable na 8GB/m Hetzner plan z 20 TB transfer). if stream: type_hint = "m3u8" if ".m3u8" in stream.lower() else "mp4" raw_meta: dict = {"origin": pb.origin, "host": target} @@ -222,7 +249,14 @@ def resolve_movie_playback( raise HTTPException(status_code=502, detail="no playable links") links = [_proxify_link(link, referer) for link in links] - best = _pick_best(links) if links else None + # Dla paradisehill multipart: `_pick_best` wybiera "Part N" z najwyższą cyfrą (parsuje + # quality jako int), ale user chce zacząć od Part 1. Override: zawsze links[0]. + if pb.origin == "paradisehill" and len(links) > 1 and any( + (link.raw or {}).get("part_label") for link in links + ): + best = links[0] + else: + best = _pick_best(links) if links else None return ResolveOut( source=PlaybackSourceOut.model_validate(pb), best=best, diff --git a/app/api/sources.py b/app/api/sources.py new file mode 100644 index 0000000..f25a817 --- /dev/null +++ b/app/api/sources.py @@ -0,0 +1,129 @@ +"""GET /sources — lista tube źródeł dla feature "Sites" (mobile top-level tab). + +Bug-report 2026-05-24 (ea6f05f9, Scenes screen): user chce wybrać "pages" +obok Scenes i Movies — widzieć liście tube'ów i wchodzić w nie żeby zobaczyć +najnowsze sceny z konkretnego źródła. + +Endpoint enumeruje distinct `playback_sources.origin` z ŻYWYCH playback_sources +(`dead_at IS NULL`), tylko origins zaczynające się od 'tube:' (kanoniczne źródła +typu `canonical:tpdb_trailer` są pomijane — to nie są "scrapowane strony" w sensie +intencji feature'a). + +Sortowanie: scene_count DESC (najbardziej "wypełnione" tubey na górze). +""" +from __future__ import annotations + +import logging +import re +from datetime import datetime +from typing import Annotated + +from fastapi import APIRouter, Depends +from pydantic import BaseModel +from sqlalchemy import func, select +from sqlalchemy.orm import Session + +from app.auth import require_api_key +from app.db import get_session +from app.models.playback_source import PlaybackSource + +log = logging.getLogger(__name__) + +router = APIRouter(prefix="/sources", tags=["sources"], dependencies=[Depends(require_api_key)]) + + +class SourceOut(BaseModel): + origin: str + """Raw origin string z DB — np. 'tube:hqpornercom'. Używany jako parametr + `origin=` filtra w GET /scenes (substring match).""" + + sitetag: str + """Origin bez prefiksu 'tube:' — np. 'hqpornercom'. Stabilne ID tube'a (zgodne + z `BaseDirectTubeScraper.sitetag`).""" + + display_name: str + """Czytelna nazwa do UI — np. 'hqporner.com'. Wyprowadzona z sitetag przez + `_sitetag_to_display`. Tylko presentation; logikę trzymamy na sitetag/origin.""" + + scene_count: int + """Liczba ŻYWYCH playback_sources (dead_at IS NULL) per origin. Approx scenes + coverage — scena może mieć wiele sources tego samego origin (różne page_url), + więc trochę zawyża rzeczywistą scene-distinct count, ale dla orientacji OK.""" + + last_scraped_at: datetime | None + """MAX(last_seen_at) — najświeższy scrape dla tego origin. Pozwala mobile pokazać + 'scrapowane Xh temu' i sortować świeżość.""" + + +class SourceListOut(BaseModel): + items: list[SourceOut] + total: int + + +# Hardcoded display-name overrides dla edge cases. Większość sitetags mapuje się +# czysto `_sitetag_to_display` regex'em (`hqpornercom` → `hqporner.com`), ale niektóre +# tubey mają nietypowe TLDs / brakujące kropki w sitetag. +_DISPLAY_OVERRIDES: dict[str, str] = { + "fpoxxx": "fpo.xxx", + "siskavideo": "siska.video", + "porn4dayspw": "porn4days.pw", + "porn00org": "porn00.org", + "freshpornoorg": "freshporno.org", + "pornxpph": "pornxp.ph", + "0dayxxcom": "0dayxx.com", + "shyfapnet": "shyfap.net", + "hdporngg": "hdporn.gg", + "fullmoviesxxx": "fullmovies.xxx", + "latestleaksco": "latestleaks.co", + "xxxfreewatch": "xxxfreewatch.com", + "watchporn": "watchporn.to", +} + + +_TLD_RE = re.compile(r"^(.+?)(com|org|net|info)$") + + +def _sitetag_to_display(sitetag: str) -> str: + """`hqpornercom` → `hqporner.com`. Fallback dla mainstream tube'ów.""" + if sitetag in _DISPLAY_OVERRIDES: + return _DISPLAY_OVERRIDES[sitetag] + m = _TLD_RE.match(sitetag) + if m: + return f"{m.group(1)}.{m.group(2)}" + return sitetag + + +@router.get("", response_model=SourceListOut) +def list_sources( + session: Annotated[Session, Depends(get_session)], +) -> SourceListOut: + """Zwraca listę tube źródeł z ŻYWYMI playback_sources. + + Filter: `origin LIKE 'tube:%'` (drop canonical:* — TPDB trailery to inna semantyka). + """ + rows = session.execute( + select( + PlaybackSource.origin, + func.count(PlaybackSource.id).label("scene_count"), + func.max(PlaybackSource.last_seen_at).label("last_scraped_at"), + ) + .where(PlaybackSource.dead_at.is_(None)) + .where(PlaybackSource.origin.like("tube:%")) + .group_by(PlaybackSource.origin) + .order_by(func.count(PlaybackSource.id).desc()) + ).all() + + items: list[SourceOut] = [] + for origin, scene_count, last_scraped_at in rows: + sitetag = origin.split(":", 1)[1] if origin.startswith("tube:") else origin + items.append( + SourceOut( + origin=origin, + sitetag=sitetag, + display_name=_sitetag_to_display(sitetag), + scene_count=scene_count, + last_scraped_at=last_scraped_at, + ) + ) + + return SourceListOut(items=items, total=len(items)) diff --git a/app/api/stream_proxy.py b/app/api/stream_proxy.py index 80c40e7..28b4519 100644 --- a/app/api/stream_proxy.py +++ b/app/api/stream_proxy.py @@ -86,6 +86,53 @@ DEFAULT_UA = ( "(KHTML, like Gecko) Chrome/140.0.0.0 Safari/537.36" ) TOKEN_TTL_SEC = 4 * 60 * 60 # 4h + + +# URL-level redirect cache: target_url -> (final_resolved_url, expires_ts). +# Mobile ExoPlayer robi range-requesty per seek/preload — każdy hituje proxy z tym +# samym tokenem, proxy GET-uje target_url. Dla `porntrex.com/get_file/...` (a także +# fpoxxx, freshporno) URL jest **single-use**: pierwszy GET → 302 → CDN URL (time-bound), +# drugi GET → 410. Bez cache: drugi range = 410 → ExoPlayer fail → mobile fallback do +# `Linking.openURL(page_url)` → reklama (bug-reports `cee51c76`, `e2e365e3` 2026-05-22). +# +# Z cache: pierwszy GET follow-uje redirect, cache'uje final URL. Kolejne range hituje +# direct w CDN URL który jest time-bound (~1-2h), nie single-use. Mobile gra do końca +# bez fallbacku. +# +# TTL 1800s = 30 min: krócej niż typowy CDN signed-URL lifetime (~1h+), więc stale +# entries nie powodują 403 spam. Mobile po expiry retry-uje /resolve → fresh token. +_REDIRECT_CACHE: dict[str, tuple[str, float]] = {} +_REDIRECT_CACHE_TTL_SEC = 1800 +_REDIRECT_CACHE_MAX = 1000 + + +def _redirect_cache_get(target_url: str) -> str | None: + entry = _REDIRECT_CACHE.get(target_url) + if not entry: + return None + final, exp = entry + if exp < time.time(): + _REDIRECT_CACHE.pop(target_url, None) + return None + return final + + +def _redirect_cache_put(target_url: str, final_url: str) -> None: + if not final_url or target_url == final_url: + return + _REDIRECT_CACHE[target_url] = (final_url, time.time() + _REDIRECT_CACHE_TTL_SEC) + if len(_REDIRECT_CACHE) > _REDIRECT_CACHE_MAX: + cutoff = time.time() + for k in list(_REDIRECT_CACHE.keys()): + v = _REDIRECT_CACHE.get(k) + if v is None or v[1] < cutoff: + _REDIRECT_CACHE.pop(k, None) + + +def _redirect_cache_invalidate(target_url: str) -> None: + _REDIRECT_CACHE.pop(target_url, None) + + HOP_BY_HOP = { "connection", "keep-alive", @@ -390,12 +437,17 @@ async def proxy_stream( request: Request, ) -> Response: payload = parse_token(token) - target = payload["u"] + original_target = payload["u"] referer = payload["r"] or None use_impersonate = bool(payload.get("i")) refetch_url = payload.get("rf") refetch_hoster = payload.get("rh") + # Jeśli ten target był już wcześniej follow-redirect-ed, użyj cached final URL. + # Powód: porntrex `get_file/` 410 po reuse — patrz `_REDIRECT_CACHE` docstring. + cached_target = _redirect_cache_get(original_target) + target = cached_target or original_target + # Forwardujemy Range header (HLS/MP4 player robi byte-range fetches dla seek/preload) headers = _build_headers(referer) range_h = request.headers.get("range") @@ -437,8 +489,21 @@ async def proxy_stream( ups_headers = dict(upstream.headers) await upstream.aclose() await client.aclose() + # Cached final URL zwrócił error (np. CDN signed-URL expired, 403/410) — + # invaliduj cache i daj mobile retry przez fresh /resolve. Bez tego stale + # cache trzymałby martwy CDN URL przez 30 min (TTL). + if cached_target is not None and status in (401, 403, 404, 410): + _redirect_cache_invalidate(original_target) return _upstream_error_response(status, ups_headers, target) + # Pierwszy successful pass dla single-use targets (np. porntrex get_file): + # cache resolved final URL (po follow_redirects). Następne range-requesty + # pójdą direct w CDN URL — get_file nie dostaje drugiego hita. + if cached_target is None: + final_url = str(upstream.url) + if final_url != original_target: + _redirect_cache_put(original_target, final_url) + ct = (upstream.headers.get("content-type") or "").lower() is_m3u8 = ( path_suggests_m3u8 diff --git a/app/connectors/paradisehill.py b/app/connectors/paradisehill.py index f2f07e1..fd6ca05 100644 --- a/app/connectors/paradisehill.py +++ b/app/connectors/paradisehill.py @@ -72,6 +72,46 @@ _CHAPTER_RE = re.compile( r'([^<]+)', re.IGNORECASE, ) +# videoList JS array w detail page — może mieć multiple parts (Video.js playlist): +# var videoList = [{"sources":[{"src":"...part1.mp4","type":"video/mp4"}]}, ...] +# Bez parsowania tego mobile WebView gra tylko pierwszy part, kolejne pomija. +# Bug-reports `c5693926`/`418270e4` 2026-05-21 ("ładuje tylko 1 z 4 części"). +_VIDEO_LIST_RE = re.compile(r"var\s+videoList\s*=\s*(\[.*?\])\s*;", re.IGNORECASE | re.DOTALL) +_VIDEO_SRC_RE = re.compile(r'"src"\s*:\s*"([^"]+\.mp4[^"]*)"', re.IGNORECASE) + + +def extract_video_parts(html: str) -> list[tuple[str, str]]: + """Wyciąga listę MP4 parts z paradisehill detail HTML. + + Returns: [(mp4_url, label), ...] np. `[(.../part1.mp4, "Part 1"), ...]`. + Pusta lista gdy `videoList` nieobecny lub bez sources (login-only filmy). + """ + m = _VIDEO_LIST_RE.search(html) + if not m: + return [] + parts: list[tuple[str, str]] = [] + for i, src_m in enumerate(_VIDEO_SRC_RE.finditer(m.group(1)), start=1): + url = src_m.group(1).replace("\\/", "/") + parts.append((url, f"Part {i}")) + return parts + + +def fetch_and_extract_parts(page_url: str, *, timeout: float = 20.0) -> list[tuple[str, str]]: + """Resolve-time helper: pobierz page, wyciągnij videoList parts. + Używane przez `app.api.playback.resolve_movie_playback` dla origin='paradisehill'. + """ + with httpx.Client( + timeout=timeout, + follow_redirects=True, + headers={ + "User-Agent": USER_AGENT, + "Cookie": "is18=1", + "Accept-Language": "en-US,en;q=0.9", + }, + ) as client: + r = client.get(page_url) + r.raise_for_status() + return extract_video_parts(r.text) # Listing page item: _LIST_ITEM_RE = re.compile( r']*>\s*' @@ -230,15 +270,32 @@ def _parse_detail(hex_id: str, html: str) -> RawMovie | None: # Genre — pierwszy itemprop="genre" w samym block-inside (nie w recommendations). # Recommended films też mają itemprop="genre" więc match limity do block-inside. + # Wcześniejszy regex wymagał `
...
`. tags: list[RawTag] = [] - block_match = re.search( - r']*itemtype="http://schema\.org/Movie"[^>]*>' - r'(.*?)
\s*
\s*]*itemtype="http://schema\.org/Movie"[^>]*>', html, - re.DOTALL, ) - block = block_match.group(1) if block_match else html[:8000] - for m_genre in re.finditer(r'itemprop="genre"[^>]*>([^<]+). Wszystko przedtem to + # właściwa zawartość filmu (genre/cast/itd.); reszta to recommendations + # i komentarze ktore mają własne itemprop="genre". + stop = re.search(r'Female Domination` + # v2: `itemprop="genre">All Sex` (od 2026-05) + # Optional `` wrapper między `itemprop` a tekstem — bez tego v2 dawał empty. + for m_genre in re.finditer( + r'itemprop="genre"[^>]*>\s*(?:]*>)?\s*([^<]+)', block, re.IGNORECASE, + ): name = _decode_html(m_genre.group(1).strip()) if name and len(tags) < 10: tags.append(RawTag(name=name, slug=_slugify(name))) diff --git a/app/extractors/__init__.py b/app/extractors/__init__.py index bbba4af..5ff41d1 100644 --- a/app/extractors/__init__.py +++ b/app/extractors/__init__.py @@ -99,9 +99,14 @@ _REGISTRY: dict[str, Callable[[str], list[StreamSource] | None]] = { # bandwidth + VPS anonimowość priorytet. WebView fallback → mobile pobiera embed # z phone IP, KVS player JS decoduje video_url, ExoPlayer odtwarza direct z CDN. "freshpornoorg": _vps_blocked_fallback.extract, - # porn00 / pornxp — IP-bound CDN tokens. Pre-public WebView fallback (bandwidth + - # anonimowość VPS). Niski volume (84 scen), trivial. - "porn00org": _vps_blocked_fallback.extract, + # porn00 — KVS engine z v-acctoken w URL. Backend extract działa (zweryfikowane + # 2026-05-23), zwraca świeże get_file URL-e z `force_proxy=True` flag. + # `_proxify_link` rozwija je przez VPS proxy (CDN token IP-bound do VPS, mobile + # direct = 403). Bug-reports `5037b3e3`/`e8e3198b` 2026-05-22: WebView fallback + # pokazywał reklamę full-screen (porn00.org ma agresywny ad-network) — mobile + # nigdy nie dochodził do `
` w środku → fallback do html[:8000] +→ 0 tagów. Fix w `paradisehill.py` (re-relaxed boundary + `` wrapper support); +ten skrypt re-scrapuje istniejące filmy żeby uzupełnić tagi które bug pominął. + +Idempotent — re-run bez efektów ubocznych. +""" +from __future__ import annotations + +import logging +import time + +from sqlalchemy import select + +from app.connectors.paradisehill import ParadisehillConnector, _parse_detail +from app.db import session_scope +from app.models import Movie, MovieExternalRef, Tag +from app.models.movie import MovieTag +from app.models.source import Source + +log = logging.getLogger(__name__) + + +def main() -> int: + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") + c = ParadisehillConnector() + done = 0 + new_tags_total = 0 + errors = 0 + + with session_scope() as session: + # Filtr: tylko paradisehill movies (sources.name = 'paradisehill') bez tagów. + # Wcześniej szukałem po external_id NOT LIKE '%:%' co łapało też mangoporn/ + # pandamovies/streamporn (slugi bez `:`) → 404 spam. + pdh_src_id = session.execute( + select(Source.id).where(Source.name == "paradisehill") + ).scalar_one() + rows = session.execute( + select(Movie.id, MovieExternalRef.external_id) + .join(MovieExternalRef, MovieExternalRef.movie_id == Movie.id) + .where(MovieExternalRef.source_id == pdh_src_id) + .where(~Movie.id.in_(select(MovieTag.movie_id).distinct())) + ).all() + log.info("paradisehill movies without tags: %d", len(rows)) + + for movie_id, hex_id in rows: + try: + r = c._client.get(f"/{hex_id}/") + if r.status_code != 200: + errors += 1 + continue + raw_movie = _parse_detail(hex_id, r.text) + if raw_movie is None or not raw_movie.tags: + done += 1 + continue + with session_scope() as s: + for raw_tag in raw_movie.tags: + tag = s.execute( + select(Tag).where(Tag.slug == raw_tag.slug) + ).scalar_one_or_none() + if tag is None: + tag = Tag(name=raw_tag.name, slug=raw_tag.slug) + s.add(tag) + s.flush() + exists = s.execute( + select(MovieTag).where( + MovieTag.movie_id == movie_id, + MovieTag.tag_id == tag.id, + ) + ).scalar_one_or_none() + if exists is None: + s.add(MovieTag(movie_id=movie_id, tag_id=tag.id)) + new_tags_total += 1 + done += 1 + if done % 50 == 0: + log.info( + "progress: done=%d/%d new_tags=%d errors=%d", + done, len(rows), new_tags_total, errors, + ) + time.sleep(0.05) # gentle rate-limit + except Exception as e: + errors += 1 + if errors <= 5: + log.warning("hex=%s failed: %s", hex_id, e) + + log.info( + "DONE: processed=%d/%d new_tags=%d errors=%d", + done, len(rows), new_tags_total, errors, + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/publish_update.py b/scripts/publish_update.py index a8edca0..8e47048 100644 --- a/scripts/publish_update.py +++ b/scripts/publish_update.py @@ -85,7 +85,11 @@ def main() -> int: created_at = datetime.now(UTC).isoformat().replace("+00:00", "Z") def asset_url(rel_path: str) -> str: - # rel_path = "_expo/static/js/android/abc.hbc" lub "assets/abc" + # rel_path = "_expo/static/js/android/abc.hbc" lub "assets/abc". + # Windows: Expo metadata.json używa os.sep (`\`) w assets[].path. Normalizujemy + # do `/` żeby URL był poprawny path-side (Linux backend nie traktuje `\` jako + # separatora — bez tego mobile dostaje 404 na każdy asset i odrzuca update). + rel_path = rel_path.replace("\\", "/") return f"{PUBLIC_BASE}?asset={update_id}/{rel_path}&runtimeVersion={args.runtime}&platform=android" launch_asset = {