User-facing bugs resolved (per bug_reports table 2026-05-25): - 40cd28aa (short-scene filter): mobile api.ts default min_duration_sec=60 hides 6519 sub-60s scenes across all list endpoints (Performer/Site/Tag/ Browse). Caller may override with explicit 0. - 5e89ef7e (porndoe needs cookies/play click): INJECTED_JS in PlayerScreen now auto-clicks player-poster overlay (player-poster-play, big-play-button, vjs-big-play-button, jw-icon-display, btn-big-play, mejs__overlay-button, play-button, btn-play, videoPlayButton). Triggered same interval as consent-dismiss + ad-iframe removal. - b1b5e1a2 (Mixdrop czarny ekran): re-enable mixdrop direct stream via VPS curl_cffi proxy (was: skip → WebView fallback → blank screen). Backend pipeline (mixdrop.py extract + stream_proxy._curl_cffi_stream with JA3 + auto-refetch on token expire) was already complete; just removed the skip in app/api/playback.py. Plus ongoing WIP (paradisehill multi-part extraction, stream_proxy refetch logic, gesture race fix for long-press 2x speed, anti-adblock INJECTED_JS defenses, scripts for freshporno backfill, new sources API).
104 lines
3.3 KiB
Python
104 lines
3.3 KiB
Python
"""One-shot: backfill `release_date` for freshporno scenes that were scraped before
|
|
the `itemprop="uploadDate"` regex was added.
|
|
|
|
Tło: bug-report 2026-05-20 ("brak Brazzers Exxtra po 15-05") wymusił dodanie
|
|
`release_date` extracta z `itemprop="uploadDate"` w freshporno connector. Stare
|
|
scenes (z przed tego patcha) mają `release_date = NULL`, przez co scene_resolver
|
|
nie liczy date-overlap signal → score < 0.92 → orphan zamiast merged z TPDB
|
|
canonical.
|
|
|
|
10468 orphan freshporno scenes (vs 4789 canonical) — 99% bez release_date.
|
|
Po backfill resolver auto-merge przy następnym bulk-dedup tick.
|
|
|
|
Idempotent: update tylko gdy aktualne `release_date IS NULL` i `uploadDate`
|
|
ekstrakcja się powiedzie.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
from datetime import UTC, date, datetime
|
|
|
|
import httpx
|
|
from sqlalchemy import select
|
|
|
|
from app.db import session_scope
|
|
from app.models import Scene
|
|
from app.models.playback_source import PlaybackSource
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/140.0.0.0"
|
|
_UPLOAD_DATE_RE = re.compile(
|
|
r'itemprop="uploadDate"[^>]+content="(\d{4}-\d{2}-\d{2})',
|
|
)
|
|
|
|
|
|
def main() -> int:
|
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
|
|
|
with session_scope() as session:
|
|
rows = session.execute(
|
|
select(Scene.id, PlaybackSource.page_url)
|
|
.join(PlaybackSource, PlaybackSource.scene_id == Scene.id)
|
|
.where(PlaybackSource.origin == "tube:freshpornoorg")
|
|
.where(Scene.release_date.is_(None))
|
|
).all()
|
|
log.info("freshporno scenes without release_date: %d", len(rows))
|
|
|
|
client = httpx.Client(
|
|
timeout=15.0,
|
|
follow_redirects=True,
|
|
headers={"User-Agent": USER_AGENT},
|
|
)
|
|
|
|
updated = 0
|
|
skipped = 0
|
|
errors = 0
|
|
|
|
for scene_id, page_url in rows:
|
|
try:
|
|
r = client.get(page_url)
|
|
if r.status_code != 200:
|
|
if r.status_code in (404, 410):
|
|
skipped += 1
|
|
else:
|
|
errors += 1
|
|
continue
|
|
m = _UPLOAD_DATE_RE.search(r.text)
|
|
if not m:
|
|
skipped += 1
|
|
continue
|
|
try:
|
|
rd = date.fromisoformat(m.group(1))
|
|
except ValueError:
|
|
skipped += 1
|
|
continue
|
|
|
|
with session_scope() as s:
|
|
scene = s.get(Scene, scene_id)
|
|
if scene is None or scene.release_date is not None:
|
|
continue
|
|
scene.release_date = rd
|
|
updated += 1
|
|
if updated % 100 == 0:
|
|
log.info(
|
|
"progress: updated=%d skipped=%d errors=%d (%d/%d)",
|
|
updated, skipped, errors,
|
|
updated + skipped + errors, len(rows),
|
|
)
|
|
except Exception as e:
|
|
errors += 1
|
|
if errors <= 5:
|
|
log.warning("scene=%s url=%s failed: %s", scene_id, page_url, e)
|
|
|
|
client.close()
|
|
log.info(
|
|
"DONE: candidates=%d updated=%d skipped=%d errors=%d",
|
|
len(rows), updated, skipped, errors,
|
|
)
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|