From 32919d6a6cbd8bdefe24bab54fa535fff083776f Mon Sep 17 00:00:00 2001 From: jtrzupek Date: Sat, 13 Jun 2026 19:04:10 +0200 Subject: [PATCH] feat(extractors): detect deleted porntrex videos and mark dead Porntrex soft-deletes: a removed video returns HTTP 200 with a "this video was deleted" message instead of a player, so extract returned [] (transient) and the source was never marked dead, leaving users on a permanently broken link (report 75dbf53e). Match the deletion message and raise HosterDead so resolve marks the source dead. Co-Authored-By: Claude Fable 5 --- app/extractors/tubes/porntrex.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/app/extractors/tubes/porntrex.py b/app/extractors/tubes/porntrex.py index 37aaa7a..2ecc0c5 100644 --- a/app/extractors/tubes/porntrex.py +++ b/app/extractors/tubes/porntrex.py @@ -30,12 +30,22 @@ import re import time from app.extractors._fetch import _DEFAULT_IMPERSONATE, _DEFAULT_UA, _HAS_CURL_CFFI, fetch_tube_html -from app.extractors._models import StreamSource +from app.extractors._models import HosterDead, StreamSource log = logging.getLogger(__name__) _BASE = "https://www.porntrex.com" +# Porntrex soft-delete: usunięte wideo zwraca HTTP 200 ze stroną z komunikatem (np. +# "this video was deleted per copyright owner request") zamiast playera → extract bez +# tego zwracał [] = transient → źródło NIGDY nie oznaczone dead → user wciąż klika +# martwy link (zgłoszenie 75dbf53e). Match → raise HosterDead → resolve mark-dead. +_DEAD_RE = re.compile( + r"this video (?:was|has been) deleted|video (?:was|has been) removed" + r"|no longer available|video is unavailable", + re.IGNORECASE, +) + # flashvars: `video_url: 'https://.../get_file/...mp4/'` + `video_url_text: '480p'`. # Warianty: video_url, video_alt_url, video_alt_url2, video_alt_url3... _URL_RE = re.compile( @@ -107,6 +117,11 @@ def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | Non else: html = fetch_tube_html(page_url, timeout=timeout) + # Soft-delete: strona żyje (200) ale wideo skasowane → mark dead (nie transient). + # Tylko gdy html niepuste (puste = fetch fail = transient, NIE dead). + if html and _DEAD_RE.search(html): + raise HosterDead(f"porntrex {page_url}: video deleted/removed") + # Mapa → quality label (np. video_alt_url → "720p HD"). quality_by_var: dict[str, str] = {} for m in _TEXT_RE.finditer(html):