feat(extractors): detect deleted porntrex videos and mark dead
Porntrex soft-deletes: a removed video returns HTTP 200 with a "this video was deleted" message instead of a player, so extract returned [] (transient) and the source was never marked dead, leaving users on a permanently broken link (report 75dbf53e). Match the deletion message and raise HosterDead so resolve marks the source dead. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
9d4384cef3
commit
32919d6a6c
1 changed files with 16 additions and 1 deletions
|
|
@ -30,12 +30,22 @@ import re
|
|||
import time
|
||||
|
||||
from app.extractors._fetch import _DEFAULT_IMPERSONATE, _DEFAULT_UA, _HAS_CURL_CFFI, fetch_tube_html
|
||||
from app.extractors._models import StreamSource
|
||||
from app.extractors._models import HosterDead, StreamSource
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_BASE = "https://www.porntrex.com"
|
||||
|
||||
# Porntrex soft-delete: usunięte wideo zwraca HTTP 200 ze stroną z komunikatem (np.
|
||||
# "this video was deleted per copyright owner request") zamiast playera → extract bez
|
||||
# tego zwracał [] = transient → źródło NIGDY nie oznaczone dead → user wciąż klika
|
||||
# martwy link (zgłoszenie 75dbf53e). Match → raise HosterDead → resolve mark-dead.
|
||||
_DEAD_RE = re.compile(
|
||||
r"this video (?:was|has been) deleted|video (?:was|has been) removed"
|
||||
r"|no longer available|video is unavailable",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
# flashvars: `video_url: 'https://.../get_file/...mp4/'` + `video_url_text: '480p'`.
|
||||
# Warianty: video_url, video_alt_url, video_alt_url2, video_alt_url3...
|
||||
_URL_RE = re.compile(
|
||||
|
|
@ -107,6 +117,11 @@ def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | Non
|
|||
else:
|
||||
html = fetch_tube_html(page_url, timeout=timeout)
|
||||
|
||||
# Soft-delete: strona żyje (200) ale wideo skasowane → mark dead (nie transient).
|
||||
# Tylko gdy html niepuste (puste = fetch fail = transient, NIE dead).
|
||||
if html and _DEAD_RE.search(html):
|
||||
raise HosterDead(f"porntrex {page_url}: video deleted/removed")
|
||||
|
||||
# Mapa <var_name> → quality label (np. video_alt_url → "720p HD").
|
||||
quality_by_var: dict[str, str] = {}
|
||||
for m in _TEXT_RE.finditer(html):
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue