feat(extractors): detect deleted porntrex videos and mark dead
Porntrex soft-deletes: a removed video returns HTTP 200 with a "this video was deleted" message instead of a player, so extract returned [] (transient) and the source was never marked dead, leaving users on a permanently broken link (report 75dbf53e). Match the deletion message and raise HosterDead so resolve marks the source dead. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
parent
9d4384cef3
commit
32919d6a6c
1 changed files with 16 additions and 1 deletions
|
|
@ -30,12 +30,22 @@ import re
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from app.extractors._fetch import _DEFAULT_IMPERSONATE, _DEFAULT_UA, _HAS_CURL_CFFI, fetch_tube_html
|
from app.extractors._fetch import _DEFAULT_IMPERSONATE, _DEFAULT_UA, _HAS_CURL_CFFI, fetch_tube_html
|
||||||
from app.extractors._models import StreamSource
|
from app.extractors._models import HosterDead, StreamSource
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
_BASE = "https://www.porntrex.com"
|
_BASE = "https://www.porntrex.com"
|
||||||
|
|
||||||
|
# Porntrex soft-delete: usunięte wideo zwraca HTTP 200 ze stroną z komunikatem (np.
|
||||||
|
# "this video was deleted per copyright owner request") zamiast playera → extract bez
|
||||||
|
# tego zwracał [] = transient → źródło NIGDY nie oznaczone dead → user wciąż klika
|
||||||
|
# martwy link (zgłoszenie 75dbf53e). Match → raise HosterDead → resolve mark-dead.
|
||||||
|
_DEAD_RE = re.compile(
|
||||||
|
r"this video (?:was|has been) deleted|video (?:was|has been) removed"
|
||||||
|
r"|no longer available|video is unavailable",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
|
||||||
# flashvars: `video_url: 'https://.../get_file/...mp4/'` + `video_url_text: '480p'`.
|
# flashvars: `video_url: 'https://.../get_file/...mp4/'` + `video_url_text: '480p'`.
|
||||||
# Warianty: video_url, video_alt_url, video_alt_url2, video_alt_url3...
|
# Warianty: video_url, video_alt_url, video_alt_url2, video_alt_url3...
|
||||||
_URL_RE = re.compile(
|
_URL_RE = re.compile(
|
||||||
|
|
@ -107,6 +117,11 @@ def extract(page_url: str, *, timeout: float = 60.0) -> list[StreamSource] | Non
|
||||||
else:
|
else:
|
||||||
html = fetch_tube_html(page_url, timeout=timeout)
|
html = fetch_tube_html(page_url, timeout=timeout)
|
||||||
|
|
||||||
|
# Soft-delete: strona żyje (200) ale wideo skasowane → mark dead (nie transient).
|
||||||
|
# Tylko gdy html niepuste (puste = fetch fail = transient, NIE dead).
|
||||||
|
if html and _DEAD_RE.search(html):
|
||||||
|
raise HosterDead(f"porntrex {page_url}: video deleted/removed")
|
||||||
|
|
||||||
# Mapa <var_name> → quality label (np. video_alt_url → "720p HD").
|
# Mapa <var_name> → quality label (np. video_alt_url → "720p HD").
|
||||||
quality_by_var: dict[str, str] = {}
|
quality_by_var: dict[str, str] = {}
|
||||||
for m in _TEXT_RE.finditer(html):
|
for m in _TEXT_RE.finditer(html):
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue