"""PornTrex.com — direct HTML scrape search results.
Search: `https://www.porntrex.com/search//` (single page, brak ?page=).
Scene URL: `https://www.porntrex.com/video///`
Porntrex pagination niespójne między widokami — używamy `?from=` gdy page>1.
"""
from __future__ import annotations
import re
from app.connectors.direct_scrapers._search_base import BaseSearchScraper
class PornTrexScraper(BaseSearchScraper):
sitetag = "porntrexcom"
_search_url_template = "https://www.porntrex.com/search/{query}/"
_scene_url_re = re.compile(
r'href="(?Phttps://www\.porntrex\.com/video/\d+/(?P[a-z0-9_\-]+))/?"',
re.IGNORECASE,
)
def search(self, query, *, page=1, limit=None):
# Porntrex używa offset w URL gdy page > 1: `/search//?from_videos=`
if page > 1:
original = self._search_url_template
self._search_url_template = f"{original.rstrip('/')}/?from_videos={page}"
try:
yield from super().search(query, page=page, limit=limit)
finally:
self._search_url_template = original
else:
yield from super().search(query, page=page, limit=limit)