"""XVideos.com — direct HTML scrape search results.
Search: `https://www.xvideos.com/?k=&p=` (xvideos używa 0-indexed pages)
Scene URL: `https://www.xvideos.com/video/`
"""
from __future__ import annotations
import re
import urllib.parse
from app.connectors.direct_scrapers._search_base import BaseSearchScraper
class XVideosScraper(BaseSearchScraper):
sitetag = "xvideoscom"
# 0-indexed page — w base classie computed jako `page=N`, więc override _build_url.
_search_url_template = "https://www.xvideos.com/?k={query}&p={page}"
_scene_url_re = re.compile(
r'href="(?P/video[a-z0-9.\-]+/(?P[a-z0-9_\-]+))"',
re.IGNORECASE,
)
def search(self, query, *, page=1, limit=None):
# XVideos używa 0-indexed pages — `page=1` w API → `&p=0` w URL.
# Override żeby base class fetch'nął zewnętrzny URL z (page-1).
# Najprościej: dostosujmy URL w override przed wywołaniem super().search().
# Ale super() używa self._search_url_template — robimy clone z poprawionym page.
original = self._search_url_template
self._search_url_template = original.replace("{page}", str(page - 1))
try:
yield from super().search(query, page=page, limit=limit)
finally:
self._search_url_template = original