Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
33 lines
1.3 KiB
Python
33 lines
1.3 KiB
Python
"""XVideos.com — direct HTML scrape search results.
|
|
|
|
Search: `https://www.xvideos.com/?k=<q>&p=<page-1>` (xvideos używa 0-indexed pages)
|
|
Scene URL: `https://www.xvideos.com/video<digits>/<slug>`
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import urllib.parse
|
|
|
|
from app.connectors.direct_scrapers._search_base import BaseSearchScraper
|
|
|
|
|
|
class XVideosScraper(BaseSearchScraper):
|
|
sitetag = "xvideoscom"
|
|
# 0-indexed page — w base classie computed jako `page=N`, więc override _build_url.
|
|
_search_url_template = "https://www.xvideos.com/?k={query}&p={page}"
|
|
_scene_url_re = re.compile(
|
|
r'href="(?P<url>/video[a-z0-9.\-]+/(?P<slug>[a-z0-9_\-]+))"',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
def search(self, query, *, page=1, limit=None):
|
|
# XVideos używa 0-indexed pages — `page=1` w API → `&p=0` w URL.
|
|
# Override żeby base class fetch'nął zewnętrzny URL z (page-1).
|
|
# Najprościej: dostosujmy URL w override przed wywołaniem super().search().
|
|
# Ale super() używa self._search_url_template — robimy clone z poprawionym page.
|
|
original = self._search_url_template
|
|
self._search_url_template = original.replace("{page}", str(page - 1))
|
|
try:
|
|
yield from super().search(query, page=page, limit=limit)
|
|
finally:
|
|
self._search_url_template = original
|