Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
24 lines
903 B
Python
24 lines
903 B
Python
"""PornHub.com — direct HTML scrape search results.
|
|
|
|
Search: `https://www.pornhub.com/video/search?search=<q>&page=<n>`
|
|
Scene URL: `https://www.pornhub.com/view_video.php?viewkey=<id>`
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
from app.connectors.direct_scrapers._search_base import BaseSearchScraper
|
|
|
|
|
|
class PornHubScraper(BaseSearchScraper):
|
|
sitetag = "pornhubcom"
|
|
_search_url_template = "https://www.pornhub.com/video/search?search={query}&page={page}"
|
|
_scene_url_re = re.compile(
|
|
r'href="(?P<url>/view_video\.php\?viewkey=[A-Za-z0-9]+)"',
|
|
)
|
|
|
|
def _slug_from_match(self, m, scene_url):
|
|
# Pornhub URL nie ma slugu — używamy viewkey jako slug do query token filtering.
|
|
# Tytuł będzie derived z viewkey (krótki ID), ale faktyczny title backfilluje
|
|
# się przy resolve (yt-dlp ma metadata).
|
|
return m.group("url").split("=")[-1]
|