goon/app/connectors/direct_scrapers/pornhub.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

24 lines
903 B
Python

"""PornHub.com — direct HTML scrape search results.
Search: `https://www.pornhub.com/video/search?search=<q>&page=<n>`
Scene URL: `https://www.pornhub.com/view_video.php?viewkey=<id>`
"""
from __future__ import annotations
import re
from app.connectors.direct_scrapers._search_base import BaseSearchScraper
class PornHubScraper(BaseSearchScraper):
sitetag = "pornhubcom"
_search_url_template = "https://www.pornhub.com/video/search?search={query}&page={page}"
_scene_url_re = re.compile(
r'href="(?P<url>/view_video\.php\?viewkey=[A-Za-z0-9]+)"',
)
def _slug_from_match(self, m, scene_url):
# Pornhub URL nie ma slugu — używamy viewkey jako slug do query token filtering.
# Tytuł będzie derived z viewkey (krótki ID), ale faktyczny title backfilluje
# się przy resolve (yt-dlp ma metadata).
return m.group("url").split("=")[-1]