Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
26 lines
1 KiB
Python
26 lines
1 KiB
Python
"""pornditt.com — direct HTML scrape.
|
|
|
|
KVS-style site (kt_player engine). Search URL: `/search/<slug>/?from=<page>` z slug-style
|
|
zapytaniem (spacje → `-`). Sceny renderują się na subdomenie `v.pornditt.com/videos/<id>/<slug>/`,
|
|
więc regex matchuje oba (z i bez `v.` prefix).
|
|
|
|
Sitetag `porndittcom` (legacy z porn-app DEFAULT_SITETAGS — suffix-stripped name).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
|
|
from app.connectors.direct_scrapers._search_base import BaseSearchScraper
|
|
|
|
|
|
class PornDittScraper(BaseSearchScraper):
|
|
sitetag = "porndittcom"
|
|
_search_url_template = "https://pornditt.com/search/{query}/?from={page}"
|
|
_scene_url_re = re.compile(
|
|
r'href="(?P<url>https://(?:v\.)?pornditt\.com/videos/(?P<sid>\d+)/(?P<slug>[a-z0-9\-]+))/"',
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
def _format_query_for_url(self, query: str) -> str:
|
|
# KVS slug: lowercase, spacja/interpunkcja → `-`. URL-encoded (`+`) tu nie zadziała.
|
|
return re.sub(r"[^a-z0-9]+", "-", query.lower()).strip("-")
|