"""pornditt.com — direct HTML scrape.
KVS-style site (kt_player engine). Search URL: `/search//?from=` z slug-style
zapytaniem (spacje → `-`). Sceny renderują się na subdomenie `v.pornditt.com/videos///`,
więc regex matchuje oba (z i bez `v.` prefix).
Sitetag `porndittcom` (legacy z porn-app DEFAULT_SITETAGS — suffix-stripped name).
"""
from __future__ import annotations
import re
from app.connectors.direct_scrapers._search_base import BaseSearchScraper
class PornDittScraper(BaseSearchScraper):
sitetag = "porndittcom"
_search_url_template = "https://pornditt.com/search/{query}/?from={page}"
_scene_url_re = re.compile(
r'href="(?Phttps://(?:v\.)?pornditt\.com/videos/(?P\d+)/(?P[a-z0-9\-]+))/"',
re.IGNORECASE,
)
def _format_query_for_url(self, query: str) -> str:
# KVS slug: lowercase, spacja/interpunkcja → `-`. URL-encoded (`+`) tu nie zadziała.
return re.sub(r"[^a-z0-9]+", "-", query.lower()).strip("-")