goon/app/connectors/direct_scrapers/siska.py

"""siska.video — direct HTML scrape.

Search: `https://siska.video/page/<n>/?s=<q>` (działa nadal).
Scene URL: `https://siska.video/video.php?videoID=<n>` (zmiana 2026-05+, dawniej `/<slug>/`).

Nowy format nie ma słów tytułu w URL (slug = numer videoID), więc do `slug` (którego
`_search_base` używa do token-filtra query + derywacji tytułu) bierzemy `title='...'`
z tego samego <a>. Świeże filmy embedują playmogo + luluvid → telefon resolwuje
phone-side (_embed_iframe oddaje type='hoster'). Re-enabled 2026-06-20 (user fa4083a2).
"""
from __future__ import annotations

import re

from app.connectors.direct_scrapers._search_base import BaseSearchScraper


class SiskaScraper(BaseSearchScraper):
    sitetag = "siskavideo"
    _search_url_template = "https://siska.video/page/{page}/?s={query}"
    # <a title=' Tytuł Sceny ' href='https://siska.video/video.php?videoID=227110' ...>
    # `slug` = tytuł (token-filtr + tytuł działają na nim; numer videoID nie ma słów).
    _scene_url_re = re.compile(
        r"<a\s+title='(?P<slug>[^']*)'\s+href='(?P<url>https://siska\.video/video\.php\?videoID=\d+)'",
        re.IGNORECASE,
    )