Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
50 lines
1.4 KiB
Python
50 lines
1.4 KiB
Python
import pytest
|
|
|
|
from app.normalize.text import normalize, normalize_person, slugify, strip_accents
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
("raw", "expected"),
|
|
[
|
|
("Café Noir", "cafe noir"),
|
|
("naïve", "naive"),
|
|
("Łódź", "lodz"),
|
|
],
|
|
)
|
|
def test_strip_accents_and_lower(raw: str, expected: str) -> None:
|
|
assert normalize(raw) == expected
|
|
|
|
|
|
def test_normalize_drops_leading_articles() -> None:
|
|
assert normalize("The Big Lebowski") == "big lebowski"
|
|
assert normalize("A Quick One") == "quick one"
|
|
assert normalize("An Honest Take") == "honest take"
|
|
|
|
|
|
def test_normalize_collapses_whitespace_and_punct() -> None:
|
|
assert normalize("Hello, world!!!") == "hello world"
|
|
assert normalize("foo--bar__baz") == "foo bar baz"
|
|
|
|
|
|
def test_normalize_handles_none_and_empty() -> None:
|
|
assert normalize(None) == ""
|
|
assert normalize("") == ""
|
|
assert normalize(" ") == ""
|
|
|
|
|
|
def test_normalize_person_keeps_initials() -> None:
|
|
# „Mia M." → „mia m" (kropka usunięta jako interpunkcja)
|
|
assert normalize_person("Mia M.") == "mia m"
|
|
assert normalize_person("Mia Malkova") == "mia malkova"
|
|
|
|
|
|
def test_slugify_is_url_safe() -> None:
|
|
assert slugify("Brazzers Network") == "brazzers-network"
|
|
assert slugify("Mia Malkova") == "mia-malkova"
|
|
assert slugify("") == ""
|
|
|
|
|
|
def test_strip_accents_idempotent() -> None:
|
|
once = strip_accents("naïve")
|
|
twice = strip_accents(once)
|
|
assert once == twice == "naive"
|