goon/tests/test_normalize.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

50 lines
1.4 KiB
Python

import pytest
from app.normalize.text import normalize, normalize_person, slugify, strip_accents
@pytest.mark.parametrize(
("raw", "expected"),
[
("Café Noir", "cafe noir"),
("naïve", "naive"),
("Łódź", "lodz"),
],
)
def test_strip_accents_and_lower(raw: str, expected: str) -> None:
assert normalize(raw) == expected
def test_normalize_drops_leading_articles() -> None:
assert normalize("The Big Lebowski") == "big lebowski"
assert normalize("A Quick One") == "quick one"
assert normalize("An Honest Take") == "honest take"
def test_normalize_collapses_whitespace_and_punct() -> None:
assert normalize("Hello, world!!!") == "hello world"
assert normalize("foo--bar__baz") == "foo bar baz"
def test_normalize_handles_none_and_empty() -> None:
assert normalize(None) == ""
assert normalize("") == ""
assert normalize(" ") == ""
def test_normalize_person_keeps_initials() -> None:
# „Mia M." → „mia m" (kropka usunięta jako interpunkcja)
assert normalize_person("Mia M.") == "mia m"
assert normalize_person("Mia Malkova") == "mia malkova"
def test_slugify_is_url_safe() -> None:
assert slugify("Brazzers Network") == "brazzers-network"
assert slugify("Mia Malkova") == "mia-malkova"
assert slugify("") == ""
def test_strip_accents_idempotent() -> None:
once = strip_accents("naïve")
twice = strip_accents(once)
assert once == twice == "naive"