goon/tests/test_normalize_scenes.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

60 lines
2.1 KiB
Python

"""Unit testy normalizacji RawScene → NormalizedScene (bez DB)."""
from __future__ import annotations
import json
from pathlib import Path
import pytest
from app.connectors.tpdb import _parse_scene
from app.normalize.scenes import normalize_performer, normalize_scene, normalize_studio
from app.normalize.text import normalize, normalize_person
@pytest.fixture
def tpdb_scene_raw() -> dict:
path = Path(__file__).parent / "fixtures" / "tpdb_scene.json"
return json.loads(path.read_text())
def test_normalize_scene_computes_indexable_fields(tpdb_scene_raw: dict) -> None:
raw = _parse_scene(tpdb_scene_raw)
assert raw is not None
norm = normalize_scene(raw)
assert norm.title == "The Great Heist"
assert norm.title_normalized == "great heist" # leading "the" dropped, lowercased
assert norm.slug == "the-great-heist"
assert norm.release_date == raw.release_date
def test_normalize_studio(tpdb_scene_raw: dict) -> None:
raw = _parse_scene(tpdb_scene_raw)
assert raw is not None
assert raw.studio is not None
norm = normalize_studio(raw.studio)
assert norm.name_normalized == normalize("Brazzers Exxtra")
assert norm.slug == "brazzers-exxtra"
assert norm.network == "MindGeek"
def test_normalize_performer_aliases_dedup_and_lowercase(tpdb_scene_raw: dict) -> None:
raw = _parse_scene(tpdb_scene_raw)
assert raw is not None
mia_raw = raw.performers[0]
mia = normalize_performer(mia_raw)
assert mia.canonical_name == "Mia Malkova"
assert mia.name_normalized == normalize_person("Mia Malkova")
# aliases preserve original case in `aliases`, normalized form in `aliases_normalized`
assert "Madison Clover" in mia.aliases
assert normalize_person("Madison Clover") in mia.aliases_normalized
# de-dup preserves order — duplicate aliases would collapse
assert len(mia.aliases) == len(set(mia.aliases))
def test_normalize_scene_preserves_performer_order(tpdb_scene_raw: dict) -> None:
raw = _parse_scene(tpdb_scene_raw)
assert raw is not None
norm = normalize_scene(raw)
assert [p.canonical_name for p in norm.performers] == ["Mia Malkova", "Johnny Sins"]