"""Unit testy normalizacji RawScene → NormalizedScene (bez DB).""" from __future__ import annotations import json from pathlib import Path import pytest from app.connectors.tpdb import _parse_scene from app.normalize.scenes import normalize_performer, normalize_scene, normalize_studio from app.normalize.text import normalize, normalize_person @pytest.fixture def tpdb_scene_raw() -> dict: path = Path(__file__).parent / "fixtures" / "tpdb_scene.json" return json.loads(path.read_text()) def test_normalize_scene_computes_indexable_fields(tpdb_scene_raw: dict) -> None: raw = _parse_scene(tpdb_scene_raw) assert raw is not None norm = normalize_scene(raw) assert norm.title == "The Great Heist" assert norm.title_normalized == "great heist" # leading "the" dropped, lowercased assert norm.slug == "the-great-heist" assert norm.release_date == raw.release_date def test_normalize_studio(tpdb_scene_raw: dict) -> None: raw = _parse_scene(tpdb_scene_raw) assert raw is not None assert raw.studio is not None norm = normalize_studio(raw.studio) assert norm.name_normalized == normalize("Brazzers Exxtra") assert norm.slug == "brazzers-exxtra" assert norm.network == "MindGeek" def test_normalize_performer_aliases_dedup_and_lowercase(tpdb_scene_raw: dict) -> None: raw = _parse_scene(tpdb_scene_raw) assert raw is not None mia_raw = raw.performers[0] mia = normalize_performer(mia_raw) assert mia.canonical_name == "Mia Malkova" assert mia.name_normalized == normalize_person("Mia Malkova") # aliases preserve original case in `aliases`, normalized form in `aliases_normalized` assert "Madison Clover" in mia.aliases assert normalize_person("Madison Clover") in mia.aliases_normalized # de-dup preserves order — duplicate aliases would collapse assert len(mia.aliases) == len(set(mia.aliases)) def test_normalize_scene_preserves_performer_order(tpdb_scene_raw: dict) -> None: raw = _parse_scene(tpdb_scene_raw) assert raw is not None norm = normalize_scene(raw) assert [p.canonical_name for p in norm.performers] == ["Mia Malkova", "Johnny Sins"]