Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
60 lines
2.1 KiB
Python
60 lines
2.1 KiB
Python
"""Unit testy normalizacji RawScene → NormalizedScene (bez DB)."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from app.connectors.tpdb import _parse_scene
|
|
from app.normalize.scenes import normalize_performer, normalize_scene, normalize_studio
|
|
from app.normalize.text import normalize, normalize_person
|
|
|
|
|
|
@pytest.fixture
|
|
def tpdb_scene_raw() -> dict:
|
|
path = Path(__file__).parent / "fixtures" / "tpdb_scene.json"
|
|
return json.loads(path.read_text())
|
|
|
|
|
|
def test_normalize_scene_computes_indexable_fields(tpdb_scene_raw: dict) -> None:
|
|
raw = _parse_scene(tpdb_scene_raw)
|
|
assert raw is not None
|
|
norm = normalize_scene(raw)
|
|
|
|
assert norm.title == "The Great Heist"
|
|
assert norm.title_normalized == "great heist" # leading "the" dropped, lowercased
|
|
assert norm.slug == "the-great-heist"
|
|
assert norm.release_date == raw.release_date
|
|
|
|
|
|
def test_normalize_studio(tpdb_scene_raw: dict) -> None:
|
|
raw = _parse_scene(tpdb_scene_raw)
|
|
assert raw is not None
|
|
assert raw.studio is not None
|
|
norm = normalize_studio(raw.studio)
|
|
assert norm.name_normalized == normalize("Brazzers Exxtra")
|
|
assert norm.slug == "brazzers-exxtra"
|
|
assert norm.network == "MindGeek"
|
|
|
|
|
|
def test_normalize_performer_aliases_dedup_and_lowercase(tpdb_scene_raw: dict) -> None:
|
|
raw = _parse_scene(tpdb_scene_raw)
|
|
assert raw is not None
|
|
mia_raw = raw.performers[0]
|
|
mia = normalize_performer(mia_raw)
|
|
|
|
assert mia.canonical_name == "Mia Malkova"
|
|
assert mia.name_normalized == normalize_person("Mia Malkova")
|
|
# aliases preserve original case in `aliases`, normalized form in `aliases_normalized`
|
|
assert "Madison Clover" in mia.aliases
|
|
assert normalize_person("Madison Clover") in mia.aliases_normalized
|
|
# de-dup preserves order — duplicate aliases would collapse
|
|
assert len(mia.aliases) == len(set(mia.aliases))
|
|
|
|
|
|
def test_normalize_scene_preserves_performer_order(tpdb_scene_raw: dict) -> None:
|
|
raw = _parse_scene(tpdb_scene_raw)
|
|
assert raw is not None
|
|
norm = normalize_scene(raw)
|
|
assert [p.canonical_name for p in norm.performers] == ["Mia Malkova", "Johnny Sins"]
|