Goon — self-hosted aggregator for adult-content scene metadata. Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites. Cross-source deduplication via perceptual hash + Levenshtein distance. FastAPI backend + APScheduler worker + React Native (Expo) mobile client. FOSS, ad-free, donation-funded. See README for details.
156 lines
4.8 KiB
Python
156 lines
4.8 KiB
Python
"""Unit testy scoring engine — czyste funkcje bez DB."""
|
||
from __future__ import annotations
|
||
|
||
import uuid
|
||
from datetime import date
|
||
|
||
import pytest
|
||
|
||
from app.resolve.scoring import (
|
||
composite_score,
|
||
date_proximity,
|
||
hamming_distance_hex,
|
||
performer_set_similarity,
|
||
phash_similarity,
|
||
title_similarity,
|
||
triage,
|
||
)
|
||
|
||
|
||
# ---- hamming / phash ------------------------------------------------------
|
||
|
||
def test_hamming_zero_on_identical() -> None:
|
||
assert hamming_distance_hex("deadbeef", "deadbeef") == 0
|
||
|
||
|
||
def test_hamming_one_bit_difference() -> None:
|
||
# 0x0 vs 0x1 = 1 bit
|
||
assert hamming_distance_hex("00", "01") == 1
|
||
# 0xFF vs 0x00 = 8 bity
|
||
assert hamming_distance_hex("ff", "00") == 8
|
||
|
||
|
||
def test_hamming_raises_on_length_mismatch() -> None:
|
||
with pytest.raises(ValueError):
|
||
hamming_distance_hex("abcd", "abcdef")
|
||
|
||
|
||
def test_phash_similarity_64bit() -> None:
|
||
# 16 hex chars = 64 bits
|
||
a = "0000000000000000"
|
||
b = "0000000000000001" # 1 bit różnicy
|
||
sim = phash_similarity(a, b)
|
||
assert sim == pytest.approx(1.0 - 1 / 64, abs=1e-6)
|
||
|
||
|
||
def test_phash_similarity_far() -> None:
|
||
a = "0000000000000000"
|
||
b = "ffffffffffffffff" # 64 bity różnicy
|
||
assert phash_similarity(a, b) == 0.0
|
||
|
||
|
||
# ---- title similarity -----------------------------------------------------
|
||
|
||
def test_title_similarity_identical() -> None:
|
||
assert title_similarity("the great heist", "the great heist") == 1.0
|
||
|
||
|
||
def test_title_similarity_token_set_handles_extra_words() -> None:
|
||
# token_set_ratio jest odporny na zmianę kolejności i dodatkowe tokeny
|
||
s = title_similarity("great heist", "the great heist extended cut")
|
||
assert s > 0.8
|
||
|
||
|
||
def test_title_similarity_empty_inputs() -> None:
|
||
assert title_similarity("", "anything") == 0.0
|
||
assert title_similarity("anything", "") == 0.0
|
||
|
||
|
||
# ---- performer set Jaccard ------------------------------------------------
|
||
|
||
def test_performer_set_full_overlap() -> None:
|
||
a = uuid.uuid4()
|
||
b = uuid.uuid4()
|
||
assert performer_set_similarity([a, b], [a, b]) == 1.0
|
||
|
||
|
||
def test_performer_set_partial_overlap() -> None:
|
||
a, b, c = uuid.uuid4(), uuid.uuid4(), uuid.uuid4()
|
||
# |∩| = 1, |∪| = 3 → 1/3
|
||
assert performer_set_similarity([a, b], [a, c]) == pytest.approx(1 / 3)
|
||
|
||
|
||
def test_performer_set_empty_inputs() -> None:
|
||
assert performer_set_similarity([], []) == 0.0
|
||
|
||
|
||
# ---- date proximity -------------------------------------------------------
|
||
|
||
def test_date_proximity_same_day() -> None:
|
||
d = date(2024, 1, 1)
|
||
assert date_proximity(d, d) == 1.0
|
||
|
||
|
||
def test_date_proximity_within_window() -> None:
|
||
d = date(2024, 1, 1)
|
||
# +3 days, window 7 → 1 - 3/7
|
||
assert date_proximity(d, date(2024, 1, 4), window_days=7) == pytest.approx(1 - 3 / 7)
|
||
|
||
|
||
def test_date_proximity_outside_window() -> None:
|
||
assert date_proximity(date(2024, 1, 1), date(2024, 2, 1), window_days=7) == 0.0
|
||
|
||
|
||
def test_date_proximity_none_inputs() -> None:
|
||
assert date_proximity(None, date(2024, 1, 1)) == 0.0
|
||
assert date_proximity(date(2024, 1, 1), None) == 0.0
|
||
|
||
|
||
# ---- composite -----------------------------------------------------------
|
||
|
||
def test_composite_studio_mismatch_hard_rejects() -> None:
|
||
score, reasons = composite_score(
|
||
fp=None, title=0.9, performers=0.9, date_score=1.0, studio_match=False
|
||
)
|
||
assert score == 0.0
|
||
assert reasons.get("studio_mismatch")
|
||
|
||
|
||
def test_composite_strong_fp_overrides_studio_mismatch() -> None:
|
||
# silny pHash bije studio mismatch (sub-studio mapping rozjeżdża się czasem)
|
||
score, reasons = composite_score(
|
||
fp=0.97, title=0.5, performers=None, date_score=1.0, studio_match=False
|
||
)
|
||
assert score > 0.0
|
||
assert reasons.get("studio_mismatch_overridden_by_fp") is True
|
||
|
||
|
||
def test_composite_redistributes_weights_when_fp_missing() -> None:
|
||
# wszystko 1.0 → composite musi być 1.0 niezależnie czy mamy fp czy nie
|
||
s_with, _ = composite_score(fp=1.0, title=1.0, performers=1.0, date_score=1.0, studio_match=True)
|
||
s_without, _ = composite_score(fp=None, title=1.0, performers=1.0, date_score=1.0, studio_match=True)
|
||
assert s_with == pytest.approx(1.0)
|
||
assert s_without == pytest.approx(1.0)
|
||
|
||
|
||
def test_composite_no_signals() -> None:
|
||
score, reasons = composite_score(
|
||
fp=None, title=None, performers=None, date_score=None, studio_match=None
|
||
)
|
||
assert score == 0.0
|
||
assert reasons.get("no_signals")
|
||
|
||
|
||
def test_composite_clamps_to_unit() -> None:
|
||
score, _ = composite_score(fp=2.0, title=2.0, performers=2.0, date_score=2.0, studio_match=True)
|
||
assert score == 1.0
|
||
|
||
|
||
# ---- triage --------------------------------------------------------------
|
||
|
||
def test_triage_thresholds() -> None:
|
||
assert triage(0.95) == "auto" # >= 0.92
|
||
assert triage(0.92) == "auto"
|
||
assert triage(0.85) == "review"
|
||
assert triage(0.75) == "review"
|
||
assert triage(0.5) == "reject"
|