Mobile / OTA: - Enable Expo Updates (app.json + AndroidManifest) → api.goon-foss.org - Bump 0.1.6 → 0.1.9 (build.gradle, app.json, appVersion.ts, main.py /version) - backend.ts: default public backend auto-connect (no manual login) WebView fallback fix (PlayerScreen INJECTED_JS): - Auto-dismiss cookie/consent gates (hqporner et al. blocked kt_player init) - Context-scoped: only clicks consent buttons inside cookie/gdpr containers - Retry window for <source>.src polling raised 5→15 ticks (post-dismiss init) Resolver: - Series-position + modifier mismatch detector (Episode 2≠4, BTS/unedited) → composite_score hard-reject / cap; wired into scene_score + bulk_dedup - aggregator-mode candidate query: LIMIT 500 + title-match ordering Connectors: - porndoe.com browse scraper (JSON-LD VideoObject) — theporndude audit pilot landing: APK links → goon-v0.1.9.apk Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
265 lines
9.2 KiB
Python
265 lines
9.2 KiB
Python
"""Unit testy scoring engine — czyste funkcje bez DB."""
|
||
from __future__ import annotations
|
||
|
||
import uuid
|
||
from datetime import date
|
||
|
||
import pytest
|
||
|
||
from app.resolve.scoring import (
|
||
composite_score,
|
||
date_proximity,
|
||
detect_modifier_tags,
|
||
detect_series_positions,
|
||
hamming_distance_hex,
|
||
performer_set_similarity,
|
||
phash_similarity,
|
||
series_mismatch_strength,
|
||
title_similarity,
|
||
triage,
|
||
)
|
||
|
||
|
||
# ---- hamming / phash ------------------------------------------------------
|
||
|
||
def test_hamming_zero_on_identical() -> None:
|
||
assert hamming_distance_hex("deadbeef", "deadbeef") == 0
|
||
|
||
|
||
def test_hamming_one_bit_difference() -> None:
|
||
# 0x0 vs 0x1 = 1 bit
|
||
assert hamming_distance_hex("00", "01") == 1
|
||
# 0xFF vs 0x00 = 8 bity
|
||
assert hamming_distance_hex("ff", "00") == 8
|
||
|
||
|
||
def test_hamming_raises_on_length_mismatch() -> None:
|
||
with pytest.raises(ValueError):
|
||
hamming_distance_hex("abcd", "abcdef")
|
||
|
||
|
||
def test_phash_similarity_64bit() -> None:
|
||
# 16 hex chars = 64 bits
|
||
a = "0000000000000000"
|
||
b = "0000000000000001" # 1 bit różnicy
|
||
sim = phash_similarity(a, b)
|
||
assert sim == pytest.approx(1.0 - 1 / 64, abs=1e-6)
|
||
|
||
|
||
def test_phash_similarity_far() -> None:
|
||
a = "0000000000000000"
|
||
b = "ffffffffffffffff" # 64 bity różnicy
|
||
assert phash_similarity(a, b) == 0.0
|
||
|
||
|
||
# ---- title similarity -----------------------------------------------------
|
||
|
||
def test_title_similarity_identical() -> None:
|
||
assert title_similarity("the great heist", "the great heist") == 1.0
|
||
|
||
|
||
def test_title_similarity_token_set_handles_extra_words() -> None:
|
||
# token_set_ratio jest odporny na zmianę kolejności i dodatkowe tokeny
|
||
s = title_similarity("great heist", "the great heist extended cut")
|
||
assert s > 0.8
|
||
|
||
|
||
def test_title_similarity_empty_inputs() -> None:
|
||
assert title_similarity("", "anything") == 0.0
|
||
assert title_similarity("anything", "") == 0.0
|
||
|
||
|
||
# ---- performer set Jaccard ------------------------------------------------
|
||
|
||
def test_performer_set_full_overlap() -> None:
|
||
a = uuid.uuid4()
|
||
b = uuid.uuid4()
|
||
assert performer_set_similarity([a, b], [a, b]) == 1.0
|
||
|
||
|
||
def test_performer_set_partial_overlap() -> None:
|
||
a, b, c = uuid.uuid4(), uuid.uuid4(), uuid.uuid4()
|
||
# |∩| = 1, |∪| = 3 → 1/3
|
||
assert performer_set_similarity([a, b], [a, c]) == pytest.approx(1 / 3)
|
||
|
||
|
||
def test_performer_set_empty_inputs() -> None:
|
||
assert performer_set_similarity([], []) == 0.0
|
||
|
||
|
||
# ---- date proximity -------------------------------------------------------
|
||
|
||
def test_date_proximity_same_day() -> None:
|
||
d = date(2024, 1, 1)
|
||
assert date_proximity(d, d) == 1.0
|
||
|
||
|
||
def test_date_proximity_within_window() -> None:
|
||
d = date(2024, 1, 1)
|
||
# +3 days, window 7 → 1 - 3/7
|
||
assert date_proximity(d, date(2024, 1, 4), window_days=7) == pytest.approx(1 - 3 / 7)
|
||
|
||
|
||
def test_date_proximity_outside_window() -> None:
|
||
assert date_proximity(date(2024, 1, 1), date(2024, 2, 1), window_days=7) == 0.0
|
||
|
||
|
||
def test_date_proximity_none_inputs() -> None:
|
||
assert date_proximity(None, date(2024, 1, 1)) == 0.0
|
||
assert date_proximity(date(2024, 1, 1), None) == 0.0
|
||
|
||
|
||
# ---- composite -----------------------------------------------------------
|
||
|
||
def test_composite_studio_mismatch_hard_rejects() -> None:
|
||
score, reasons = composite_score(
|
||
fp=None, title=0.9, performers=0.9, date_score=1.0, studio_match=False
|
||
)
|
||
assert score == 0.0
|
||
assert reasons.get("studio_mismatch")
|
||
|
||
|
||
def test_composite_strong_fp_overrides_studio_mismatch() -> None:
|
||
# silny pHash bije studio mismatch (sub-studio mapping rozjeżdża się czasem)
|
||
score, reasons = composite_score(
|
||
fp=0.97, title=0.5, performers=None, date_score=1.0, studio_match=False
|
||
)
|
||
assert score > 0.0
|
||
assert reasons.get("studio_mismatch_overridden_by_fp") is True
|
||
|
||
|
||
def test_composite_redistributes_weights_when_fp_missing() -> None:
|
||
# wszystko 1.0 → composite musi być 1.0 niezależnie czy mamy fp czy nie
|
||
s_with, _ = composite_score(fp=1.0, title=1.0, performers=1.0, date_score=1.0, studio_match=True)
|
||
s_without, _ = composite_score(fp=None, title=1.0, performers=1.0, date_score=1.0, studio_match=True)
|
||
assert s_with == pytest.approx(1.0)
|
||
assert s_without == pytest.approx(1.0)
|
||
|
||
|
||
def test_composite_no_signals() -> None:
|
||
score, reasons = composite_score(
|
||
fp=None, title=None, performers=None, date_score=None, studio_match=None
|
||
)
|
||
assert score == 0.0
|
||
assert reasons.get("no_signals")
|
||
|
||
|
||
def test_composite_clamps_to_unit() -> None:
|
||
score, _ = composite_score(fp=2.0, title=2.0, performers=2.0, date_score=2.0, studio_match=True)
|
||
assert score == 1.0
|
||
|
||
|
||
# ---- triage --------------------------------------------------------------
|
||
|
||
# ---- series position / modifier detector ---------------------------------
|
||
|
||
def test_detect_series_positions_episode() -> None:
|
||
assert detect_series_positions("pleasureville a dp xxx parody episode 4") == {4}
|
||
|
||
def test_detect_series_positions_part_with_dot() -> None:
|
||
assert detect_series_positions("neon moonlight pt. 2") == {2}
|
||
|
||
def test_detect_series_positions_hash_only() -> None:
|
||
assert detect_series_positions("women seeking women #131 scene 2") == {131, 2}
|
||
|
||
def test_detect_series_positions_volume() -> None:
|
||
assert detect_series_positions("women seeking women volume 140 scene 3") == {140, 3}
|
||
|
||
def test_detect_series_positions_s_e_style() -> None:
|
||
assert detect_series_positions("can you handle a woman like me s9 e8") == {9, 8}
|
||
|
||
def test_detect_series_positions_empty() -> None:
|
||
assert detect_series_positions(None) == set()
|
||
assert detect_series_positions("") == set()
|
||
|
||
def test_detect_modifier_tags_bts() -> None:
|
||
assert "bts" in detect_modifier_tags("training ravyn (bts - 1)")
|
||
|
||
def test_detect_modifier_tags_behind_the_scenes() -> None:
|
||
assert "behind the scenes" in detect_modifier_tags(
|
||
"behind the scenes - two pairs of suckable melons"
|
||
)
|
||
|
||
def test_detect_modifier_tags_unedited() -> None:
|
||
assert "unedited" in detect_modifier_tags("bad bella stinky feet prep (unedited)")
|
||
|
||
def test_series_mismatch_episode_2_vs_4_hard() -> None:
|
||
# Episode 2 vs 4 → twardy mismatch (1.0)
|
||
s = series_mismatch_strength(
|
||
"pleasureville a dp xxx parody episode 2",
|
||
"pleasureville a dp xxx parody episode 4",
|
||
)
|
||
assert s == 1.0
|
||
|
||
def test_series_mismatch_intersection_is_no_mismatch() -> None:
|
||
# Oba mają {7} (Make'em Sweat #7) → BRAK mismatchu na pozycji,
|
||
# ale BTS asymmetry → 0.7
|
||
s = series_mismatch_strength("make'em sweat #7", "make'em sweat #7 bts")
|
||
assert s == pytest.approx(0.7)
|
||
|
||
def test_series_mismatch_partial_overlap_is_still_hard() -> None:
|
||
# "Volume 140 Scene 3" vs "Volume 140 Scene 4" — wspólny 140 ale różne 3/4,
|
||
# to są osobne sceny ze wspólnej kompilacji → hard split.
|
||
s = series_mismatch_strength(
|
||
"women seeking women volume 140 scene 3",
|
||
"women seeking women volume 140 scene 4",
|
||
)
|
||
assert s == 1.0
|
||
|
||
def test_series_mismatch_no_year_false_positive() -> None:
|
||
# "scene from 2020" nie może wygenerować fałszywej pozycji z roku.
|
||
pos = detect_series_positions("scene from 2020")
|
||
# Może tu być {2020}? Nie — \d{1,3} z anti-greedy boundary nie złapie 4-cyfr.
|
||
assert pos == set()
|
||
|
||
def test_series_mismatch_bts_asymmetric() -> None:
|
||
# Tytuły: Training Ravyn vs Training Ravyn (BTS - 1)
|
||
# pos: {} vs {1} → brak common pos ale jedna strona pusta → nie hard split
|
||
# BTS po jednej stronie → 0.7
|
||
s = series_mismatch_strength("training ravyn", "training ravyn (bts - 1)")
|
||
assert s == pytest.approx(0.7)
|
||
|
||
def test_series_mismatch_no_signal() -> None:
|
||
s = series_mismatch_strength("the great heist", "the great heist")
|
||
assert s == 0.0
|
||
|
||
def test_composite_series_position_hard_reject() -> None:
|
||
# Mimo wszystkich silnych sygnałów (fp/title/performers/date 1.0) — series mismatch
|
||
# 1.0 forsuje twardy reject. To gwarantuje że "Episode 2 vs Episode 4" z tym samym
|
||
# phashem (studio reuse cover art) NIE auto-mergeują.
|
||
score, reasons = composite_score(
|
||
fp=1.0, title=1.0, performers=1.0, date_score=1.0,
|
||
studio_match=True, series_mismatch=1.0,
|
||
)
|
||
assert score == 0.0
|
||
assert reasons.get("series_position_mismatch")
|
||
|
||
def test_composite_series_modifier_cap_07() -> None:
|
||
# Modifier mismatch (BTS po jednej stronie) → cap = 1 - 0.7 = 0.3
|
||
score, reasons = composite_score(
|
||
fp=1.0, title=1.0, performers=1.0, date_score=1.0,
|
||
studio_match=True, series_mismatch=0.7,
|
||
)
|
||
assert score == pytest.approx(0.3)
|
||
assert reasons.get("series_modifier_cap") == pytest.approx(0.3)
|
||
|
||
def test_composite_series_zero_no_effect() -> None:
|
||
score_a, _ = composite_score(
|
||
fp=1.0, title=1.0, performers=1.0, date_score=1.0,
|
||
studio_match=True, series_mismatch=0.0,
|
||
)
|
||
score_b, _ = composite_score(
|
||
fp=1.0, title=1.0, performers=1.0, date_score=1.0,
|
||
studio_match=True, series_mismatch=None,
|
||
)
|
||
assert score_a == score_b == pytest.approx(1.0)
|
||
|
||
|
||
# ---- triage --------------------------------------------------------------
|
||
|
||
def test_triage_thresholds() -> None:
|
||
assert triage(0.95) == "auto" # >= 0.92
|
||
assert triage(0.92) == "auto"
|
||
assert triage(0.85) == "review"
|
||
assert triage(0.75) == "review"
|
||
assert triage(0.5) == "reject"
|