fix(ingest): cap code/director to column length (GOON-J)

Some sources (sexlikereal) build a giant `code`/`director` from a multi-performer
compilation title, overflowing scenes.code varchar(128) -> StringDataRightTruncation,
and the scene silently dropped from ingest. Cap both at the column limit in
_create_canonical and the fill path; code/director are stored metadata, not match keys,
so truncation is safe.

Fixes GOON-J

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
jtrzupek 2026-06-13 19:04:10 +02:00
parent 86b3e88f08
commit 9d4384cef3

View file

@ -394,6 +394,14 @@ def _effective_duration(norm: NormalizedScene) -> int | None:
return max(ps_durs) if ps_durs else None
def _cap(s: str | None, n: int) -> str | None:
"""Przytnij string do limitu VARCHAR kolumny. Niektóre źródła (sexlikereal) dają
monstrualne `code`/`director` wyliczone z tytułu kompilacji (12 performerek)
StringDataRightTruncation i scena cicho wypada z ingestu (GOON-J). code/director to
tylko metadata (NIE klucze matchu/dedupu), więc przycięcie jest bezpieczne."""
return s[:n] if s and len(s) > n else s
def _create_canonical(
session: Session, *, norm: NormalizedScene, studio_id: uuid.UUID | None
) -> Scene:
@ -405,8 +413,8 @@ def _create_canonical(
studio_id=studio_id,
duration_sec=_effective_duration(norm),
description=norm.description,
code=norm.code,
director=norm.director,
code=_cap(norm.code, 128),
director=_cap(norm.director, 256),
)
session.add(scene)
session.flush()
@ -462,9 +470,9 @@ def _update_scene_fields(
if norm.description and not scene.description:
scene.description = norm.description
if norm.code and not scene.code:
scene.code = norm.code
scene.code = _cap(norm.code, 128)
if norm.director and not scene.director:
scene.director = norm.director
scene.director = _cap(norm.director, 256)
def _has_canonical_external_ref(session: Session, *, scene_id: uuid.UUID) -> bool: