diff --git a/app/config.py b/app/config.py index f7e4c42..3bcea64 100644 --- a/app/config.py +++ b/app/config.py @@ -53,6 +53,10 @@ class Settings(BaseSettings): # False = wciągaj jak dawniej. Tube'y z clip-store studiem NIE są skipowane (mają playback). skip_clip_store: bool = Field(default=True, validation_alias="GOON_SKIP_CLIP_STORE") + # Minimalny duration sceny z tube/scraper przy ingescie — bool: + """True gdy scena ze scrapera/tube ma ZNANY duration < `min_ingest_duration_sec` + (trailer/teaser/preview — śmieć). Nieznany duration → NIE wycinamy (mogłaby być pełna + scena bez metadanych). Tylko scraper-source — canonical (TPDB/StashDB) zostawiamy. + porndoe/deep-crawl ciągną z głębi katalogu sporo trailerów <3min (2026-06-03).""" + floor = getattr(get_settings(), "min_ingest_duration_sec", 0) + if not floor: + return False + dur = norm.duration_sec + if dur is None: + ps_durs = [ps.duration_sec for ps in norm.playback_sources if ps.duration_sec] + dur = max(ps_durs) if ps_durs else None + if dur is None or dur >= floor: + return False + src = session.get(Source, source_id) + return src is not None and src.kind == SourceKind.scraper + + def _canonical_json(payload: dict) -> bytes: return json.dumps(payload, sort_keys=True, separators=(",", ":"), default=str).encode() @@ -248,6 +266,10 @@ def _process_scene(*, source_id: uuid.UUID, raw_scene: RawScene, counters: dict[ counters["skipped"] += 1 return + if _skip_short_tube_scene(session, source_id=source_id, norm=norm): + counters["skipped"] += 1 + return + result = resolve_scene(session, norm=norm, source_id=source_id) if result.was_created: