Two observability additions to the worker scheduler (intertwined in the same files): (1) ingest-watchdog now also covers performer-driven search scrapers (ALL_DIRECT_SCRAPERS) with a separate 7d threshold, not just browse tubes at 48h — several search tubes (perverzija, fpoxxx, porndish, ...) had frozen silently for weeks. (2) New Hetzner Cloud bandwidth monitor (app/scheduler/hetzner_monitor.py): polls outgoing_traffic vs included_traffic and fires a Sentry message at info/warning/error % thresholds with a per-level fingerprint. The config fields existed for ages but the monitor was never implemented. No-op until HETZNER_API_TOKEN + HETZNER_SERVER_ID are set in .env (verified: returns {enabled: False}, job registers as 'hetzner-monitor every 6h', jobs=13).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
103 lines
3.8 KiB
Python
103 lines
3.8 KiB
Python
"""Hetzner Cloud bandwidth monitor — alert do Sentry zanim transfer przekroczy included.
|
|
|
|
Hetzner Cloud: ruch WYCHODZĄCY liczy się do `included_traffic` (przychodzący darmowy),
|
|
overage = €1/TB. Przy dystrybucji apki (fala instalacji + część playbacku przez
|
|
/proxy/<token>) transfer może rosnąć. Spec był w config.py od dawna, ale monitor nigdy
|
|
nie powstał — to jego implementacja.
|
|
|
|
Pyta Hetzner Cloud API o bieżący `outgoing_traffic` vs `included_traffic` i alarmuje do
|
|
Sentry przy progach (info/warning/error %, z config). Stabilny fingerprint per poziom →
|
|
jedno eskalujące issue, nie nowe co run. Bez `HETZNER_API_TOKEN`/`HETZNER_SERVER_ID` =
|
|
no-op (warning w logu raz). Wołane periodycznie przez scheduler (`_job_hetzner_monitor`).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from typing import Any
|
|
|
|
import httpx
|
|
|
|
from app.config import get_settings
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_API = "https://api.hetzner.cloud/v1/servers/{id}"
|
|
|
|
|
|
def run_hetzner_bandwidth_check() -> dict[str, Any]:
|
|
"""Sprawdź % zużycia included_traffic i zaalarmuj Sentry przy przekroczeniu progu.
|
|
|
|
Zwraca {enabled, used_pct, outgoing_gb, included_gb, level} (level=None gdy poniżej
|
|
najniższego progu lub monitor wyłączony).
|
|
"""
|
|
s = get_settings()
|
|
token = s.hetzner_api_token
|
|
server_id = s.hetzner_server_id
|
|
if not token or not server_id:
|
|
log.info("hetzner-monitor: wyłączony (brak HETZNER_API_TOKEN/HETZNER_SERVER_ID)")
|
|
return {"enabled": False}
|
|
|
|
try:
|
|
r = httpx.get(
|
|
_API.format(id=server_id),
|
|
headers={"Authorization": f"Bearer {token}"},
|
|
timeout=20.0,
|
|
)
|
|
r.raise_for_status()
|
|
srv = r.json()["server"]
|
|
except Exception as e:
|
|
log.warning("hetzner-monitor: API fetch failed: %s", str(e)[:160])
|
|
return {"enabled": True, "error": str(e)[:160]}
|
|
|
|
included = srv.get("included_traffic") or 0
|
|
outgoing = srv.get("outgoing_traffic") or 0
|
|
if included <= 0:
|
|
log.warning("hetzner-monitor: included_traffic=0 — pomijam")
|
|
return {"enabled": True, "error": "included_traffic=0"}
|
|
|
|
used_pct = round(100.0 * outgoing / included, 1)
|
|
out_gb = round(outgoing / 1e9, 1)
|
|
inc_gb = round(included / 1e9, 1)
|
|
|
|
# Najwyższy przekroczony próg → poziom Sentry.
|
|
level: str | None = None
|
|
if used_pct >= s.hetzner_alert_error_pct:
|
|
level = "error"
|
|
elif used_pct >= s.hetzner_alert_warning_pct:
|
|
level = "warning"
|
|
elif used_pct >= s.hetzner_alert_info_pct:
|
|
level = "info"
|
|
|
|
result = {
|
|
"enabled": True,
|
|
"used_pct": used_pct,
|
|
"outgoing_gb": out_gb,
|
|
"included_gb": inc_gb,
|
|
"level": level,
|
|
}
|
|
log.info(
|
|
"hetzner-monitor: %s%% included (%sGB / %sGB out), level=%s",
|
|
used_pct, out_gb, inc_gb, level or "ok",
|
|
)
|
|
|
|
if level:
|
|
try:
|
|
import sentry_sdk
|
|
|
|
with sentry_sdk.push_scope() as scope:
|
|
scope.level = level
|
|
scope.set_tag("hetzner_server_id", str(server_id))
|
|
scope.set_extra("used_pct", used_pct)
|
|
scope.set_extra("outgoing_gb", out_gb)
|
|
scope.set_extra("included_gb", inc_gb)
|
|
# Fingerprint per POZIOM → eskalacja info→warning→error to osobne, trwałe
|
|
# issue (nie fragmentowane po zmiennym %).
|
|
scope.fingerprint = ["hetzner-traffic", level]
|
|
sentry_sdk.capture_message(
|
|
f"hetzner-monitor: transfer {used_pct}% included "
|
|
f"({out_gb}GB / {inc_gb}GB) — poziom {level}"
|
|
)
|
|
except Exception: # pragma: no cover - Sentry off / brak DSN
|
|
log.exception("hetzner-monitor: Sentry capture failed")
|
|
|
|
return result
|