goon/scripts/tpdb_backfill_status.py
goon-foss ad0284585b Initial commit
Goon — self-hosted aggregator for adult-content scene metadata.

Indexes scenes from TPDB, StashDB, and 30+ public adult tube sites.
Cross-source deduplication via perceptual hash + Levenshtein distance.
FastAPI backend + APScheduler worker + React Native (Expo) mobile client.

FOSS, ad-free, donation-funded. See README for details.
2026-05-20 10:10:22 +02:00

66 lines
2 KiB
Python

"""Status check dla tpdb_backfill — uruchom z VPS worker container:
docker compose exec -T worker python scripts/tpdb_backfill_status.py
"""
from __future__ import annotations
import os
import subprocess
from datetime import date
from sqlalchemy import func, select
from app.db import session_scope
from app.models.scene import SceneExternalRef
from app.models.source import Source, SourceKind
PROGRESS_FILE = "/tmp/tpdb_backfill_progress.txt"
LOG_FILE = "/tmp/tpdb_backfill.log"
def main() -> None:
# Progress
try:
with open(PROGRESS_FILE) as f:
done_dates = [line.strip() for line in f if line.strip()]
except FileNotFoundError:
print("No progress file — backfill not started")
return
n_done = len(done_dates)
print(f"=== TPDB backfill status ===\n")
print(f"Days completed: {n_done}")
if done_dates:
print(f"Date range processed: {min(done_dates)}{max(done_dates)}")
print(f"Last completed day: {done_dates[-1]}")
# Process check
try:
ps = subprocess.run(["ps", "-eo", "pid,cmd"], capture_output=True, text=True, check=True)
if "tpdb_backfill" in ps.stdout:
print("Process: RUNNING")
else:
print("Process: NOT RUNNING (completed or crashed)")
except Exception:
print("Process check: failed")
# DB count
with session_scope() as session:
src = session.execute(select(Source).where(Source.name == "tpdb")).scalar_one_or_none()
if src:
n = session.execute(
select(func.count(SceneExternalRef.scene_id))
.where(SceneExternalRef.source_id == src.id)
).scalar() or 0
print(f"\nTPDB scene_external_refs in DB: {n:,}")
# Last log lines
if os.path.exists(LOG_FILE):
with open(LOG_FILE) as f:
lines = f.readlines()
print(f"\n=== Last 5 log lines (of {len(lines)}) ===")
for line in lines[-5:]:
print(f" {line.rstrip()}")
if __name__ == "__main__":
main()