perf(scenes): light list payload — drop tags/refs, slim playback to thumbnail

Scene list returned the full SceneOut per item (nested tags/external_refs + all
playback_sources with page_url/embed/stream/quality) though SceneTile only reads
the thumbnail + title/duration/performer/studio, and SceneDetail re-fetches the
full scene via /scenes/{id}. Added light=True to _build_scenes_out_batch: skip the
tags + external_refs queries entirely and collapse playback_sources to one slim
entry (thumbnail_url + animated_thumbnail_url only).

Result: default list payload 78KB->48KB (-38%), ~28ms cached, less DB work per
list. Verified on emulator: grid thumbnails/durations/titles render unchanged.
No mobile change (tile reads the same fields); server-side, no OTA.

NOTE: the separate slow path — common-tag-filtered lists (4-12s; query expands all
matching scene_tags before sort/limit) — is structural (needs a denormalized
(tag_id, created_at) index) and deferred. VACUUM ANALYZE + raised tag_id stats
applied but the planner still can't avoid the materialization.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jtrzupek 2026-06-07 21:03:26 +02:00
parent 9f46e8dea9
commit d52641774d

View file

@ -404,7 +404,7 @@ def list_scenes(
total = (page - 1) * per_page + len(rows) total = (page - 1) * per_page + len(rows)
total_capped = has_more total_capped = has_more
items = _build_scenes_out_batch(session, list(rows)) items = _build_scenes_out_batch(session, list(rows), light=True)
return SceneListOut( return SceneListOut(
items=items, items=items,
@ -447,12 +447,19 @@ def _wrap_image_proxy(url: str, referer: str) -> str:
return f"/proxy/img/{token}/img.{ext}" return f"/proxy/img/{token}/img.{ext}"
def _build_scenes_out_batch(session: Session, scenes: list[Scene]) -> list[SceneOut]: def _build_scenes_out_batch(
session: Session, scenes: list[Scene], *, light: bool = False
) -> list[SceneOut]:
"""Batch-fetch wszystkich relacji dla N scen w 7 zapytaniach (zamiast 7×N). """Batch-fetch wszystkich relacji dla N scen w 7 zapytaniach (zamiast 7×N).
Eliminuje N+1 z `_build_scene_out` w listach scen `/scenes?per_page=24` szło Eliminuje N+1 z `_build_scene_out` w listach scen `/scenes?per_page=24` szło
z ~9.6s do <500ms. Pojedyncza scena (`/scenes/{id}`) nadal używa `_build_scene_out` z ~9.6s do <500ms. Pojedyncza scena (`/scenes/{id}`) nadal używa `_build_scene_out`
bo overhead na batch nie ma sensu dla N=1. bo overhead na batch nie ma sensu dla N=1.
`light=True` (listy/grid): pomija `tags` i `external_refs` (kafelek SceneTile ich
NIE używa, a SceneDetail re-fetchuje pełną scenę osobno) i ślimaczy `playback_sources`
do 1 wpisu z samą miniaturką (kafelek czyta tylko thumbnail_url/animated_thumbnail_url).
Mniej DB + mniej payloadu + szybszy parse na kliencie (perf 2026-06-07).
""" """
from collections import defaultdict from collections import defaultdict
if not scenes: if not scenes:
@ -488,50 +495,95 @@ def _build_scenes_out_batch(session: Session, scenes: list[Scene]) -> list[Scene
) )
) )
# 3) Tags # 3) Tags + 4) External refs — kafelek listy ich nie używa; w light mode pomijamy
tag_rows = session.execute( # (SceneDetail re-fetchuje pełną scenę przez /scenes/{id}).
select(SceneTag.scene_id, Tag)
.join(Tag, Tag.id == SceneTag.tag_id)
.where(SceneTag.scene_id.in_(scene_ids))
).all()
tags_by_scene: dict = defaultdict(list) tags_by_scene: dict = defaultdict(list)
for sid, t in tag_rows:
tags_by_scene[sid].append(TagOut.model_validate(t))
# 4) External refs + sources
ref_rows = session.execute(
select(SceneExternalRef, Source)
.join(Source, Source.id == SceneExternalRef.source_id)
.where(SceneExternalRef.scene_id.in_(scene_ids))
).all()
refs_by_scene: dict = defaultdict(list) refs_by_scene: dict = defaultdict(list)
for ref, src in ref_rows: if not light:
refs_by_scene[ref.scene_id].append( tag_rows = session.execute(
ExternalRefOut( select(SceneTag.scene_id, Tag)
source=src.name, .join(Tag, Tag.id == SceneTag.tag_id)
external_id=ref.external_id, .where(SceneTag.scene_id.in_(scene_ids))
url=ref.url, ).all()
last_seen=ref.last_seen, for sid, t in tag_rows:
) tags_by_scene[sid].append(TagOut.model_validate(t))
)
# 5) Playback sources ref_rows = session.execute(
pb_rows = session.execute( select(SceneExternalRef, Source)
select(PlaybackSource) .join(Source, Source.id == SceneExternalRef.source_id)
.where( .where(SceneExternalRef.scene_id.in_(scene_ids))
PlaybackSource.scene_id.in_(scene_ids), ).all()
PlaybackSource.dead_at.is_(None), for ref, src in ref_rows:
) refs_by_scene[ref.scene_id].append(
.order_by(PlaybackSource.origin.asc()) ExternalRefOut(
).scalars().all() source=src.name,
external_id=ref.external_id,
url=ref.url,
last_seen=ref.last_seen,
)
)
# 5) Playback sources. Light mode: tylko miniaturka (jedna na scenę) — kafelek
# czyta wyłącznie playback_sources[].thumbnail_url / animated_thumbnail_url.
pb_by_scene: dict = defaultdict(list) pb_by_scene: dict = defaultdict(list)
for p in pb_rows: if light:
out = PlaybackSourceOut.model_validate(p) pb_light = session.execute(
if out.thumbnail_url and _needs_proxy(out.thumbnail_url): select(
out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url) PlaybackSource.scene_id,
if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url): PlaybackSource.thumbnail_url,
out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url) PlaybackSource.animated_thumbnail_url,
pb_by_scene[p.scene_id].append(out) PlaybackSource.page_url,
)
.where(
PlaybackSource.scene_id.in_(scene_ids),
PlaybackSource.dead_at.is_(None),
)
.order_by(PlaybackSource.origin.asc())
).all()
# Pierwsza miniaturka + pierwszy animated per scena (1 slim wpis).
thumb_by_scene: dict = {}
anim_by_scene: dict = {}
for sid, thumb, anim, page_url in pb_light:
if sid not in thumb_by_scene and thumb:
thumb_by_scene[sid] = (thumb, page_url)
if sid not in anim_by_scene and anim:
anim_by_scene[sid] = (anim, page_url)
for sid in scene_ids:
t = thumb_by_scene.get(sid)
a = anim_by_scene.get(sid)
if not t and not a:
continue
t_url = t[0] if t else None
a_url = a[0] if a else None
ref = (t or a)[1]
if t_url and _needs_proxy(t_url):
t_url = _wrap_image_proxy(t_url, ref)
if a_url and _needs_proxy(a_url):
a_url = _wrap_image_proxy(a_url, ref)
# id/origin/page_url wymagane przez schemat ale nieużywane przez kafelek
# (SceneDetail re-fetchuje pełne źródła) — dummy sentinel.
pb_by_scene[sid].append(
PlaybackSourceOut(
id=uuid.UUID(int=0), origin="", page_url="",
thumbnail_url=t_url, animated_thumbnail_url=a_url,
)
)
else:
pb_rows = session.execute(
select(PlaybackSource)
.where(
PlaybackSource.scene_id.in_(scene_ids),
PlaybackSource.dead_at.is_(None),
)
.order_by(PlaybackSource.origin.asc())
).scalars().all()
for p in pb_rows:
out = PlaybackSourceOut.model_validate(p)
if out.thumbnail_url and _needs_proxy(out.thumbnail_url):
out.thumbnail_url = _wrap_image_proxy(out.thumbnail_url, p.page_url)
if out.animated_thumbnail_url and _needs_proxy(out.animated_thumbnail_url):
out.animated_thumbnail_url = _wrap_image_proxy(out.animated_thumbnail_url, p.page_url)
pb_by_scene[p.scene_id].append(out)
# 6) Progress # 6) Progress
progress_by_scene: dict = {} progress_by_scene: dict = {}