perf(scenes): light list payload — drop tags/refs, slim playback to thumbnail

Scene list returned the full SceneOut per item (nested tags/external_refs + all
playback_sources with page_url/embed/stream/quality) though SceneTile only reads
the thumbnail + title/duration/performer/studio, and SceneDetail re-fetches the
full scene via /scenes/{id}. Added light=True to _build_scenes_out_batch: skip the
tags + external_refs queries entirely and collapse playback_sources to one slim
entry (thumbnail_url + animated_thumbnail_url only).

Result: default list payload 78KB->48KB (-38%), ~28ms cached, less DB work per
list. Verified on emulator: grid thumbnails/durations/titles render unchanged.
No mobile change (tile reads the same fields); server-side, no OTA.

NOTE: the separate slow path — common-tag-filtered lists (4-12s; query expands all
matching scene_tags before sort/limit) — is structural (needs a denormalized
(tag_id, created_at) index) and deferred. VACUUM ANALYZE + raised tag_id stats
applied but the planner still can't avoid the materialization.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
jtrzupek 2026-06-07 21:03:26 +02:00
parent 9f46e8dea9
commit d52641774d

View file

@ -404,7 +404,7 @@ def list_scenes(
total = (page - 1) * per_page + len(rows) total = (page - 1) * per_page + len(rows)
total_capped = has_more total_capped = has_more
items = _build_scenes_out_batch(session, list(rows)) items = _build_scenes_out_batch(session, list(rows), light=True)
return SceneListOut( return SceneListOut(
items=items, items=items,
@ -447,12 +447,19 @@ def _wrap_image_proxy(url: str, referer: str) -> str:
return f"/proxy/img/{token}/img.{ext}" return f"/proxy/img/{token}/img.{ext}"
def _build_scenes_out_batch(session: Session, scenes: list[Scene]) -> list[SceneOut]: def _build_scenes_out_batch(
session: Session, scenes: list[Scene], *, light: bool = False
) -> list[SceneOut]:
"""Batch-fetch wszystkich relacji dla N scen w 7 zapytaniach (zamiast 7×N). """Batch-fetch wszystkich relacji dla N scen w 7 zapytaniach (zamiast 7×N).
Eliminuje N+1 z `_build_scene_out` w listach scen `/scenes?per_page=24` szło Eliminuje N+1 z `_build_scene_out` w listach scen `/scenes?per_page=24` szło
z ~9.6s do <500ms. Pojedyncza scena (`/scenes/{id}`) nadal używa `_build_scene_out` z ~9.6s do <500ms. Pojedyncza scena (`/scenes/{id}`) nadal używa `_build_scene_out`
bo overhead na batch nie ma sensu dla N=1. bo overhead na batch nie ma sensu dla N=1.
`light=True` (listy/grid): pomija `tags` i `external_refs` (kafelek SceneTile ich
NIE używa, a SceneDetail re-fetchuje pełną scenę osobno) i ślimaczy `playback_sources`
do 1 wpisu z samą miniaturką (kafelek czyta tylko thumbnail_url/animated_thumbnail_url).
Mniej DB + mniej payloadu + szybszy parse na kliencie (perf 2026-06-07).
""" """
from collections import defaultdict from collections import defaultdict
if not scenes: if not scenes:
@ -488,23 +495,24 @@ def _build_scenes_out_batch(session: Session, scenes: list[Scene]) -> list[Scene
) )
) )
# 3) Tags # 3) Tags + 4) External refs — kafelek listy ich nie używa; w light mode pomijamy
# (SceneDetail re-fetchuje pełną scenę przez /scenes/{id}).
tags_by_scene: dict = defaultdict(list)
refs_by_scene: dict = defaultdict(list)
if not light:
tag_rows = session.execute( tag_rows = session.execute(
select(SceneTag.scene_id, Tag) select(SceneTag.scene_id, Tag)
.join(Tag, Tag.id == SceneTag.tag_id) .join(Tag, Tag.id == SceneTag.tag_id)
.where(SceneTag.scene_id.in_(scene_ids)) .where(SceneTag.scene_id.in_(scene_ids))
).all() ).all()
tags_by_scene: dict = defaultdict(list)
for sid, t in tag_rows: for sid, t in tag_rows:
tags_by_scene[sid].append(TagOut.model_validate(t)) tags_by_scene[sid].append(TagOut.model_validate(t))
# 4) External refs + sources
ref_rows = session.execute( ref_rows = session.execute(
select(SceneExternalRef, Source) select(SceneExternalRef, Source)
.join(Source, Source.id == SceneExternalRef.source_id) .join(Source, Source.id == SceneExternalRef.source_id)
.where(SceneExternalRef.scene_id.in_(scene_ids)) .where(SceneExternalRef.scene_id.in_(scene_ids))
).all() ).all()
refs_by_scene: dict = defaultdict(list)
for ref, src in ref_rows: for ref, src in ref_rows:
refs_by_scene[ref.scene_id].append( refs_by_scene[ref.scene_id].append(
ExternalRefOut( ExternalRefOut(
@ -515,7 +523,52 @@ def _build_scenes_out_batch(session: Session, scenes: list[Scene]) -> list[Scene
) )
) )
# 5) Playback sources # 5) Playback sources. Light mode: tylko miniaturka (jedna na scenę) — kafelek
# czyta wyłącznie playback_sources[].thumbnail_url / animated_thumbnail_url.
pb_by_scene: dict = defaultdict(list)
if light:
pb_light = session.execute(
select(
PlaybackSource.scene_id,
PlaybackSource.thumbnail_url,
PlaybackSource.animated_thumbnail_url,
PlaybackSource.page_url,
)
.where(
PlaybackSource.scene_id.in_(scene_ids),
PlaybackSource.dead_at.is_(None),
)
.order_by(PlaybackSource.origin.asc())
).all()
# Pierwsza miniaturka + pierwszy animated per scena (1 slim wpis).
thumb_by_scene: dict = {}
anim_by_scene: dict = {}
for sid, thumb, anim, page_url in pb_light:
if sid not in thumb_by_scene and thumb:
thumb_by_scene[sid] = (thumb, page_url)
if sid not in anim_by_scene and anim:
anim_by_scene[sid] = (anim, page_url)
for sid in scene_ids:
t = thumb_by_scene.get(sid)
a = anim_by_scene.get(sid)
if not t and not a:
continue
t_url = t[0] if t else None
a_url = a[0] if a else None
ref = (t or a)[1]
if t_url and _needs_proxy(t_url):
t_url = _wrap_image_proxy(t_url, ref)
if a_url and _needs_proxy(a_url):
a_url = _wrap_image_proxy(a_url, ref)
# id/origin/page_url wymagane przez schemat ale nieużywane przez kafelek
# (SceneDetail re-fetchuje pełne źródła) — dummy sentinel.
pb_by_scene[sid].append(
PlaybackSourceOut(
id=uuid.UUID(int=0), origin="", page_url="",
thumbnail_url=t_url, animated_thumbnail_url=a_url,
)
)
else:
pb_rows = session.execute( pb_rows = session.execute(
select(PlaybackSource) select(PlaybackSource)
.where( .where(
@ -524,7 +577,6 @@ def _build_scenes_out_batch(session: Session, scenes: list[Scene]) -> list[Scene
) )
.order_by(PlaybackSource.origin.asc()) .order_by(PlaybackSource.origin.asc())
).scalars().all() ).scalars().all()
pb_by_scene: dict = defaultdict(list)
for p in pb_rows: for p in pb_rows:
out = PlaybackSourceOut.model_validate(p) out = PlaybackSourceOut.model_validate(p)
if out.thumbnail_url and _needs_proxy(out.thumbnail_url): if out.thumbnail_url and _needs_proxy(out.thumbnail_url):