feat(trickplay): scan-time montage sprite for the web scrubber

Pre-generate ONE trickplay sprite (montage JPEG of frames sampled every
library.trickplay.interval, default 10s) + a JSON manifest per file during the
scan/auto-scan prewarm, cached in .unarr next to the media. The web scrubber
shows tiles from it instead of extracting frames live — removing the ffmpeg
contention with the active stream that broke seekbar previews (the original
'no thumbnail' report was the auto-scan prewarm decoding the same file the HLS
transcode was reading, not a seek-index fault).

- config: [library.trickplay] enabled/interval/width (default on, 10s, 240px),
  editable + a toggle; IntervalSeconds() with a 10s fallback.
- mediainfo: GenerateTrickplay (one ffmpeg fps=1/interval,scale,tile pass; idle
  I/O priority; ceil() frame count so no black trailing tile; a 16.7M-px cap
  coarsens the interval for long media so a single sprite stays decodable on
  iOS/Safari) + sprite/manifest sidecar cache helpers.
- engine: /trickplay endpoint (manifest JSON, ?kind=sprite JPEG); the agent owns
  the tile width so the web requests by path only; thumb:<sha256> token reused.
- prewarm: a trickplay job per item, gated; scan.go + daemon.go wire the config.

Tests: parseDims; synthetic 3x2 / exact-multiple / 1x1; real-file e2e smoke
(S02E08 → 143 tiles, 662KB sprite). Non-breaking: the existing 5-frame panel
prewarm + on-demand /thumbnail stay until the web migrates to the sprite.
This commit is contained in:
Deivid Soto 2026-06-03 20:30:29 +02:00
parent 7877e1de42
commit 8e37293b7d
7 changed files with 553 additions and 21 deletions

View file

@ -0,0 +1,245 @@
package mediainfo
import (
"context"
"encoding/json"
"fmt"
"math"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
)
// TrickplayManifest describes the montage sprite layout so a client can map a
// playback time to one tile: tileIndex = floor(timeSec / IntervalSec), then
// col = tileIndex % Cols, row = tileIndex / Cols, and the tile's pixel box is
// (col*TileWidth, row*TileHeight, TileWidth, TileHeight).
type TrickplayManifest struct {
Version int `json:"version"` // schema version (1)
IntervalSec float64 `json:"intervalSec"`
TileWidth int `json:"tileWidth"`
TileHeight int `json:"tileHeight"`
Cols int `json:"cols"`
Rows int `json:"rows"`
Count int `json:"count"` // number of REAL frames (≤ Cols*Rows; the rest are padding)
DurationSec float64 `json:"durationSec"`
}
// trickplaySpritePath / trickplayManifestPath include the tile width so changing
// library.trickplay.width regenerates cleanly instead of serving a stale sprite.
func trickplaySpritePath(mediaPath string, width int) string {
return filepath.Join(sidecarDir(mediaPath), fmt.Sprintf("%s.trickplay.w%d.jpg", filepath.Base(mediaPath), width))
}
func trickplayManifestPath(mediaPath string, width int) string {
return filepath.Join(sidecarDir(mediaPath), fmt.Sprintf("%s.trickplay.w%d.json", filepath.Base(mediaPath), width))
}
// TrickplaySpritePath is the public accessor the stream server uses to locate the
// cached sprite JPEG for serving.
func TrickplaySpritePath(mediaPath string, width int) string {
return trickplaySpritePath(mediaPath, width)
}
// ReadCachedTrickplay returns the manifest when a fresh sprite + manifest exist
// for (mediaPath, width). ok=false means the caller should (re)generate. Both
// the sprite and the manifest must be at least as new as the media file.
func ReadCachedTrickplay(mediaPath string, width int) (TrickplayManifest, bool) {
sprite := trickplaySpritePath(mediaPath, width)
manifest := trickplayManifestPath(mediaPath, width)
if !sidecarFresh(sprite, mediaPath) || !sidecarFresh(manifest, mediaPath) {
return TrickplayManifest{}, false
}
b, err := os.ReadFile(manifest)
if err != nil || len(b) == 0 {
return TrickplayManifest{}, false
}
var m TrickplayManifest
if err := json.Unmarshal(b, &m); err != nil || m.Cols <= 0 || m.TileWidth <= 0 {
return TrickplayManifest{}, false
}
return m, true
}
// GenerateTrickplay builds the montage sprite + manifest for mediaPath and caches
// them in the sidecar dir. ONE ffmpeg pass samples a frame every intervalSec
// (fps=1/interval), scales each to width (even height), and tiles them into a
// single JPEG. The whole file is decoded once — slow but a one-time, cached,
// scan-time cost (run with idle I/O priority by the prewarm), and it removes ALL
// live extraction during playback (no contention with the active stream).
//
// durationSec drives the grid size; pass the probed duration (0 → error, nothing
// to sample). The caller owns the ctx deadline (generous at scan time).
func GenerateTrickplay(ctx context.Context, ffmpegPath, mediaPath string, intervalSec float64, width int, durationSec float64) (TrickplayManifest, error) {
if ffmpegPath == "" {
return TrickplayManifest{}, fmt.Errorf("trickplay: no ffmpeg")
}
if intervalSec <= 0 || width <= 0 {
return TrickplayManifest{}, fmt.Errorf("trickplay: invalid interval=%v width=%d", intervalSec, width)
}
if durationSec <= 0 {
return TrickplayManifest{}, fmt.Errorf("trickplay: unknown duration")
}
// fps=1/interval emits a frame at t=0, interval, 2*interval, … while t <
// duration → ceil(duration/interval) frames. (An earlier floor(...)+1 put a
// black padding tile at the very end of the scrubber on round-duration media.)
effInterval := intervalSec
count := int(math.Ceil(durationSec / effInterval))
if count < 1 {
count = 1
}
// Mobile decode cap: a single JPEG above ~16.7M px (4096²) fails to decode on
// iOS/Safari. For long media, sample fewer frames (coarser effective interval)
// so ONE sprite stays renderable everywhere. tileH is unknown until probe, so
// estimate from 16:9 for the budget; the manifest reports effInterval so the
// client maps time→tile correctly.
const maxSpritePixels = 16_000_000
estTileH := width * 9 / 16
if estTileH < 1 {
estTileH = 1
}
if maxTiles := maxSpritePixels / (width * estTileH); maxTiles >= 1 && count > maxTiles {
effInterval = durationSec / float64(maxTiles)
count = int(math.Ceil(durationSec / effInterval))
if count > maxTiles {
count = maxTiles // guard ceil rounding
}
}
// Roughly-square grid. Cols*Rows ≥ count; trailing cells are ffmpeg padding,
// and Count tells the client how many are real.
cols := int(math.Ceil(math.Sqrt(float64(count))))
if cols < 1 {
cols = 1
}
rows := int(math.Ceil(float64(count) / float64(cols)))
if rows < 1 {
rows = 1
}
spritePath := trickplaySpritePath(mediaPath, width)
manifestPath := trickplayManifestPath(mediaPath, width)
if err := os.MkdirAll(filepath.Dir(spritePath), 0o755); err != nil {
return TrickplayManifest{}, err
}
tmpSprite := spritePath + ".tmp"
// fps filter wants a rational; format 1/effInterval with enough precision.
fps := fmt.Sprintf("1/%s", strconv.FormatFloat(effInterval, 'f', 3, 64))
vf := fmt.Sprintf("fps=%s,scale=%d:-2,tile=%dx%d", fps, width, cols, rows)
args := []string{
"-nostdin", "-loglevel", "error", "-y",
"-i", mediaPath,
"-frames:v", "1",
"-vf", vf,
"-an", "-sn",
"-q:v", "5",
// Force the muxer: the temp output ends in ".tmp", so ffmpeg can't infer
// the format from the extension (it would error "Unable to choose an
// output format"). mjpeg writes the single montage frame as a JPEG.
"-f", "mjpeg",
tmpSprite,
}
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
var stderr strings.Builder
cmd.Stderr = &stderr
// Start + idle I/O priority + Wait (matches the subtitle/thumbnail extractors):
// this full-decode pass is the heaviest sidecar job and runs in the background
// alongside live streaming on the same disk/NFS, so it must yield I/O.
if err := cmd.Start(); err != nil {
_ = os.Remove(tmpSprite)
return TrickplayManifest{}, fmt.Errorf("ffmpeg tile start: %w", err)
}
setIdleIOPriority(cmd.Process.Pid)
if err := cmd.Wait(); err != nil {
_ = os.Remove(tmpSprite)
return TrickplayManifest{}, fmt.Errorf("ffmpeg tile: %w: %s", err, strings.TrimSpace(stderr.String()))
}
if fi, err := os.Stat(tmpSprite); err != nil || fi.Size() == 0 {
_ = os.Remove(tmpSprite)
return TrickplayManifest{}, fmt.Errorf("trickplay: empty sprite")
}
// Probe the produced sprite for EXACT dimensions, so tile geometry is precise
// (avoids ±1px aspect-rounding drift between our math and ffmpeg's scale=-2).
spriteW, spriteH, err := probeImageDims(ctx, ffmpegPath, tmpSprite)
if err != nil || spriteW < cols || spriteH < rows {
_ = os.Remove(tmpSprite)
return TrickplayManifest{}, fmt.Errorf("trickplay: probe sprite dims: %w", err)
}
m := TrickplayManifest{
Version: 1,
IntervalSec: effInterval,
TileWidth: spriteW / cols,
TileHeight: spriteH / rows,
Cols: cols,
Rows: rows,
Count: count,
DurationSec: durationSec,
}
mb, err := json.Marshal(m)
if err != nil {
_ = os.Remove(tmpSprite)
return TrickplayManifest{}, err
}
// Publish sprite (rename) then manifest (atomic write). Order: sprite first so
// a reader that sees a fresh manifest always finds the matching sprite.
if err := os.Rename(tmpSprite, spritePath); err != nil {
_ = os.Remove(tmpSprite)
return TrickplayManifest{}, err
}
if err := writeSidecar(manifestPath, mb); err != nil {
return TrickplayManifest{}, err
}
return m, nil
}
// probeImageDims returns the pixel width/height of an image file via ffmpeg's
// bundled ffprobe-less path: we reuse ffmpeg with -hide_banner and parse the
// "Stream ... WxH" line from stderr. Using ffmpeg (already resolved) avoids a
// hard dependency on a separate ffprobe binary here.
func probeImageDims(ctx context.Context, ffmpegPath, path string) (int, int, error) {
cmd := exec.CommandContext(ctx, ffmpegPath, "-hide_banner", "-i", path)
var stderr strings.Builder
cmd.Stderr = &stderr
_ = cmd.Run() // ffmpeg exits non-zero with no output file; we only want the probe stderr
return parseDims(stderr.String())
}
// parseDims extracts the first WxH (e.g. "3840x2160") from ffmpeg's stream info.
func parseDims(s string) (int, int, error) {
idx := strings.Index(s, "Video:")
if idx < 0 {
return 0, 0, fmt.Errorf("no video stream in probe output")
}
// Scan for the first "<digits>x<digits>" token after "Video:".
rest := s[idx:]
for i := 0; i < len(rest); i++ {
if rest[i] < '0' || rest[i] > '9' {
continue
}
j := i
for j < len(rest) && rest[j] >= '0' && rest[j] <= '9' {
j++
}
if j < len(rest) && rest[j] == 'x' {
k := j + 1
for k < len(rest) && rest[k] >= '0' && rest[k] <= '9' {
k++
}
if k > j+1 {
w, _ := strconv.Atoi(rest[i:j])
h, _ := strconv.Atoi(rest[j+1 : k])
if w > 0 && h > 0 {
return w, h, nil
}
}
}
i = j
}
return 0, 0, fmt.Errorf("no WxH token in probe output")
}

View file

@ -0,0 +1,107 @@
package mediainfo
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
"time"
)
func TestParseDims(t *testing.T) {
cases := []struct {
in string
w, h int
ok bool
}{
{"Stream #0:0: Video: mjpeg, yuvj420p(pc), 720x270 [SAR 1:1 DAR 8:3]", 720, 270, true},
{" Stream #0:0: Video: h264 (High), yuv420p, 3840x2160, 23.98 fps", 3840, 2160, true},
{"Stream #0:1: Audio: aac, 48000 Hz, stereo", 0, 0, false}, // no Video:
{"", 0, 0, false},
}
for _, c := range cases {
w, h, err := parseDims(c.in)
if c.ok {
if err != nil || w != c.w || h != c.h {
t.Errorf("parseDims(%q) = %d,%d,%v; want %d,%d,nil", c.in, w, h, err, c.w, c.h)
}
} else if err == nil {
t.Errorf("parseDims(%q) expected error, got %dx%d", c.in, w, h)
}
}
}
// makeClip writes a synthetic 16:9 test clip of the given duration (seconds).
func makeClip(t *testing.T, ff, path string, durSec int) {
t.Helper()
mk := exec.Command(ff, "-nostdin", "-loglevel", "error", "-y",
"-f", "lavfi", "-i", fmt.Sprintf("testsrc=duration=%d:size=640x360:rate=10", durSec),
"-pix_fmt", "yuv420p", path)
if out, err := mk.CombinedOutput(); err != nil {
t.Fatalf("make test clip: %v: %s", err, out)
}
}
// TestGenerateTrickplay builds synthetic clips and asserts the sprite grid +
// manifest. ffmpeg-gated (skips without it, like the encode benchmark).
func TestGenerateTrickplay(t *testing.T) {
ff, err := exec.LookPath("ffmpeg")
if err != nil {
t.Skip("ffmpeg not on PATH")
}
cases := []struct {
name string
durSec int
wantCount int
wantCols, wantRows int
}{
// fps=1/10 emits a frame at 0,10,20,… while t<dur → ceil(dur/10) frames.
{"non_multiple_55s", 55, 6, 3, 2}, // ceil(55/10)=6
{"exact_multiple_60s", 60, 6, 3, 2}, // ceil(60/10)=6 (NOT 7 — the off-by-one)
{"short_clip_5s", 5, 1, 1, 1}, // 1x1 grid
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
dir := t.TempDir()
clip := filepath.Join(dir, "clip.mp4")
makeClip(t, ff, clip, c.durSec)
ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second)
defer cancel()
m, err := GenerateTrickplay(ctx, ff, clip, 10, 240, float64(c.durSec))
if err != nil {
t.Fatalf("GenerateTrickplay: %v", err)
}
if m.Count != c.wantCount || m.Cols != c.wantCols || m.Rows != c.wantRows {
t.Errorf("grid: count=%d cols=%d rows=%d; want %d/%d/%d",
m.Count, m.Cols, m.Rows, c.wantCount, c.wantCols, c.wantRows)
}
if m.TileWidth != 240 {
t.Errorf("tileWidth=%d; want 240", m.TileWidth)
}
if m.TileHeight < 130 || m.TileHeight > 140 {
t.Errorf("tileHeight=%d; want ~135 (16:9)", m.TileHeight)
}
if m.IntervalSec != 10 {
t.Errorf("intervalSec=%v; want 10 (no cap at this size)", m.IntervalSec)
}
if fi, err := os.Stat(TrickplaySpritePath(clip, 240)); err != nil || fi.Size() == 0 {
t.Errorf("sprite not written: %v", err)
}
m2, ok := ReadCachedTrickplay(clip, 240)
if !ok || m2.Count != m.Count || m2.TileHeight != m.TileHeight || m2.Cols != m.Cols {
t.Errorf("ReadCachedTrickplay mismatch: ok=%v got=%+v want=%+v", ok, m2, m)
}
// Stale media (newer mtime) must invalidate the cache.
future := time.Now().Add(2 * time.Hour)
if err := os.Chtimes(clip, future, future); err == nil {
if _, ok := ReadCachedTrickplay(clip, 240); ok {
t.Error("ReadCachedTrickplay returned stale sprite after media mtime bumped")
}
}
})
}
}