unarr/internal/library/prewarm.go
Deivid Soto c82826bf68
Some checks failed
CI / Test (push) Failing after 6m21s
CI / Build (push) Successful in 1m34s
CI / Build-1 (push) Successful in 2m0s
CI / Build-2 (push) Successful in 1m33s
CI / Build-3 (push) Successful in 1m38s
CI / Build-4 (push) Successful in 1m35s
CI / Build-5 (push) Successful in 1m38s
CI / Lint (push) Failing after 2m34s
CI / Coverage (push) Failing after 2m44s
CI / Vet (push) Successful in 2m3s
fix(trickplay): stop scan-time sprite generation from saturating the host
Trickplay sprite generation (one full-decode ffmpeg pass per file) could pin a
machine: multiple agents on the same library decoded the same 4K file at once, no
CPU throttling, and crashed/restarted agents orphaned ffmpeg to init (it ran the
full 45-min decode to completion). Stacked orphans spiked a box to load ~140.

- Single-flight lock: O_CREATE|O_EXCL .lock in the shared sidecar dir so two
  agents watching the same library never decode the same file twice (stale locks
  reclaimed after a TTL). Returns ErrTrickplayInProgress → prewarm skips, not fail.
- Load gate: defer the heavy decode until 1-min load ≤ max(ratio×NumCPU, 1.5),
  capped at 15 min so it throttles without ever becoming a permanent off-switch on
  busy / small hosts. New knob library.prewarm_max_load_ratio (default 0.7).
- Concurrency: trickSem caps trickplay to ONE decode at a time per agent.
- CPU priority: setLowCPUPriority (nice 19) alongside the existing idle ionice.
- No orphans: hardenCmd sets Setpgid + Pdeathsig=SIGKILL, with runtime.LockOSThread
  around the child so the kernel kills ffmpeg exactly when the agent dies (and not
  spuriously — golang/go#27505).

Tests: single-flight/stale-reclaim, load-gate immediate/cancel, and an e2e
Pdeathsig orphan-kill check.
2026-06-04 08:25:00 +02:00

345 lines
12 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package library
import (
"context"
"errors"
"log"
"math"
"runtime"
"sync"
"time"
"github.com/torrentclaw/unarr/internal/library/mediainfo"
)
// Thumbnail sampling — kept in lockstep with the web's src/lib/stream/thumbnails.ts
// (THUMB_FRACTIONS / THUMB_FALLBACK_SECS / THUMB_WIDTH) so the frames the scan
// pre-extracts are the exact ones the "file characteristics" panel requests.
var (
thumbFractions = []float64{0.1, 0.3, 0.5, 0.7, 0.9}
thumbFallbackSec = []float64{30, 120, 300, 600, 1200}
)
const thumbWidth = 320
// PrewarmOptions controls scan-time sidecar extraction.
type PrewarmOptions struct {
FFmpegPath string // resolved ffmpeg binary; empty disables prewarm
CacheSubtitles bool // library.cache_subtitles
CacheThumbnails bool // library.cache_thumbnails
Workers int // concurrent ffmpeg jobs (each is heavy); default 2
// Trickplay (library.trickplay): generate ONE montage sprite per file sampled
// every TrickplayIntervalSec at TrickplayWidth. Replaces live scrubber
// extraction during playback (no contention with the active stream).
Trickplay bool
TrickplayIntervalSec float64
TrickplayWidth int
// MaxLoadRatio gates the heavy trickplay decode on system load: a job only
// starts while the 1-min load average is ≤ MaxLoadRatio×NumCPU, so sprite
// generation never saturates the machine or the NAS. ≤0 → default 0.7. Has no
// effect on platforms without a load reading (proceeds unthrottled).
MaxLoadRatio float64
}
// prewarmJob is one extraction unit: all text subtitles of a file in one ffmpeg
// pass (subtitle job), a single thumbnail frame (thumb=true), or the trickplay
// montage sprite for a file (trick=true).
type prewarmJob struct {
path string
thumb bool
trick bool // trickplay sprite job
subIdx []int // subtitle stream indices to extract in ONE pass (subtitle job)
posSec float64 // frame position in seconds (thumbnail job)
width int // frame/tile width (thumbnail + trickplay jobs)
duration float64 // runtime seconds (trickplay job)
}
// PrewarmSidecars extracts text subtitles (→ WebVTT) and the panel's sample
// thumbnail frames (→ JPEG) for every scanned item into the hidden ".unarr"
// sidecar dir next to the media file, so the /sub and /thumbnail handlers serve
// them instantly. Subtitle extraction without the per-request HTTP timeout is
// what makes huge remuxes work.
//
// Best-effort and idempotent: fresh sidecars are skipped, errors are logged and
// the item moves on, and ctx cancellation (Ctrl-C / daemon shutdown) stops
// cleanly. Safe to call after every scan — only missing/stale caches do work.
func PrewarmSidecars(ctx context.Context, cache *LibraryCache, opts PrewarmOptions) {
if cache == nil || opts.FFmpegPath == "" || (!opts.CacheSubtitles && !opts.CacheThumbnails && !opts.Trickplay) {
return
}
workers := opts.Workers
if workers < 1 {
workers = 2
}
maxLoadRatio := opts.MaxLoadRatio
if maxLoadRatio <= 0 {
maxLoadRatio = 0.7
}
// Trickplay is the heaviest job (full 4K decode). Cap it to ONE concurrent
// decode across this agent's workers — the thumbnail/subtitle jobs (light /
// I/O-bound) keep their `workers` parallelism. Cross-agent dup work is stopped
// by the per-file lock inside GenerateTrickplay.
trickSem := make(chan struct{}, 1)
jobs := make(chan prewarmJob)
var wg sync.WaitGroup
var mu sync.Mutex
subCached, thumbCached, trickCached, failed := 0, 0, 0, 0
var sampleErr string // first extraction error, surfaced in the summary so a
// systemic ffmpeg failure (vs one corrupt file) is diagnosable from "N failed".
for i := 0; i < workers; i++ {
wg.Add(1)
go func() {
defer wg.Done()
for j := range jobs {
if ctx.Err() != nil {
return
}
if j.thumb {
if _, ok := mediainfo.ReadCachedThumbnail(j.path, j.posSec, j.width); ok {
continue
}
// A single keyframe decode is fast; 60s bounds a corrupt file.
jctx, cancel := context.WithTimeout(ctx, 60*time.Second)
img, err := mediainfo.ExtractThumbnailJPEG(jctx, opts.FFmpegPath, j.path, j.posSec, j.width)
cancel()
if err != nil { // seek past EOF / corrupt → skip
mu.Lock()
failed++
if sampleErr == "" {
sampleErr = err.Error()
}
mu.Unlock()
continue
}
if werr := mediainfo.WriteCachedThumbnail(j.path, j.posSec, j.width, img); werr != nil {
log.Printf("[prewarm] thumbnail write skipped (pos=%.0f path=%q): %v", j.posSec, j.path, werr)
mu.Lock()
failed++
mu.Unlock()
continue
}
mu.Lock()
thumbCached++
mu.Unlock()
continue
}
if j.trick {
if _, ok := mediainfo.ReadCachedTrickplay(j.path, j.width); ok {
continue
}
// Serialize the heavy decode (1 at a time) and wait for the box to
// be idle enough before starting — sprite generation must never
// saturate the CPU or the NAS.
select {
case trickSem <- struct{}{}:
case <-ctx.Done():
return
}
waitForLowLoad(ctx, maxLoadRatio)
if ctx.Err() != nil {
<-trickSem
return
}
// Full-decode pass (samples 1 frame per interval over the whole
// file) — generous deadline like subtitles; idempotent + cached.
// INVARIANT: this deadline MUST stay below mediainfo.trickplayLockTTL,
// or another agent could reclaim a still-running job's lock and double
// the decode. If you raise this, raise trickplayLockTTL too.
jctx, cancel := context.WithTimeout(ctx, 45*time.Minute)
_, err := mediainfo.GenerateTrickplay(jctx, opts.FFmpegPath, j.path, opts.TrickplayIntervalSec, j.width, j.duration)
cancel()
<-trickSem
mu.Lock()
switch {
case err == nil:
trickCached++
case errors.Is(err, mediainfo.ErrTrickplayInProgress):
// another worker/agent owns this file — skip, not a failure.
default:
failed++
if sampleErr == "" {
sampleErr = err.Error()
}
}
mu.Unlock()
continue
}
// Extract only the indices not already fresh, and do them in ONE
// ffmpeg pass — a multi-GB remux is demuxed once for all its text
// tracks instead of once per track.
todo := make([]int, 0, len(j.subIdx))
for _, idx := range j.subIdx {
if _, ok := mediainfo.ReadCachedSubtitle(j.path, idx); !ok {
todo = append(todo, idx)
}
}
if len(todo) == 0 {
continue
}
// Generous per-file deadline. Subtitle packets are interleaved across
// the whole container, so extraction is I/O-bound: it must read the
// entire file once (all text tracks share that single pass). A 60GB
// remux over ~75 MB/s NFS is ~14 min, so 45 min covers files up to
// ~200GB; bounded so one corrupt/stalled file can't wedge a worker.
// This is background + idempotent — it only runs until the cache fills.
jctx, cancel := context.WithTimeout(ctx, 45*time.Minute)
res, err := mediainfo.ExtractSubtitlesVTTMulti(jctx, opts.FFmpegPath, j.path, todo)
cancel()
if err != nil {
mu.Lock()
failed += len(todo)
if sampleErr == "" {
sampleErr = err.Error()
}
mu.Unlock()
continue
}
for idx, vtt := range res {
if werr := mediainfo.WriteCachedSubtitle(j.path, idx, vtt); werr != nil {
log.Printf("[prewarm] sidecar write skipped (i=%d path=%q): %v", idx, j.path, werr)
mu.Lock()
failed++
mu.Unlock()
continue
}
mu.Lock()
subCached++
mu.Unlock()
}
}
}()
}
go func() {
defer close(jobs)
for _, item := range cache.Items {
if item.MediaInfo == nil || item.FilePath == "" {
continue
}
if opts.CacheSubtitles {
var subIdx []int
for idx, sub := range item.MediaInfo.Subtitles {
if mediainfo.IsTextSubtitleCodec(sub.Codec) {
subIdx = append(subIdx, idx) // bitmap → burned in, skipped
}
}
if len(subIdx) > 0 {
select {
case jobs <- prewarmJob{path: item.FilePath, subIdx: subIdx}:
case <-ctx.Done():
return
}
}
}
if opts.CacheThumbnails {
for _, pos := range thumbPositions(item) {
select {
case jobs <- prewarmJob{path: item.FilePath, thumb: true, posSec: pos, width: thumbWidth}:
case <-ctx.Done():
return
}
}
}
if opts.Trickplay && opts.TrickplayIntervalSec > 0 {
dur := 0.0
if item.MediaInfo.Video != nil {
dur = item.MediaInfo.Video.Duration
}
if dur > 0 {
w := opts.TrickplayWidth
if w <= 0 {
w = 240
}
select {
case jobs <- prewarmJob{path: item.FilePath, trick: true, width: w, duration: dur}:
case <-ctx.Done():
return
}
}
}
}
}()
wg.Wait()
if subCached > 0 || thumbCached > 0 || trickCached > 0 || failed > 0 {
if failed > 0 && sampleErr != "" {
log.Printf("[prewarm] %d subtitles, %d thumbnails, %d trickplay cached, %d failed (e.g. %s)", subCached, thumbCached, trickCached, failed, sampleErr)
} else {
log.Printf("[prewarm] %d subtitles, %d thumbnails, %d trickplay cached, %d failed", subCached, thumbCached, trickCached, failed)
}
}
}
// prewarmLoadWaitCap bounds how long the load gate DEFERS a trickplay job. It's a
// throttle, not an off-switch: on a host whose baseline load is permanently above
// the threshold (a shared prod box, or any 12 core machine), an unbounded wait
// would mean sprites NEVER generate. After the cap we proceed anyway — the other
// safeguards (single-flight lock, trickSem=1, nice 19 + idle I/O, Pdeathsig) keep
// one throttled decode from saturating the box.
const prewarmLoadWaitCap = 15 * time.Minute
// waitForLowLoad defers until the 1-minute system load is at or below
// max(maxRatio×NumCPU, 1.5), or ctx is cancelled, or prewarmLoadWaitCap elapses —
// so the heavy trickplay decode prefers an idle machine but never stalls forever.
// The 1.5 floor reserves ~one core so the gate can still open on 12 core hosts
// (without it, threshold 0.71.4 is below almost any active machine's load and the
// feature would be permanently off). No load reading (non-Linux) → returns at once.
func waitForLowLoad(ctx context.Context, maxRatio float64) {
threshold := maxRatio * float64(runtime.NumCPU())
if threshold < 1.5 {
threshold = 1.5
}
deadline := time.After(prewarmLoadWaitCap)
logged := false
for {
load, ok := mediainfo.LoadAverage1()
if !ok || load <= threshold {
return
}
if !logged {
log.Printf("[prewarm] system load %.1f > %.1f — deferring trickplay (≤ %s)", load, threshold, prewarmLoadWaitCap)
logged = true
}
select {
case <-ctx.Done():
return
case <-deadline:
log.Printf("[prewarm] load still high after %s — proceeding with throttled trickplay (nice + idle I/O + single-flight still apply)", prewarmLoadWaitCap)
return
case <-time.After(15 * time.Second):
}
}
}
// thumbPositions returns the sample frame offsets (whole seconds) for an item,
// matching the web panel: fractions of a known runtime, else fixed fallbacks.
func thumbPositions(item LibraryItem) []float64 {
var dur float64
if item.MediaInfo != nil && item.MediaInfo.Video != nil {
dur = item.MediaInfo.Video.Duration
}
src := thumbFallbackSec
if dur > 0 {
src = make([]float64, len(thumbFractions))
for i, f := range thumbFractions {
src[i] = math.Round(dur * f)
}
}
// Dedup (short clips can round multiple fractions to the same second).
seen := make(map[float64]struct{}, len(src))
out := make([]float64, 0, len(src))
for _, p := range src {
if _, ok := seen[p]; ok {
continue
}
seen[p] = struct{}{}
out = append(out, p)
}
return out
}