fix(stream): functional libplacebo probe + benchmark hardening

Review (critico) caught a regression: the prod agent image ships a BtbN GPL
ffmpeg with libplacebo COMPILED IN but no Vulkan runtime (debian-slim, no
libvulkan1/mesa-vulkan-drivers/nvidia ICD). The presence probe (ffmpeg
-filters) would flip HasLibplacebo on, the filter's Vulkan device creation
would fail at runtime, and HDR sources that previously tonemapped via zscale
would break.

- FFmpegSupportsLibplacebo now RUNS the filter on one synthetic frame and
  requires a clean exit (forces Vulkan device init + filtergraph negotiation),
  so it is honest about THIS host: works on Vulkan-capable hosts, falls back to
  zscale where Vulkan is absent. Logs the real ffmpeg error on failure.
- Warm the libplacebo (Vulkan init ~1.7s) + zscale caches in a background
  goroutine at startup so the first stream session doesn't pay the probe and
  risk its setup timeout.
- Benchmark: margin 1.5x -> 2.0x (the probe measures encode only; real decode
  of HEVC/10-bit + busier content needs more headroom), per-probe timeout
  12s -> 6s + overall 45s -> 20s (it blocks registration on software hosts),
  and a 'no rung measured' case (missing lavfi/wedged ffmpeg) now keeps the
  1080 default instead of flooring at 480 — an infra failure isn't a slow host.

Verified e2e on the fixed binary: LOTR Two Towers (HEVC 3840x1608 10-bit
HDR10/PQ, 12GB) on desktop-Chrome caps -> hls, ffmpeg runs h264_nvenc with
-vf ...,libplacebo=...:format=yuv420p:tonemapping=bt.2390 (zscale chain
replaced), 45 fMP4 segments, ffprobe confirms output h264 yuv420p bt709
(tonemapped from bt2020/smpte2084), no ffmpeg errors.
This commit is contained in:
Deivid Soto 2026-06-03 09:57:48 +02:00
parent ef3b190e0b
commit cfaedb7f3b
4 changed files with 102 additions and 21 deletions

View file

@ -160,12 +160,27 @@ func runDaemonStart() error {
// HW encoders return 2160 instantly; a software-only host runs a bounded // HW encoders return 2160 instantly; a software-only host runs a bounded
// encode benchmark so a weak NAS/CPU reports the rung it can actually // encode benchmark so a weak NAS/CPU reports the rung it can actually
// sustain (720/480) and the web side routes oversized sources to an // sustain (720/480) and the web side routes oversized sources to an
// external player instead of a stuttering transcode. Own timeout — the 10 s // external player instead of a stuttering transcode. This blocks
// probeCtx above is sized for the quick diagnostic, not three encode rungs. // registration on a software host, so it's bounded tight (3 rungs × 6 s =
benchCtx, benchCancel := context.WithTimeout(context.Background(), 45*time.Second) // 18 s worst case; <1 s on a capable box that passes the first rung). Own
// timeout — the 10 s probeCtx above is sized for the quick diagnostic.
benchCtx, benchCancel := context.WithTimeout(context.Background(), 20*time.Second)
maxTranscodeHeight := engine.BenchmarkMaxTranscodeHeight(benchCtx, ffmpegResolved, hwAccelPick) maxTranscodeHeight := engine.BenchmarkMaxTranscodeHeight(benchCtx, ffmpegResolved, hwAccelPick)
benchCancel() benchCancel()
// Warm the tonemap capability caches off the hot path. The libplacebo probe
// actually RUNS the filter (Vulkan device init ~1.7 s), so doing it lazily
// in buildTranscodeRuntime would tax the FIRST stream session and risk its
// setup timeout. A real session arrives seconds-to-minutes after startup, so
// a background warm has finished by then; if one races in first, the cache's
// own mutex makes the concurrent cold call safe (both compute the same bool).
if cfg.Download.Transcode.Enabled && ffmpegResolved != "" {
go func() {
engine.FFmpegSupportsLibplacebo(ffmpegResolved)
engine.FFmpegSupportsZscale(ffmpegResolved)
}()
}
// Create daemon config // Create daemon config
daemonCfg := agent.DaemonConfig{ daemonCfg := agent.DaemonConfig{
AgentID: cfg.Agent.ID, AgentID: cfg.Agent.ID,

View file

@ -25,12 +25,15 @@ var softwareBenchmarkRungs = []benchmarkRung{
} }
// realtimeMarginSoftware is how much faster than realtime a synthetic encode // realtimeMarginSoftware is how much faster than realtime a synthetic encode
// must run before we call a rung "sustainable". 1.5× leaves headroom for two // must run before we call a rung "sustainable". 2.0× (not 1.5×) because the
// things the benchmark does NOT measure: (a) decoding the real source — // benchmark measures ONLY the encode of a low-entropy synthetic source and
// software HEVC / 10-bit decode is heavier than encoding the synthetic clip — // must cover two costs it never sees: (a) decoding the real source — software
// and (b) real content being busier than testsrc2 (which x264 compresses // HEVC / 10-bit decode can rival the encode cost on its own — and (b) real
// faster than film grain or motion). // content (film grain, motion) being far busier than testsrc2 for x264's
const realtimeMarginSoftware = 1.5 // rate-control + motion estimation. Erring high routes a borderline box's
// oversized sources to an external player (which works) instead of a
// stuttering transcode (which is the failure we're preventing).
const realtimeMarginSoftware = 2.0
// benchmarkClipSeconds is the synthetic clip length. Short enough that a // benchmarkClipSeconds is the synthetic clip length. Short enough that a
// capable host finishes the 1080p rung in well under a second, long enough to // capable host finishes the 1080p rung in well under a second, long enough to
@ -56,6 +59,7 @@ func BenchmarkMaxTranscodeHeight(ctx context.Context, ffmpegPath string, hw HWAc
if ffmpegPath == "" { if ffmpegPath == "" {
return 1080 // no benchmark possible; keep the historical default return 1080 // no benchmark possible; keep the historical default
} }
measuredAny := false
for _, rung := range softwareBenchmarkRungs { for _, rung := range softwareBenchmarkRungs {
factor, ok := measureEncodeRealtimeFactor(ctx, ffmpegPath, rung) factor, ok := measureEncodeRealtimeFactor(ctx, ffmpegPath, rung)
if !ok { if !ok {
@ -64,12 +68,21 @@ func BenchmarkMaxTranscodeHeight(ctx context.Context, ffmpegPath string, hw HWAc
log.Printf("[transcode] encode benchmark: %dp probe failed — trying lower", rung.height) log.Printf("[transcode] encode benchmark: %dp probe failed — trying lower", rung.height)
continue continue
} }
measuredAny = true
if factor >= realtimeMarginSoftware { if factor >= realtimeMarginSoftware {
log.Printf("[transcode] encode benchmark: software ceiling %dp (%.1f× realtime)", rung.height, factor) log.Printf("[transcode] encode benchmark: software ceiling %dp (%.1f× realtime)", rung.height, factor)
return rung.height return rung.height
} }
log.Printf("[transcode] encode benchmark: %dp only %.1f× realtime (<%.1f×) — trying lower", rung.height, factor, realtimeMarginSoftware) log.Printf("[transcode] encode benchmark: %dp only %.1f× realtime (<%.1f×) — trying lower", rung.height, factor, realtimeMarginSoftware)
} }
if !measuredAny {
// No rung produced a measurement at all — the benchmark infrastructure
// failed (missing lavfi/testsrc2, ffmpeg wedged), NOT a slow host. Don't
// punish a possibly-capable box by flooring at 480; keep the historical
// default so behaviour is no worse than before the benchmark existed.
log.Printf("[transcode] encode benchmark: no rung could be measured (lavfi/ffmpeg issue) — keeping default 1080 ceiling")
return 1080
}
log.Printf("[transcode] encode benchmark: host can't sustain 480p software encode — flooring ceiling at 480 (oversized sources route to external)") log.Printf("[transcode] encode benchmark: host can't sustain 480p software encode — flooring ceiling at 480 (oversized sources route to external)")
return 480 return 480
} }
@ -81,10 +94,11 @@ func BenchmarkMaxTranscodeHeight(ctx context.Context, ffmpegPath string, hw HWAc
// rather than treating the failure as a fast result. Each probe is bounded so // rather than treating the failure as a fast result. Each probe is bounded so
// a wedged ffmpeg can't stall daemon startup. // a wedged ffmpeg can't stall daemon startup.
func measureEncodeRealtimeFactor(ctx context.Context, ffmpegPath string, rung benchmarkRung) (float64, bool) { func measureEncodeRealtimeFactor(ctx context.Context, ffmpegPath string, rung benchmarkRung) (float64, bool) {
// A 3 s superfast encode that takes longer than 12 s is <0.25× realtime — // A 3 s superfast encode that takes longer than 6 s is <0.5× realtime —
// already far below the 1.5× bar — so capping here only kills genuinely // already far below the 2.0× bar — so capping here only kills genuinely
// hopeless rungs early and keeps worst-case startup bounded. // hopeless rungs early and bounds worst-case startup blocking (3 rungs ×
bctx, cancel := context.WithTimeout(ctx, 12*time.Second) // 6 s = 18 s) since this runs synchronously before the agent registers.
bctx, cancel := context.WithTimeout(ctx, 6*time.Second)
defer cancel() defer cancel()
size := strconv.Itoa(rung.width) + "x" + strconv.Itoa(rung.height) size := strconv.Itoa(rung.width) + "x" + strconv.Itoa(rung.height)

View file

@ -5,6 +5,7 @@ import (
"context" "context"
"log" "log"
"os/exec" "os/exec"
"strings"
"sync" "sync"
"time" "time"
) )
@ -46,10 +47,21 @@ var (
libplaceboCache = map[string]bool{} libplaceboCache = map[string]bool{}
) )
// FFmpegSupportsLibplacebo reports whether the ffmpeg binary has the libplacebo // FFmpegSupportsLibplacebo reports whether this host can ACTUALLY run the
// filter (Vulkan GPU HDR tonemap + colorspace). Preferred over zscale when both // libplacebo filter — not merely whether it is compiled in. libplacebo is a
// exist. Cached per path; a probe failure is treated as "no". Mirrors // Vulkan filter, so it needs a working Vulkan device + ICD at runtime, which a
// FFmpegSupportsZscale. // presence check (`ffmpeg -filters`) does NOT prove: the prod agent image
// ships a BtbN GPL ffmpeg with libplacebo built in but no Vulkan runtime
// (debian-slim, no libvulkan1 / mesa-vulkan-drivers / nvidia ICD), so a
// presence check would flip this on and break HDR playback that previously
// tonemapped fine via zscale.
//
// So we run the real filter on one synthetic frame and require a clean exit:
// that forces Vulkan device creation + filtergraph negotiation (the implicit
// hwupload/hwdownload around the GPU filter). Pass → libplacebo works here;
// fail → fall back to the zscale chain. Cached per path; a probe failure is
// treated as "no". The probe is bounded so a wedged ffmpeg can't stall the
// first session.
func FFmpegSupportsLibplacebo(ffmpegPath string) bool { func FFmpegSupportsLibplacebo(ffmpegPath string) bool {
if ffmpegPath == "" { if ffmpegPath == "" {
return false return false
@ -61,20 +73,43 @@ func FFmpegSupportsLibplacebo(ffmpegPath string) bool {
} }
libplaceboCacheMu.Unlock() libplaceboCacheMu.Unlock()
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) // 10 s: first-run Vulkan device creation alone can take ~1 s ("Spent
// ~1150ms creating vulkan device"), plus codec/filter init.
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel() defer cancel()
out, err := exec.CommandContext(ctx, ffmpegPath, "-hide_banner", "-filters").Output() // Run the EXACT filter we'd use, on a 1-frame synthetic source, discarding
supported := err == nil && bytes.Contains(out, []byte("libplacebo")) // output. testsrc2 is SDR so the tonemap is near-passthrough — the point is
// to exercise Vulkan device init + the filter, not the mapping quality.
out, err := exec.CommandContext(ctx, ffmpegPath,
"-hide_banner", "-loglevel", "error", "-nostats",
"-f", "lavfi", "-i", "testsrc2=size=128x128:rate=1:duration=1",
"-vf", libplaceboTonemapFilter, "-frames:v", "1", "-f", "null", "-",
).CombinedOutput()
supported := err == nil
libplaceboCacheMu.Lock() libplaceboCacheMu.Lock()
libplaceboCache[ffmpegPath] = supported libplaceboCache[ffmpegPath] = supported
libplaceboCacheMu.Unlock() libplaceboCacheMu.Unlock()
if supported { if supported {
log.Printf("[tonemap] ffmpeg has libplacebo — HDR sources tonemapped on the GPU (preferred)") log.Printf("[tonemap] ffmpeg libplacebo works (Vulkan OK) — HDR sources tonemapped on the GPU (preferred)")
} else {
log.Printf("[tonemap] ffmpeg libplacebo unavailable (no Vulkan runtime or filter absent) — HDR falls back to zscale/none: %v", strings.TrimSpace(lastLine(out)))
} }
return supported return supported
} }
// lastLine returns the last non-empty line of ffmpeg output — the actual error
// (e.g. "No VK_ICD..." / "Device creation failed") rather than the whole log.
func lastLine(b []byte) string {
lines := strings.Split(strings.TrimRight(string(b), "\n"), "\n")
for i := len(lines) - 1; i >= 0; i-- {
if strings.TrimSpace(lines[i]) != "" {
return lines[i]
}
}
return ""
}
// FFmpegSupportsZscale reports whether the ffmpeg binary at path was built with // FFmpegSupportsZscale reports whether the ffmpeg binary at path was built with
// the zscale filter (libzimg), required for HDR→SDR tonemapping. Cached per // the zscale filter (libzimg), required for HDR→SDR tonemapping. Cached per
// path. A detection failure (binary missing, exec error) is treated as "no" so // path. A detection failure (binary missing, exec error) is treated as "no" so

View file

@ -2,6 +2,7 @@ package engine
import ( import (
"os" "os"
"os/exec"
"path/filepath" "path/filepath"
"strings" "strings"
"testing" "testing"
@ -112,6 +113,22 @@ func TestTonemap_VAAPIInsertsBeforeHwupload(t *testing.T) {
} }
} }
func TestFFmpegSupportsLibplacebo_FunctionalProbe(t *testing.T) {
if FFmpegSupportsLibplacebo("") {
t.Error("empty path must be false")
}
// A bogus path can't run → false (no panic, no hang).
if FFmpegSupportsLibplacebo("/nonexistent/ffmpeg") {
t.Error("nonexistent ffmpeg must be false")
}
// With a real ffmpeg the result is environment-dependent (true only when a
// Vulkan runtime is present), so we only assert the probe completes and
// returns a bool — its whole purpose is to be honest about THIS host.
if _, err := exec.LookPath("ffmpeg"); err == nil {
_ = FFmpegSupportsLibplacebo("ffmpeg") // must not hang or panic
}
}
func TestFFmpegSupportsZscale_Stub(t *testing.T) { func TestFFmpegSupportsZscale_Stub(t *testing.T) {
dir := t.TempDir() dir := t.TempDir()