From cfaedb7f3bfc41b782d6435b1f737d9a97ffd3a8 Mon Sep 17 00:00:00 2001 From: Deivid Soto Date: Wed, 3 Jun 2026 09:57:48 +0200 Subject: [PATCH] fix(stream): functional libplacebo probe + benchmark hardening MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review (critico) caught a regression: the prod agent image ships a BtbN GPL ffmpeg with libplacebo COMPILED IN but no Vulkan runtime (debian-slim, no libvulkan1/mesa-vulkan-drivers/nvidia ICD). The presence probe (ffmpeg -filters) would flip HasLibplacebo on, the filter's Vulkan device creation would fail at runtime, and HDR sources that previously tonemapped via zscale would break. - FFmpegSupportsLibplacebo now RUNS the filter on one synthetic frame and requires a clean exit (forces Vulkan device init + filtergraph negotiation), so it is honest about THIS host: works on Vulkan-capable hosts, falls back to zscale where Vulkan is absent. Logs the real ffmpeg error on failure. - Warm the libplacebo (Vulkan init ~1.7s) + zscale caches in a background goroutine at startup so the first stream session doesn't pay the probe and risk its setup timeout. - Benchmark: margin 1.5x -> 2.0x (the probe measures encode only; real decode of HEVC/10-bit + busier content needs more headroom), per-probe timeout 12s -> 6s + overall 45s -> 20s (it blocks registration on software hosts), and a 'no rung measured' case (missing lavfi/wedged ffmpeg) now keeps the 1080 default instead of flooring at 480 — an infra failure isn't a slow host. Verified e2e on the fixed binary: LOTR Two Towers (HEVC 3840x1608 10-bit HDR10/PQ, 12GB) on desktop-Chrome caps -> hls, ffmpeg runs h264_nvenc with -vf ...,libplacebo=...:format=yuv420p:tonemapping=bt.2390 (zscale chain replaced), 45 fMP4 segments, ffprobe confirms output h264 yuv420p bt709 (tonemapped from bt2020/smpte2084), no ffmpeg errors. --- internal/cmd/daemon.go | 21 ++++++++++-- internal/engine/encode_benchmark.go | 34 +++++++++++++------ internal/engine/tonemap.go | 51 ++++++++++++++++++++++++----- internal/engine/tonemap_test.go | 17 ++++++++++ 4 files changed, 102 insertions(+), 21 deletions(-) diff --git a/internal/cmd/daemon.go b/internal/cmd/daemon.go index 07d090c..eb6cbbf 100644 --- a/internal/cmd/daemon.go +++ b/internal/cmd/daemon.go @@ -160,12 +160,27 @@ func runDaemonStart() error { // HW encoders return 2160 instantly; a software-only host runs a bounded // encode benchmark so a weak NAS/CPU reports the rung it can actually // sustain (720/480) and the web side routes oversized sources to an - // external player instead of a stuttering transcode. Own timeout — the 10 s - // probeCtx above is sized for the quick diagnostic, not three encode rungs. - benchCtx, benchCancel := context.WithTimeout(context.Background(), 45*time.Second) + // external player instead of a stuttering transcode. This blocks + // registration on a software host, so it's bounded tight (3 rungs × 6 s = + // 18 s worst case; <1 s on a capable box that passes the first rung). Own + // timeout — the 10 s probeCtx above is sized for the quick diagnostic. + benchCtx, benchCancel := context.WithTimeout(context.Background(), 20*time.Second) maxTranscodeHeight := engine.BenchmarkMaxTranscodeHeight(benchCtx, ffmpegResolved, hwAccelPick) benchCancel() + // Warm the tonemap capability caches off the hot path. The libplacebo probe + // actually RUNS the filter (Vulkan device init ~1.7 s), so doing it lazily + // in buildTranscodeRuntime would tax the FIRST stream session and risk its + // setup timeout. A real session arrives seconds-to-minutes after startup, so + // a background warm has finished by then; if one races in first, the cache's + // own mutex makes the concurrent cold call safe (both compute the same bool). + if cfg.Download.Transcode.Enabled && ffmpegResolved != "" { + go func() { + engine.FFmpegSupportsLibplacebo(ffmpegResolved) + engine.FFmpegSupportsZscale(ffmpegResolved) + }() + } + // Create daemon config daemonCfg := agent.DaemonConfig{ AgentID: cfg.Agent.ID, diff --git a/internal/engine/encode_benchmark.go b/internal/engine/encode_benchmark.go index 385cd3b..6fd3740 100644 --- a/internal/engine/encode_benchmark.go +++ b/internal/engine/encode_benchmark.go @@ -25,12 +25,15 @@ var softwareBenchmarkRungs = []benchmarkRung{ } // realtimeMarginSoftware is how much faster than realtime a synthetic encode -// must run before we call a rung "sustainable". 1.5× leaves headroom for two -// things the benchmark does NOT measure: (a) decoding the real source — -// software HEVC / 10-bit decode is heavier than encoding the synthetic clip — -// and (b) real content being busier than testsrc2 (which x264 compresses -// faster than film grain or motion). -const realtimeMarginSoftware = 1.5 +// must run before we call a rung "sustainable". 2.0× (not 1.5×) because the +// benchmark measures ONLY the encode of a low-entropy synthetic source and +// must cover two costs it never sees: (a) decoding the real source — software +// HEVC / 10-bit decode can rival the encode cost on its own — and (b) real +// content (film grain, motion) being far busier than testsrc2 for x264's +// rate-control + motion estimation. Erring high routes a borderline box's +// oversized sources to an external player (which works) instead of a +// stuttering transcode (which is the failure we're preventing). +const realtimeMarginSoftware = 2.0 // benchmarkClipSeconds is the synthetic clip length. Short enough that a // capable host finishes the 1080p rung in well under a second, long enough to @@ -56,6 +59,7 @@ func BenchmarkMaxTranscodeHeight(ctx context.Context, ffmpegPath string, hw HWAc if ffmpegPath == "" { return 1080 // no benchmark possible; keep the historical default } + measuredAny := false for _, rung := range softwareBenchmarkRungs { factor, ok := measureEncodeRealtimeFactor(ctx, ffmpegPath, rung) if !ok { @@ -64,12 +68,21 @@ func BenchmarkMaxTranscodeHeight(ctx context.Context, ffmpegPath string, hw HWAc log.Printf("[transcode] encode benchmark: %dp probe failed — trying lower", rung.height) continue } + measuredAny = true if factor >= realtimeMarginSoftware { log.Printf("[transcode] encode benchmark: software ceiling %dp (%.1f× realtime)", rung.height, factor) return rung.height } log.Printf("[transcode] encode benchmark: %dp only %.1f× realtime (<%.1f×) — trying lower", rung.height, factor, realtimeMarginSoftware) } + if !measuredAny { + // No rung produced a measurement at all — the benchmark infrastructure + // failed (missing lavfi/testsrc2, ffmpeg wedged), NOT a slow host. Don't + // punish a possibly-capable box by flooring at 480; keep the historical + // default so behaviour is no worse than before the benchmark existed. + log.Printf("[transcode] encode benchmark: no rung could be measured (lavfi/ffmpeg issue) — keeping default 1080 ceiling") + return 1080 + } log.Printf("[transcode] encode benchmark: host can't sustain 480p software encode — flooring ceiling at 480 (oversized sources route to external)") return 480 } @@ -81,10 +94,11 @@ func BenchmarkMaxTranscodeHeight(ctx context.Context, ffmpegPath string, hw HWAc // rather than treating the failure as a fast result. Each probe is bounded so // a wedged ffmpeg can't stall daemon startup. func measureEncodeRealtimeFactor(ctx context.Context, ffmpegPath string, rung benchmarkRung) (float64, bool) { - // A 3 s superfast encode that takes longer than 12 s is <0.25× realtime — - // already far below the 1.5× bar — so capping here only kills genuinely - // hopeless rungs early and keeps worst-case startup bounded. - bctx, cancel := context.WithTimeout(ctx, 12*time.Second) + // A 3 s superfast encode that takes longer than 6 s is <0.5× realtime — + // already far below the 2.0× bar — so capping here only kills genuinely + // hopeless rungs early and bounds worst-case startup blocking (3 rungs × + // 6 s = 18 s) since this runs synchronously before the agent registers. + bctx, cancel := context.WithTimeout(ctx, 6*time.Second) defer cancel() size := strconv.Itoa(rung.width) + "x" + strconv.Itoa(rung.height) diff --git a/internal/engine/tonemap.go b/internal/engine/tonemap.go index b86e49f..192bc9b 100644 --- a/internal/engine/tonemap.go +++ b/internal/engine/tonemap.go @@ -5,6 +5,7 @@ import ( "context" "log" "os/exec" + "strings" "sync" "time" ) @@ -46,10 +47,21 @@ var ( libplaceboCache = map[string]bool{} ) -// FFmpegSupportsLibplacebo reports whether the ffmpeg binary has the libplacebo -// filter (Vulkan GPU HDR tonemap + colorspace). Preferred over zscale when both -// exist. Cached per path; a probe failure is treated as "no". Mirrors -// FFmpegSupportsZscale. +// FFmpegSupportsLibplacebo reports whether this host can ACTUALLY run the +// libplacebo filter — not merely whether it is compiled in. libplacebo is a +// Vulkan filter, so it needs a working Vulkan device + ICD at runtime, which a +// presence check (`ffmpeg -filters`) does NOT prove: the prod agent image +// ships a BtbN GPL ffmpeg with libplacebo built in but no Vulkan runtime +// (debian-slim, no libvulkan1 / mesa-vulkan-drivers / nvidia ICD), so a +// presence check would flip this on and break HDR playback that previously +// tonemapped fine via zscale. +// +// So we run the real filter on one synthetic frame and require a clean exit: +// that forces Vulkan device creation + filtergraph negotiation (the implicit +// hwupload/hwdownload around the GPU filter). Pass → libplacebo works here; +// fail → fall back to the zscale chain. Cached per path; a probe failure is +// treated as "no". The probe is bounded so a wedged ffmpeg can't stall the +// first session. func FFmpegSupportsLibplacebo(ffmpegPath string) bool { if ffmpegPath == "" { return false @@ -61,20 +73,43 @@ func FFmpegSupportsLibplacebo(ffmpegPath string) bool { } libplaceboCacheMu.Unlock() - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + // 10 s: first-run Vulkan device creation alone can take ~1 s ("Spent + // ~1150ms creating vulkan device"), plus codec/filter init. + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() - out, err := exec.CommandContext(ctx, ffmpegPath, "-hide_banner", "-filters").Output() - supported := err == nil && bytes.Contains(out, []byte("libplacebo")) + // Run the EXACT filter we'd use, on a 1-frame synthetic source, discarding + // output. testsrc2 is SDR so the tonemap is near-passthrough — the point is + // to exercise Vulkan device init + the filter, not the mapping quality. + out, err := exec.CommandContext(ctx, ffmpegPath, + "-hide_banner", "-loglevel", "error", "-nostats", + "-f", "lavfi", "-i", "testsrc2=size=128x128:rate=1:duration=1", + "-vf", libplaceboTonemapFilter, "-frames:v", "1", "-f", "null", "-", + ).CombinedOutput() + supported := err == nil libplaceboCacheMu.Lock() libplaceboCache[ffmpegPath] = supported libplaceboCacheMu.Unlock() if supported { - log.Printf("[tonemap] ffmpeg has libplacebo — HDR sources tonemapped on the GPU (preferred)") + log.Printf("[tonemap] ffmpeg libplacebo works (Vulkan OK) — HDR sources tonemapped on the GPU (preferred)") + } else { + log.Printf("[tonemap] ffmpeg libplacebo unavailable (no Vulkan runtime or filter absent) — HDR falls back to zscale/none: %v", strings.TrimSpace(lastLine(out))) } return supported } +// lastLine returns the last non-empty line of ffmpeg output — the actual error +// (e.g. "No VK_ICD..." / "Device creation failed") rather than the whole log. +func lastLine(b []byte) string { + lines := strings.Split(strings.TrimRight(string(b), "\n"), "\n") + for i := len(lines) - 1; i >= 0; i-- { + if strings.TrimSpace(lines[i]) != "" { + return lines[i] + } + } + return "" +} + // FFmpegSupportsZscale reports whether the ffmpeg binary at path was built with // the zscale filter (libzimg), required for HDR→SDR tonemapping. Cached per // path. A detection failure (binary missing, exec error) is treated as "no" so diff --git a/internal/engine/tonemap_test.go b/internal/engine/tonemap_test.go index a73e4f8..2cc0d5b 100644 --- a/internal/engine/tonemap_test.go +++ b/internal/engine/tonemap_test.go @@ -2,6 +2,7 @@ package engine import ( "os" + "os/exec" "path/filepath" "strings" "testing" @@ -112,6 +113,22 @@ func TestTonemap_VAAPIInsertsBeforeHwupload(t *testing.T) { } } +func TestFFmpegSupportsLibplacebo_FunctionalProbe(t *testing.T) { + if FFmpegSupportsLibplacebo("") { + t.Error("empty path must be false") + } + // A bogus path can't run → false (no panic, no hang). + if FFmpegSupportsLibplacebo("/nonexistent/ffmpeg") { + t.Error("nonexistent ffmpeg must be false") + } + // With a real ffmpeg the result is environment-dependent (true only when a + // Vulkan runtime is present), so we only assert the probe completes and + // returns a bool — its whole purpose is to be honest about THIS host. + if _, err := exec.LookPath("ffmpeg"); err == nil { + _ = FFmpegSupportsLibplacebo("ffmpeg") // must not hang or panic + } +} + func TestFFmpegSupportsZscale_Stub(t *testing.T) { dir := t.TempDir()