From ef3b190e0b546617084a5ca4cb784547d8d14ee5 Mon Sep 17 00:00:00 2001 From: Deivid Soto Date: Wed, 3 Jun 2026 09:30:03 +0200 Subject: [PATCH] feat(stream): benchmark software encode ceiling at startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the guessed transcode ceiling (CPU->1080, GPU->2160) with a measured one. HW encoders still return 2160 instantly. A software-only host runs a bounded encode benchmark — 3s testsrc2 through the real libx264 superfast settings at 1080/720/480, top-down — and reports the rung it sustains at >=1.5x realtime (margin for real decode + busier content). Fixes risk 2: a weak NAS/old CPU that is ffmpeg-capable but can't keep up with a 1080p software encode no longer advertises a 1080 ceiling, so decideStreamPlan routes oversized sources to an external player instead of a stuttering transcode. Floors at 480; each probe is timeout-bounded so a wedged ffmpeg can't stall daemon startup. --- internal/cmd/daemon.go | 13 ++- internal/engine/encode_benchmark.go | 106 +++++++++++++++++++++++ internal/engine/encode_benchmark_test.go | 52 +++++++++++ 3 files changed, 167 insertions(+), 4 deletions(-) create mode 100644 internal/engine/encode_benchmark.go create mode 100644 internal/engine/encode_benchmark_test.go diff --git a/internal/cmd/daemon.go b/internal/cmd/daemon.go index 2ffc459..07d090c 100644 --- a/internal/cmd/daemon.go +++ b/internal/cmd/daemon.go @@ -156,10 +156,15 @@ func runDaemonStart() error { hwDiag := engine.DetectHWAccelDiagnostic(probeCtx, ffmpegResolved) log.Println(hwDiag.LogLine()) hwAccelPick := hwDiag.Pick - maxTranscodeHeight := 1080 - if hwAccelPick != engine.HWAccelNone { - maxTranscodeHeight = 2160 - } + // Measure the real transcode ceiling instead of guessing from the backend. + // HW encoders return 2160 instantly; a software-only host runs a bounded + // encode benchmark so a weak NAS/CPU reports the rung it can actually + // sustain (720/480) and the web side routes oversized sources to an + // external player instead of a stuttering transcode. Own timeout — the 10 s + // probeCtx above is sized for the quick diagnostic, not three encode rungs. + benchCtx, benchCancel := context.WithTimeout(context.Background(), 45*time.Second) + maxTranscodeHeight := engine.BenchmarkMaxTranscodeHeight(benchCtx, ffmpegResolved, hwAccelPick) + benchCancel() // Create daemon config daemonCfg := agent.DaemonConfig{ diff --git a/internal/engine/encode_benchmark.go b/internal/engine/encode_benchmark.go new file mode 100644 index 0000000..385cd3b --- /dev/null +++ b/internal/engine/encode_benchmark.go @@ -0,0 +1,106 @@ +package engine + +import ( + "context" + "log" + "os/exec" + "strconv" + "time" +) + +// benchmarkRung is a candidate transcode-height ceiling plus the 16:9 frame +// size used to measure whether a software encoder sustains it. +type benchmarkRung struct { + height int + width int +} + +// softwareBenchmarkRungs are tested high→low. The frame sizes match the real +// streaming output tiers; the H.264 level / macroblock math in hls.go is +// independent of what we measure here. +var softwareBenchmarkRungs = []benchmarkRung{ + {height: 1080, width: 1920}, + {height: 720, width: 1280}, + {height: 480, width: 854}, +} + +// realtimeMarginSoftware is how much faster than realtime a synthetic encode +// must run before we call a rung "sustainable". 1.5× leaves headroom for two +// things the benchmark does NOT measure: (a) decoding the real source — +// software HEVC / 10-bit decode is heavier than encoding the synthetic clip — +// and (b) real content being busier than testsrc2 (which x264 compresses +// faster than film grain or motion). +const realtimeMarginSoftware = 1.5 + +// benchmarkClipSeconds is the synthetic clip length. Short enough that a +// capable host finishes the 1080p rung in well under a second, long enough to +// average out process spin-up. +const benchmarkClipSeconds = 3 + +// BenchmarkMaxTranscodeHeight returns the largest output height this host can +// software-transcode in real time, one of {1080,720,480}. Hardware encoders +// return 2160 WITHOUT benchmarking — NVENC/QSV/VAAPI/VideoToolbox all sustain +// 4K and a probe would only add startup latency. +// +// The point is the weak end. A low-power NAS or an old CPU can be +// ffmpeg-capable yet unable to keep up with a 1080p software encode, so the +// historical static 1080 ceiling makes the web side attempt a transcode that +// stutters. Measuring real throughput lets decideStreamPlan route oversized +// sources to an external player instead. Floors at 480: a box that can't +// sustain even that is barely functional, and 480-or-smaller sources transcode +// cheaply regardless — anything larger is already gated out by the 480 ceiling. +func BenchmarkMaxTranscodeHeight(ctx context.Context, ffmpegPath string, hw HWAccel) int { + if hw != HWAccelNone { + return 2160 + } + if ffmpegPath == "" { + return 1080 // no benchmark possible; keep the historical default + } + for _, rung := range softwareBenchmarkRungs { + factor, ok := measureEncodeRealtimeFactor(ctx, ffmpegPath, rung) + if !ok { + // Probe couldn't run (timeout / exec error) — try a lighter rung + // rather than treat the failure as a measured "fast enough". + log.Printf("[transcode] encode benchmark: %dp probe failed — trying lower", rung.height) + continue + } + if factor >= realtimeMarginSoftware { + log.Printf("[transcode] encode benchmark: software ceiling %dp (%.1f× realtime)", rung.height, factor) + return rung.height + } + log.Printf("[transcode] encode benchmark: %dp only %.1f× realtime (<%.1f×) — trying lower", rung.height, factor, realtimeMarginSoftware) + } + log.Printf("[transcode] encode benchmark: host can't sustain 480p software encode — flooring ceiling at 480 (oversized sources route to external)") + return 480 +} + +// measureEncodeRealtimeFactor encodes benchmarkClipSeconds of synthetic video +// at the rung's resolution using the real streaming encoder settings (libx264 +// superfast, no B-frames) to /dev/null and returns clipDuration/wallTime — the +// realtime factor. ok=false when the probe couldn't run, so the caller skips +// rather than treating the failure as a fast result. Each probe is bounded so +// a wedged ffmpeg can't stall daemon startup. +func measureEncodeRealtimeFactor(ctx context.Context, ffmpegPath string, rung benchmarkRung) (float64, bool) { + // A 3 s superfast encode that takes longer than 12 s is <0.25× realtime — + // already far below the 1.5× bar — so capping here only kills genuinely + // hopeless rungs early and keeps worst-case startup bounded. + bctx, cancel := context.WithTimeout(ctx, 12*time.Second) + defer cancel() + + size := strconv.Itoa(rung.width) + "x" + strconv.Itoa(rung.height) + args := []string{ + "-hide_banner", "-nostats", "-loglevel", "error", + "-f", "lavfi", + "-i", "testsrc2=size=" + size + ":rate=24:duration=" + strconv.Itoa(benchmarkClipSeconds), + "-c:v", "libx264", "-preset", "superfast", "-threads", "0", + "-bf", "0", "-sc_threshold", "0", + "-f", "null", "-", + } + start := time.Now() + err := exec.CommandContext(bctx, ffmpegPath, args...).Run() + elapsed := time.Since(start) + if err != nil || elapsed <= 0 { + return 0, false + } + return float64(benchmarkClipSeconds) / elapsed.Seconds(), true +} diff --git a/internal/engine/encode_benchmark_test.go b/internal/engine/encode_benchmark_test.go new file mode 100644 index 0000000..20f2e64 --- /dev/null +++ b/internal/engine/encode_benchmark_test.go @@ -0,0 +1,52 @@ +package engine + +import ( + "context" + "os/exec" + "testing" +) + +func TestBenchmarkMaxTranscodeHeight_HardwareSkipsProbe(t *testing.T) { + // Hardware encoders return 2160 without touching ffmpeg — pass a bogus path + // to prove no subprocess runs. + for _, hw := range []HWAccel{HWAccelNVENC, HWAccelQSV, HWAccelVAAPI, HWAccelVideoToolbox} { + got := BenchmarkMaxTranscodeHeight(context.Background(), "/nonexistent/ffmpeg", hw) + if got != 2160 { + t.Errorf("hw=%s: got %d, want 2160", hw, got) + } + } +} + +func TestBenchmarkMaxTranscodeHeight_NoFFmpegKeepsDefault(t *testing.T) { + if got := BenchmarkMaxTranscodeHeight(context.Background(), "", HWAccelNone); got != 1080 { + t.Errorf("empty ffmpeg path: got %d, want 1080 (historical default)", got) + } +} + +func TestBenchmarkMaxTranscodeHeight_SoftwareReturnsValidRung(t *testing.T) { + ffmpeg, err := exec.LookPath("ffmpeg") + if err != nil { + t.Skip("ffmpeg not on PATH — software benchmark needs a real encoder") + } + got := BenchmarkMaxTranscodeHeight(context.Background(), ffmpeg, HWAccelNone) + switch got { + case 1080, 720, 480: + // any rung is valid; the exact one depends on the host's CPU. + default: + t.Errorf("software ceiling = %d, want one of {1080,720,480}", got) + } +} + +func TestMeasureEncodeRealtimeFactor_RealEncoder(t *testing.T) { + ffmpeg, err := exec.LookPath("ffmpeg") + if err != nil { + t.Skip("ffmpeg not on PATH") + } + factor, ok := measureEncodeRealtimeFactor(context.Background(), ffmpeg, benchmarkRung{height: 480, width: 854}) + if !ok { + t.Fatal("480p probe failed to run on a host with ffmpeg") + } + if factor <= 0 { + t.Errorf("realtime factor = %.2f, want > 0", factor) + } +}