feat(stream): benchmark software encode ceiling at startup
Replace the guessed transcode ceiling (CPU->1080, GPU->2160) with a measured one. HW encoders still return 2160 instantly. A software-only host runs a bounded encode benchmark — 3s testsrc2 through the real libx264 superfast settings at 1080/720/480, top-down — and reports the rung it sustains at >=1.5x realtime (margin for real decode + busier content). Fixes risk 2: a weak NAS/old CPU that is ffmpeg-capable but can't keep up with a 1080p software encode no longer advertises a 1080 ceiling, so decideStreamPlan routes oversized sources to an external player instead of a stuttering transcode. Floors at 480; each probe is timeout-bounded so a wedged ffmpeg can't stall daemon startup.
This commit is contained in:
parent
005a4380dd
commit
ef3b190e0b
3 changed files with 167 additions and 4 deletions
|
|
@ -156,10 +156,15 @@ func runDaemonStart() error {
|
||||||
hwDiag := engine.DetectHWAccelDiagnostic(probeCtx, ffmpegResolved)
|
hwDiag := engine.DetectHWAccelDiagnostic(probeCtx, ffmpegResolved)
|
||||||
log.Println(hwDiag.LogLine())
|
log.Println(hwDiag.LogLine())
|
||||||
hwAccelPick := hwDiag.Pick
|
hwAccelPick := hwDiag.Pick
|
||||||
maxTranscodeHeight := 1080
|
// Measure the real transcode ceiling instead of guessing from the backend.
|
||||||
if hwAccelPick != engine.HWAccelNone {
|
// HW encoders return 2160 instantly; a software-only host runs a bounded
|
||||||
maxTranscodeHeight = 2160
|
// encode benchmark so a weak NAS/CPU reports the rung it can actually
|
||||||
}
|
// sustain (720/480) and the web side routes oversized sources to an
|
||||||
|
// external player instead of a stuttering transcode. Own timeout — the 10 s
|
||||||
|
// probeCtx above is sized for the quick diagnostic, not three encode rungs.
|
||||||
|
benchCtx, benchCancel := context.WithTimeout(context.Background(), 45*time.Second)
|
||||||
|
maxTranscodeHeight := engine.BenchmarkMaxTranscodeHeight(benchCtx, ffmpegResolved, hwAccelPick)
|
||||||
|
benchCancel()
|
||||||
|
|
||||||
// Create daemon config
|
// Create daemon config
|
||||||
daemonCfg := agent.DaemonConfig{
|
daemonCfg := agent.DaemonConfig{
|
||||||
|
|
|
||||||
106
internal/engine/encode_benchmark.go
Normal file
106
internal/engine/encode_benchmark.go
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
package engine
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log"
|
||||||
|
"os/exec"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// benchmarkRung is a candidate transcode-height ceiling plus the 16:9 frame
|
||||||
|
// size used to measure whether a software encoder sustains it.
|
||||||
|
type benchmarkRung struct {
|
||||||
|
height int
|
||||||
|
width int
|
||||||
|
}
|
||||||
|
|
||||||
|
// softwareBenchmarkRungs are tested high→low. The frame sizes match the real
|
||||||
|
// streaming output tiers; the H.264 level / macroblock math in hls.go is
|
||||||
|
// independent of what we measure here.
|
||||||
|
var softwareBenchmarkRungs = []benchmarkRung{
|
||||||
|
{height: 1080, width: 1920},
|
||||||
|
{height: 720, width: 1280},
|
||||||
|
{height: 480, width: 854},
|
||||||
|
}
|
||||||
|
|
||||||
|
// realtimeMarginSoftware is how much faster than realtime a synthetic encode
|
||||||
|
// must run before we call a rung "sustainable". 1.5× leaves headroom for two
|
||||||
|
// things the benchmark does NOT measure: (a) decoding the real source —
|
||||||
|
// software HEVC / 10-bit decode is heavier than encoding the synthetic clip —
|
||||||
|
// and (b) real content being busier than testsrc2 (which x264 compresses
|
||||||
|
// faster than film grain or motion).
|
||||||
|
const realtimeMarginSoftware = 1.5
|
||||||
|
|
||||||
|
// benchmarkClipSeconds is the synthetic clip length. Short enough that a
|
||||||
|
// capable host finishes the 1080p rung in well under a second, long enough to
|
||||||
|
// average out process spin-up.
|
||||||
|
const benchmarkClipSeconds = 3
|
||||||
|
|
||||||
|
// BenchmarkMaxTranscodeHeight returns the largest output height this host can
|
||||||
|
// software-transcode in real time, one of {1080,720,480}. Hardware encoders
|
||||||
|
// return 2160 WITHOUT benchmarking — NVENC/QSV/VAAPI/VideoToolbox all sustain
|
||||||
|
// 4K and a probe would only add startup latency.
|
||||||
|
//
|
||||||
|
// The point is the weak end. A low-power NAS or an old CPU can be
|
||||||
|
// ffmpeg-capable yet unable to keep up with a 1080p software encode, so the
|
||||||
|
// historical static 1080 ceiling makes the web side attempt a transcode that
|
||||||
|
// stutters. Measuring real throughput lets decideStreamPlan route oversized
|
||||||
|
// sources to an external player instead. Floors at 480: a box that can't
|
||||||
|
// sustain even that is barely functional, and 480-or-smaller sources transcode
|
||||||
|
// cheaply regardless — anything larger is already gated out by the 480 ceiling.
|
||||||
|
func BenchmarkMaxTranscodeHeight(ctx context.Context, ffmpegPath string, hw HWAccel) int {
|
||||||
|
if hw != HWAccelNone {
|
||||||
|
return 2160
|
||||||
|
}
|
||||||
|
if ffmpegPath == "" {
|
||||||
|
return 1080 // no benchmark possible; keep the historical default
|
||||||
|
}
|
||||||
|
for _, rung := range softwareBenchmarkRungs {
|
||||||
|
factor, ok := measureEncodeRealtimeFactor(ctx, ffmpegPath, rung)
|
||||||
|
if !ok {
|
||||||
|
// Probe couldn't run (timeout / exec error) — try a lighter rung
|
||||||
|
// rather than treat the failure as a measured "fast enough".
|
||||||
|
log.Printf("[transcode] encode benchmark: %dp probe failed — trying lower", rung.height)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if factor >= realtimeMarginSoftware {
|
||||||
|
log.Printf("[transcode] encode benchmark: software ceiling %dp (%.1f× realtime)", rung.height, factor)
|
||||||
|
return rung.height
|
||||||
|
}
|
||||||
|
log.Printf("[transcode] encode benchmark: %dp only %.1f× realtime (<%.1f×) — trying lower", rung.height, factor, realtimeMarginSoftware)
|
||||||
|
}
|
||||||
|
log.Printf("[transcode] encode benchmark: host can't sustain 480p software encode — flooring ceiling at 480 (oversized sources route to external)")
|
||||||
|
return 480
|
||||||
|
}
|
||||||
|
|
||||||
|
// measureEncodeRealtimeFactor encodes benchmarkClipSeconds of synthetic video
|
||||||
|
// at the rung's resolution using the real streaming encoder settings (libx264
|
||||||
|
// superfast, no B-frames) to /dev/null and returns clipDuration/wallTime — the
|
||||||
|
// realtime factor. ok=false when the probe couldn't run, so the caller skips
|
||||||
|
// rather than treating the failure as a fast result. Each probe is bounded so
|
||||||
|
// a wedged ffmpeg can't stall daemon startup.
|
||||||
|
func measureEncodeRealtimeFactor(ctx context.Context, ffmpegPath string, rung benchmarkRung) (float64, bool) {
|
||||||
|
// A 3 s superfast encode that takes longer than 12 s is <0.25× realtime —
|
||||||
|
// already far below the 1.5× bar — so capping here only kills genuinely
|
||||||
|
// hopeless rungs early and keeps worst-case startup bounded.
|
||||||
|
bctx, cancel := context.WithTimeout(ctx, 12*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
size := strconv.Itoa(rung.width) + "x" + strconv.Itoa(rung.height)
|
||||||
|
args := []string{
|
||||||
|
"-hide_banner", "-nostats", "-loglevel", "error",
|
||||||
|
"-f", "lavfi",
|
||||||
|
"-i", "testsrc2=size=" + size + ":rate=24:duration=" + strconv.Itoa(benchmarkClipSeconds),
|
||||||
|
"-c:v", "libx264", "-preset", "superfast", "-threads", "0",
|
||||||
|
"-bf", "0", "-sc_threshold", "0",
|
||||||
|
"-f", "null", "-",
|
||||||
|
}
|
||||||
|
start := time.Now()
|
||||||
|
err := exec.CommandContext(bctx, ffmpegPath, args...).Run()
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
if err != nil || elapsed <= 0 {
|
||||||
|
return 0, false
|
||||||
|
}
|
||||||
|
return float64(benchmarkClipSeconds) / elapsed.Seconds(), true
|
||||||
|
}
|
||||||
52
internal/engine/encode_benchmark_test.go
Normal file
52
internal/engine/encode_benchmark_test.go
Normal file
|
|
@ -0,0 +1,52 @@
|
||||||
|
package engine
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os/exec"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBenchmarkMaxTranscodeHeight_HardwareSkipsProbe(t *testing.T) {
|
||||||
|
// Hardware encoders return 2160 without touching ffmpeg — pass a bogus path
|
||||||
|
// to prove no subprocess runs.
|
||||||
|
for _, hw := range []HWAccel{HWAccelNVENC, HWAccelQSV, HWAccelVAAPI, HWAccelVideoToolbox} {
|
||||||
|
got := BenchmarkMaxTranscodeHeight(context.Background(), "/nonexistent/ffmpeg", hw)
|
||||||
|
if got != 2160 {
|
||||||
|
t.Errorf("hw=%s: got %d, want 2160", hw, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBenchmarkMaxTranscodeHeight_NoFFmpegKeepsDefault(t *testing.T) {
|
||||||
|
if got := BenchmarkMaxTranscodeHeight(context.Background(), "", HWAccelNone); got != 1080 {
|
||||||
|
t.Errorf("empty ffmpeg path: got %d, want 1080 (historical default)", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBenchmarkMaxTranscodeHeight_SoftwareReturnsValidRung(t *testing.T) {
|
||||||
|
ffmpeg, err := exec.LookPath("ffmpeg")
|
||||||
|
if err != nil {
|
||||||
|
t.Skip("ffmpeg not on PATH — software benchmark needs a real encoder")
|
||||||
|
}
|
||||||
|
got := BenchmarkMaxTranscodeHeight(context.Background(), ffmpeg, HWAccelNone)
|
||||||
|
switch got {
|
||||||
|
case 1080, 720, 480:
|
||||||
|
// any rung is valid; the exact one depends on the host's CPU.
|
||||||
|
default:
|
||||||
|
t.Errorf("software ceiling = %d, want one of {1080,720,480}", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMeasureEncodeRealtimeFactor_RealEncoder(t *testing.T) {
|
||||||
|
ffmpeg, err := exec.LookPath("ffmpeg")
|
||||||
|
if err != nil {
|
||||||
|
t.Skip("ffmpeg not on PATH")
|
||||||
|
}
|
||||||
|
factor, ok := measureEncodeRealtimeFactor(context.Background(), ffmpeg, benchmarkRung{height: 480, width: 854})
|
||||||
|
if !ok {
|
||||||
|
t.Fatal("480p probe failed to run on a host with ffmpeg")
|
||||||
|
}
|
||||||
|
if factor <= 0 {
|
||||||
|
t.Errorf("realtime factor = %.2f, want > 0", factor)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue