From afd5856d0d52a8e33906df9fbfb01db0bdbc0cc1 Mon Sep 17 00:00:00 2001 From: Deivid Soto Date: Wed, 27 May 2026 15:45:55 +0200 Subject: [PATCH] feat(vaapi): hybrid CPU-scale + hwupload encode path (QW2, 0.9.14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes QW2. Validated against the dev box's AMD Raphael iGPU (/dev/dri/renderD128, radeonsi/mesa 25.2.8). The "proper" full-GPU path via scale_vaapi triggers a known mesa 25 + Raphael bug ("Cannot allocate memory" per session start, encode still succeeds but logs are spammy) — hybrid CPU scale → format=nv12 → hwupload → h264_vaapi encode delivers GPU surfaces to the encoder without poking the broken scaler. Three concrete changes in buildHLSFFmpegArgsAt: 1. New `case "h264_vaapi"` adds `-vaapi_device /dev/dri/renderD128`. Multi-GPU hosts (this dev box has NVIDIA on renderD129 + AMD on renderD128) need it so the encoder doesn't bind to a non-VAAPI render node — without it the encoder fell back to NULL device in manual smoke testing. 2. Filter chain branches on codec: VAAPI uses `scale=…,format=nv12,hwupload` while libx264 / NVENC / QSV keep the existing `scale=…,format=yuv420p,setparams=…` shape. The setparams color metadata block is dropped on VAAPI because VAAPI surfaces don't expose VUI fields and the encoder writes its own. 3. Two new unit tests lock the argv shape so a future refactor doesn't accidentally merge the paths back together: TestBuildHLSFFmpegArgsVAAPI asserts the new flags + the ABSENCE of scale_vaapi; TestBuildHLSFFmpegArgsLibx264NoRegression verifies the software path keeps yuv420p + setparams + has none of the VAAPI extras. Manual ffmpeg validation on the dev box: hybrid encode of 5 s 4K → 720p: 0.66 s wall, 472 % CPU, 268 KB output — no errors logged. scale_vaapi variant in comparison spammed "Cannot allocate memory" while emitting valid output. --- CHANGELOG.md | 24 +++++++++++ internal/cmd/version.go | 2 +- internal/engine/hls.go | 35 +++++++++++++-- internal/engine/vaapi_args_test.go | 69 ++++++++++++++++++++++++++++++ 4 files changed, 126 insertions(+), 4 deletions(-) create mode 100644 internal/engine/vaapi_args_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index c8681bf..58b4053 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.9.14] - 2026-05-27 + +### Changed + +- **VAAPI encode path now ships proper GPU surfaces**. Adds + `-vaapi_device /dev/dri/renderD128` so the encoder doesn't fall + back to a NULL device on multi-GPU hosts (the dev box that + validated this has an NVIDIA dGPU on renderD129 + an AMD iGPU on + renderD128 — without the explicit device the encoder picked the + wrong node). Filter chain switches to `format=nv12,hwupload` + (was `format=yuv420p`) so frames arrive at the encoder as VAAPI + surfaces. Color-metadata `setparams=` block is dropped on the + VAAPI path because VAAPI surfaces don't expose VUI fields the + same way libx264 does — the encoder records its own. + Intentionally avoids `scale_vaapi`: mesa 25 + AMD Raphael iGPU + emit "Cannot allocate memory" per session start, polluting logs + even though encode succeeds. CPU scale + hwupload is the safe + hybrid that works across all VAAPI-capable hosts. +- **Unit tests** lock the argv shape: TestBuildHLSFFmpegArgsVAAPI + asserts the new VAAPI flags + absence of scale_vaapi / + format=yuv420p; TestBuildHLSFFmpegArgsLibx264NoRegression + ensures the libx264 path keeps its `setparams` + `yuv420p` and + doesn't accidentally inherit the VAAPI shape. + ## [0.9.13] - 2026-05-27 ### Added diff --git a/internal/cmd/version.go b/internal/cmd/version.go index efb6b30..497c9a0 100644 --- a/internal/cmd/version.go +++ b/internal/cmd/version.go @@ -1,4 +1,4 @@ package cmd // Version is the CLI version. Overridden by goreleaser ldflags at release time. -var Version = "0.9.13" +var Version = "0.9.14" diff --git a/internal/engine/hls.go b/internal/engine/hls.go index 6acde30..86219d5 100644 --- a/internal/engine/hls.go +++ b/internal/engine/hls.go @@ -1168,6 +1168,17 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin // silently ignores `-q:v`, so the constant-quality knob never // took effect anyway. args = append(args, "-realtime", "1") + case "h264_vaapi": + // h264_vaapi has no preset knob. Bitrate args (set later) drive + // rate control. Add `-vaapi_device /dev/dri/renderD128` so the + // encoder doesn't fall back to a NULL device on multi-GPU hosts + // where the default render node is a non-VAAPI GPU (an Nvidia + // dGPU's render node, etc.). The filter chain below switches to + // `format=nv12,hwupload` so frames land on the right VAAPI + // surface before the encoder; we intentionally avoid scale_vaapi + // because mesa 25 + Raphael iGPU emits "Cannot allocate memory" + // per session start, polluting logs even though encode succeeds. + args = append(args, "-vaapi_device", "/dev/dri/renderD128") } // Derive H.264 level from the actual output height. A fixed "4.0" caps the // encoder at 1080p — anything taller (1440p, 4K source on quality=original) @@ -1218,14 +1229,32 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin if maxH == 0 { maxH = cfg.Transcode.MaxHeight } + // VAAPI needs frames as nv12 VAAPI surfaces before the encoder. We do + // scale + format conversion on CPU then `hwupload` once at the end — + // skips the mesa 25 + Raphael iGPU "Cannot allocate memory" log spam + // that scale_vaapi triggers per-session-start while still delivering + // the encoder a GPU surface. setparams is dropped because VAAPI + // surfaces don't expose VUI fields the way libx264 does; the encoder + // records its own color metadata via the source PTS chain. + pixFormat := "yuv420p" + hwUploadTail := "" + colorTail := ",setparams=colorspace=bt709:color_trc=bt709:color_primaries=bt709:range=tv" + if codec == "h264_vaapi" { + pixFormat = "nv12" + hwUploadTail = ",hwupload" + colorTail = "" + } var filterChain string if maxH > 0 && probe.Height > maxH { filterChain = fmt.Sprintf( - "scale=-2:%d:force_original_aspect_ratio=decrease,scale=trunc(iw/2)*2:trunc(ih/2)*2,format=yuv420p,setparams=colorspace=bt709:color_trc=bt709:color_primaries=bt709:range=tv", - maxH, + "scale=-2:%d:force_original_aspect_ratio=decrease,scale=trunc(iw/2)*2:trunc(ih/2)*2,format=%s%s%s", + maxH, pixFormat, colorTail, hwUploadTail, ) } else { - filterChain = "scale=trunc(iw/2)*2:trunc(ih/2)*2,format=yuv420p,setparams=colorspace=bt709:color_trc=bt709:color_primaries=bt709:range=tv" + filterChain = fmt.Sprintf( + "scale=trunc(iw/2)*2:trunc(ih/2)*2,format=%s%s%s", + pixFormat, colorTail, hwUploadTail, + ) } args = append(args, "-vf", filterChain) diff --git a/internal/engine/vaapi_args_test.go b/internal/engine/vaapi_args_test.go new file mode 100644 index 0000000..4bdf010 --- /dev/null +++ b/internal/engine/vaapi_args_test.go @@ -0,0 +1,69 @@ +package engine + +import ( + "strings" + "testing" +) + +func TestBuildHLSFFmpegArgsVAAPI(t *testing.T) { + cfg := HLSSessionConfig{ + SessionID: "test", + SourcePath: "/tmp/test.mkv", + Quality: "720p", + AudioIndex: 0, + Transcode: TranscodeRuntime{ + FFmpegPath: "/usr/bin/ffmpeg", + FFprobePath: "/usr/bin/ffprobe", + HWAccel: HWAccelVAAPI, + }, + } + probe := &StreamProbe{Width: 1920, Height: 1080, DurationSec: 100} + args := buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0) + got := strings.Join(args, " ") + + wants := []string{ + "-hwaccel vaapi", + "-vaapi_device /dev/dri/renderD128", + "-c:v h264_vaapi", + "format=nv12", + "hwupload", + } + for _, want := range wants { + if !strings.Contains(got, want) { + t.Errorf("argv missing %q\n%s", want, got) + } + } + if strings.Contains(got, "scale_vaapi") { + t.Errorf("argv unexpectedly contains scale_vaapi (mesa bug): %s", got) + } + if strings.Contains(got, "format=yuv420p") { + t.Errorf("argv contains format=yuv420p (libx264 path) for VAAPI codec: %s", got) + } +} + +func TestBuildHLSFFmpegArgsLibx264NoRegression(t *testing.T) { + cfg := HLSSessionConfig{ + SessionID: "test", + SourcePath: "/tmp/test.mkv", + Quality: "720p", + AudioIndex: 0, + Transcode: TranscodeRuntime{ + FFmpegPath: "/usr/bin/ffmpeg", + FFprobePath: "/usr/bin/ffprobe", + HWAccel: HWAccelNone, + }, + } + probe := &StreamProbe{Width: 1920, Height: 1080, DurationSec: 100} + args := buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0) + got := strings.Join(args, " ") + for _, want := range []string{"-c:v libx264", "format=yuv420p", "setparams=colorspace=bt709"} { + if !strings.Contains(got, want) { + t.Errorf("libx264 argv missing %q: %s", want, got) + } + } + for _, bad := range []string{"-vaapi_device", "format=nv12", "hwupload"} { + if strings.Contains(got, bad) { + t.Errorf("libx264 argv unexpectedly contains %q: %s", bad, got) + } + } +}