feat(vaapi): hybrid CPU-scale + hwupload encode path (QW2, 0.9.14)
Closes QW2. Validated against the dev box's AMD Raphael iGPU
(/dev/dri/renderD128, radeonsi/mesa 25.2.8). The "proper" full-GPU
path via scale_vaapi triggers a known mesa 25 + Raphael bug
("Cannot allocate memory" per session start, encode still succeeds
but logs are spammy) — hybrid CPU scale → format=nv12 → hwupload
→ h264_vaapi encode delivers GPU surfaces to the encoder without
poking the broken scaler.
Three concrete changes in buildHLSFFmpegArgsAt:
1. New `case "h264_vaapi"` adds `-vaapi_device /dev/dri/renderD128`.
Multi-GPU hosts (this dev box has NVIDIA on renderD129 + AMD on
renderD128) need it so the encoder doesn't bind to a non-VAAPI
render node — without it the encoder fell back to NULL device
in manual smoke testing.
2. Filter chain branches on codec: VAAPI uses
`scale=…,format=nv12,hwupload` while libx264 / NVENC / QSV
keep the existing `scale=…,format=yuv420p,setparams=…` shape.
The setparams color metadata block is dropped on VAAPI because
VAAPI surfaces don't expose VUI fields and the encoder writes
its own.
3. Two new unit tests lock the argv shape so a future refactor
doesn't accidentally merge the paths back together:
TestBuildHLSFFmpegArgsVAAPI asserts the new flags + the
ABSENCE of scale_vaapi; TestBuildHLSFFmpegArgsLibx264NoRegression
verifies the software path keeps yuv420p + setparams + has
none of the VAAPI extras.
Manual ffmpeg validation on the dev box:
hybrid encode of 5 s 4K → 720p: 0.66 s wall, 472 % CPU, 268 KB
output — no errors logged. scale_vaapi variant in comparison
spammed "Cannot allocate memory" while emitting valid output.
This commit is contained in:
parent
cfd4666bb2
commit
afd5856d0d
4 changed files with 126 additions and 4 deletions
24
CHANGELOG.md
24
CHANGELOG.md
|
|
@ -5,6 +5,30 @@ All notable changes to this project will be documented in this file.
|
||||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||||
|
|
||||||
|
## [0.9.14] - 2026-05-27
|
||||||
|
|
||||||
|
### Changed
|
||||||
|
|
||||||
|
- **VAAPI encode path now ships proper GPU surfaces**. Adds
|
||||||
|
`-vaapi_device /dev/dri/renderD128` so the encoder doesn't fall
|
||||||
|
back to a NULL device on multi-GPU hosts (the dev box that
|
||||||
|
validated this has an NVIDIA dGPU on renderD129 + an AMD iGPU on
|
||||||
|
renderD128 — without the explicit device the encoder picked the
|
||||||
|
wrong node). Filter chain switches to `format=nv12,hwupload`
|
||||||
|
(was `format=yuv420p`) so frames arrive at the encoder as VAAPI
|
||||||
|
surfaces. Color-metadata `setparams=` block is dropped on the
|
||||||
|
VAAPI path because VAAPI surfaces don't expose VUI fields the
|
||||||
|
same way libx264 does — the encoder records its own.
|
||||||
|
Intentionally avoids `scale_vaapi`: mesa 25 + AMD Raphael iGPU
|
||||||
|
emit "Cannot allocate memory" per session start, polluting logs
|
||||||
|
even though encode succeeds. CPU scale + hwupload is the safe
|
||||||
|
hybrid that works across all VAAPI-capable hosts.
|
||||||
|
- **Unit tests** lock the argv shape: TestBuildHLSFFmpegArgsVAAPI
|
||||||
|
asserts the new VAAPI flags + absence of scale_vaapi /
|
||||||
|
format=yuv420p; TestBuildHLSFFmpegArgsLibx264NoRegression
|
||||||
|
ensures the libx264 path keeps its `setparams` + `yuv420p` and
|
||||||
|
doesn't accidentally inherit the VAAPI shape.
|
||||||
|
|
||||||
## [0.9.13] - 2026-05-27
|
## [0.9.13] - 2026-05-27
|
||||||
|
|
||||||
### Added
|
### Added
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
package cmd
|
package cmd
|
||||||
|
|
||||||
// Version is the CLI version. Overridden by goreleaser ldflags at release time.
|
// Version is the CLI version. Overridden by goreleaser ldflags at release time.
|
||||||
var Version = "0.9.13"
|
var Version = "0.9.14"
|
||||||
|
|
|
||||||
|
|
@ -1168,6 +1168,17 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin
|
||||||
// silently ignores `-q:v`, so the constant-quality knob never
|
// silently ignores `-q:v`, so the constant-quality knob never
|
||||||
// took effect anyway.
|
// took effect anyway.
|
||||||
args = append(args, "-realtime", "1")
|
args = append(args, "-realtime", "1")
|
||||||
|
case "h264_vaapi":
|
||||||
|
// h264_vaapi has no preset knob. Bitrate args (set later) drive
|
||||||
|
// rate control. Add `-vaapi_device /dev/dri/renderD128` so the
|
||||||
|
// encoder doesn't fall back to a NULL device on multi-GPU hosts
|
||||||
|
// where the default render node is a non-VAAPI GPU (an Nvidia
|
||||||
|
// dGPU's render node, etc.). The filter chain below switches to
|
||||||
|
// `format=nv12,hwupload` so frames land on the right VAAPI
|
||||||
|
// surface before the encoder; we intentionally avoid scale_vaapi
|
||||||
|
// because mesa 25 + Raphael iGPU emits "Cannot allocate memory"
|
||||||
|
// per session start, polluting logs even though encode succeeds.
|
||||||
|
args = append(args, "-vaapi_device", "/dev/dri/renderD128")
|
||||||
}
|
}
|
||||||
// Derive H.264 level from the actual output height. A fixed "4.0" caps the
|
// Derive H.264 level from the actual output height. A fixed "4.0" caps the
|
||||||
// encoder at 1080p — anything taller (1440p, 4K source on quality=original)
|
// encoder at 1080p — anything taller (1440p, 4K source on quality=original)
|
||||||
|
|
@ -1218,14 +1229,32 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin
|
||||||
if maxH == 0 {
|
if maxH == 0 {
|
||||||
maxH = cfg.Transcode.MaxHeight
|
maxH = cfg.Transcode.MaxHeight
|
||||||
}
|
}
|
||||||
|
// VAAPI needs frames as nv12 VAAPI surfaces before the encoder. We do
|
||||||
|
// scale + format conversion on CPU then `hwupload` once at the end —
|
||||||
|
// skips the mesa 25 + Raphael iGPU "Cannot allocate memory" log spam
|
||||||
|
// that scale_vaapi triggers per-session-start while still delivering
|
||||||
|
// the encoder a GPU surface. setparams is dropped because VAAPI
|
||||||
|
// surfaces don't expose VUI fields the way libx264 does; the encoder
|
||||||
|
// records its own color metadata via the source PTS chain.
|
||||||
|
pixFormat := "yuv420p"
|
||||||
|
hwUploadTail := ""
|
||||||
|
colorTail := ",setparams=colorspace=bt709:color_trc=bt709:color_primaries=bt709:range=tv"
|
||||||
|
if codec == "h264_vaapi" {
|
||||||
|
pixFormat = "nv12"
|
||||||
|
hwUploadTail = ",hwupload"
|
||||||
|
colorTail = ""
|
||||||
|
}
|
||||||
var filterChain string
|
var filterChain string
|
||||||
if maxH > 0 && probe.Height > maxH {
|
if maxH > 0 && probe.Height > maxH {
|
||||||
filterChain = fmt.Sprintf(
|
filterChain = fmt.Sprintf(
|
||||||
"scale=-2:%d:force_original_aspect_ratio=decrease,scale=trunc(iw/2)*2:trunc(ih/2)*2,format=yuv420p,setparams=colorspace=bt709:color_trc=bt709:color_primaries=bt709:range=tv",
|
"scale=-2:%d:force_original_aspect_ratio=decrease,scale=trunc(iw/2)*2:trunc(ih/2)*2,format=%s%s%s",
|
||||||
maxH,
|
maxH, pixFormat, colorTail, hwUploadTail,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
filterChain = "scale=trunc(iw/2)*2:trunc(ih/2)*2,format=yuv420p,setparams=colorspace=bt709:color_trc=bt709:color_primaries=bt709:range=tv"
|
filterChain = fmt.Sprintf(
|
||||||
|
"scale=trunc(iw/2)*2:trunc(ih/2)*2,format=%s%s%s",
|
||||||
|
pixFormat, colorTail, hwUploadTail,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
args = append(args, "-vf", filterChain)
|
args = append(args, "-vf", filterChain)
|
||||||
|
|
||||||
|
|
|
||||||
69
internal/engine/vaapi_args_test.go
Normal file
69
internal/engine/vaapi_args_test.go
Normal file
|
|
@ -0,0 +1,69 @@
|
||||||
|
package engine
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestBuildHLSFFmpegArgsVAAPI(t *testing.T) {
|
||||||
|
cfg := HLSSessionConfig{
|
||||||
|
SessionID: "test",
|
||||||
|
SourcePath: "/tmp/test.mkv",
|
||||||
|
Quality: "720p",
|
||||||
|
AudioIndex: 0,
|
||||||
|
Transcode: TranscodeRuntime{
|
||||||
|
FFmpegPath: "/usr/bin/ffmpeg",
|
||||||
|
FFprobePath: "/usr/bin/ffprobe",
|
||||||
|
HWAccel: HWAccelVAAPI,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
probe := &StreamProbe{Width: 1920, Height: 1080, DurationSec: 100}
|
||||||
|
args := buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0)
|
||||||
|
got := strings.Join(args, " ")
|
||||||
|
|
||||||
|
wants := []string{
|
||||||
|
"-hwaccel vaapi",
|
||||||
|
"-vaapi_device /dev/dri/renderD128",
|
||||||
|
"-c:v h264_vaapi",
|
||||||
|
"format=nv12",
|
||||||
|
"hwupload",
|
||||||
|
}
|
||||||
|
for _, want := range wants {
|
||||||
|
if !strings.Contains(got, want) {
|
||||||
|
t.Errorf("argv missing %q\n%s", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.Contains(got, "scale_vaapi") {
|
||||||
|
t.Errorf("argv unexpectedly contains scale_vaapi (mesa bug): %s", got)
|
||||||
|
}
|
||||||
|
if strings.Contains(got, "format=yuv420p") {
|
||||||
|
t.Errorf("argv contains format=yuv420p (libx264 path) for VAAPI codec: %s", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildHLSFFmpegArgsLibx264NoRegression(t *testing.T) {
|
||||||
|
cfg := HLSSessionConfig{
|
||||||
|
SessionID: "test",
|
||||||
|
SourcePath: "/tmp/test.mkv",
|
||||||
|
Quality: "720p",
|
||||||
|
AudioIndex: 0,
|
||||||
|
Transcode: TranscodeRuntime{
|
||||||
|
FFmpegPath: "/usr/bin/ffmpeg",
|
||||||
|
FFprobePath: "/usr/bin/ffprobe",
|
||||||
|
HWAccel: HWAccelNone,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
probe := &StreamProbe{Width: 1920, Height: 1080, DurationSec: 100}
|
||||||
|
args := buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0)
|
||||||
|
got := strings.Join(args, " ")
|
||||||
|
for _, want := range []string{"-c:v libx264", "format=yuv420p", "setparams=colorspace=bt709"} {
|
||||||
|
if !strings.Contains(got, want) {
|
||||||
|
t.Errorf("libx264 argv missing %q: %s", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, bad := range []string{"-vaapi_device", "format=nv12", "hwupload"} {
|
||||||
|
if strings.Contains(got, bad) {
|
||||||
|
t.Errorf("libx264 argv unexpectedly contains %q: %s", bad, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue