fix(stream): don't cache transient libplacebo probe timeouts
Second critico pass on the functional probe. - The probe does real Vulkan device init, which can transiently fail when the box is busy (notably the startup warm racing the encode benchmark). Caching that timeout as a permanent 'no' would pin HDR to the zscale CPU chain until daemon restart. Now a deadline is NOT cached — only a clean non-zero exit (filter absent / no ICD), which is a stable result. zscale stays cached as before (cheap deterministic grep, can't flake). - Surface the exec error when ffmpeg never produced stderr (timeout / ENOENT): the fallback log now shows err.Error() instead of a blank tail, so 'no Vulkan' is distinguishable from 'ffmpeg never ran'. - Dockerfile comment: clarify the Vulkan ICD (not GLX) is the load-bearing mount that 'graphics' adds; 'compute' alone doesn't mount it. Probe still returns true on a Vulkan host (verified); engine tests green.
This commit is contained in:
parent
5e5a719f27
commit
e298ff6c05
2 changed files with 28 additions and 12 deletions
|
|
@ -95,9 +95,10 @@ ENV XDG_DATA_HOME=/data
|
||||||
|
|
||||||
# NVIDIA passthrough defaults. `--gpus all` alone only grants the "utility" +
|
# NVIDIA passthrough defaults. `--gpus all` alone only grants the "utility" +
|
||||||
# "compute" capabilities; nvenc needs "video", and "graphics" makes the runtime
|
# "compute" capabilities; nvenc needs "video", and "graphics" makes the runtime
|
||||||
# mount the NVIDIA Vulkan ICD (nvidia_icd.json + GLX libs) so ffmpeg's libplacebo
|
# mount the NVIDIA Vulkan ICD (nvidia_icd.json — the load-bearing piece — plus
|
||||||
# filter (GPU HDR tonemap, paired with libvulkan1 above) can create a Vulkan
|
# GLX/EGL libs) so ffmpeg's libplacebo filter (GPU HDR tonemap, paired with
|
||||||
# device. Baking these here means a plain `docker run --gpus all` (or the compose
|
# libvulkan1 above) can create a Vulkan device. "compute" alone does NOT mount
|
||||||
|
# the ICD. Baking these here means a plain `docker run --gpus all` (or the compose
|
||||||
# device reservation) lights up HW transcode + GPU tonemap with zero extra flags.
|
# device reservation) lights up HW transcode + GPU tonemap with zero extra flags.
|
||||||
# Harmless when no GPU is attached.
|
# Harmless when no GPU is attached.
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||||
|
|
|
||||||
|
|
@ -57,11 +57,12 @@ var (
|
||||||
// tonemapped fine via zscale.
|
// tonemapped fine via zscale.
|
||||||
//
|
//
|
||||||
// So we run the real filter on one synthetic frame and require a clean exit:
|
// So we run the real filter on one synthetic frame and require a clean exit:
|
||||||
// that forces Vulkan device creation + filtergraph negotiation (the implicit
|
// that forces Vulkan device creation + filtergraph negotiation (libplacebo
|
||||||
// hwupload/hwdownload around the GPU filter). Pass → libplacebo works here;
|
// auto-inserts the hwupload/hwdownload around itself). Pass → libplacebo works
|
||||||
// fail → fall back to the zscale chain. Cached per path; a probe failure is
|
// here; fail → fall back to the zscale chain. Cached per path — EXCEPT a
|
||||||
// treated as "no". The probe is bounded so a wedged ffmpeg can't stall the
|
// context timeout, which is transient (a busy box during the startup warm) and
|
||||||
// first session.
|
// must not pin HDR to zscale for the whole process. The probe is bounded so a
|
||||||
|
// wedged ffmpeg can't stall the first session.
|
||||||
func FFmpegSupportsLibplacebo(ffmpegPath string) bool {
|
func FFmpegSupportsLibplacebo(ffmpegPath string) bool {
|
||||||
if ffmpegPath == "" {
|
if ffmpegPath == "" {
|
||||||
return false
|
return false
|
||||||
|
|
@ -87,13 +88,27 @@ func FFmpegSupportsLibplacebo(ffmpegPath string) bool {
|
||||||
).CombinedOutput()
|
).CombinedOutput()
|
||||||
supported := err == nil
|
supported := err == nil
|
||||||
|
|
||||||
libplaceboCacheMu.Lock()
|
// Cache the result — but NOT a timeout. A clean non-zero exit (filter
|
||||||
libplaceboCache[ffmpegPath] = supported
|
// absent, no Vulkan ICD) is a stable "no" worth remembering; a deadline is
|
||||||
libplaceboCacheMu.Unlock()
|
// transient (the box was busy, e.g. the startup warm racing the encode
|
||||||
|
// benchmark) and caching it would force HDR onto the zscale CPU chain until
|
||||||
|
// restart. Worst case a perpetually-loaded box re-probes per session — rare,
|
||||||
|
// and it fails closed to zscale each time.
|
||||||
|
if supported || ctx.Err() != context.DeadlineExceeded {
|
||||||
|
libplaceboCacheMu.Lock()
|
||||||
|
libplaceboCache[ffmpegPath] = supported
|
||||||
|
libplaceboCacheMu.Unlock()
|
||||||
|
}
|
||||||
if supported {
|
if supported {
|
||||||
log.Printf("[tonemap] ffmpeg libplacebo works (Vulkan OK) — HDR sources tonemapped on the GPU (preferred)")
|
log.Printf("[tonemap] ffmpeg libplacebo works (Vulkan OK) — HDR sources tonemapped on the GPU (preferred)")
|
||||||
} else {
|
} else {
|
||||||
log.Printf("[tonemap] ffmpeg libplacebo unavailable (no Vulkan runtime or filter absent) — HDR falls back to zscale/none: %v", strings.TrimSpace(lastLine(out)))
|
// On an exec/timeout failure the stderr tail is empty — surface err
|
||||||
|
// itself so the log distinguishes "no Vulkan" from "ffmpeg never ran".
|
||||||
|
detail := strings.TrimSpace(lastLine(out))
|
||||||
|
if detail == "" {
|
||||||
|
detail = err.Error()
|
||||||
|
}
|
||||||
|
log.Printf("[tonemap] ffmpeg libplacebo unavailable (no Vulkan runtime or filter absent) — HDR falls back to zscale/none: %v", detail)
|
||||||
}
|
}
|
||||||
return supported
|
return supported
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue