Keep an NVENC downscale of an SDR source entirely on the GPU (decode -> scale_cuda -> h264_nvenc) instead of copying every frame to the CPU for `scale=` and back. That GPU->CPU->GPU round-trip is the wall on modest GPUs; even a strong box gains ~37% (scale_cuda 14.9x vs CPU 10.9x on a 4K SDR HEVC -> 1080p encode). Strictly gated so every case that needs CPU frames is unchanged: - HDR (libplacebo Vulkan / zscale CPU tonemap can't consume a CUDA surface), - burn-in (the scale2ref+overlay composite runs on CPU frames), - non-NVENC encoders, and no-op when not actually downscaling. - hwscale.go: FFmpegSupportsScaleCuda — a functional 1-frame probe mirroring the libplacebo probe (presence in -filters lies; needs a real CUDA device). Probes the worst-case real input (10-bit p010 -> 8-bit yuv420p) so a host whose scale_cuda can't do the 10->8-bit conversion fails closed to CPU. - hls.go: useCudaScale gate + `-hwaccel_output_format cuda` + a `scale_cuda=-2:H:format=yuv420p` filter branch. Output is 8-bit (format=yuv420p + `-profile:v main`), browser-safe. - transcode_quality.go / player_session_registry.go / daemon.go: HasScaleCuda flag, populated + warmed at startup like the other ffmpeg capability probes. Fail-closed: probe absent/fails -> keep the CPU scale path, no regression. Verified live (real 4K SDR HEVC Main10 session emitted scale_cuda, 5.54x realtime, nvenc at 100%) + 8 arg-builder unit tests for the gate.
106 lines
3.6 KiB
Go
106 lines
3.6 KiB
Go
package cmd
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
|
|
"github.com/torrentclaw/unarr/internal/config"
|
|
"github.com/torrentclaw/unarr/internal/engine"
|
|
"github.com/torrentclaw/unarr/internal/library/mediainfo"
|
|
)
|
|
|
|
// playerSessionRegistry tracks per-session cancel funcs for active in-browser
|
|
// HLS streaming sessions. Each session lives only as long as its ffmpeg
|
|
// process; the registry exists so duplicate sync responses don't double-spawn
|
|
// the same session and so daemon shutdown can drain.
|
|
var playerSessionRegistry = &playerSessionRegistryT{
|
|
cancels: make(map[string]context.CancelFunc),
|
|
}
|
|
|
|
type playerSessionRegistryT struct {
|
|
mu sync.Mutex
|
|
cancels map[string]context.CancelFunc
|
|
}
|
|
|
|
func (r *playerSessionRegistryT) has(sessionID string) bool {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
_, ok := r.cancels[sessionID]
|
|
return ok
|
|
}
|
|
|
|
func (r *playerSessionRegistryT) add(sessionID string, cancel context.CancelFunc) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
r.cancels[sessionID] = cancel
|
|
}
|
|
|
|
func (r *playerSessionRegistryT) remove(sessionID string) {
|
|
r.mu.Lock()
|
|
defer r.mu.Unlock()
|
|
delete(r.cancels, sessionID)
|
|
}
|
|
|
|
// cancelAllPlayerSessions cancels every running session. Called on daemon
|
|
// shutdown so the ffmpeg children and SSE consumers exit cleanly.
|
|
func cancelAllPlayerSessions() {
|
|
playerSessionRegistry.mu.Lock()
|
|
cancels := make([]context.CancelFunc, 0, len(playerSessionRegistry.cancels))
|
|
for _, c := range playerSessionRegistry.cancels {
|
|
cancels = append(cancels, c)
|
|
}
|
|
playerSessionRegistry.cancels = make(map[string]context.CancelFunc)
|
|
playerSessionRegistry.mu.Unlock()
|
|
for _, c := range cancels {
|
|
c()
|
|
}
|
|
}
|
|
|
|
// buildTranscodeRuntime resolves the ffmpeg/ffprobe binaries + config knobs
|
|
// for the HLS streaming pipeline. Failure to resolve a binary returns a
|
|
// runtime with empty paths so the caller can short-circuit instead of
|
|
// launching a transcoder that will immediately fail.
|
|
func buildTranscodeRuntime(ctx context.Context, cfg config.Config) engine.TranscodeRuntime {
|
|
if !cfg.Download.Transcode.Enabled {
|
|
return engine.TranscodeRuntime{Disabled: true}
|
|
}
|
|
ffmpegPath, errF := mediainfo.ResolveFFmpeg(cfg.Library.FFmpegPath)
|
|
ffprobePath, errP := mediainfo.ResolveFFprobe(cfg.Library.FFprobePath)
|
|
if errF != nil || errP != nil {
|
|
return engine.TranscodeRuntime{Disabled: true}
|
|
}
|
|
hw := engine.HWAccelNone
|
|
switch cfg.Download.Transcode.HWAccel {
|
|
case "auto":
|
|
hw = engine.DetectHWAccel(ctx, ffmpegPath)
|
|
case "nvenc":
|
|
hw = engine.HWAccelNVENC
|
|
case "qsv":
|
|
hw = engine.HWAccelQSV
|
|
case "vaapi":
|
|
hw = engine.HWAccelVAAPI
|
|
case "videotoolbox":
|
|
hw = engine.HWAccelVideoToolbox
|
|
case "none", "":
|
|
hw = engine.HWAccelNone
|
|
}
|
|
return engine.TranscodeRuntime{
|
|
FFmpegPath: ffmpegPath,
|
|
FFprobePath: ffprobePath,
|
|
HWAccel: hw,
|
|
Preset: cfg.Download.Transcode.Preset,
|
|
VideoBitrate: cfg.Download.Transcode.VideoBitrate,
|
|
AudioBitrate: cfg.Download.Transcode.AudioBitrate,
|
|
MaxHeight: cfg.Download.Transcode.MaxHeight,
|
|
// Tonemap HDR→SDR only when this ffmpeg build has zscale; otherwise the
|
|
// filter would error and break playback, so HDR plays untonemapped.
|
|
TonemapHDR: engine.FFmpegSupportsZscale(ffmpegPath),
|
|
// libplacebo (GPU) is preferred over zscale when present — checked here so
|
|
// the per-session arg builder can pick it for HDR sources.
|
|
HasLibplacebo: engine.FFmpegSupportsLibplacebo(ffmpegPath),
|
|
// scale_cuda lets an NVENC SDR downscale stay fully on the GPU. Probed
|
|
// unconditionally (like libplacebo); fails closed to false on non-CUDA
|
|
// hosts, where the arg builder keeps the CPU scale path anyway.
|
|
HasScaleCuda: engine.FFmpegSupportsScaleCuda(ffmpegPath),
|
|
}
|
|
}
|