unarr/internal/streaming/ffmpeg_args.go
Deivid Soto 75dcc0f1cb feat(streaming): ffmpeg transcoding pipeline (direct play / fMP4 / HW accel)
The browser-side WebRTC reproductor needs MP4 / H.264 / AAC / yuv420p to
keep MSE happy. This package decides per request whether to:

  • direct-play  — input already MSE-compatible, just remux to fMP4
  • transcode    — re-encode video (libx264 / NVENC / QSV / VAAPI /
                   VideoToolbox) + audio (AAC), fragment to fMP4

Pieces:

- internal/streaming/transcoder.go — AnalyzeCompatibility decides the
  recipe from a parsed mediainfo. CompatibilityReport carries the reasons
  so the player UI can show "transcoding video: HEVC → H.264".

- internal/streaming/ffmpeg_args.go — BuildFFmpegArgs assembles the argv
  for ffmpeg. Direct play uses `-c copy`; transcode uses libx264 or the
  selected HW encoder. Output is always fragmented MP4 piped to stdout
  (-movflags frag_keyframe+empty_moov+default_base_moof) so the HTTP
  handler can stream straight to the browser without disk I/O.

  Quality ladder: 480p (1.5Mb), 720p (3.5Mb), 1080p (6Mb), 2160p (25Mb).
  Default 1080p when unset / unknown. -ss seek for resume / scrubbing.

- internal/streaming/hwaccel.go — DetectHWAccel runs `ffmpeg -encoders`
  once per process and caches the best available. Order: NVENC → QSV →
  VAAPI → VideoToolbox → libx264. VAAPI is the only family that wires up
  HW decode too (`-hwaccel vaapi`); the others software-decode and HW-
  encode (works fine and avoids /dev/dri permission rabbit holes).

- internal/streaming/stream.go — Transcoder facade wires Analyze + Stream
  together for the API handler in Fase 4. Captures the last 8 KiB of
  ffmpeg stderr for diagnosable errors without unbounded memory.

Tests (20 unit, all green):
- AnalyzeCompatibility: h264+aac direct, video-only direct, HEVC →
  transcode, 10-bit HDR → transcode, EAC3 audio → transcode, nil guards
- ResolveQuality: empty + unknown fallback to 1080p, 4-step ladder
- BuildFFmpegArgs: direct play -c copy, transcode libx264 + bitrate +
  scale, NVENC swaps encoder & drops preset, VAAPI injects -hwaccel +
  scale_vaapi, -ss timestamp formatting
- HWAccel: encoder-name table, VAAPI is the only one with HW decode
- formatDuration: zero, sub-second, HH:MM:SS, negative-clamped
- cappedBuffer: tail retention through multi-write and large-write paths
- NewTranscoder: rejects empty paths
2026-05-06 11:34:57 +02:00

173 lines
5.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package streaming
import (
"fmt"
"strconv"
"time"
)
// StreamOptions controls a single transcode/remux invocation.
type StreamOptions struct {
// Quality caps the output resolution and bitrate when transcoding.
// Direct play ignores it (the source bitrate wins). One of:
// "2160p", "1080p", "720p", "480p", "" (= "1080p").
Quality string
// StartOffset seeks the input N seconds in before transcoding. Useful
// for resume / scrubbing. Zero means start from the beginning.
StartOffset time.Duration
// HW selects the hardware encoder. "" (or "none") means software libx264.
HW HWAccel
// AudioTrackIndex selects which audio track to keep (0-based, before
// the video stream is excluded). Zero is the default track.
AudioTrackIndex int
}
// QualityProfile maps a Quality label to encoder constraints.
type QualityProfile struct {
Label string // "1080p"
MaxHeight int // 1080
VideoBitrate int // bits/s for libx264 -b:v
AudioBitrate int // bits/s for AAC
}
// qualityProfiles is the full ladder. We default to 1080p when unset.
var qualityProfiles = map[string]QualityProfile{
"2160p": {Label: "2160p", MaxHeight: 2160, VideoBitrate: 25_000_000, AudioBitrate: 192_000},
"1080p": {Label: "1080p", MaxHeight: 1080, VideoBitrate: 6_000_000, AudioBitrate: 160_000},
"720p": {Label: "720p", MaxHeight: 720, VideoBitrate: 3_500_000, AudioBitrate: 128_000},
"480p": {Label: "480p", MaxHeight: 480, VideoBitrate: 1_500_000, AudioBitrate: 96_000},
}
// ResolveQuality returns the QualityProfile for a label, falling back to
// 1080p when the label is empty / unknown.
func ResolveQuality(label string) QualityProfile {
if p, ok := qualityProfiles[label]; ok {
return p
}
return qualityProfiles["1080p"]
}
// fragmentedMP4Movflags are the magic flags MSE needs to consume an
// ffmpeg pipe as it's produced — avoids the moov atom being written at the
// end of the file (which would force buffering the whole stream).
const fragmentedMP4Movflags = "frag_keyframe+empty_moov+default_base_moof"
// BuildFFmpegArgs returns the argv (without the binary itself) for
// ffmpeg given the input file, stream options, and a compatibility report.
//
// Two recipes:
//
// - Direct play: -c copy on every selected stream + remux to fMP4.
// - Transcode: re-encode video (libx264 / hwaccel) + audio (aac).
//
// The result writes fMP4 fragments to stdout (`pipe:1`) so the HTTP
// handler can stream them directly to the browser without touching disk.
func BuildFFmpegArgs(inputPath string, report CompatibilityReport, opts StreamOptions) []string {
args := []string{
"-hide_banner",
"-loglevel", "warning",
"-nostdin",
}
if opts.HW.HasDecoder() {
args = append(args, opts.HW.DecoderArgs()...)
}
if opts.StartOffset > 0 {
args = append(args, "-ss", formatDuration(opts.StartOffset))
}
args = append(args, "-i", inputPath)
// Map first video + selected audio. Drop subtitles (browser handles
// them out-of-band; baking them in is a Phase 4.x decision).
args = append(args,
"-map", "0:v:0",
"-map", fmt.Sprintf("0:a:%d?", opts.AudioTrackIndex),
)
if report.DirectPlay {
// Cheap path: copy streams, just remux container.
args = append(args, "-c", "copy")
} else {
// Transcode path: pick encoder per HW.
profile := ResolveQuality(opts.Quality)
args = append(args, transcodeArgs(profile, opts.HW)...)
}
args = append(args,
"-movflags", fragmentedMP4Movflags,
"-f", "mp4",
"pipe:1",
)
return args
}
// transcodeArgs returns the encoder + bitrate flags. Keeps the function
// flat so the BuildFFmpegArgs reader can scan the recipe top to bottom.
func transcodeArgs(profile QualityProfile, hw HWAccel) []string {
args := []string{}
// Video encoder.
args = append(args, "-c:v", hw.VideoEncoder())
// Scale filter caps the long edge to MaxHeight, preserving aspect.
// `force_original_aspect_ratio=decrease` keeps it ≤ MaxHeight when
// the source is taller and leaves smaller sources untouched. The
// `force_divisible_by=2` keeps libx264 happy.
scale := fmt.Sprintf(
"scale=-2:%d:force_original_aspect_ratio=decrease:force_divisible_by=2",
profile.MaxHeight,
)
if hw == HWAccelVAAPI {
// VAAPI needs frames in the GPU surface, scaling is done with
// scale_vaapi. We still upload via format=nv12.
scale = fmt.Sprintf("format=nv12,hwupload,scale_vaapi=-2:%d", profile.MaxHeight)
}
args = append(args, "-vf", scale)
// Bitrate ceiling (variable bitrate with 2× burst).
args = append(args,
"-b:v", strconv.Itoa(profile.VideoBitrate),
"-maxrate", strconv.Itoa(profile.VideoBitrate*2),
"-bufsize", strconv.Itoa(profile.VideoBitrate*4),
)
// SW-only: tune for low latency + don't waste cycles on the deepest
// preset when we're feeding live playback.
if hw == HWAccelNone || hw == HWAccelUnset {
args = append(args,
"-preset", "veryfast",
"-tune", "zerolatency",
)
}
// Force yuv420p so MSE reliably plays the result (some libx264
// configurations otherwise emit yuv422p for SD content).
args = append(args, "-pix_fmt", "yuv420p")
// Audio: re-encode to AAC stereo. Mono / 5.1 sources are downmixed.
args = append(args,
"-c:a", "aac",
"-b:a", strconv.Itoa(profile.AudioBitrate),
"-ac", "2",
)
return args
}
// formatDuration prints a Go Duration as ffmpeg's `-ss HH:MM:SS.mmm`.
func formatDuration(d time.Duration) string {
if d < 0 {
d = 0
}
h := int(d / time.Hour)
d -= time.Duration(h) * time.Hour
m := int(d / time.Minute)
d -= time.Duration(m) * time.Minute
s := float64(d) / float64(time.Second)
return fmt.Sprintf("%02d:%02d:%06.3f", h, m, s)
}