feat(streaming): ffmpeg transcoding pipeline (direct play / fMP4 / HW accel)
The browser-side WebRTC reproductor needs MP4 / H.264 / AAC / yuv420p to
keep MSE happy. This package decides per request whether to:
• direct-play — input already MSE-compatible, just remux to fMP4
• transcode — re-encode video (libx264 / NVENC / QSV / VAAPI /
VideoToolbox) + audio (AAC), fragment to fMP4
Pieces:
- internal/streaming/transcoder.go — AnalyzeCompatibility decides the
recipe from a parsed mediainfo. CompatibilityReport carries the reasons
so the player UI can show "transcoding video: HEVC → H.264".
- internal/streaming/ffmpeg_args.go — BuildFFmpegArgs assembles the argv
for ffmpeg. Direct play uses `-c copy`; transcode uses libx264 or the
selected HW encoder. Output is always fragmented MP4 piped to stdout
(-movflags frag_keyframe+empty_moov+default_base_moof) so the HTTP
handler can stream straight to the browser without disk I/O.
Quality ladder: 480p (1.5Mb), 720p (3.5Mb), 1080p (6Mb), 2160p (25Mb).
Default 1080p when unset / unknown. -ss seek for resume / scrubbing.
- internal/streaming/hwaccel.go — DetectHWAccel runs `ffmpeg -encoders`
once per process and caches the best available. Order: NVENC → QSV →
VAAPI → VideoToolbox → libx264. VAAPI is the only family that wires up
HW decode too (`-hwaccel vaapi`); the others software-decode and HW-
encode (works fine and avoids /dev/dri permission rabbit holes).
- internal/streaming/stream.go — Transcoder facade wires Analyze + Stream
together for the API handler in Fase 4. Captures the last 8 KiB of
ffmpeg stderr for diagnosable errors without unbounded memory.
Tests (20 unit, all green):
- AnalyzeCompatibility: h264+aac direct, video-only direct, HEVC →
transcode, 10-bit HDR → transcode, EAC3 audio → transcode, nil guards
- ResolveQuality: empty + unknown fallback to 1080p, 4-step ladder
- BuildFFmpegArgs: direct play -c copy, transcode libx264 + bitrate +
scale, NVENC swaps encoder & drops preset, VAAPI injects -hwaccel +
scale_vaapi, -ss timestamp formatting
- HWAccel: encoder-name table, VAAPI is the only one with HW decode
- formatDuration: zero, sub-second, HH:MM:SS, negative-clamped
- cappedBuffer: tail retention through multi-write and large-write paths
- NewTranscoder: rejects empty paths
This commit is contained in:
parent
e68b127acc
commit
75dcc0f1cb
5 changed files with 850 additions and 0 deletions
173
internal/streaming/ffmpeg_args.go
Normal file
173
internal/streaming/ffmpeg_args.go
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
package streaming
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"time"
|
||||
)
|
||||
|
||||
// StreamOptions controls a single transcode/remux invocation.
|
||||
type StreamOptions struct {
|
||||
// Quality caps the output resolution and bitrate when transcoding.
|
||||
// Direct play ignores it (the source bitrate wins). One of:
|
||||
// "2160p", "1080p", "720p", "480p", "" (= "1080p").
|
||||
Quality string
|
||||
|
||||
// StartOffset seeks the input N seconds in before transcoding. Useful
|
||||
// for resume / scrubbing. Zero means start from the beginning.
|
||||
StartOffset time.Duration
|
||||
|
||||
// HW selects the hardware encoder. "" (or "none") means software libx264.
|
||||
HW HWAccel
|
||||
|
||||
// AudioTrackIndex selects which audio track to keep (0-based, before
|
||||
// the video stream is excluded). Zero is the default track.
|
||||
AudioTrackIndex int
|
||||
}
|
||||
|
||||
// QualityProfile maps a Quality label to encoder constraints.
|
||||
type QualityProfile struct {
|
||||
Label string // "1080p"
|
||||
MaxHeight int // 1080
|
||||
VideoBitrate int // bits/s for libx264 -b:v
|
||||
AudioBitrate int // bits/s for AAC
|
||||
}
|
||||
|
||||
// qualityProfiles is the full ladder. We default to 1080p when unset.
|
||||
var qualityProfiles = map[string]QualityProfile{
|
||||
"2160p": {Label: "2160p", MaxHeight: 2160, VideoBitrate: 25_000_000, AudioBitrate: 192_000},
|
||||
"1080p": {Label: "1080p", MaxHeight: 1080, VideoBitrate: 6_000_000, AudioBitrate: 160_000},
|
||||
"720p": {Label: "720p", MaxHeight: 720, VideoBitrate: 3_500_000, AudioBitrate: 128_000},
|
||||
"480p": {Label: "480p", MaxHeight: 480, VideoBitrate: 1_500_000, AudioBitrate: 96_000},
|
||||
}
|
||||
|
||||
// ResolveQuality returns the QualityProfile for a label, falling back to
|
||||
// 1080p when the label is empty / unknown.
|
||||
func ResolveQuality(label string) QualityProfile {
|
||||
if p, ok := qualityProfiles[label]; ok {
|
||||
return p
|
||||
}
|
||||
return qualityProfiles["1080p"]
|
||||
}
|
||||
|
||||
// fragmentedMP4Movflags are the magic flags MSE needs to consume an
|
||||
// ffmpeg pipe as it's produced — avoids the moov atom being written at the
|
||||
// end of the file (which would force buffering the whole stream).
|
||||
const fragmentedMP4Movflags = "frag_keyframe+empty_moov+default_base_moof"
|
||||
|
||||
// BuildFFmpegArgs returns the argv (without the binary itself) for
|
||||
// ffmpeg given the input file, stream options, and a compatibility report.
|
||||
//
|
||||
// Two recipes:
|
||||
//
|
||||
// - Direct play: -c copy on every selected stream + remux to fMP4.
|
||||
// - Transcode: re-encode video (libx264 / hwaccel) + audio (aac).
|
||||
//
|
||||
// The result writes fMP4 fragments to stdout (`pipe:1`) so the HTTP
|
||||
// handler can stream them directly to the browser without touching disk.
|
||||
func BuildFFmpegArgs(inputPath string, report CompatibilityReport, opts StreamOptions) []string {
|
||||
args := []string{
|
||||
"-hide_banner",
|
||||
"-loglevel", "warning",
|
||||
"-nostdin",
|
||||
}
|
||||
|
||||
if opts.HW.HasDecoder() {
|
||||
args = append(args, opts.HW.DecoderArgs()...)
|
||||
}
|
||||
|
||||
if opts.StartOffset > 0 {
|
||||
args = append(args, "-ss", formatDuration(opts.StartOffset))
|
||||
}
|
||||
|
||||
args = append(args, "-i", inputPath)
|
||||
|
||||
// Map first video + selected audio. Drop subtitles (browser handles
|
||||
// them out-of-band; baking them in is a Phase 4.x decision).
|
||||
args = append(args,
|
||||
"-map", "0:v:0",
|
||||
"-map", fmt.Sprintf("0:a:%d?", opts.AudioTrackIndex),
|
||||
)
|
||||
|
||||
if report.DirectPlay {
|
||||
// Cheap path: copy streams, just remux container.
|
||||
args = append(args, "-c", "copy")
|
||||
} else {
|
||||
// Transcode path: pick encoder per HW.
|
||||
profile := ResolveQuality(opts.Quality)
|
||||
args = append(args, transcodeArgs(profile, opts.HW)...)
|
||||
}
|
||||
|
||||
args = append(args,
|
||||
"-movflags", fragmentedMP4Movflags,
|
||||
"-f", "mp4",
|
||||
"pipe:1",
|
||||
)
|
||||
return args
|
||||
}
|
||||
|
||||
// transcodeArgs returns the encoder + bitrate flags. Keeps the function
|
||||
// flat so the BuildFFmpegArgs reader can scan the recipe top to bottom.
|
||||
func transcodeArgs(profile QualityProfile, hw HWAccel) []string {
|
||||
args := []string{}
|
||||
|
||||
// Video encoder.
|
||||
args = append(args, "-c:v", hw.VideoEncoder())
|
||||
|
||||
// Scale filter caps the long edge to MaxHeight, preserving aspect.
|
||||
// `force_original_aspect_ratio=decrease` keeps it ≤ MaxHeight when
|
||||
// the source is taller and leaves smaller sources untouched. The
|
||||
// `force_divisible_by=2` keeps libx264 happy.
|
||||
scale := fmt.Sprintf(
|
||||
"scale=-2:%d:force_original_aspect_ratio=decrease:force_divisible_by=2",
|
||||
profile.MaxHeight,
|
||||
)
|
||||
if hw == HWAccelVAAPI {
|
||||
// VAAPI needs frames in the GPU surface, scaling is done with
|
||||
// scale_vaapi. We still upload via format=nv12.
|
||||
scale = fmt.Sprintf("format=nv12,hwupload,scale_vaapi=-2:%d", profile.MaxHeight)
|
||||
}
|
||||
args = append(args, "-vf", scale)
|
||||
|
||||
// Bitrate ceiling (variable bitrate with 2× burst).
|
||||
args = append(args,
|
||||
"-b:v", strconv.Itoa(profile.VideoBitrate),
|
||||
"-maxrate", strconv.Itoa(profile.VideoBitrate*2),
|
||||
"-bufsize", strconv.Itoa(profile.VideoBitrate*4),
|
||||
)
|
||||
|
||||
// SW-only: tune for low latency + don't waste cycles on the deepest
|
||||
// preset when we're feeding live playback.
|
||||
if hw == HWAccelNone || hw == HWAccelUnset {
|
||||
args = append(args,
|
||||
"-preset", "veryfast",
|
||||
"-tune", "zerolatency",
|
||||
)
|
||||
}
|
||||
|
||||
// Force yuv420p so MSE reliably plays the result (some libx264
|
||||
// configurations otherwise emit yuv422p for SD content).
|
||||
args = append(args, "-pix_fmt", "yuv420p")
|
||||
|
||||
// Audio: re-encode to AAC stereo. Mono / 5.1 sources are downmixed.
|
||||
args = append(args,
|
||||
"-c:a", "aac",
|
||||
"-b:a", strconv.Itoa(profile.AudioBitrate),
|
||||
"-ac", "2",
|
||||
)
|
||||
|
||||
return args
|
||||
}
|
||||
|
||||
// formatDuration prints a Go Duration as ffmpeg's `-ss HH:MM:SS.mmm`.
|
||||
func formatDuration(d time.Duration) string {
|
||||
if d < 0 {
|
||||
d = 0
|
||||
}
|
||||
h := int(d / time.Hour)
|
||||
d -= time.Duration(h) * time.Hour
|
||||
m := int(d / time.Minute)
|
||||
d -= time.Duration(m) * time.Minute
|
||||
s := float64(d) / float64(time.Second)
|
||||
return fmt.Sprintf("%02d:%02d:%06.3f", h, m, s)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue