The browser-side WebRTC reproductor needs MP4 / H.264 / AAC / yuv420p to
keep MSE happy. This package decides per request whether to:
• direct-play — input already MSE-compatible, just remux to fMP4
• transcode — re-encode video (libx264 / NVENC / QSV / VAAPI /
VideoToolbox) + audio (AAC), fragment to fMP4
Pieces:
- internal/streaming/transcoder.go — AnalyzeCompatibility decides the
recipe from a parsed mediainfo. CompatibilityReport carries the reasons
so the player UI can show "transcoding video: HEVC → H.264".
- internal/streaming/ffmpeg_args.go — BuildFFmpegArgs assembles the argv
for ffmpeg. Direct play uses `-c copy`; transcode uses libx264 or the
selected HW encoder. Output is always fragmented MP4 piped to stdout
(-movflags frag_keyframe+empty_moov+default_base_moof) so the HTTP
handler can stream straight to the browser without disk I/O.
Quality ladder: 480p (1.5Mb), 720p (3.5Mb), 1080p (6Mb), 2160p (25Mb).
Default 1080p when unset / unknown. -ss seek for resume / scrubbing.
- internal/streaming/hwaccel.go — DetectHWAccel runs `ffmpeg -encoders`
once per process and caches the best available. Order: NVENC → QSV →
VAAPI → VideoToolbox → libx264. VAAPI is the only family that wires up
HW decode too (`-hwaccel vaapi`); the others software-decode and HW-
encode (works fine and avoids /dev/dri permission rabbit holes).
- internal/streaming/stream.go — Transcoder facade wires Analyze + Stream
together for the API handler in Fase 4. Captures the last 8 KiB of
ffmpeg stderr for diagnosable errors without unbounded memory.
Tests (20 unit, all green):
- AnalyzeCompatibility: h264+aac direct, video-only direct, HEVC →
transcode, 10-bit HDR → transcode, EAC3 audio → transcode, nil guards
- ResolveQuality: empty + unknown fallback to 1080p, 4-step ladder
- BuildFFmpegArgs: direct play -c copy, transcode libx264 + bitrate +
scale, NVENC swaps encoder & drops preset, VAAPI injects -hwaccel +
scale_vaapi, -ss timestamp formatting
- HWAccel: encoder-name table, VAAPI is the only one with HW decode
- formatDuration: zero, sub-second, HH:MM:SS, negative-clamped
- cappedBuffer: tail retention through multi-write and large-write paths
- NewTranscoder: rejects empty paths
173 lines
5.5 KiB
Go
173 lines
5.5 KiB
Go
package streaming
|
||
|
||
import (
|
||
"fmt"
|
||
"strconv"
|
||
"time"
|
||
)
|
||
|
||
// StreamOptions controls a single transcode/remux invocation.
|
||
type StreamOptions struct {
|
||
// Quality caps the output resolution and bitrate when transcoding.
|
||
// Direct play ignores it (the source bitrate wins). One of:
|
||
// "2160p", "1080p", "720p", "480p", "" (= "1080p").
|
||
Quality string
|
||
|
||
// StartOffset seeks the input N seconds in before transcoding. Useful
|
||
// for resume / scrubbing. Zero means start from the beginning.
|
||
StartOffset time.Duration
|
||
|
||
// HW selects the hardware encoder. "" (or "none") means software libx264.
|
||
HW HWAccel
|
||
|
||
// AudioTrackIndex selects which audio track to keep (0-based, before
|
||
// the video stream is excluded). Zero is the default track.
|
||
AudioTrackIndex int
|
||
}
|
||
|
||
// QualityProfile maps a Quality label to encoder constraints.
|
||
type QualityProfile struct {
|
||
Label string // "1080p"
|
||
MaxHeight int // 1080
|
||
VideoBitrate int // bits/s for libx264 -b:v
|
||
AudioBitrate int // bits/s for AAC
|
||
}
|
||
|
||
// qualityProfiles is the full ladder. We default to 1080p when unset.
|
||
var qualityProfiles = map[string]QualityProfile{
|
||
"2160p": {Label: "2160p", MaxHeight: 2160, VideoBitrate: 25_000_000, AudioBitrate: 192_000},
|
||
"1080p": {Label: "1080p", MaxHeight: 1080, VideoBitrate: 6_000_000, AudioBitrate: 160_000},
|
||
"720p": {Label: "720p", MaxHeight: 720, VideoBitrate: 3_500_000, AudioBitrate: 128_000},
|
||
"480p": {Label: "480p", MaxHeight: 480, VideoBitrate: 1_500_000, AudioBitrate: 96_000},
|
||
}
|
||
|
||
// ResolveQuality returns the QualityProfile for a label, falling back to
|
||
// 1080p when the label is empty / unknown.
|
||
func ResolveQuality(label string) QualityProfile {
|
||
if p, ok := qualityProfiles[label]; ok {
|
||
return p
|
||
}
|
||
return qualityProfiles["1080p"]
|
||
}
|
||
|
||
// fragmentedMP4Movflags are the magic flags MSE needs to consume an
|
||
// ffmpeg pipe as it's produced — avoids the moov atom being written at the
|
||
// end of the file (which would force buffering the whole stream).
|
||
const fragmentedMP4Movflags = "frag_keyframe+empty_moov+default_base_moof"
|
||
|
||
// BuildFFmpegArgs returns the argv (without the binary itself) for
|
||
// ffmpeg given the input file, stream options, and a compatibility report.
|
||
//
|
||
// Two recipes:
|
||
//
|
||
// - Direct play: -c copy on every selected stream + remux to fMP4.
|
||
// - Transcode: re-encode video (libx264 / hwaccel) + audio (aac).
|
||
//
|
||
// The result writes fMP4 fragments to stdout (`pipe:1`) so the HTTP
|
||
// handler can stream them directly to the browser without touching disk.
|
||
func BuildFFmpegArgs(inputPath string, report CompatibilityReport, opts StreamOptions) []string {
|
||
args := []string{
|
||
"-hide_banner",
|
||
"-loglevel", "warning",
|
||
"-nostdin",
|
||
}
|
||
|
||
if opts.HW.HasDecoder() {
|
||
args = append(args, opts.HW.DecoderArgs()...)
|
||
}
|
||
|
||
if opts.StartOffset > 0 {
|
||
args = append(args, "-ss", formatDuration(opts.StartOffset))
|
||
}
|
||
|
||
args = append(args, "-i", inputPath)
|
||
|
||
// Map first video + selected audio. Drop subtitles (browser handles
|
||
// them out-of-band; baking them in is a Phase 4.x decision).
|
||
args = append(args,
|
||
"-map", "0:v:0",
|
||
"-map", fmt.Sprintf("0:a:%d?", opts.AudioTrackIndex),
|
||
)
|
||
|
||
if report.DirectPlay {
|
||
// Cheap path: copy streams, just remux container.
|
||
args = append(args, "-c", "copy")
|
||
} else {
|
||
// Transcode path: pick encoder per HW.
|
||
profile := ResolveQuality(opts.Quality)
|
||
args = append(args, transcodeArgs(profile, opts.HW)...)
|
||
}
|
||
|
||
args = append(args,
|
||
"-movflags", fragmentedMP4Movflags,
|
||
"-f", "mp4",
|
||
"pipe:1",
|
||
)
|
||
return args
|
||
}
|
||
|
||
// transcodeArgs returns the encoder + bitrate flags. Keeps the function
|
||
// flat so the BuildFFmpegArgs reader can scan the recipe top to bottom.
|
||
func transcodeArgs(profile QualityProfile, hw HWAccel) []string {
|
||
args := []string{}
|
||
|
||
// Video encoder.
|
||
args = append(args, "-c:v", hw.VideoEncoder())
|
||
|
||
// Scale filter caps the long edge to MaxHeight, preserving aspect.
|
||
// `force_original_aspect_ratio=decrease` keeps it ≤ MaxHeight when
|
||
// the source is taller and leaves smaller sources untouched. The
|
||
// `force_divisible_by=2` keeps libx264 happy.
|
||
scale := fmt.Sprintf(
|
||
"scale=-2:%d:force_original_aspect_ratio=decrease:force_divisible_by=2",
|
||
profile.MaxHeight,
|
||
)
|
||
if hw == HWAccelVAAPI {
|
||
// VAAPI needs frames in the GPU surface, scaling is done with
|
||
// scale_vaapi. We still upload via format=nv12.
|
||
scale = fmt.Sprintf("format=nv12,hwupload,scale_vaapi=-2:%d", profile.MaxHeight)
|
||
}
|
||
args = append(args, "-vf", scale)
|
||
|
||
// Bitrate ceiling (variable bitrate with 2× burst).
|
||
args = append(args,
|
||
"-b:v", strconv.Itoa(profile.VideoBitrate),
|
||
"-maxrate", strconv.Itoa(profile.VideoBitrate*2),
|
||
"-bufsize", strconv.Itoa(profile.VideoBitrate*4),
|
||
)
|
||
|
||
// SW-only: tune for low latency + don't waste cycles on the deepest
|
||
// preset when we're feeding live playback.
|
||
if hw == HWAccelNone || hw == HWAccelUnset {
|
||
args = append(args,
|
||
"-preset", "veryfast",
|
||
"-tune", "zerolatency",
|
||
)
|
||
}
|
||
|
||
// Force yuv420p so MSE reliably plays the result (some libx264
|
||
// configurations otherwise emit yuv422p for SD content).
|
||
args = append(args, "-pix_fmt", "yuv420p")
|
||
|
||
// Audio: re-encode to AAC stereo. Mono / 5.1 sources are downmixed.
|
||
args = append(args,
|
||
"-c:a", "aac",
|
||
"-b:a", strconv.Itoa(profile.AudioBitrate),
|
||
"-ac", "2",
|
||
)
|
||
|
||
return args
|
||
}
|
||
|
||
// formatDuration prints a Go Duration as ffmpeg's `-ss HH:MM:SS.mmm`.
|
||
func formatDuration(d time.Duration) string {
|
||
if d < 0 {
|
||
d = 0
|
||
}
|
||
h := int(d / time.Hour)
|
||
d -= time.Duration(h) * time.Hour
|
||
m := int(d / time.Minute)
|
||
d -= time.Duration(m) * time.Minute
|
||
s := float64(d) / float64(time.Second)
|
||
return fmt.Sprintf("%02d:%02d:%06.3f", h, m, s)
|
||
}
|