feat(stream): real-time transcoding for non-browser-decodable codecs
Source files in HEVC, AV1, AC3, DTS, EAC3, etc. now transcode through ffmpeg
to fragmented MP4 (h264 + aac) on-the-fly when the browser would otherwise
play silent black. Decision matrix lives in engine.DecideAction:
passthrough → remux → audio-transcode → full video-transcode.
Architecture — temp file + growing-size source:
- engine.streamSource interface abstracts byte source. Two impls:
* diskFileSource: passthrough when codecs are already browser-friendly.
* transcodeSource: spawns ffmpeg writing to a /tmp/tc-stream-*.mp4 file.
A ticker polls file size and wakes blocked ReadAt callers as ffmpeg
produces output. Estimate of final size (bitrate × duration) is
announced over the wire so the browser's scrubber has something to
anchor on.
- dataChannelPump now reads from streamSource instead of *os.File. HELLO
carries Transcoding=true + an estimated total size; Seekable=true (we
read random-access from the temp file even while writing).
- Transcoder runtime resolved per session by buildTranscodeRuntime in
cmd/daemon: ffmpeg/ffprobe path lookup + HWAccel auto-detection
(NVENC/QSV/VAAPI/VideoToolbox).
- New [downloads.transcode] TOML section: enabled (default true), hw_accel
(auto), preset (veryfast), video_bitrate (5M), audio_bitrate (192k),
max_height (optional downscale), max_concurrent (safety cap).
Falls back to passthrough if ffprobe is missing, fails, or codecs are
already browser-friendly. tmp file is cleaned up on session shutdown.
This commit is contained in:
parent
4314c06c5c
commit
66ac79664b
6 changed files with 583 additions and 51 deletions
|
|
@ -24,7 +24,6 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
|
|
@ -62,6 +61,25 @@ type WebRTCStreamConfig struct {
|
|||
Signal *agent.Client
|
||||
// Logger receives diagnostic events; a nil logger swallows everything.
|
||||
Logger StreamLogger
|
||||
// Transcode steers on-the-fly transcoding when source codecs are not
|
||||
// browser-decodable (HEVC/AV1/AC3/DTS). Empty FFmpegPath disables it.
|
||||
Transcode TranscodeRuntime
|
||||
}
|
||||
|
||||
// TranscodeRuntime carries the resolved ffmpeg/ffprobe paths + tunables so
|
||||
// each session can decide whether to passthrough or pipe through ffmpeg.
|
||||
type TranscodeRuntime struct {
|
||||
FFmpegPath string
|
||||
FFprobePath string
|
||||
HWAccel HWAccel
|
||||
Preset string
|
||||
VideoBitrate string
|
||||
AudioBitrate string
|
||||
MaxHeight int
|
||||
// Disabled forces passthrough for every file even when codecs are not
|
||||
// browser-friendly. Useful when the user explicitly turns transcoding
|
||||
// off in config.
|
||||
Disabled bool
|
||||
}
|
||||
|
||||
// StreamLogger is an injectable logger so tests can capture events.
|
||||
|
|
@ -84,6 +102,48 @@ func logger(l StreamLogger) StreamLogger {
|
|||
return l
|
||||
}
|
||||
|
||||
// buildStreamSource picks between passthrough and transcoded source. ffprobe
|
||||
// failure or missing ffmpeg falls back to passthrough — the browser surfaces
|
||||
// a clearer codec error than us refusing to start.
|
||||
func buildStreamSource(
|
||||
ctx context.Context,
|
||||
abs string,
|
||||
displayName string,
|
||||
cfg WebRTCStreamConfig,
|
||||
log StreamLogger,
|
||||
) (streamSource, error) {
|
||||
tc := cfg.Transcode
|
||||
if tc.Disabled || tc.FFmpegPath == "" || tc.FFprobePath == "" {
|
||||
return newDiskFileSource(abs)
|
||||
}
|
||||
|
||||
probe, err := ProbeFile(ctx, tc.FFprobePath, abs)
|
||||
if err != nil {
|
||||
log.Warnf("[wrtc %s] probe failed (%v) — passthrough", agent.ShortID(cfg.SessionID), err)
|
||||
return newDiskFileSource(abs)
|
||||
}
|
||||
action := DecideAction(probe)
|
||||
if action == ActionPassthrough {
|
||||
log.Infof("[wrtc %s] codec passthrough (%s + %s in %s)",
|
||||
agent.ShortID(cfg.SessionID), probe.VideoCodec, probe.AudioCodec, probe.Container)
|
||||
return newDiskFileSource(abs)
|
||||
}
|
||||
|
||||
log.Infof("[wrtc %s] transcoding %s/%s/%s → h264+aac (%s)",
|
||||
agent.ShortID(cfg.SessionID), probe.Container, probe.VideoCodec, probe.AudioCodec, action)
|
||||
|
||||
opts := TranscodeOpts{
|
||||
Action: action,
|
||||
HWAccel: tc.HWAccel,
|
||||
Preset: tc.Preset,
|
||||
VideoBitrate: tc.VideoBitrate,
|
||||
AudioBitrate: tc.AudioBitrate,
|
||||
MaxHeight: tc.MaxHeight,
|
||||
FFmpegPath: tc.FFmpegPath,
|
||||
}
|
||||
return newTranscodeSource(ctx, abs, probe, action, opts, displayName)
|
||||
}
|
||||
|
||||
// RunWebRTCStream blocks until the session ends — either the DataChannel
|
||||
// closes, the peer connection drops, or ctx is cancelled. Always returns a
|
||||
// non-nil error explaining the termination reason.
|
||||
|
|
@ -101,24 +161,20 @@ func RunWebRTCStream(ctx context.Context, cfg WebRTCStreamConfig) error {
|
|||
if err != nil {
|
||||
return fmt.Errorf("webrtc_stream: resolve path: %w", err)
|
||||
}
|
||||
file, err := os.Open(abs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("webrtc_stream: open file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
stat, err := file.Stat()
|
||||
displayName := cfg.FileName
|
||||
if displayName == "" {
|
||||
displayName = filepath.Base(abs)
|
||||
}
|
||||
|
||||
// Decide passthrough vs transcoding. Probe is best-effort: if ffprobe
|
||||
// is missing or fails we fall back to passthrough (the browser will
|
||||
// surface a clearer error than us guessing wrong).
|
||||
source, err := buildStreamSource(ctx, abs, displayName, cfg, log)
|
||||
if err != nil {
|
||||
return fmt.Errorf("webrtc_stream: stat: %w", err)
|
||||
}
|
||||
fileSize := stat.Size()
|
||||
if cfg.FileSize > 0 && cfg.FileSize != fileSize {
|
||||
log.Warnf("webrtc_stream: declared size %d != actual %d", cfg.FileSize, fileSize)
|
||||
}
|
||||
fileName := cfg.FileName
|
||||
if fileName == "" {
|
||||
fileName = filepath.Base(abs)
|
||||
return fmt.Errorf("webrtc_stream: build source: %w", err)
|
||||
}
|
||||
defer source.Close()
|
||||
|
||||
// 1. Build PeerConnection.
|
||||
api := webrtc.NewAPI()
|
||||
|
|
@ -188,10 +244,17 @@ func RunWebRTCStream(ctx context.Context, cfg WebRTCStreamConfig) error {
|
|||
}
|
||||
})
|
||||
|
||||
// 2. Drive the SDP exchange.
|
||||
// 2. Drive the SDP exchange. Any error from the loop (browser sent
|
||||
// "bye", signal stream closed, etc.) cancels the session so we don't
|
||||
// dangle on the DC waiting for a peer that's already gone.
|
||||
sdpDone := make(chan error, 1)
|
||||
go func() {
|
||||
sdpDone <- runSDPExchange(sessionCtx, pc, cfg)
|
||||
err := runSDPExchange(sessionCtx, pc, cfg)
|
||||
sdpDone <- err
|
||||
if err != nil && sessionCtx.Err() == nil {
|
||||
log.Infof("[wrtc %s] signal loop ended: %v", agent.ShortID(cfg.SessionID), err)
|
||||
cancelSession()
|
||||
}
|
||||
}()
|
||||
|
||||
// 3. Wait for either SDP error or DataChannel open.
|
||||
|
|
@ -217,7 +280,7 @@ func RunWebRTCStream(ctx context.Context, cfg WebRTCStreamConfig) error {
|
|||
}
|
||||
|
||||
// 4. Wire up the data channel pump.
|
||||
pump := newDataChannelPump(dc, file, fileSize, fileName, log, cancelSession)
|
||||
pump := newDataChannelPump(dc, source, log, cancelSession)
|
||||
dc.OnOpen(pump.onOpen)
|
||||
dc.OnMessage(pump.onMessage)
|
||||
dc.OnClose(func() {
|
||||
|
|
@ -346,14 +409,12 @@ func handleSignal(
|
|||
return nil
|
||||
}
|
||||
|
||||
// dataChannelPump owns the DC + file handle and serves wire-protocol frames.
|
||||
// dataChannelPump owns the DC + stream source and serves wire-protocol frames.
|
||||
type dataChannelPump struct {
|
||||
dc *webrtc.DataChannel
|
||||
file *os.File
|
||||
fileSize int64
|
||||
fileName string
|
||||
log StreamLogger
|
||||
cancel context.CancelFunc
|
||||
dc *webrtc.DataChannel
|
||||
source streamSource
|
||||
log StreamLogger
|
||||
cancel context.CancelFunc
|
||||
|
||||
// Flow control: writers wait on resumeCh when bufferedAmount goes high.
|
||||
paused atomic.Bool
|
||||
|
|
@ -372,17 +433,13 @@ type dataChannelPump struct {
|
|||
|
||||
func newDataChannelPump(
|
||||
dc *webrtc.DataChannel,
|
||||
file *os.File,
|
||||
fileSize int64,
|
||||
fileName string,
|
||||
source streamSource,
|
||||
log StreamLogger,
|
||||
cancel context.CancelFunc,
|
||||
) *dataChannelPump {
|
||||
p := &dataChannelPump{
|
||||
dc: dc,
|
||||
file: file,
|
||||
fileSize: fileSize,
|
||||
fileName: fileName,
|
||||
source: source,
|
||||
log: log,
|
||||
cancel: cancel,
|
||||
resumeCh: make(chan struct{}, 1),
|
||||
|
|
@ -395,16 +452,25 @@ func newDataChannelPump(
|
|||
}
|
||||
|
||||
func (p *dataChannelPump) onOpen() {
|
||||
// Use estimated size for transcoded streams so the browser scrubber has
|
||||
// something to anchor on. Real size is reflected by Range responses as
|
||||
// ffmpeg writes more bytes; the estimate just bootstraps the UI.
|
||||
announceSize := p.source.EstimatedSize()
|
||||
transcoding := p.source.Transcoded()
|
||||
// Seekable=true even for transcoded sources because we read from a tmp
|
||||
// file (random access). Seek backwards just works; seek forward beyond
|
||||
// what ffmpeg has produced will block briefly inside ReadAt.
|
||||
seekable := true
|
||||
hello := wire.HelloPayload{
|
||||
FileSize: uint64(p.fileSize),
|
||||
Transcoding: false,
|
||||
Seekable: true,
|
||||
FileName: p.fileName,
|
||||
FileSize: uint64(announceSize),
|
||||
Transcoding: transcoding,
|
||||
Seekable: seekable,
|
||||
FileName: p.source.FileName(),
|
||||
}
|
||||
payload := wire.EncodeHello(hello)
|
||||
frame := wire.EncodeFrame(wire.Header{
|
||||
Type: wire.FrameHello,
|
||||
Flags: wire.HelloFlags(false, true),
|
||||
Flags: wire.HelloFlags(transcoding, seekable),
|
||||
StreamID: 0,
|
||||
Length: uint32(len(payload)),
|
||||
}, payload)
|
||||
|
|
@ -487,19 +553,42 @@ func (p *dataChannelPump) serveRange(streamID uint32, req wire.RangeReqPayload)
|
|||
// Reject offsets above MaxInt64 — uint64→int64 narrowing would wrap to a
|
||||
// negative value and bypass the bounds check, then ReadAt would be called
|
||||
// with a negative offset.
|
||||
if req.Offset > math.MaxInt64 || int64(req.Offset) >= p.fileSize {
|
||||
currentSize := p.source.Size()
|
||||
finalSize := p.source.EstimatedSize()
|
||||
if req.Offset > math.MaxInt64 {
|
||||
p.sendRangeEnd(streamID, 2) // out of range
|
||||
return
|
||||
}
|
||||
// For transcoded streams `currentSize` grows over time; only reject when
|
||||
// the offset is past the *estimated* final size.
|
||||
if int64(req.Offset) >= finalSize && p.source.Final() {
|
||||
p.sendRangeEnd(streamID, 2)
|
||||
return
|
||||
}
|
||||
|
||||
want := int64(req.Length)
|
||||
if req.Length > math.MaxInt64 {
|
||||
want = 0 // treat absurd length as "remainder of file"
|
||||
}
|
||||
remaining := p.fileSize - int64(req.Offset)
|
||||
// "Remainder" target: prefer current known size, fall back to estimate
|
||||
// for transcoded streams so the browser can keep scrolling forward as
|
||||
// ffmpeg produces output.
|
||||
knownEnd := currentSize
|
||||
if p.source.Final() {
|
||||
knownEnd = finalSize
|
||||
}
|
||||
if knownEnd < int64(req.Offset) {
|
||||
knownEnd = int64(req.Offset)
|
||||
}
|
||||
remaining := knownEnd - int64(req.Offset)
|
||||
if want <= 0 || want > remaining {
|
||||
want = remaining
|
||||
}
|
||||
if want <= 0 {
|
||||
// Nothing to serve right now (transcoder hasn't reached this offset).
|
||||
p.sendRangeEnd(streamID, 0)
|
||||
return
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
p.activeMu.Lock()
|
||||
|
|
@ -527,7 +616,7 @@ func (p *dataChannelPump) serveRange(streamID uint32, req wire.RangeReqPayload)
|
|||
if end-offset < chunkLen {
|
||||
chunkLen = end - offset
|
||||
}
|
||||
n, rerr := p.file.ReadAt(buf[:chunkLen], offset)
|
||||
n, rerr := p.source.ReadAt(buf[:chunkLen], offset)
|
||||
if n > 0 {
|
||||
// EOF on a short read means this is the final chunk — flag it so the
|
||||
// browser doesn't wait for more data before processing RangeEnd.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue