unarr/internal/engine/stream_source.go
Deivid Soto 66ac79664b feat(stream): real-time transcoding for non-browser-decodable codecs
Source files in HEVC, AV1, AC3, DTS, EAC3, etc. now transcode through ffmpeg
to fragmented MP4 (h264 + aac) on-the-fly when the browser would otherwise
play silent black. Decision matrix lives in engine.DecideAction:
passthrough → remux → audio-transcode → full video-transcode.

Architecture — temp file + growing-size source:
- engine.streamSource interface abstracts byte source. Two impls:
  * diskFileSource: passthrough when codecs are already browser-friendly.
  * transcodeSource: spawns ffmpeg writing to a /tmp/tc-stream-*.mp4 file.
    A ticker polls file size and wakes blocked ReadAt callers as ffmpeg
    produces output. Estimate of final size (bitrate × duration) is
    announced over the wire so the browser's scrubber has something to
    anchor on.
- dataChannelPump now reads from streamSource instead of *os.File. HELLO
  carries Transcoding=true + an estimated total size; Seekable=true (we
  read random-access from the temp file even while writing).
- Transcoder runtime resolved per session by buildTranscodeRuntime in
  cmd/daemon: ffmpeg/ffprobe path lookup + HWAccel auto-detection
  (NVENC/QSV/VAAPI/VideoToolbox).
- New [downloads.transcode] TOML section: enabled (default true), hw_accel
  (auto), preset (veryfast), video_bitrate (5M), audio_bitrate (192k),
  max_height (optional downscale), max_concurrent (safety cap).

Falls back to passthrough if ffprobe is missing, fails, or codecs are
already browser-friendly. tmp file is cleaned up on session shutdown.
2026-05-07 09:26:05 +02:00

354 lines
9.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package engine
import (
"context"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"sync/atomic"
"time"
)
// streamSource abstracts the byte source served over the WebRTC DataChannel.
// Two implementations:
// - diskFileSource — direct passthrough of the on-disk file.
// - transcodeSource — ffmpeg writes a fragmented MP4 to a temp file in
// real time; reads block briefly when callers ask for bytes ahead of
// the writer.
type streamSource interface {
ReadAt(p []byte, off int64) (int, error)
// Size returns the currently known size. For transcoded sources this
// grows as ffmpeg produces output; on Final() it's the final size.
Size() int64
// Final reports whether the source size is now stable (passthrough is
// always final, transcoder becomes final when ffmpeg exits).
Final() bool
// EstimatedSize returns the final size we expect to converge on. For
// passthrough it's the same as Size(). For transcoder it's a bitrate
// × duration estimate so the browser scrubber has something to anchor
// on; the real size will differ ±20%.
EstimatedSize() int64
FileName() string
Transcoded() bool
Close() error
}
// ─────────────────────────────────────────────────────────────────────────────
// disk passthrough
// ─────────────────────────────────────────────────────────────────────────────
type diskFileSource struct {
f *os.File
size int64
name string
}
func newDiskFileSource(path string) (*diskFileSource, error) {
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("stream source: open %s: %w", path, err)
}
stat, err := f.Stat()
if err != nil {
f.Close()
return nil, fmt.Errorf("stream source: stat %s: %w", path, err)
}
return &diskFileSource{f: f, size: stat.Size(), name: filepath.Base(path)}, nil
}
func (d *diskFileSource) ReadAt(p []byte, off int64) (int, error) {
return d.f.ReadAt(p, off)
}
func (d *diskFileSource) Size() int64 { return d.size }
func (d *diskFileSource) Final() bool { return true }
func (d *diskFileSource) EstimatedSize() int64 { return d.size }
func (d *diskFileSource) FileName() string { return d.name }
func (d *diskFileSource) Transcoded() bool { return false }
func (d *diskFileSource) Close() error { return d.f.Close() }
// ─────────────────────────────────────────────────────────────────────────────
// transcode source — ffmpeg → tmp file
// ─────────────────────────────────────────────────────────────────────────────
type transcodeSource struct {
tmpPath string
tmpFile *os.File
cmd *Transcoder
name string
estimate int64
mu sync.Mutex
cond *sync.Cond
size atomic.Int64
final atomic.Bool
failure error
startedAt time.Time
}
const (
// readBlockTimeout caps how long ReadAt waits for bytes that haven't
// been transcoded yet before returning EOF/io.ErrUnexpectedEOF. The
// pump treats EOF as "respond with whatever we have so far + RangeEnd"
// so the browser can re-request once more bytes appear.
readBlockTimeout = 30 * time.Second
)
func newTranscodeSource(
ctx context.Context,
srcPath string,
probe *StreamProbe,
action TranscodeAction,
opts TranscodeOpts,
displayName string,
) (*transcodeSource, error) {
tmpFile, err := os.CreateTemp("", "tc-stream-*.mp4")
if err != nil {
return nil, fmt.Errorf("transcode source: tmp file: %w", err)
}
tmpPath := tmpFile.Name()
tmpFile.Close()
args := buildFFmpegArgs(srcPath, opts)
// Override -f mp4 pipe:1 with output to our tmp file path (last 3 args).
if len(args) >= 3 && args[len(args)-1] == "pipe:1" {
args[len(args)-1] = tmpPath
}
// Spawn ffmpeg directly (not via NewTranscoder pipe) so it writes to
// disk in real time. We re-use the rest of TranscodeOpts wiring.
cmd := &Transcoder{}
cmd, err = startTranscoderToFile(ctx, opts.FFmpegPath, args, cmd)
if err != nil {
os.Remove(tmpPath)
return nil, err
}
estimate := estimateOutputSize(probe, opts)
t := &transcodeSource{
tmpPath: tmpPath,
cmd: cmd,
name: displayName,
estimate: estimate,
startedAt: time.Now(),
}
t.cond = sync.NewCond(&t.mu)
// Re-open the tmp file for reading; ffmpeg keeps writing to it.
rf, err := os.Open(tmpPath)
if err != nil {
_ = cmd.Close()
os.Remove(tmpPath)
return nil, fmt.Errorf("transcode source: reopen tmp: %w", err)
}
t.tmpFile = rf
go t.watchSize(ctx)
go t.watchExit()
return t, nil
}
// watchSize polls the temp file size every 200 ms and wakes any blocked
// ReadAt callers once new bytes arrive.
func (t *transcodeSource) watchSize(ctx context.Context) {
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
t.mu.Lock()
t.cond.Broadcast()
t.mu.Unlock()
return
case <-ticker.C:
}
if t.final.Load() {
t.mu.Lock()
t.cond.Broadcast()
t.mu.Unlock()
return
}
stat, err := os.Stat(t.tmpPath)
if err != nil {
continue
}
current := stat.Size()
if current > t.size.Load() {
t.size.Store(current)
t.mu.Lock()
t.cond.Broadcast()
t.mu.Unlock()
}
}
}
// watchExit waits for ffmpeg to exit and locks in the final size.
func (t *transcodeSource) watchExit() {
err := t.cmd.cmd.Wait()
if err != nil && !isExpectedExit(err) {
t.mu.Lock()
t.failure = fmt.Errorf("ffmpeg exited: %w (%s)", err, t.cmd.Stderr())
t.mu.Unlock()
}
if stat, err := os.Stat(t.tmpPath); err == nil {
t.size.Store(stat.Size())
}
t.final.Store(true)
t.mu.Lock()
t.cond.Broadcast()
t.mu.Unlock()
}
func isExpectedExit(err error) bool {
// Killed by Close() — pion DC closed, that's fine.
if err == nil {
return true
}
return false
}
func (t *transcodeSource) ReadAt(p []byte, off int64) (int, error) {
if t.failure != nil {
return 0, t.failure
}
if int64(len(p)) == 0 {
return 0, nil
}
deadline := time.Now().Add(readBlockTimeout)
for {
size := t.size.Load()
if off+int64(len(p)) <= size || t.final.Load() {
break
}
// Need to wait for ffmpeg to write more.
t.mu.Lock()
// Check again under lock to avoid lost wakeup.
size = t.size.Load()
if off+int64(len(p)) <= size || t.final.Load() {
t.mu.Unlock()
break
}
// Wait with timeout via a small sleep loop — sync.Cond doesn't
// support timed wait, and a goroutine-per-sleep pattern works fine
// for our scale.
waited := time.NewTimer(500 * time.Millisecond)
done := make(chan struct{})
go func() {
t.cond.Wait()
close(done)
}()
t.mu.Unlock()
select {
case <-done:
case <-waited.C:
t.mu.Lock()
t.cond.Broadcast() // wake the goroutine so it can return
t.mu.Unlock()
<-done
}
if time.Now().After(deadline) {
break
}
}
if t.failure != nil {
return 0, t.failure
}
n, err := t.tmpFile.ReadAt(p, off)
// On growing file ReadAt returns io.EOF when reading past current size.
// Convert to io.ErrUnexpectedEOF only when we actually exceeded the
// final size; otherwise return n, nil so the pump sends what we have.
if err == io.EOF && !t.final.Load() {
if n > 0 {
return n, nil
}
return 0, errors.New("transcode source: read timed out waiting for ffmpeg output")
}
return n, err
}
func (t *transcodeSource) Size() int64 { return t.size.Load() }
func (t *transcodeSource) Final() bool { return t.final.Load() }
func (t *transcodeSource) EstimatedSize() int64 {
if t.final.Load() {
return t.size.Load()
}
return t.estimate
}
func (t *transcodeSource) FileName() string {
// Keep the original extension stripped — output is always fragmented MP4.
base := t.name
if i := lastIndexByte(base, '.'); i >= 0 {
base = base[:i]
}
return base + ".mp4"
}
func (t *transcodeSource) Transcoded() bool { return true }
func (t *transcodeSource) Close() error {
_ = t.cmd.Close()
if t.tmpFile != nil {
_ = t.tmpFile.Close()
}
if t.tmpPath != "" {
_ = os.Remove(t.tmpPath)
}
return nil
}
// estimateOutputSize converts probed bitrate × duration into a byte estimate
// so the browser scrubber has something to anchor on while transcoding.
func estimateOutputSize(probe *StreamProbe, opts TranscodeOpts) int64 {
if probe == nil || probe.DurationSec <= 0 {
return 0
}
videoKbps := parseBitrateKbps(opts.VideoBitrate, 5000)
audioKbps := parseBitrateKbps(opts.AudioBitrate, 192)
totalKbps := videoKbps + audioKbps
bytesPerSec := int64(totalKbps) * 1000 / 8
return int64(probe.DurationSec) * bytesPerSec
}
// parseBitrateKbps converts ffmpeg-style bitrate strings ("5M", "192k") to
// kilobits per second. Unknown formats fall back to fallback.
func parseBitrateKbps(s string, fallback int) int {
if s == "" {
return fallback
}
last := s[len(s)-1]
num := s
mult := 1
switch last {
case 'k', 'K':
num = s[:len(s)-1]
case 'M', 'm':
num = s[:len(s)-1]
mult = 1000
default:
// already in bps? treat as kbps
}
v := 0
for _, c := range num {
if c < '0' || c > '9' {
return fallback
}
v = v*10 + int(c-'0')
}
if v == 0 {
return fallback
}
return v * mult
}
func lastIndexByte(s string, c byte) int {
for i := len(s) - 1; i >= 0; i-- {
if s[i] == c {
return i
}
}
return -1
}