feat(stream): live transcode telemetry from ffmpeg speed=
Parse ffmpeg's -stats progress line (speed=Yx, fps=) from the HLS encoder's stderr into a per-session EWMA, and report a health snapshot to the web side a few seconds after seg-0. Lets the player name a too-slow transcode from a direct measurement (~5-7s) instead of inferring it from stall shape (~15-30s). - hls.go: add -stats; rewrite hlsStderrCapture.Write to frame on \r and \n, parse speed=/fps= (telemetry only, never logged), flag input-bound on source read errors. EWMA on HLSSession + GetTranscodeStats(); warmup-skip the first cold-start frames so a healthy encoder isn't reported as struggling. - client.go: MarkSessionReady takes an optional *SessionHealth. - daemon.go: watcher reports one health snapshot once >=4 post-warmup samples settle; classifyAgentHealth maps the speed ratio to ok/marginal/struggling. Additive: old web replicas ignore the extra field; cache-hit/direct-play sessions and short encodes report nil (the web keeps its stall heuristic).
This commit is contained in:
parent
2b47cb0656
commit
f14aee0b93
4 changed files with 335 additions and 24 deletions
|
|
@ -27,6 +27,7 @@ import (
|
|||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
|
@ -254,6 +255,21 @@ type HLSSession struct {
|
|||
cacheKey string
|
||||
fromCache bool
|
||||
writerLockHeld bool
|
||||
|
||||
// Live transcode telemetry (F3). ffmpeg's -stats progress line is parsed
|
||||
// in hlsStderrCapture.Write into an EWMA of speed= (×realtime) + fps=, plus
|
||||
// an input-bound hint set when the SOURCE read errors (slow/broken pull vs a
|
||||
// too-slow encode). GetTranscodeStats() snapshots this so the ready-watcher
|
||||
// can report a real measurement to the web side — letting the player name a
|
||||
// too-slow transcode honestly in ~4s instead of inferring it from stall
|
||||
// shape over 15-30s. Guarded by statsMu (the stderr goroutine writes; the
|
||||
// watcher goroutine reads).
|
||||
statsMu sync.Mutex
|
||||
speedEWMA float64
|
||||
fpsEWMA float64
|
||||
speedSamples int
|
||||
warmupSeen int // cold-start frames discarded before the EWMA is trusted
|
||||
inputBound bool
|
||||
}
|
||||
|
||||
// hlsSeekAhead is how many segments past the writer's current position the
|
||||
|
|
@ -595,6 +611,68 @@ func (s *HLSSession) ReadyCount() int {
|
|||
// circuit polling — a cache HIT is ready the moment we return.
|
||||
func (s *HLSSession) FromCache() bool { return s.fromCache }
|
||||
|
||||
// TranscodeStats is a point-in-time snapshot of live ffmpeg progress for one
|
||||
// HLS session (F3). SpeedX < 1.0 means the encode runs slower than realtime —
|
||||
// the player can't sustain playback without buffering. Samples==0 means no
|
||||
// -stats line has been parsed yet (the watcher keeps waiting before reporting).
|
||||
type TranscodeStats struct {
|
||||
SpeedX float64 // EWMA of ffmpeg speed= (×realtime; 1.0 = exactly realtime)
|
||||
Fps float64 // EWMA of ffmpeg fps=
|
||||
Samples int // progress lines parsed so far (0 = no telemetry yet)
|
||||
InputBound bool // source read hit I/O errors (slow/broken pull, not encode)
|
||||
FromCache bool // replayed from cache → no live encode, stats meaningless
|
||||
}
|
||||
|
||||
// GetTranscodeStats returns a snapshot of the parsed ffmpeg progress EWMAs.
|
||||
func (s *HLSSession) GetTranscodeStats() TranscodeStats {
|
||||
s.statsMu.Lock()
|
||||
defer s.statsMu.Unlock()
|
||||
return TranscodeStats{
|
||||
SpeedX: s.speedEWMA,
|
||||
Fps: s.fpsEWMA,
|
||||
Samples: s.speedSamples,
|
||||
InputBound: s.inputBound,
|
||||
FromCache: s.fromCache,
|
||||
}
|
||||
}
|
||||
|
||||
// hlsStatsWarmupSkip is how many leading -stats frames to discard before
|
||||
// trusting the EWMA. ffmpeg's first readings reflect the pipeline filling
|
||||
// (often speed=0.0x) and would otherwise drag a healthy encoder into a false
|
||||
// "struggling" verdict that pauses a stream which plays fine once warmed up.
|
||||
const hlsStatsWarmupSkip = 2
|
||||
|
||||
// recordProgress folds one parsed ffmpeg -stats sample into the session EWMAs.
|
||||
// alpha=0.3 smooths the noisy per-line numbers while still tracking a sustained
|
||||
// slowdown within a few samples (~2s of encoding).
|
||||
func (s *HLSSession) recordProgress(speedX, fps float64) {
|
||||
s.statsMu.Lock()
|
||||
defer s.statsMu.Unlock()
|
||||
// Drop the cold-start frames so a steady-state slowdown — not the encoder
|
||||
// spin-up — is what the watcher reports.
|
||||
if s.warmupSeen < hlsStatsWarmupSkip {
|
||||
s.warmupSeen++
|
||||
return
|
||||
}
|
||||
const alpha = 0.3
|
||||
if s.speedSamples == 0 {
|
||||
s.speedEWMA = speedX
|
||||
s.fpsEWMA = fps
|
||||
} else {
|
||||
s.speedEWMA = alpha*speedX + (1-alpha)*s.speedEWMA
|
||||
s.fpsEWMA = alpha*fps + (1-alpha)*s.fpsEWMA
|
||||
}
|
||||
s.speedSamples++
|
||||
}
|
||||
|
||||
// markInputBound flags that ffmpeg reported a source-read error — the wall is
|
||||
// the input pull (slow debrid link / dropped torrent peer), not the encoder.
|
||||
func (s *HLSSession) markInputBound() {
|
||||
s.statsMu.Lock()
|
||||
s.inputBound = true
|
||||
s.statsMu.Unlock()
|
||||
}
|
||||
|
||||
// IsClosed reports whether Close() has been invoked. Exposed (vs the
|
||||
// internal isClosed) so external watchers — the ready-webhook
|
||||
// goroutine in cmd/daemon.go — can short-circuit polling on a session
|
||||
|
|
@ -1140,7 +1218,10 @@ func ResolveEncoderProfile(hw HWAccel, configuredPreset string) EncoderProfile {
|
|||
// `-output_ts_offset` keeps the segment PTS aligned with manifest timeline.
|
||||
func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir string, startIdx int, startSec float64) []string {
|
||||
profile := ResolveEncoderProfile(cfg.Transcode.HWAccel, cfg.Transcode.Preset)
|
||||
args := []string{"-y", "-hide_banner", "-loglevel", "warning"}
|
||||
// -stats forces ffmpeg to emit the frame=/fps=/speed= progress line to
|
||||
// stderr even at -loglevel warning; hlsStderrCapture parses it for live
|
||||
// transcode telemetry (F3) without logging it.
|
||||
args := []string{"-y", "-hide_banner", "-loglevel", "warning", "-stats"}
|
||||
|
||||
// Demuxer-side HW-decode hint. Sourced from the profile so a future
|
||||
// codec/hint mismatch is impossible — the encoder + decode hint are
|
||||
|
|
@ -1581,6 +1662,46 @@ type hlsStderrCapture struct {
|
|||
|
||||
const maxStderrBuf = 64 * 1024
|
||||
|
||||
// ffmpeg -stats progress lines look like:
|
||||
//
|
||||
// frame= 123 fps= 30 q=28.0 size= 456kB time=00:00:08.00 speed=1.05x
|
||||
//
|
||||
// emitted with a trailing \r (overwrite-in-place), once per ~0.5s. We parse
|
||||
// speed=/fps= out of them for live transcode telemetry (F3) and DON'T log them
|
||||
// (one per 0.5s would drown the daemon log) — only \n-terminated warning/error
|
||||
// lines reach log.Printf below.
|
||||
var (
|
||||
reFFmpegSpeed = regexp.MustCompile(`speed=\s*([0-9.]+)x`)
|
||||
reFFmpegFps = regexp.MustCompile(`fps=\s*([0-9.]+)`)
|
||||
)
|
||||
|
||||
func parseFFmpegProgress(line string) (speedX, fps float64, ok bool) {
|
||||
m := reFFmpegSpeed.FindStringSubmatch(line)
|
||||
if m == nil {
|
||||
return 0, 0, false
|
||||
}
|
||||
v, err := strconv.ParseFloat(m[1], 64)
|
||||
if err != nil {
|
||||
return 0, 0, false
|
||||
}
|
||||
if fm := reFFmpegFps.FindStringSubmatch(line); fm != nil {
|
||||
fps, _ = strconv.ParseFloat(fm[1], 64)
|
||||
}
|
||||
return v, fps, true
|
||||
}
|
||||
|
||||
// isInputBoundLine spots ffmpeg stderr that means the SOURCE read failed (slow
|
||||
// debrid link, dropped torrent peer, network timeout) rather than the encoder
|
||||
// being too slow — so the player names the bottleneck as the link, not the GPU.
|
||||
func isInputBoundLine(line string) bool {
|
||||
l := strings.ToLower(line)
|
||||
return strings.Contains(l, "i/o error") ||
|
||||
strings.Contains(l, "connection reset") ||
|
||||
strings.Contains(l, "rw_timeout") ||
|
||||
strings.Contains(l, "error in the pull function") ||
|
||||
strings.Contains(l, "connection timed out")
|
||||
}
|
||||
|
||||
func (c *hlsStderrCapture) Write(p []byte) (int, error) {
|
||||
// If the incoming chunk alone exceeds the cap (very long unterminated
|
||||
// line), drop the buffered prefix AND truncate p so a single multi-MB
|
||||
|
|
@ -1589,20 +1710,33 @@ func (c *hlsStderrCapture) Write(p []byte) (int, error) {
|
|||
c.buf.Reset()
|
||||
p = p[len(p)-maxStderrBuf:]
|
||||
} else if c.buf.Len()+len(p) > maxStderrBuf {
|
||||
// Drop the unterminated partial line; we'll resync on the next \n.
|
||||
// Drop the unterminated partial line; we'll resync on the next \r/\n.
|
||||
c.buf.Reset()
|
||||
}
|
||||
c.buf.Write(p)
|
||||
// Frame on \r OR \n: ffmpeg's progress line is \r-terminated, warnings are
|
||||
// \n-terminated. Parsing progress per-frame keeps the EWMA fresh; logging
|
||||
// only the \n lines keeps the log readable.
|
||||
for {
|
||||
line, rest, ok := strings.Cut(c.buf.String(), "\n")
|
||||
if !ok {
|
||||
s := c.buf.String()
|
||||
idx := strings.IndexAny(s, "\r\n")
|
||||
if idx < 0 {
|
||||
break
|
||||
}
|
||||
line := strings.TrimSpace(s[:idx])
|
||||
c.buf.Reset()
|
||||
c.buf.WriteString(rest)
|
||||
if line = strings.TrimSpace(line); line != "" {
|
||||
log.Printf("[hls %s] ffmpeg: %s", shortHLSID(c.owner.cfg.SessionID), line)
|
||||
c.buf.WriteString(s[idx+1:])
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
if speedX, fps, ok := parseFFmpegProgress(line); ok {
|
||||
c.owner.recordProgress(speedX, fps)
|
||||
continue // progress line — telemetry only, never logged
|
||||
}
|
||||
if isInputBoundLine(line) {
|
||||
c.owner.markInputBound()
|
||||
}
|
||||
log.Printf("[hls %s] ffmpeg: %s", shortHLSID(c.owner.cfg.SessionID), line)
|
||||
}
|
||||
return len(p), nil
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue