feat(stream): live transcode telemetry from ffmpeg speed=

Parse ffmpeg's -stats progress line (speed=Yx, fps=) from the HLS encoder's
stderr into a per-session EWMA, and report a health snapshot to the web side a
few seconds after seg-0. Lets the player name a too-slow transcode from a
direct measurement (~5-7s) instead of inferring it from stall shape (~15-30s).

- hls.go: add -stats; rewrite hlsStderrCapture.Write to frame on \r and \n,
  parse speed=/fps= (telemetry only, never logged), flag input-bound on source
  read errors. EWMA on HLSSession + GetTranscodeStats(); warmup-skip the first
  cold-start frames so a healthy encoder isn't reported as struggling.
- client.go: MarkSessionReady takes an optional *SessionHealth.
- daemon.go: watcher reports one health snapshot once >=4 post-warmup samples
  settle; classifyAgentHealth maps the speed ratio to ok/marginal/struggling.

Additive: old web replicas ignore the extra field; cache-hit/direct-play
sessions and short encodes report nil (the web keeps its stall heuristic).
This commit is contained in:
Deivid Soto 2026-06-06 00:37:03 +02:00
parent 2b47cb0656
commit f14aee0b93
4 changed files with 335 additions and 24 deletions

View file

@ -119,10 +119,11 @@ func (c *Client) ReportUpgradeResult(ctx context.Context, agentID string, succes
// will reach the same conclusion via HEAD probes anyway if this call
// fails. We log the error in the caller but don't retry — by the time
// a retry would land the user is likely already playing.
func (c *Client) MarkSessionReady(ctx context.Context, sessionID string) error {
func (c *Client) MarkSessionReady(ctx context.Context, sessionID string, health *SessionHealth) error {
req := struct {
SessionID string `json:"sessionId"`
}{SessionID: sessionID}
SessionID string `json:"sessionId"`
Health *SessionHealth `json:"health,omitempty"`
}{SessionID: sessionID, Health: health}
var resp StatusResponse
if err := c.doPost(ctx, "/api/internal/agent/session-ready", req, &resp); err != nil {
return fmt.Errorf("mark session ready: %w", err)
@ -130,6 +131,20 @@ func (c *Client) MarkSessionReady(ctx context.Context, sessionID string) error {
return nil
}
// SessionHealth is an OPTIONAL live-transcode health snapshot attached to a
// session-ready report (F3). A nil *SessionHealth means the agent has no
// telemetry to share (cache hit, direct-play, or progress not yet stable) and
// the web side keeps its stall-shape heuristic. Old web replicas ignore the
// extra field; old agents simply never send it.
type SessionHealth struct {
// "ok" (≥ realtime) | "marginal" (keeps up barely) | "struggling" (can't).
Health string `json:"health"`
// ffmpeg speed= EWMA: 1.0 = exactly realtime, < 1.0 = slower than playback.
RealtimeRatio float64 `json:"realtimeRatio"`
// "realtime" | "transcode" (encoder is the wall) | "input_bound" (source read).
Reason string `json:"reason"`
}
// RefreshStreamURL re-resolves a fresh debrid direct URL for a live streaming
// session (hueco #2 / 2c). Called by the daemon when a debrid source expires
// mid-stream (the link is time-limited; the content is still cached). Returns