From 9b97aedfe40ac8cf4134d2798afb6d585d6c9be8 Mon Sep 17 00:00:00 2001 From: Deivid Soto Date: Wed, 10 Jun 2026 00:21:15 +0200 Subject: [PATCH] feat(hls): resume-aware first spawn + capped-CRF/CQ rate control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - HLSSessionConfig.StartSec (sync StreamSession.startSec): el primer ffmpeg arranca ya seekeado en el punto de resume (-ss + -output_ts_offset + -start_number, misma maquinaria que el seek-restart) en vez de encodear desde seg-0 para morir en el seek-restart inmediato del player (doble spawn, resume lento). readyMax se pre-siembra al índice de arranque; el ready-watcher compara ReadyCount() > WriterStartIdx() para no marcar "ready" antes del primer segmento real. startSec >= duración → arranque desde 0 (resume obsoleto de un fichero reemplazado). - Rate control: capped constant-quality donde el encoder lo hace bien — libx264 -crf 23, NVENC -cq 23 -b:v 0 — con el mismo -maxrate de siempre y -bufsize 2x (antes 1x estrangulaba picos). Escenas fáciles emiten muchos menos bits (menos stalls vía funnel/LTE); el peor caso no cambia. QSV/VideoToolbox/VAAPI conservan el triple de bitrate fijo probado (sus knobs de calidad tienen gotchas de vendor). - Limpieza: wrapper buildHLSFFmpegArgs y guard startIdx<0 muertos. --- internal/agent/types.go | 7 ++ internal/cmd/daemon.go | 11 ++- internal/engine/hls.go | 112 +++++++++++++++++++++--- internal/engine/hls_ratecontrol_test.go | 111 +++++++++++++++++++++++ internal/engine/transcode_quality.go | 34 +++++++ 5 files changed, 259 insertions(+), 16 deletions(-) create mode 100644 internal/engine/hls_ratecontrol_test.go diff --git a/internal/agent/types.go b/internal/agent/types.go index 345df4e..4086186 100644 --- a/internal/agent/types.go +++ b/internal/agent/types.go @@ -478,6 +478,13 @@ type StreamSession struct { // omitted. Forces a full video re-encode (the overlay can't ride a copy // path), so the web only sends it when the user picks a bitmap sub. BurnSubtitleIndex *int `json:"burnSubtitleIndex,omitempty"` + // StartSec is the playback position (seconds) the viewer opens at — the + // saved resume point, or the current position on a quality/audio switch. + // HLS sessions spawn the FIRST ffmpeg already seeked there instead of + // encoding from segment 0 and immediately seek-restarting (double spawn, + // slow resume). 0/omitted = start at the beginning. Older daemons simply + // don't decode the field and keep the old start-at-0 behaviour. + StartSec float64 `json:"startSec,omitempty"` // PlayMethod is how the daemon should serve this session: // "" — default (HLS transcode); also what legacy servers send. // "direct" — the source is already browser-native (the web decided this diff --git a/internal/cmd/daemon.go b/internal/cmd/daemon.go index 3bbcc41..ee91dc4 100644 --- a/internal/cmd/daemon.go +++ b/internal/cmd/daemon.go @@ -790,6 +790,7 @@ func runDaemonStart() error { Quality: sess.Quality, AudioIndex: sess.AudioIndex, BurnSubtitleIndex: sess.BurnSubtitleIndex, + StartSec: sess.StartSec, Transcode: tcRuntime, Cache: hlsCache, // 2c: refresh the debrid link if it expires mid-transcode; the @@ -925,6 +926,7 @@ func runDaemonStart() error { Quality: sess.Quality, AudioIndex: sess.AudioIndex, BurnSubtitleIndex: sess.BurnSubtitleIndex, + StartSec: sess.StartSec, Transcode: tcRuntime, Cache: hlsCache, }, hlsCtx, hlsCancel) @@ -1449,8 +1451,13 @@ func watchSessionReady(ctx context.Context, client *agent.Client, hsess *engine. if hsess.IsClosed() { return } - // Phase 1: cache HIT or seg-0 ready → flip the "Preparando…" UI now. - if !readyPosted && (hsess.FromCache() || hsess.ReadyCount() >= 1) { + // Phase 1: cache HIT or first segment ready → flip the "Preparando…" + // UI now. Compare against WriterStartIdx, not `>= 1`: a resume + // session (StartSec) pre-seeds readyMax to the start index, so + // ReadyCount() is ≥ 1 before ffmpeg has written a single byte — + // `>= 1` would fire "ready" instantly and freeze the player waiting + // on a segment that doesn't exist yet. + if !readyPosted && (hsess.FromCache() || hsess.ReadyCount() > hsess.WriterStartIdx()) { postReady(nil) readyPosted = true // Cache replay has no live encode → no telemetry to report, done. diff --git a/internal/engine/hls.go b/internal/engine/hls.go index 1eab7e0..f5e1c30 100644 --- a/internal/engine/hls.go +++ b/internal/engine/hls.go @@ -66,6 +66,17 @@ func segmentStartSec(idx int) float64 { return float64(idx * hlsSegmentDuration) } +// segmentIdxForTime returns the index of the segment containing second `sec` +// of the timeline — the inverse of segmentStartSec. Used to translate a +// session's StartSec (resume position) into the segment the FIRST ffmpeg +// should start writing from. +func segmentIdxForTime(sec float64) int { + if sec <= 0 { + return 0 + } + return int(sec / float64(hlsSegmentDuration)) +} + // segmentCountForDuration returns how many segments cover a source of the // given duration. Always returns at least 1. func segmentCountForDuration(dur float64) int { @@ -160,7 +171,16 @@ type HLSSessionConfig struct { // with the clean one. Forces the video re-encode the HLS path already does // to also composite the subtitle overlay. BurnSubtitleIndex *int - Transcode TranscodeRuntime + // StartSec is the playback position (seconds) the viewer will start at — + // the saved resume point, or the current position on a quality/audio + // switch. When > 0 the FIRST ffmpeg spawns already seeked there + // (`-ss` + `-output_ts_offset` + `-start_number`, the same flags as a + // seek-restart), instead of encoding from segment 0 only to be + // killed by an immediate seek-restart when the player asks for the resume + // segment (double spawn, slow resume). 0 = start at the beginning. + // Ignored on a cache HIT (every segment is already on disk). + StartSec float64 + Transcode TranscodeRuntime // Cache is an optional persistent segment cache keyed by (source, quality, // audio). When set, completed encodes are kept across sessions so re-plays // of the same file at the same quality skip ffmpeg entirely. nil disables @@ -503,11 +523,38 @@ func StartHLSSession(ctx context.Context, cfg HLSSessionConfig) (*HLSSession, er return s, nil } + // Resume-aware first spawn: when the session carries a StartSec (resume + // point / position on a quality switch), launch ffmpeg already seeked at + // the segment containing it. The web player opens playback at the same + // position (hls.js startPosition), so segment 0 would never be requested — + // encoding from 0 just to seek-restart milliseconds later wasted a full + // ffmpeg spawn and doubled the resume latency. Earlier segments simply + // don't exist on disk; ServeSegment's `idx < segStart` branch restarts the + // encoder if the user later scrubs back before the resume point. A partial + // encode never seals the cache (allSegmentsPresent checks 0..N), matching + // today's post-seek behaviour. + startIdx := 0 + if cfg.StartSec > 0 && cfg.StartSec < probe.DurationSec { + startIdx = segmentIdxForTime(cfg.StartSec) + if startIdx > segCount-1 { + startIdx = segCount - 1 + } + } else if cfg.StartSec >= probe.DurationSec && cfg.StartSec > 0 { + // Stale resume beyond this source's duration (the file was replaced by + // a shorter cut, or progress was saved against another release). Start + // from the beginning instead of encoding only the final segment, which + // would "end" the video seconds after it starts. + log.Printf("[hls %s] startSec %.0f ≥ duration %.0f — starting from 0", + shortHLSID(cfg.SessionID), cfg.StartSec, probe.DurationSec) + } + s.ffmpegSegStart = startIdx + s.readyMax = startIdx + // Spawn ffmpeg under a dedicated context so Close() can kill it without // touching the parent ctx. ffCtx, cancel := context.WithCancel(context.Background()) s.cancel = cancel - args := buildHLSFFmpegArgs(cfg, probe, tmpDir) + args := buildHLSFFmpegArgsAt(cfg, probe, tmpDir, startIdx, segmentStartSec(startIdx)) cmd := exec.CommandContext(ffCtx, cfg.Transcode.FFmpegPath, args...) cmd.Stderr = &hlsStderrCapture{owner: s} if err := cmd.Start(); err != nil { @@ -540,10 +587,14 @@ func StartHLSSession(ctx context.Context, cfg HLSSessionConfig) (*HLSSession, er if profile.Preset != "" { presetNote = " preset=" + profile.Preset } - log.Printf("[hls %s] started: %s, %.1fs, %d segs (quality=%s, encoder=%s accel=%s%s)%s", + startNote := "" + if startIdx > 0 { + startNote = fmt.Sprintf(" start=seg-%d@%.0fs", startIdx, segmentStartSec(startIdx)) + } + log.Printf("[hls %s] started: %s, %.1fs, %d segs (quality=%s, encoder=%s accel=%s%s)%s%s", shortHLSID(cfg.SessionID), cfg.logName(), probe.DurationSec, segCount, coalesce(cfg.Quality, "auto"), - profile.Codec, string(cfg.Transcode.HWAccel), presetNote, cachedNote) + profile.Codec, string(cfg.Transcode.HWAccel), presetNote, cachedNote, startNote) return s, nil } @@ -601,16 +652,30 @@ func (s *HLSSession) ProbeInfo() map[string]any { } } -// ReadyCount returns how many segments are currently fully on disk. -// Caller can `>= 1` it to check whether seg-0 has landed (and so the -// player can be told to attach). For cache-HIT sessions this is always -// `segmentCount` from the moment StartHLSSession returns. +// ReadyCount returns the session's readyMax watermark: segment idx is on disk +// iff idx < ReadyCount() AND idx >= WriterStartIdx(). For a from-zero encode +// this is simply "how many segments are on disk"; for a resume session +// (StartSec > 0) readyMax is pre-seeded to the start index, so the FIRST real +// segment has landed only once ReadyCount() > WriterStartIdx() — use that +// comparison, not `>= 1`, to flip the player's "Preparando…" UI. For +// cache-HIT sessions this is always `segmentCount` from the moment +// StartHLSSession returns. func (s *HLSSession) ReadyCount() int { s.readyMu.Lock() defer s.readyMu.Unlock() return s.readyMax } +// WriterStartIdx returns the segment index the CURRENT ffmpeg writer started +// at: 0 for a from-the-beginning encode, the resume segment for a StartSec +// session, the seek target after a seek-restart. See ReadyCount for the +// "first segment landed" comparison. +func (s *HLSSession) WriterStartIdx() int { + s.mu.Lock() + defer s.mu.Unlock() + return s.ffmpegSegStart +} + // FromCache reports whether this session was served from the HLS cache // (no ffmpeg subprocess spawned). Used by ready-watcher logic to short- // circuit polling — a cache HIT is ready the moment we return. @@ -1159,11 +1224,6 @@ func (s *HLSSession) restartFromSegment(targetIdx int) error { // ---- ffmpeg argument builders ---- -// buildHLSFFmpegArgs returns the argv for the initial HLS encode (start at 0). -func buildHLSFFmpegArgs(cfg HLSSessionConfig, probe *StreamProbe, tmpDir string) []string { - return buildHLSFFmpegArgsAt(cfg, probe, tmpDir, 0, 0) -} - // EncoderProfile names the codec + preset + decoder hint combination the HLS // pipeline picks for the given hardware backend + transcode config. Exposed // so callers can log the chosen encoder before ffmpeg launches and so both @@ -1418,7 +1478,31 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin if bitrate == "" { bitrate = "5M" } - args = append(args, "-b:v", bitrate, "-maxrate", bitrate, "-bufsize", bitrate) + // Rate control: capped constant-quality where the encoder supports it well + // (libx264 CRF, NVENC CQ), plain CBR-ish elsewhere. Constant quality is the + // on-the-fly analogue of per-title encoding: easy scenes (dialogue, anime + // flats) emit FAR fewer bits than the fixed target — which is what keeps a + // funnel/LTE link from stalling — while complex scenes can still use up to + // `-maxrate` (the same ceiling as before, so worst-case quality and the + // level-derived VBV pair are unchanged). `-bufsize 2×maxrate` gives the VBV + // a standard one-segment window to absorb spikes; the old 1× window forced + // the encoder to flatline at the cap. CPB stays far below every H.264 + // level's limit (level 3.1 allows 14 Mbps CPB vs our 3M at 480p). + switch codec { + case "libx264": + // Capped CRF: no -b:v (CRF drives quality), -maxrate/-bufsize cap it. + args = append(args, "-crf", "23", "-maxrate", bitrate, "-bufsize", doubleBitrate(bitrate)) + case "h264_nvenc": + // NVENC constant-quality VBR: -cq targets quality, -b:v 0 disables the + // default 2M average-bitrate target that would otherwise fight it. + args = append(args, "-cq", "23", "-b:v", "0", "-maxrate", bitrate, "-bufsize", doubleBitrate(bitrate)) + default: + // QSV / VideoToolbox / VAAPI: keep the proven fixed-bitrate triple — + // their constant-quality knobs (ICQ, -q:v) have vendor-specific gotchas + // (VideoToolbox ignores -q:v when -b:v is set; QSV ICQ conflicts with + // look_ahead=0) and we can't regression-test them here. + args = append(args, "-b:v", bitrate, "-maxrate", bitrate, "-bufsize", bitrate) + } // Force keyframe alignment with segment boundaries. args = append(args, "-force_key_frames", fmt.Sprintf("expr:gte(t,n_forced*%d)", hlsSegmentDuration)) diff --git a/internal/engine/hls_ratecontrol_test.go b/internal/engine/hls_ratecontrol_test.go new file mode 100644 index 0000000..612f391 --- /dev/null +++ b/internal/engine/hls_ratecontrol_test.go @@ -0,0 +1,111 @@ +package engine + +import ( + "strings" + "testing" +) + +func TestDoubleBitrate(t *testing.T) { + cases := map[string]string{ + "6000k": "12000k", + "25000k": "50000k", + "1500k": "3000k", + "5M": "10M", + "1.5M": "3M", + "2.5m": "5m", + "800000": "1600000", + "": "", + "garbage": "garbage", // unparseable → unchanged (1× bufsize fallback) + "-5M": "-5M", // non-positive → unchanged + } + for in, want := range cases { + if got := doubleBitrate(in); got != want { + t.Errorf("doubleBitrate(%q) = %q, want %q", in, got, want) + } + } +} + +// segmentIdxForTime must be the exact inverse of segmentStartSec so the +// resume-aware first spawn (HLSSessionConfig.StartSec) lands on the same +// segment the player's hls.js startPosition will request. +func TestSegmentIdxForTime(t *testing.T) { + cases := map[float64]int{ + 0: 0, + -3: 0, + 0.5: 0, + 1.99: 0, + 2: 1, + 3.9: 1, + 60: 30, + 3599.9: 1799, + } + for sec, want := range cases { + if got := segmentIdxForTime(sec); got != want { + t.Errorf("segmentIdxForTime(%v) = %d, want %d", sec, got, want) + } + } + // Round-trip: the start time of the segment we resolve must never be + // AFTER the requested position (the player would miss its first frames). + for _, sec := range []float64{0, 1, 2, 7.3, 119.9, 4321} { + idx := segmentIdxForTime(sec) + if start := segmentStartSec(idx); start > sec { + t.Errorf("segmentStartSec(segmentIdxForTime(%v)) = %v > %v", sec, start, sec) + } + } +} + +// Capped constant-quality rate control: libx264 gets -crf (no -b:v), NVENC +// gets -cq with -b:v 0, both keep -maxrate at the level-coherent cap and a +// 2× -bufsize. VAAPI (and the other vendor encoders) keep the proven +// fixed-bitrate triple untouched. +func TestBuildHLSFFmpegArgsRateControl(t *testing.T) { + probe := &StreamProbe{Width: 1920, Height: 1080, DurationSec: 100} + base := HLSSessionConfig{ + SessionID: "test", + SourcePath: "/media/Movie.mkv", + Quality: "1080p", + Transcode: TranscodeRuntime{ + FFmpegPath: "/usr/bin/ffmpeg", + FFprobePath: "/usr/bin/ffprobe", + }, + } + + t.Run("libx264 capped CRF", func(t *testing.T) { + cfg := base + cfg.Transcode.HWAccel = HWAccelNone + got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0), " ") + for _, want := range []string{"-crf 23", "-maxrate 6000k", "-bufsize 12000k"} { + if !strings.Contains(got, want) { + t.Errorf("libx264 argv missing %q\n%s", want, got) + } + } + if strings.Contains(got, "-b:v 6000k") { + t.Errorf("libx264 argv must not carry -b:v alongside -crf\n%s", got) + } + }) + + t.Run("nvenc constant-quality VBR", func(t *testing.T) { + cfg := base + cfg.Transcode.HWAccel = HWAccelNVENC + got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0), " ") + for _, want := range []string{"-rc vbr", "-cq 23", "-b:v 0", "-maxrate 6000k", "-bufsize 12000k"} { + if !strings.Contains(got, want) { + t.Errorf("nvenc argv missing %q\n%s", want, got) + } + } + }) + + t.Run("vaapi keeps fixed-bitrate triple", func(t *testing.T) { + cfg := base + cfg.Transcode.HWAccel = HWAccelVAAPI + got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0), " ") + for _, want := range []string{"-b:v 6000k", "-maxrate 6000k", "-bufsize 6000k"} { + if !strings.Contains(got, want) { + t.Errorf("vaapi argv missing %q\n%s", want, got) + } + } + if strings.Contains(got, "-crf") || strings.Contains(got, "-cq") { + t.Errorf("vaapi argv must not carry constant-quality flags\n%s", got) + } + }) +} diff --git a/internal/engine/transcode_quality.go b/internal/engine/transcode_quality.go index 40a9fb7..64bbae4 100644 --- a/internal/engine/transcode_quality.go +++ b/internal/engine/transcode_quality.go @@ -1,5 +1,10 @@ package engine +import ( + "math" + "strconv" +) + // TranscodeRuntime carries the resolved ffmpeg/ffprobe paths + tunables so // each session can decide whether to passthrough or pipe through ffmpeg. type TranscodeRuntime struct { @@ -48,6 +53,35 @@ func resolveQualityCap(label string) qualityCap { } } +// doubleBitrate returns an ffmpeg bitrate string with twice the value of the +// input ("6000k" → "12000k", "1.5M" → "3M", "5M" → "10M"). Used to size +// `-bufsize` at the standard 2× of `-maxrate` for capped-CRF/CQ rate control. +// An unparseable string falls back to the input unchanged (1× bufsize — the +// pre-CRF behaviour, safe just suboptimal). The doubled CPB stays far below +// every H.264 level's limit for the (level, maxrate) pairs this package emits +// (worst case: 1080p level 4.1 → 12000k bufsize vs 62500k allowed). +func doubleBitrate(b string) string { + if b == "" { + return b + } + num := b + suffix := "" + switch b[len(b)-1] { + case 'k', 'K', 'm', 'M': + num = b[:len(b)-1] + suffix = string(b[len(b)-1]) + } + v, err := strconv.ParseFloat(num, 64) + if err != nil || v <= 0 { + return b + } + d := v * 2 + if d == math.Trunc(d) { + return strconv.FormatFloat(d, 'f', 0, 64) + suffix + } + return strconv.FormatFloat(d, 'f', -1, 64) + suffix +} + // capForHeight returns the bitrate-cap pair appropriate for an effective // output height. Used after clamping outputHeight to the source's resolution: // asking ffmpeg for "2160p" bitrate (25 Mbps) on a 1080p source overshoots