feat(hls): resume-aware first spawn + capped-CRF/CQ rate control
- HLSSessionConfig.StartSec (sync StreamSession.startSec): el primer ffmpeg arranca ya seekeado en el punto de resume (-ss + -output_ts_offset + -start_number, misma maquinaria que el seek-restart) en vez de encodear desde seg-0 para morir en el seek-restart inmediato del player (doble spawn, resume lento). readyMax se pre-siembra al índice de arranque; el ready-watcher compara ReadyCount() > WriterStartIdx() para no marcar "ready" antes del primer segmento real. startSec >= duración → arranque desde 0 (resume obsoleto de un fichero reemplazado). - Rate control: capped constant-quality donde el encoder lo hace bien — libx264 -crf 23, NVENC -cq 23 -b:v 0 — con el mismo -maxrate de siempre y -bufsize 2x (antes 1x estrangulaba picos). Escenas fáciles emiten muchos menos bits (menos stalls vía funnel/LTE); el peor caso no cambia. QSV/VideoToolbox/VAAPI conservan el triple de bitrate fijo probado (sus knobs de calidad tienen gotchas de vendor). - Limpieza: wrapper buildHLSFFmpegArgs y guard startIdx<0 muertos.
This commit is contained in:
parent
f7ca282ca0
commit
9b97aedfe4
5 changed files with 259 additions and 16 deletions
|
|
@ -478,6 +478,13 @@ type StreamSession struct {
|
|||
// omitted. Forces a full video re-encode (the overlay can't ride a copy
|
||||
// path), so the web only sends it when the user picks a bitmap sub.
|
||||
BurnSubtitleIndex *int `json:"burnSubtitleIndex,omitempty"`
|
||||
// StartSec is the playback position (seconds) the viewer opens at — the
|
||||
// saved resume point, or the current position on a quality/audio switch.
|
||||
// HLS sessions spawn the FIRST ffmpeg already seeked there instead of
|
||||
// encoding from segment 0 and immediately seek-restarting (double spawn,
|
||||
// slow resume). 0/omitted = start at the beginning. Older daemons simply
|
||||
// don't decode the field and keep the old start-at-0 behaviour.
|
||||
StartSec float64 `json:"startSec,omitempty"`
|
||||
// PlayMethod is how the daemon should serve this session:
|
||||
// "" — default (HLS transcode); also what legacy servers send.
|
||||
// "direct" — the source is already browser-native (the web decided this
|
||||
|
|
|
|||
|
|
@ -790,6 +790,7 @@ func runDaemonStart() error {
|
|||
Quality: sess.Quality,
|
||||
AudioIndex: sess.AudioIndex,
|
||||
BurnSubtitleIndex: sess.BurnSubtitleIndex,
|
||||
StartSec: sess.StartSec,
|
||||
Transcode: tcRuntime,
|
||||
Cache: hlsCache,
|
||||
// 2c: refresh the debrid link if it expires mid-transcode; the
|
||||
|
|
@ -925,6 +926,7 @@ func runDaemonStart() error {
|
|||
Quality: sess.Quality,
|
||||
AudioIndex: sess.AudioIndex,
|
||||
BurnSubtitleIndex: sess.BurnSubtitleIndex,
|
||||
StartSec: sess.StartSec,
|
||||
Transcode: tcRuntime,
|
||||
Cache: hlsCache,
|
||||
}, hlsCtx, hlsCancel)
|
||||
|
|
@ -1449,8 +1451,13 @@ func watchSessionReady(ctx context.Context, client *agent.Client, hsess *engine.
|
|||
if hsess.IsClosed() {
|
||||
return
|
||||
}
|
||||
// Phase 1: cache HIT or seg-0 ready → flip the "Preparando…" UI now.
|
||||
if !readyPosted && (hsess.FromCache() || hsess.ReadyCount() >= 1) {
|
||||
// Phase 1: cache HIT or first segment ready → flip the "Preparando…"
|
||||
// UI now. Compare against WriterStartIdx, not `>= 1`: a resume
|
||||
// session (StartSec) pre-seeds readyMax to the start index, so
|
||||
// ReadyCount() is ≥ 1 before ffmpeg has written a single byte —
|
||||
// `>= 1` would fire "ready" instantly and freeze the player waiting
|
||||
// on a segment that doesn't exist yet.
|
||||
if !readyPosted && (hsess.FromCache() || hsess.ReadyCount() > hsess.WriterStartIdx()) {
|
||||
postReady(nil)
|
||||
readyPosted = true
|
||||
// Cache replay has no live encode → no telemetry to report, done.
|
||||
|
|
|
|||
|
|
@ -66,6 +66,17 @@ func segmentStartSec(idx int) float64 {
|
|||
return float64(idx * hlsSegmentDuration)
|
||||
}
|
||||
|
||||
// segmentIdxForTime returns the index of the segment containing second `sec`
|
||||
// of the timeline — the inverse of segmentStartSec. Used to translate a
|
||||
// session's StartSec (resume position) into the segment the FIRST ffmpeg
|
||||
// should start writing from.
|
||||
func segmentIdxForTime(sec float64) int {
|
||||
if sec <= 0 {
|
||||
return 0
|
||||
}
|
||||
return int(sec / float64(hlsSegmentDuration))
|
||||
}
|
||||
|
||||
// segmentCountForDuration returns how many segments cover a source of the
|
||||
// given duration. Always returns at least 1.
|
||||
func segmentCountForDuration(dur float64) int {
|
||||
|
|
@ -160,7 +171,16 @@ type HLSSessionConfig struct {
|
|||
// with the clean one. Forces the video re-encode the HLS path already does
|
||||
// to also composite the subtitle overlay.
|
||||
BurnSubtitleIndex *int
|
||||
Transcode TranscodeRuntime
|
||||
// StartSec is the playback position (seconds) the viewer will start at —
|
||||
// the saved resume point, or the current position on a quality/audio
|
||||
// switch. When > 0 the FIRST ffmpeg spawns already seeked there
|
||||
// (`-ss` + `-output_ts_offset` + `-start_number`, the same flags as a
|
||||
// seek-restart), instead of encoding from segment 0 only to be
|
||||
// killed by an immediate seek-restart when the player asks for the resume
|
||||
// segment (double spawn, slow resume). 0 = start at the beginning.
|
||||
// Ignored on a cache HIT (every segment is already on disk).
|
||||
StartSec float64
|
||||
Transcode TranscodeRuntime
|
||||
// Cache is an optional persistent segment cache keyed by (source, quality,
|
||||
// audio). When set, completed encodes are kept across sessions so re-plays
|
||||
// of the same file at the same quality skip ffmpeg entirely. nil disables
|
||||
|
|
@ -503,11 +523,38 @@ func StartHLSSession(ctx context.Context, cfg HLSSessionConfig) (*HLSSession, er
|
|||
return s, nil
|
||||
}
|
||||
|
||||
// Resume-aware first spawn: when the session carries a StartSec (resume
|
||||
// point / position on a quality switch), launch ffmpeg already seeked at
|
||||
// the segment containing it. The web player opens playback at the same
|
||||
// position (hls.js startPosition), so segment 0 would never be requested —
|
||||
// encoding from 0 just to seek-restart milliseconds later wasted a full
|
||||
// ffmpeg spawn and doubled the resume latency. Earlier segments simply
|
||||
// don't exist on disk; ServeSegment's `idx < segStart` branch restarts the
|
||||
// encoder if the user later scrubs back before the resume point. A partial
|
||||
// encode never seals the cache (allSegmentsPresent checks 0..N), matching
|
||||
// today's post-seek behaviour.
|
||||
startIdx := 0
|
||||
if cfg.StartSec > 0 && cfg.StartSec < probe.DurationSec {
|
||||
startIdx = segmentIdxForTime(cfg.StartSec)
|
||||
if startIdx > segCount-1 {
|
||||
startIdx = segCount - 1
|
||||
}
|
||||
} else if cfg.StartSec >= probe.DurationSec && cfg.StartSec > 0 {
|
||||
// Stale resume beyond this source's duration (the file was replaced by
|
||||
// a shorter cut, or progress was saved against another release). Start
|
||||
// from the beginning instead of encoding only the final segment, which
|
||||
// would "end" the video seconds after it starts.
|
||||
log.Printf("[hls %s] startSec %.0f ≥ duration %.0f — starting from 0",
|
||||
shortHLSID(cfg.SessionID), cfg.StartSec, probe.DurationSec)
|
||||
}
|
||||
s.ffmpegSegStart = startIdx
|
||||
s.readyMax = startIdx
|
||||
|
||||
// Spawn ffmpeg under a dedicated context so Close() can kill it without
|
||||
// touching the parent ctx.
|
||||
ffCtx, cancel := context.WithCancel(context.Background())
|
||||
s.cancel = cancel
|
||||
args := buildHLSFFmpegArgs(cfg, probe, tmpDir)
|
||||
args := buildHLSFFmpegArgsAt(cfg, probe, tmpDir, startIdx, segmentStartSec(startIdx))
|
||||
cmd := exec.CommandContext(ffCtx, cfg.Transcode.FFmpegPath, args...)
|
||||
cmd.Stderr = &hlsStderrCapture{owner: s}
|
||||
if err := cmd.Start(); err != nil {
|
||||
|
|
@ -540,10 +587,14 @@ func StartHLSSession(ctx context.Context, cfg HLSSessionConfig) (*HLSSession, er
|
|||
if profile.Preset != "" {
|
||||
presetNote = " preset=" + profile.Preset
|
||||
}
|
||||
log.Printf("[hls %s] started: %s, %.1fs, %d segs (quality=%s, encoder=%s accel=%s%s)%s",
|
||||
startNote := ""
|
||||
if startIdx > 0 {
|
||||
startNote = fmt.Sprintf(" start=seg-%d@%.0fs", startIdx, segmentStartSec(startIdx))
|
||||
}
|
||||
log.Printf("[hls %s] started: %s, %.1fs, %d segs (quality=%s, encoder=%s accel=%s%s)%s%s",
|
||||
shortHLSID(cfg.SessionID), cfg.logName(),
|
||||
probe.DurationSec, segCount, coalesce(cfg.Quality, "auto"),
|
||||
profile.Codec, string(cfg.Transcode.HWAccel), presetNote, cachedNote)
|
||||
profile.Codec, string(cfg.Transcode.HWAccel), presetNote, cachedNote, startNote)
|
||||
return s, nil
|
||||
}
|
||||
|
||||
|
|
@ -601,16 +652,30 @@ func (s *HLSSession) ProbeInfo() map[string]any {
|
|||
}
|
||||
}
|
||||
|
||||
// ReadyCount returns how many segments are currently fully on disk.
|
||||
// Caller can `>= 1` it to check whether seg-0 has landed (and so the
|
||||
// player can be told to attach). For cache-HIT sessions this is always
|
||||
// `segmentCount` from the moment StartHLSSession returns.
|
||||
// ReadyCount returns the session's readyMax watermark: segment idx is on disk
|
||||
// iff idx < ReadyCount() AND idx >= WriterStartIdx(). For a from-zero encode
|
||||
// this is simply "how many segments are on disk"; for a resume session
|
||||
// (StartSec > 0) readyMax is pre-seeded to the start index, so the FIRST real
|
||||
// segment has landed only once ReadyCount() > WriterStartIdx() — use that
|
||||
// comparison, not `>= 1`, to flip the player's "Preparando…" UI. For
|
||||
// cache-HIT sessions this is always `segmentCount` from the moment
|
||||
// StartHLSSession returns.
|
||||
func (s *HLSSession) ReadyCount() int {
|
||||
s.readyMu.Lock()
|
||||
defer s.readyMu.Unlock()
|
||||
return s.readyMax
|
||||
}
|
||||
|
||||
// WriterStartIdx returns the segment index the CURRENT ffmpeg writer started
|
||||
// at: 0 for a from-the-beginning encode, the resume segment for a StartSec
|
||||
// session, the seek target after a seek-restart. See ReadyCount for the
|
||||
// "first segment landed" comparison.
|
||||
func (s *HLSSession) WriterStartIdx() int {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
return s.ffmpegSegStart
|
||||
}
|
||||
|
||||
// FromCache reports whether this session was served from the HLS cache
|
||||
// (no ffmpeg subprocess spawned). Used by ready-watcher logic to short-
|
||||
// circuit polling — a cache HIT is ready the moment we return.
|
||||
|
|
@ -1159,11 +1224,6 @@ func (s *HLSSession) restartFromSegment(targetIdx int) error {
|
|||
|
||||
// ---- ffmpeg argument builders ----
|
||||
|
||||
// buildHLSFFmpegArgs returns the argv for the initial HLS encode (start at 0).
|
||||
func buildHLSFFmpegArgs(cfg HLSSessionConfig, probe *StreamProbe, tmpDir string) []string {
|
||||
return buildHLSFFmpegArgsAt(cfg, probe, tmpDir, 0, 0)
|
||||
}
|
||||
|
||||
// EncoderProfile names the codec + preset + decoder hint combination the HLS
|
||||
// pipeline picks for the given hardware backend + transcode config. Exposed
|
||||
// so callers can log the chosen encoder before ffmpeg launches and so both
|
||||
|
|
@ -1418,7 +1478,31 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin
|
|||
if bitrate == "" {
|
||||
bitrate = "5M"
|
||||
}
|
||||
args = append(args, "-b:v", bitrate, "-maxrate", bitrate, "-bufsize", bitrate)
|
||||
// Rate control: capped constant-quality where the encoder supports it well
|
||||
// (libx264 CRF, NVENC CQ), plain CBR-ish elsewhere. Constant quality is the
|
||||
// on-the-fly analogue of per-title encoding: easy scenes (dialogue, anime
|
||||
// flats) emit FAR fewer bits than the fixed target — which is what keeps a
|
||||
// funnel/LTE link from stalling — while complex scenes can still use up to
|
||||
// `-maxrate` (the same ceiling as before, so worst-case quality and the
|
||||
// level-derived VBV pair are unchanged). `-bufsize 2×maxrate` gives the VBV
|
||||
// a standard one-segment window to absorb spikes; the old 1× window forced
|
||||
// the encoder to flatline at the cap. CPB stays far below every H.264
|
||||
// level's limit (level 3.1 allows 14 Mbps CPB vs our 3M at 480p).
|
||||
switch codec {
|
||||
case "libx264":
|
||||
// Capped CRF: no -b:v (CRF drives quality), -maxrate/-bufsize cap it.
|
||||
args = append(args, "-crf", "23", "-maxrate", bitrate, "-bufsize", doubleBitrate(bitrate))
|
||||
case "h264_nvenc":
|
||||
// NVENC constant-quality VBR: -cq targets quality, -b:v 0 disables the
|
||||
// default 2M average-bitrate target that would otherwise fight it.
|
||||
args = append(args, "-cq", "23", "-b:v", "0", "-maxrate", bitrate, "-bufsize", doubleBitrate(bitrate))
|
||||
default:
|
||||
// QSV / VideoToolbox / VAAPI: keep the proven fixed-bitrate triple —
|
||||
// their constant-quality knobs (ICQ, -q:v) have vendor-specific gotchas
|
||||
// (VideoToolbox ignores -q:v when -b:v is set; QSV ICQ conflicts with
|
||||
// look_ahead=0) and we can't regression-test them here.
|
||||
args = append(args, "-b:v", bitrate, "-maxrate", bitrate, "-bufsize", bitrate)
|
||||
}
|
||||
|
||||
// Force keyframe alignment with segment boundaries.
|
||||
args = append(args, "-force_key_frames", fmt.Sprintf("expr:gte(t,n_forced*%d)", hlsSegmentDuration))
|
||||
|
|
|
|||
111
internal/engine/hls_ratecontrol_test.go
Normal file
111
internal/engine/hls_ratecontrol_test.go
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
package engine
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDoubleBitrate(t *testing.T) {
|
||||
cases := map[string]string{
|
||||
"6000k": "12000k",
|
||||
"25000k": "50000k",
|
||||
"1500k": "3000k",
|
||||
"5M": "10M",
|
||||
"1.5M": "3M",
|
||||
"2.5m": "5m",
|
||||
"800000": "1600000",
|
||||
"": "",
|
||||
"garbage": "garbage", // unparseable → unchanged (1× bufsize fallback)
|
||||
"-5M": "-5M", // non-positive → unchanged
|
||||
}
|
||||
for in, want := range cases {
|
||||
if got := doubleBitrate(in); got != want {
|
||||
t.Errorf("doubleBitrate(%q) = %q, want %q", in, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// segmentIdxForTime must be the exact inverse of segmentStartSec so the
|
||||
// resume-aware first spawn (HLSSessionConfig.StartSec) lands on the same
|
||||
// segment the player's hls.js startPosition will request.
|
||||
func TestSegmentIdxForTime(t *testing.T) {
|
||||
cases := map[float64]int{
|
||||
0: 0,
|
||||
-3: 0,
|
||||
0.5: 0,
|
||||
1.99: 0,
|
||||
2: 1,
|
||||
3.9: 1,
|
||||
60: 30,
|
||||
3599.9: 1799,
|
||||
}
|
||||
for sec, want := range cases {
|
||||
if got := segmentIdxForTime(sec); got != want {
|
||||
t.Errorf("segmentIdxForTime(%v) = %d, want %d", sec, got, want)
|
||||
}
|
||||
}
|
||||
// Round-trip: the start time of the segment we resolve must never be
|
||||
// AFTER the requested position (the player would miss its first frames).
|
||||
for _, sec := range []float64{0, 1, 2, 7.3, 119.9, 4321} {
|
||||
idx := segmentIdxForTime(sec)
|
||||
if start := segmentStartSec(idx); start > sec {
|
||||
t.Errorf("segmentStartSec(segmentIdxForTime(%v)) = %v > %v", sec, start, sec)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Capped constant-quality rate control: libx264 gets -crf (no -b:v), NVENC
|
||||
// gets -cq with -b:v 0, both keep -maxrate at the level-coherent cap and a
|
||||
// 2× -bufsize. VAAPI (and the other vendor encoders) keep the proven
|
||||
// fixed-bitrate triple untouched.
|
||||
func TestBuildHLSFFmpegArgsRateControl(t *testing.T) {
|
||||
probe := &StreamProbe{Width: 1920, Height: 1080, DurationSec: 100}
|
||||
base := HLSSessionConfig{
|
||||
SessionID: "test",
|
||||
SourcePath: "/media/Movie.mkv",
|
||||
Quality: "1080p",
|
||||
Transcode: TranscodeRuntime{
|
||||
FFmpegPath: "/usr/bin/ffmpeg",
|
||||
FFprobePath: "/usr/bin/ffprobe",
|
||||
},
|
||||
}
|
||||
|
||||
t.Run("libx264 capped CRF", func(t *testing.T) {
|
||||
cfg := base
|
||||
cfg.Transcode.HWAccel = HWAccelNone
|
||||
got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0), " ")
|
||||
for _, want := range []string{"-crf 23", "-maxrate 6000k", "-bufsize 12000k"} {
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("libx264 argv missing %q\n%s", want, got)
|
||||
}
|
||||
}
|
||||
if strings.Contains(got, "-b:v 6000k") {
|
||||
t.Errorf("libx264 argv must not carry -b:v alongside -crf\n%s", got)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("nvenc constant-quality VBR", func(t *testing.T) {
|
||||
cfg := base
|
||||
cfg.Transcode.HWAccel = HWAccelNVENC
|
||||
got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0), " ")
|
||||
for _, want := range []string{"-rc vbr", "-cq 23", "-b:v 0", "-maxrate 6000k", "-bufsize 12000k"} {
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("nvenc argv missing %q\n%s", want, got)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("vaapi keeps fixed-bitrate triple", func(t *testing.T) {
|
||||
cfg := base
|
||||
cfg.Transcode.HWAccel = HWAccelVAAPI
|
||||
got := strings.Join(buildHLSFFmpegArgsAt(cfg, probe, "/tmp/tmpdir", 0, 0), " ")
|
||||
for _, want := range []string{"-b:v 6000k", "-maxrate 6000k", "-bufsize 6000k"} {
|
||||
if !strings.Contains(got, want) {
|
||||
t.Errorf("vaapi argv missing %q\n%s", want, got)
|
||||
}
|
||||
}
|
||||
if strings.Contains(got, "-crf") || strings.Contains(got, "-cq") {
|
||||
t.Errorf("vaapi argv must not carry constant-quality flags\n%s", got)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
@ -1,5 +1,10 @@
|
|||
package engine
|
||||
|
||||
import (
|
||||
"math"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// TranscodeRuntime carries the resolved ffmpeg/ffprobe paths + tunables so
|
||||
// each session can decide whether to passthrough or pipe through ffmpeg.
|
||||
type TranscodeRuntime struct {
|
||||
|
|
@ -48,6 +53,35 @@ func resolveQualityCap(label string) qualityCap {
|
|||
}
|
||||
}
|
||||
|
||||
// doubleBitrate returns an ffmpeg bitrate string with twice the value of the
|
||||
// input ("6000k" → "12000k", "1.5M" → "3M", "5M" → "10M"). Used to size
|
||||
// `-bufsize` at the standard 2× of `-maxrate` for capped-CRF/CQ rate control.
|
||||
// An unparseable string falls back to the input unchanged (1× bufsize — the
|
||||
// pre-CRF behaviour, safe just suboptimal). The doubled CPB stays far below
|
||||
// every H.264 level's limit for the (level, maxrate) pairs this package emits
|
||||
// (worst case: 1080p level 4.1 → 12000k bufsize vs 62500k allowed).
|
||||
func doubleBitrate(b string) string {
|
||||
if b == "" {
|
||||
return b
|
||||
}
|
||||
num := b
|
||||
suffix := ""
|
||||
switch b[len(b)-1] {
|
||||
case 'k', 'K', 'm', 'M':
|
||||
num = b[:len(b)-1]
|
||||
suffix = string(b[len(b)-1])
|
||||
}
|
||||
v, err := strconv.ParseFloat(num, 64)
|
||||
if err != nil || v <= 0 {
|
||||
return b
|
||||
}
|
||||
d := v * 2
|
||||
if d == math.Trunc(d) {
|
||||
return strconv.FormatFloat(d, 'f', 0, 64) + suffix
|
||||
}
|
||||
return strconv.FormatFloat(d, 'f', -1, 64) + suffix
|
||||
}
|
||||
|
||||
// capForHeight returns the bitrate-cap pair appropriate for an effective
|
||||
// output height. Used after clamping outputHeight to the source's resolution:
|
||||
// asking ffmpeg for "2160p" bitrate (25 Mbps) on a 1080p source overshoots
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue