unarr/internal/engine/hls.go
Deivid Soto a4a6e2f2d6 fix(stream): no copiar AAC multicanal en modo copy (WebKit lo rechaza igual)
El downmix estéreo del re-encode (f89396c) dejaba un agujero simétrico: una
fuente cuyo audio YA es AAC 5.1 se copiaba tal cual, y WebKit rechaza el
AAC multicanal en el primer segmento exactamente igual que el re-encodeado.
Copy de audio ahora solo cuando la pista es AAC con ≤2 canales; cualquier
otra cosa (no-AAC, AAC 5.1+, o canales desconocidos en el probe — fail-safe)
re-encodea a AAC estéreo 48k. La pista multicanal original queda intacta
para reproductor externo. Test smoke nuevo: fuente AAC 5.1 → re-encode.
2026-06-11 00:05:50 +02:00

2183 lines
85 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Package engine — hls.go implements the HLS streaming pipeline.
//
// Browser ↔ daemon over plain HTTP (LAN / Tailscale / UPnP). The daemon runs
// ffmpeg in `-f hls` mode, writing fragmented MP4 segments to a per-session
// tmpdir. Master + media playlists are pre-rendered from the probed source
// duration so the player knows the full timeline before any segment exists.
//
// One HLSSession == one browser playback. Sessions are registered in a
// process-wide map keyed by session ID; the StreamServer routes
// GET /hls/<id>/master.m3u8
// GET /hls/<id>/video/index.m3u8
// GET /hls/<id>/video/init.mp4
// GET /hls/<id>/video/seg-<n>.m4s
// GET /hls/<id>/subs/<lang>.vtt
// to the matching session.
package engine
import (
"context"
"errors"
"fmt"
"io"
"log"
"math"
"net/http"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"sync"
"time"
)
// hlsSegmentDuration is the target seconds per HLS fragment.
//
// We use 2 seconds (not the more common 4-6 s). Trade-off: 2× more segments
// per source (a 2 h movie produces 3600 segments instead of 1800), but the
// player's first-frame wait drops to ~half — ffmpeg only needs to encode
// 2 s before seg-0 lands. For software encodes on 4K this is ~1 s instead
// of ~3 s of cold-cache wait. Well within HLS spec (Apple recommends 6 s,
// but 2-6 s is acceptable; Low-Latency HLS uses 1-2 s segments).
//
// Caveat for existing cached encodes: cache entries from 0.9.9 used 4 s
// segments. After this bump, VerifyComplete (which checks the highest
// expected segment index) returns false for those entries — they're
// invalidated + re-encoded with 2 s segments on next play. Self-healing.
const hlsSegmentDuration = 2
// segmentDurationFor returns the target duration (in whole seconds) for the
// segment at index idx. With uniform-duration segments this is always
// hlsSegmentDuration; the helper exists so a future short-first-segment
// variant can be slotted in here without touching every call site.
func segmentDurationFor(idx int) int {
return hlsSegmentDuration
}
// segmentStartSec returns the wall-clock start time of segment idx. Used
// to compute the `-ss` flag when ffmpeg restarts at a mid-file segment.
func segmentStartSec(idx int) float64 {
if idx <= 0 {
return 0
}
return float64(idx * hlsSegmentDuration)
}
// segmentIdxForTime returns the index of the segment containing second `sec`
// of the timeline — the inverse of segmentStartSec. Used to translate a
// session's StartSec (resume position) into the segment the FIRST ffmpeg
// should start writing from.
func segmentIdxForTime(sec float64) int {
if sec <= 0 {
return 0
}
return int(sec / float64(hlsSegmentDuration))
}
// segmentCountForDuration returns how many segments cover a source of the
// given duration. Always returns at least 1.
func segmentCountForDuration(dur float64) int {
if dur <= 0 {
return 1
}
return int((dur + float64(hlsSegmentDuration) - 1) / float64(hlsSegmentDuration))
}
// hlsSessionTTL is how long a session can sit idle (no segment requests)
// before the manager kills ffmpeg + cleans the tmpdir.
const hlsSessionTTL = 30 * time.Minute
// hlsTmpDirRoot returns the per-user tmpdir root for HLS sessions.
//
// Linux: ~/.cache/unarr/hls-sessions
// macOS: ~/Library/Caches/unarr/hls-sessions
// Windows: %LOCALAPPDATA%/unarr/hls-sessions
//
// Falls back to os.TempDir() if the user cache dir can't be resolved.
func hlsTmpDirRoot() string {
if dir, err := os.UserCacheDir(); err == nil {
return filepath.Join(dir, "unarr", "hls-sessions")
}
return filepath.Join(os.TempDir(), "unarr-hls-sessions")
}
// CleanupHLSOrphanDirs removes any per-session tmpdir under hlsTmpDirRoot
// that's older than 1 h. Daemon restart drops the in-memory session
// registry but leaves tmpdirs behind; on the next start we GC them so
// disk usage doesn't grow unbounded across restarts. Sessions started
// less than 1 h ago might still belong to the daemon we're booting (race
// during a quick restart) — leave those alone.
func CleanupHLSOrphanDirs() error {
root := hlsTmpDirRoot()
entries, err := os.ReadDir(root)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
cutoff := time.Now().Add(-1 * time.Hour)
removed := 0
for _, e := range entries {
if !e.IsDir() {
continue
}
info, err := e.Info()
if err != nil {
continue
}
if info.ModTime().Before(cutoff) {
if err := os.RemoveAll(filepath.Join(root, e.Name())); err == nil {
removed++
}
}
}
if removed > 0 {
log.Printf("[hls] cleaned %d orphan tmpdir(s) at startup", removed)
}
return nil
}
// HLSSessionConfig describes a single browser playback session driven by HLS.
type HLSSessionConfig struct {
SessionID string
// Exactly one of SourcePath / SourceURL identifies the input. SourcePath is
// a local file; SourceURL is a remote HTTP(S) URL ffmpeg reads directly
// (hueco #2 / 2b — transcoding a debrid source that isn't browser-native).
SourcePath string
// SourceURL, when set, is fed to ffmpeg/ffprobe as the input (-i <url>) with
// network-resilience flags. Takes priority over SourcePath.
SourceURL string
// CacheID overrides the cache key identity. Empty → key by SourcePath (local
// files). Set to a stable id (the torrent info_hash) for SourceURL sessions
// so re-plays cache-hit even though the debrid URL changes each resolution.
CacheID string
// RefreshURL, when set (debrid URL sessions only), re-resolves a fresh
// SourceURL when the current link expires mid-transcode (hueco #2 / 2c).
// The auto-restart supervisor calls it before relaunching ffmpeg so the
// restart uses a live link instead of retrying the dead one. nil = no refresh.
RefreshURL func(context.Context) (string, error)
FileName string
Quality string // "2160p"|"1080p"|"720p"|"480p"|"original"|""
AudioIndex int // 0-based ffmpeg audio stream selection (-map 0:a:N). -1 = default.
// BurnSubtitleIndex burns a BITMAP subtitle (PGS/DVB) at this 0-based
// subtitle stream index into the video. nil = no burn (text subs are served
// as separate WebVTT). A pointer (not int) so the zero value 0 — a valid
// stream index — can't be mistaken for a burn request when a caller leaves
// the field unset. Part of the cache key so a burned encode never collides
// with the clean one. Forces the video re-encode the HLS path already does
// to also composite the subtitle overlay.
BurnSubtitleIndex *int
// StartSec is the playback position (seconds) the viewer will start at —
// the saved resume point, or the current position on a quality/audio
// switch. When > 0 the FIRST ffmpeg spawns already seeked there
// (`-ss` + `-output_ts_offset` + `-start_number`, the same flags as a
// seek-restart), instead of encoding from segment 0 only to be
// killed by an immediate seek-restart when the player asks for the resume
// segment (double spawn, slow resume). 0 = start at the beginning.
// Ignored on a cache HIT (every segment is already on disk).
StartSec float64
// Prewarm marks a background cache-fill session. The daemon defers its
// encode until no live encode runs and registers it via RegisterKeep
// (never evicting the viewer). It also lets a REAL session close stale
// prewarms up front so the cache writer-lock is free for the viewer.
Prewarm bool
Transcode TranscodeRuntime
// Cache is an optional persistent segment cache keyed by (source, quality,
// audio). When set, completed encodes are kept across sessions so re-plays
// of the same file at the same quality skip ffmpeg entirely. nil disables
// caching (per-session tmpdir, deleted on Close — original behavior).
Cache *HLSCache
// VideoCopy switches the session to HLS-copy mode: ffmpeg `-c:v copy`
// (NEVER re-encodes video — I/O-bound, works on a GPU-less NAS), audio
// copied when already AAC or re-encoded to AAC otherwise. This replaces
// the fragile progressive-remux path (growing fMP4 over manual HTTP
// Range) with the robust segmented transport every player handles
// (hls.js + native iOS HLS). Differences from the encode mode, all
// driven by "segments cut at the SOURCE's keyframes, so their durations
// are unknown upfront":
// - the media playlist is ffmpeg's own (EVENT → ENDLIST), served from
// disk — not the pre-rendered uniform-2s VOD manifest;
// - no seek-restart / auto-restart (copy outruns any viewer: the whole
// file is remuxed at I/O speed, minutes at worst on a weak NAS);
// - no HLS cache (re-generating costs no encode — caching would only
// burn disk);
// - StartSec is ignored: copy produces from 0 (outruns playback at I/O
// speed); an offset EVENT playlist breaks iOS's native HLS parser.
// See docs/plans/hls-copy-remux-replacement.md (web repo).
VideoCopy bool
}
// copyPlaylistName is the on-disk media playlist ffmpeg owns in VideoCopy
// mode, under <tmpDir>/video/. Distinct from the encode mode's in-memory
// manifest so the two can never be confused.
const copyPlaylistName = "copy.m3u8"
// sourceRef returns the ffmpeg/ffprobe input: the remote URL when set, else the
// local path. Used everywhere a `-i` argument or a probe target is needed so
// the local-file and debrid-URL paths share one code path.
func (cfg HLSSessionConfig) sourceRef() string {
if cfg.SourceURL != "" {
return cfg.SourceURL
}
return cfg.SourcePath
}
// burnSubtitleIndexOrNone resolves the optional burn-in subtitle pointer to the
// int sentinel the cache key and filtergraph use: nil → -1 ("no burn").
func (cfg HLSSessionConfig) burnSubtitleIndexOrNone() int {
if cfg.BurnSubtitleIndex == nil {
return -1
}
return *cfg.BurnSubtitleIndex
}
// logName is a short, log-friendly source label. For local files it's the base
// name; for a URL source (no SourcePath) it prefers FileName over the raw URL
// (which would leak a query-string token into the logs).
func (cfg HLSSessionConfig) logName() string {
if cfg.SourcePath != "" {
return filepath.Base(cfg.SourcePath)
}
if cfg.FileName != "" {
return cfg.FileName
}
return "debrid-url"
}
// HLSSession owns a tmpdir + ffmpeg subprocess producing HLS fragments.
//
// Seek behaviour: ffmpeg writes segments sequentially from `ffmpegSegStart`.
// When a handler asks for a segment far ahead of the writer, the daemon
// kills the current ffmpeg and restarts it with `-ss <targetSec>
// -output_ts_offset <targetSec> -start_number <idx>` so the next segments
// it emits land at the requested timeline position. Segments already on
// disk before the seek stay there; the new ffmpeg only writes from the
// target index forward.
type HLSSession struct {
cfg HLSSessionConfig
probe *StreamProbe
tmpDir string
durationSec float64
segmentCount int
manifestVideo string // pre-rendered video media playlist
manifestRoot string // pre-rendered master playlist
mu sync.Mutex
cmd *exec.Cmd
cancel context.CancelFunc
closed bool
startedAt time.Time
lastTouch time.Time
ffmpegSegStart int // index of the first segment the current ffmpeg writes
restartCount int // bounded auto-restart counter (resets on Close)
lastRestartAt time.Time
// liveURL is the mutable debrid source URL (hueco #2 / 2c). Initialised to
// cfg.SourceURL; refreshed in place by waitFFmpeg when the link expires.
// Guarded by mu because restartFromSegment reads it from BOTH the supervisor
// goroutine (auto-restart) AND the HTTP handler goroutine (seek-restart),
// while waitFFmpeg writes it. Empty for local-file sessions. cfg itself is
// treated as immutable after construction so copying it stays race-free.
liveURL string
// readyCh + readyMax track how many segments ffmpeg has finished writing.
// readyMax is a COUNT (not an index): readyMax=N means seg-0 … seg-(N-1)
// are fully on disk. A handler waiting on `idx` blocks until
// `idx < readyMax` (segment idx is present). The pollSegments goroutine
// advances readyMax and re-creates readyCh on every step.
readyMu sync.Mutex
readyMax int
exitErr error
exited bool
readyCh chan struct{} // closed + replaced each time readyMax advances
// Persistent cache state. cache==nil means caching disabled for this session.
// fromCache=true means the session is replaying a completed encode and no
// ffmpeg subprocess was spawned. writerLockHeld=true means this session
// owns the per-key TryAcquireWriter claim — Close must ReleaseWriter.
cache *HLSCache
cacheKey string
fromCache bool
writerLockHeld bool
// Live transcode telemetry (F3). ffmpeg's -stats progress line is parsed
// in hlsStderrCapture.Write into an EWMA of speed= (×realtime) + fps=, plus
// an input-bound hint set when the SOURCE read errors (slow/broken pull vs a
// too-slow encode). GetTranscodeStats() snapshots this so the ready-watcher
// can report a real measurement to the web side — letting the player name a
// too-slow transcode honestly in ~4s instead of inferring it from stall
// shape over 15-30s. Guarded by statsMu (the stderr goroutine writes; the
// watcher goroutine reads).
statsMu sync.Mutex
speedEWMA float64
fpsEWMA float64
speedSamples int
warmupSeen int // cold-start frames discarded before the EWMA is trusted
inputBound bool
}
// hlsSeekAhead is how many segments past the writer's current position the
// browser is allowed to request before we restart ffmpeg from the requested
// segment. 8 segments * 4 s = 32 s of "warm" buffer; further seeks trigger
// a restart instead of waiting through real-time encode.
const hlsSeekAhead = 8
// HLSSessionRegistry tracks active sessions keyed by ID.
type HLSSessionRegistry struct {
mu sync.RWMutex
sessions map[string]*HLSSession
}
// NewHLSSessionRegistry returns an empty registry.
func NewHLSSessionRegistry() *HLSSessionRegistry {
return &HLSSessionRegistry{sessions: make(map[string]*HLSSession)}
}
// Get fetches a session by ID; returns nil if not registered.
func (r *HLSSessionRegistry) Get(id string) *HLSSession {
r.mu.RLock()
defer r.mu.RUnlock()
return r.sessions[id]
}
// Register adds a session under its ID. Replaces any previous session with
// the same ID (which is closed first to release ffmpeg + tmpdir).
//
// Also closes EVERY OTHER active session, since one daemon == one viewer ==
// one stream at a time. Without this, repeatedly opening the player (or
// changing quality) leaves orphan ffmpegs running until the 30 min idle
// sweeper reaps them, and N concurrent transcodes saturate the CPU.
func (r *HLSSessionRegistry) Register(s *HLSSession) {
r.mu.Lock()
stale := make([]*HLSSession, 0, len(r.sessions))
for id, prev := range r.sessions {
if id == s.cfg.SessionID {
stale = append(stale, prev)
continue
}
stale = append(stale, prev)
delete(r.sessions, id)
}
r.sessions[s.cfg.SessionID] = s
r.mu.Unlock()
for _, prev := range stale {
_ = prev.Close()
}
}
// CloseWhere closes + removes every registered session matching pred. Used
// by the REAL-session path to reap stale prewarm encodes BEFORE its own
// StartHLSSession runs — that frees the per-key cache writer-lock, so the
// viewer's encode lands in the persistent cache instead of falling back to
// an uncached per-session tmpdir (and a SEALED prewarm survives as a cache
// HIT: closing a from-cache reader never invalidates the entry).
func (r *HLSSessionRegistry) CloseWhere(pred func(*HLSSession) bool) int {
r.mu.Lock()
victims := make([]*HLSSession, 0, len(r.sessions))
for id, s := range r.sessions {
if pred(s) {
victims = append(victims, s)
delete(r.sessions, id)
}
}
r.mu.Unlock()
for _, s := range victims {
_ = s.Close()
}
return len(victims)
}
// IsPrewarm reports whether this session was started as a background
// cache-fill (HLSSessionConfig.Prewarm). cfg is immutable after construction.
func (s *HLSSession) IsPrewarm() bool { return s.cfg.Prewarm }
// RegisterKeep adds a session WITHOUT displacing the others — the prewarm
// path: a background cache-fill encode must not evict the viewer's live
// session (Register's eviction killed the stream being watched when the
// next-episode prewarm got claimed mid-playback). It still replaces (and
// closes) a previous session with the SAME ID. A later Register() of a real
// viewer session evicts prewarms like any other session — a completed
// (sealed) prewarm survives in the segment cache either way.
func (r *HLSSessionRegistry) RegisterKeep(s *HLSSession) {
r.mu.Lock()
prev := r.sessions[s.cfg.SessionID]
r.sessions[s.cfg.SessionID] = s
r.mu.Unlock()
if prev != nil && prev != s {
_ = prev.Close()
}
}
// HasLiveEncode reports whether any registered session still has a RUNNING
// ffmpeg (encode not finished). Used to defer prewarm encodes so they never
// compete with the viewer's live transcode for the encoder.
func (r *HLSSessionRegistry) HasLiveEncode() bool {
r.mu.RLock()
defer r.mu.RUnlock()
for _, s := range r.sessions {
if !s.EncodeExited() {
return true
}
}
return false
}
// Remove drops a session from the registry without closing it.
func (r *HLSSessionRegistry) Remove(id string) {
r.mu.Lock()
defer r.mu.Unlock()
delete(r.sessions, id)
}
// CloseAll terminates every active session. Call at daemon shutdown.
func (r *HLSSessionRegistry) CloseAll() {
r.mu.Lock()
sessions := make([]*HLSSession, 0, len(r.sessions))
for _, s := range r.sessions {
sessions = append(sessions, s)
}
r.sessions = make(map[string]*HLSSession)
r.mu.Unlock()
for _, s := range sessions {
_ = s.Close()
}
}
// SweepIdle closes sessions that have not been touched within hlsSessionTTL.
// Returns the number of sessions reaped.
func (r *HLSSessionRegistry) SweepIdle() int {
r.mu.Lock()
stale := make([]*HLSSession, 0)
for id, s := range r.sessions {
s.mu.Lock()
idle := time.Since(s.lastTouch)
s.mu.Unlock()
if idle > hlsSessionTTL {
stale = append(stale, s)
delete(r.sessions, id)
}
}
r.mu.Unlock()
for _, s := range stale {
_ = s.Close()
}
return len(stale)
}
// StartHLSSession probes the source, builds the playlists, spawns ffmpeg,
// and returns a HLSSession ready to serve HTTP requests. Caller must register
// the session with a HLSSessionRegistry so the server can route to it.
func StartHLSSession(ctx context.Context, cfg HLSSessionConfig) (*HLSSession, error) {
if cfg.SessionID == "" {
return nil, errors.New("hls: empty session id")
}
if !validSessionID.MatchString(cfg.SessionID) {
return nil, errors.New("hls: invalid session id")
}
if cfg.SourcePath == "" && cfg.SourceURL == "" {
return nil, errors.New("hls: no source (neither path nor URL)")
}
if cfg.Transcode.FFmpegPath == "" || cfg.Transcode.FFprobePath == "" {
return nil, errors.New("hls: ffmpeg/ffprobe not available")
}
// Probe gets a 15 s ceiling. ffprobe on a 50 GB MKV over a slow remote
// fs can hang indefinitely; without a deadline the daemon would block
// the goroutine that started the session forever and the user would
// see the player phase stuck on "Preparando sesión".
probeCtx, cancelProbe := context.WithTimeout(ctx, 15*time.Second)
probe, err := ProbeFile(probeCtx, cfg.Transcode.FFprobePath, cfg.sourceRef())
cancelProbe()
if err != nil {
return nil, fmt.Errorf("hls: probe: %w", err)
}
if probe.DurationSec <= 0 {
return nil, errors.New("hls: source has no duration")
}
// Resolve tmpDir + cache placement. Three states:
// 1. cache disabled → per-session tmpdir, deleted on Close.
// 2. cache HIT (.complete found) → read from cache dir, no ffmpeg, Pin.
// 3. cache MISS, writer-lock OK → ffmpeg writes to cache dir, Pin + writer-lock.
// 4. cache MISS, writer-lock NO → another session already writing this
// key; fall back to private per-session tmpdir
// (no caching for this session — second-writer
// would corrupt the first one's segments).
var (
tmpDir string
cacheKey string
fromCache bool
writerLockHeld bool
)
if cfg.VideoCopy && cfg.Cache != nil {
// HLS-copy never caches: re-generating costs no encode (I/O-bound), so
// persisting segments would only burn cache budget that real transcodes
// need. Private per-session tmpdir, deleted on Close.
cfg.Cache = nil
}
if cfg.Cache != nil {
// Debrid URL sessions key by CacheID (info_hash) so re-plays hit cache
// despite the URL changing each resolution; local files key by path.
if cfg.CacheID != "" {
cacheKey = cfg.Cache.KeyForID(cfg.CacheID, cfg.Quality, cfg.AudioIndex, cfg.burnSubtitleIndexOrNone())
} else {
cacheKey = cfg.Cache.KeyFor(cfg.SourcePath, cfg.Quality, cfg.AudioIndex, cfg.burnSubtitleIndexOrNone())
}
// Integrity gate: HasComplete just stats the marker. If init.mp4 or
// the last segment vanished (external rm, partial-disk failure), we
// can't actually serve a HIT — drop the dir and re-encode.
segCountForVerify := segmentCountForDuration(probe.DurationSec)
if cfg.Cache.HasComplete(cacheKey) && !cfg.Cache.VerifyComplete(cacheKey, segCountForVerify) {
log.Printf("[hls %s] cache %s sealed but failed integrity check — re-encoding",
shortHLSID(cfg.SessionID), cacheKey)
_ = cfg.Cache.Invalidate(cacheKey)
}
if cfg.Cache.HasComplete(cacheKey) {
// HIT: read-only replay — many concurrent HITs are fine.
tmpDir = cfg.Cache.DirFor(cacheKey)
cfg.Cache.Pin(cacheKey)
fromCache = true
cfg.Cache.RecordHit()
_ = cfg.Cache.Touch(cacheKey)
} else if cfg.Cache.TryAcquireWriter(cacheKey) {
tmpDir = cfg.Cache.DirFor(cacheKey)
cfg.Cache.Pin(cacheKey)
writerLockHeld = true
cfg.Cache.RecordMiss()
} else {
// Another session is writing this key — fall back to private
// dir so we don't trample its segments.
log.Printf("[hls %s] cache key %s busy, falling back to per-session tmpdir",
shortHLSID(cfg.SessionID), cacheKey)
tmpDir = filepath.Join(hlsTmpDirRoot(), cfg.SessionID)
cacheKey = "" // disable caching for this session
cfg.Cache.RecordMiss()
}
} else {
tmpDir = filepath.Join(hlsTmpDirRoot(), cfg.SessionID)
}
cleanupOnError := func() {
if cfg.Cache != nil && cacheKey != "" {
cfg.Cache.Unpin(cacheKey)
if writerLockHeld {
cfg.Cache.ReleaseWriter(cacheKey)
_ = cfg.Cache.Invalidate(cacheKey)
}
} else {
_ = os.RemoveAll(tmpDir)
}
}
if err := os.MkdirAll(filepath.Join(tmpDir, "video"), 0o755); err != nil {
cleanupOnError()
return nil, fmt.Errorf("hls: mkdir video: %w", err)
}
segCount := segmentCountForDuration(probe.DurationSec)
s := &HLSSession{
cfg: cfg,
probe: probe,
tmpDir: tmpDir,
durationSec: probe.DurationSec,
segmentCount: segCount,
startedAt: time.Now(),
lastTouch: time.Now(),
readyCh: make(chan struct{}),
cache: cfg.Cache,
cacheKey: cacheKey,
fromCache: fromCache,
writerLockHeld: writerLockHeld,
liveURL: cfg.SourceURL, // mutable copy; cfg stays immutable
}
if cfg.VideoCopy {
// Copy mode: ffmpeg owns the media playlist (segments cut at the
// source's keyframes → durations unknown upfront, the uniform-2s
// pre-render would lie). ServeVideoPlaylist reads it from disk.
s.manifestVideo = ""
s.manifestRoot = renderMasterPlaylistCopy(probe)
} else {
s.manifestVideo = renderVideoPlaylist(probe.DurationSec, segCount)
s.manifestRoot = renderMasterPlaylist(probe, cfg.Quality)
}
// Cache HIT: every segment + init.mp4 is already on disk. Skip ffmpeg
// entirely and mark readyMax so handlers don't wait. Background subtitle
// extraction is also unnecessary — subs were extracted on the original run.
if fromCache {
s.readyMu.Lock()
s.readyMax = segCount - 1
s.exited = true
close(s.readyCh)
s.readyCh = nil
s.readyMu.Unlock()
log.Printf("[hls %s] cache HIT %s: %s, %.1fs, %d segs (quality=%s)",
shortHLSID(cfg.SessionID), cacheKey, cfg.logName(),
probe.DurationSec, segCount, coalesce(cfg.Quality, "auto"))
return s, nil
}
// Resume-aware first spawn: when the session carries a StartSec (resume
// point / position on a quality switch), launch ffmpeg already seeked at
// the segment containing it. The web player opens playback at the same
// position (hls.js startPosition), so segment 0 would never be requested —
// encoding from 0 just to seek-restart milliseconds later wasted a full
// ffmpeg spawn and doubled the resume latency. Earlier segments simply
// don't exist on disk; ServeSegment's `idx < segStart` branch restarts the
// encoder if the user later scrubs back before the resume point. A partial
// encode never seals the cache (allSegmentsPresent checks 0..N), matching
// today's post-seek behaviour.
startIdx := 0
if cfg.VideoCopy {
// Copy mode always numbers from 0: segment indices don't map to
// uniform 2s slots, so a StartSec-derived index would be wrong. The
// resume seek itself is handled inside buildHLSCopyArgs via `-ss`
// (keyframe-snapped) + `-output_ts_offset`.
} else if cfg.StartSec > 0 && cfg.StartSec < probe.DurationSec {
startIdx = segmentIdxForTime(cfg.StartSec)
if startIdx > segCount-1 {
startIdx = segCount - 1
}
} else if cfg.StartSec >= probe.DurationSec && cfg.StartSec > 0 {
// Stale resume beyond this source's duration (the file was replaced by
// a shorter cut, or progress was saved against another release). Start
// from the beginning instead of encoding only the final segment, which
// would "end" the video seconds after it starts.
log.Printf("[hls %s] startSec %.0f ≥ duration %.0f — starting from 0",
shortHLSID(cfg.SessionID), cfg.StartSec, probe.DurationSec)
}
s.ffmpegSegStart = startIdx
s.readyMax = startIdx
// Spawn ffmpeg under a dedicated context so Close() can kill it without
// touching the parent ctx.
ffCtx, cancel := context.WithCancel(context.Background())
s.cancel = cancel
var args []string
if cfg.VideoCopy {
args = buildHLSCopyArgs(cfg, probe, tmpDir)
} else {
args = buildHLSFFmpegArgsAt(cfg, probe, tmpDir, startIdx, segmentStartSec(startIdx))
}
cmd := exec.CommandContext(ffCtx, cfg.Transcode.FFmpegPath, args...)
cmd.Stderr = &hlsStderrCapture{owner: s}
if err := cmd.Start(); err != nil {
cancel()
cleanupOnError()
return nil, fmt.Errorf("hls: start ffmpeg: %w", err)
}
s.cmd = cmd
go s.waitFFmpeg()
go s.pollSegments(ffCtx)
// Subtitles are no longer extracted per-session: the web player fetches each
// text track on demand as WebVTT from the /sub endpoint (subtitleHandler).
// The old per-session extraction wrote subs/sub-N.vtt that nothing requests
// anymore (the master playlist no longer advertises a SUBTITLES group), so
// it was pure wasted ffmpeg work — and its Close() wait could block HLS cache
// persistence on a slow extract. Removed.
cachedNote := ""
if cfg.Cache != nil {
cachedNote = fmt.Sprintf(" (cache-miss %s)", cacheKey)
}
// Surface the encoder profile so a "first-start was slow" report can be
// triaged from the agent log alone — `encoder=libx264 accel=none` means
// the user's ffmpeg has no HW encoders compiled in, which is the most
// common root cause (linuxbrew, default brew formula on macOS).
encoderNote := ""
if cfg.VideoCopy {
encoderNote = "encoder=copy (no video re-encode)"
} else {
profile := ResolveEncoderProfile(cfg.Transcode.HWAccel, cfg.Transcode.Preset)
presetNote := ""
if profile.Preset != "" {
presetNote = " preset=" + profile.Preset
}
encoderNote = fmt.Sprintf("encoder=%s accel=%s%s", profile.Codec, string(cfg.Transcode.HWAccel), presetNote)
}
startNote := ""
if cfg.VideoCopy && cfg.StartSec > 0 {
startNote = fmt.Sprintf(" start=%.0fs", cfg.StartSec)
} else if startIdx > 0 {
startNote = fmt.Sprintf(" start=seg-%d@%.0fs", startIdx, segmentStartSec(startIdx))
}
log.Printf("[hls %s] started: %s, %.1fs, %d segs (quality=%s, %s)%s%s",
shortHLSID(cfg.SessionID), cfg.logName(),
probe.DurationSec, segCount, coalesce(cfg.Quality, "auto"),
encoderNote, cachedNote, startNote)
return s, nil
}
// shortHLSID truncates a session ID for log lines.
func shortHLSID(id string) string {
if len(id) > 8 {
return id[:8]
}
return id
}
// ProbeInfo returns a JSON-friendly summary of the source media so the web
// player can render quality / codec / track info without re-probing.
func (s *HLSSession) ProbeInfo() map[string]any {
if s.probe == nil {
return map[string]any{}
}
audios := make([]map[string]any, 0, len(s.probe.AudioTracks))
for _, a := range s.probe.AudioTracks {
audios = append(audios, map[string]any{
"index": a.Index,
"lang": a.Lang,
"codec": a.Codec,
"channels": a.Channels,
"title": a.Title,
"default": a.Default,
})
}
subs := make([]map[string]any, 0, len(s.probe.SubtitleTracks))
for _, sb := range s.probe.SubtitleTracks {
// `external`/`path` let the stream server attach a tokened /sub vttUrl
// (path-addressed for sidecars, index-addressed for embedded). `path` is
// stripped after the URL is built so the raw path isn't doubled in JSON.
subs = append(subs, map[string]any{
"index": sb.Index,
"lang": sb.Lang,
"codec": sb.Codec,
"title": sb.Title,
"forced": sb.Forced,
"text": sb.IsTextSubtitle(),
"external": sb.External,
"path": sb.Path,
})
}
return map[string]any{
"videoCodec": s.probe.VideoCodec,
"width": s.probe.Width,
"height": s.probe.Height,
"bitDepth": s.probe.BitDepth,
"hdr": s.probe.HDR,
"durationSec": s.probe.DurationSec,
"container": s.probe.Container,
"audio": audios,
"subtitles": subs,
}
}
// ReadyCount returns the session's readyMax watermark: segment idx is on disk
// iff idx < ReadyCount() AND idx >= WriterStartIdx(). For a from-zero encode
// this is simply "how many segments are on disk"; for a resume session
// (StartSec > 0) readyMax is pre-seeded to the start index, so the FIRST real
// segment has landed only once ReadyCount() > WriterStartIdx() — use that
// comparison, not `>= 1`, to flip the player's "Preparando…" UI. For
// cache-HIT sessions this is always `segmentCount` from the moment
// StartHLSSession returns.
func (s *HLSSession) ReadyCount() int {
s.readyMu.Lock()
defer s.readyMu.Unlock()
return s.readyMax
}
// EncodeExited reports whether this session's ffmpeg has finished (clean or
// crashed) or never ran (cache HIT). False while an encode is producing
// segments. Used by HasLiveEncode to defer prewarm work.
func (s *HLSSession) EncodeExited() bool {
s.readyMu.Lock()
defer s.readyMu.Unlock()
return s.exited
}
// WriterStartIdx returns the segment index the CURRENT ffmpeg writer started
// at: 0 for a from-the-beginning encode, the resume segment for a StartSec
// session, the seek target after a seek-restart. See ReadyCount for the
// "first segment landed" comparison.
func (s *HLSSession) WriterStartIdx() int {
s.mu.Lock()
defer s.mu.Unlock()
return s.ffmpegSegStart
}
// FromCache reports whether this session was served from the HLS cache
// (no ffmpeg subprocess spawned). Used by ready-watcher logic to short-
// circuit polling — a cache HIT is ready the moment we return.
func (s *HLSSession) FromCache() bool { return s.fromCache }
// TranscodeStats is a point-in-time snapshot of live ffmpeg progress for one
// HLS session (F3). SpeedX < 1.0 means the encode runs slower than realtime —
// the player can't sustain playback without buffering. Samples==0 means no
// -stats line has been parsed yet (the watcher keeps waiting before reporting).
type TranscodeStats struct {
SpeedX float64 // EWMA of ffmpeg speed= (×realtime; 1.0 = exactly realtime)
Fps float64 // EWMA of ffmpeg fps=
Samples int // progress lines parsed so far (0 = no telemetry yet)
InputBound bool // source read hit I/O errors (slow/broken pull, not encode)
FromCache bool // replayed from cache → no live encode, stats meaningless
}
// GetTranscodeStats returns a snapshot of the parsed ffmpeg progress EWMAs.
func (s *HLSSession) GetTranscodeStats() TranscodeStats {
s.statsMu.Lock()
defer s.statsMu.Unlock()
return TranscodeStats{
SpeedX: s.speedEWMA,
Fps: s.fpsEWMA,
Samples: s.speedSamples,
InputBound: s.inputBound,
FromCache: s.fromCache,
}
}
// hlsStatsWarmupSkip is how many leading -stats frames to discard before
// trusting the EWMA. ffmpeg's first readings reflect the pipeline filling
// (often speed=0.0x) and would otherwise drag a healthy encoder into a false
// "struggling" verdict that pauses a stream which plays fine once warmed up.
const hlsStatsWarmupSkip = 2
// recordProgress folds one parsed ffmpeg -stats sample into the session EWMAs.
// alpha=0.3 smooths the noisy per-line numbers while still tracking a sustained
// slowdown within a few samples (~2s of encoding).
func (s *HLSSession) recordProgress(speedX, fps float64) {
s.statsMu.Lock()
defer s.statsMu.Unlock()
// Drop the cold-start frames so a steady-state slowdown — not the encoder
// spin-up — is what the watcher reports.
if s.warmupSeen < hlsStatsWarmupSkip {
s.warmupSeen++
return
}
const alpha = 0.3
if s.speedSamples == 0 {
s.speedEWMA = speedX
s.fpsEWMA = fps
} else {
s.speedEWMA = alpha*speedX + (1-alpha)*s.speedEWMA
s.fpsEWMA = alpha*fps + (1-alpha)*s.fpsEWMA
}
s.speedSamples++
}
// markInputBound flags that ffmpeg reported a source-read error — the wall is
// the input pull (slow debrid link / dropped torrent peer), not the encoder.
func (s *HLSSession) markInputBound() {
s.statsMu.Lock()
s.inputBound = true
s.statsMu.Unlock()
}
// IsClosed reports whether Close() has been invoked. Exposed (vs the
// internal isClosed) so external watchers — the ready-webhook
// goroutine in cmd/daemon.go — can short-circuit polling on a session
// that was torn down through a different code path (registry replace,
// idle sweep) without racing on the unexported helper.
func (s *HLSSession) IsClosed() bool { return s.isClosed() }
// MasterPlaylist returns the rendered master.m3u8 contents.
func (s *HLSSession) MasterPlaylist() string { return s.manifestRoot }
// VideoPlaylist returns the rendered video media playlist contents.
func (s *HLSSession) VideoPlaylist() string { return s.manifestVideo }
// DurationSeconds returns the source duration in seconds.
func (s *HLSSession) DurationSeconds() float64 { return s.durationSec }
// Probe returns the probe metadata used to start the session.
func (s *HLSSession) Probe() *StreamProbe { return s.probe }
// Touch updates the last-activity timestamp; the registry sweeper compares
// this against hlsSessionTTL.
func (s *HLSSession) Touch() {
s.mu.Lock()
s.lastTouch = time.Now()
s.mu.Unlock()
}
// Close stops ffmpeg and prevents further requests from blocking on segment
// readiness. Idempotent.
//
// Disk lifecycle:
// - cache disabled → delete tmpDir (original behavior).
// - cache enabled + this session was a HIT → keep dir, just unpin.
// - cache enabled + this was a write session → if ffmpeg exited cleanly and
// every segment is on disk, persist with .complete and keep dir. Otherwise
// drop the dir so a half-written cache doesn't survive into the next play.
func (s *HLSSession) Close() error {
s.mu.Lock()
if s.closed {
s.mu.Unlock()
return nil
}
s.closed = true
cancel := s.cancel
tmpDir := s.tmpDir
s.mu.Unlock()
if cancel != nil {
cancel()
}
// Unblock any handler waiting on readyCh.
s.readyMu.Lock()
if s.readyCh != nil {
close(s.readyCh)
s.readyCh = nil
}
s.exited = true
exitErr := s.exitErr
s.readyMu.Unlock()
if s.cache != nil && s.cacheKey != "" {
defer s.cache.Unpin(s.cacheKey)
if s.writerLockHeld {
defer s.cache.ReleaseWriter(s.cacheKey)
}
if s.fromCache {
log.Printf("[hls %s] closed (cache reuse)", shortHLSID(s.cfg.SessionID))
return nil
}
if exitErr == nil && s.allSegmentsPresent() {
if err := s.cache.MarkComplete(s.cacheKey); err == nil {
log.Printf("[hls %s] cache persisted %s", shortHLSID(s.cfg.SessionID), s.cacheKey)
return nil
} else {
log.Printf("[hls %s] cache persist failed: %v", shortHLSID(s.cfg.SessionID), err)
}
}
// Partial / failed → drop so we re-encode next time.
if err := s.cache.Invalidate(s.cacheKey); err != nil {
log.Printf("[hls %s] cache invalidate failed: %v", shortHLSID(s.cfg.SessionID), err)
}
log.Printf("[hls %s] closed (cache discarded)", shortHLSID(s.cfg.SessionID))
return nil
}
if tmpDir != "" {
_ = os.RemoveAll(tmpDir)
}
log.Printf("[hls %s] closed", shortHLSID(s.cfg.SessionID))
return nil
}
// allSegmentsPresent reports whether every expected segment (and init.mp4) is
// on disk AND validated by the segment poller. Used to decide whether a
// finished session is cacheable. We trust readyMax (advanced by pollSegments
// only after the next segment exists, proving the predecessor is fully closed)
// over a naive Size>0 stat that could accept truncated mid-write files.
func (s *HLSSession) allSegmentsPresent() bool {
if fi, err := os.Stat(filepath.Join(s.tmpDir, "video", "init.mp4")); err != nil || fi.Size() == 0 {
return false
}
s.readyMu.Lock()
readyMax := s.readyMax
s.readyMu.Unlock()
if readyMax < s.segmentCount-1 {
return false
}
for i := 0; i < s.segmentCount; i++ {
path := filepath.Join(s.tmpDir, "video", fmt.Sprintf("seg-%d.m4s", i))
fi, err := os.Stat(path)
if err != nil || fi.Size() == 0 {
return false
}
}
return true
}
// waitFFmpeg reaps the ffmpeg process and records its exit error for handlers.
//
// Auto-restart supervisor: if ffmpeg crashes (non-graceful exit) and the
// session is still in use, we attempt to restart it from the last known
// good segment. Bounded to maxRestarts within restartWindow to avoid
// thrashing on a permanently broken source.
func (s *HLSSession) waitFFmpeg() {
err := s.cmd.Wait()
s.readyMu.Lock()
s.exitErr = err
s.exited = true
if s.readyCh != nil {
close(s.readyCh)
s.readyCh = nil
}
readyMax := s.readyMax
s.readyMu.Unlock()
if err == nil || s.isClosed() {
return
}
log.Printf("[hls %s] ffmpeg exited: %v", shortHLSID(s.cfg.SessionID), err)
// Copy mode: no auto-restart. restartFromSegment's `-ss segmentStartSec(N)`
// math assumes uniform 2s segments, which copy mode doesn't have — a
// restart would corrupt the timeline. A failed copy surfaces through the
// player's probe deadline / fallback chain instead.
if s.cfg.VideoCopy {
log.Printf("[hls %s] copy session failed — not restarting (player falls back)", shortHLSID(s.cfg.SessionID))
return
}
// Decide whether to attempt an auto-restart. We don't restart when:
// - the session was closed externally (kill on quality change etc.)
// - we've already retried 3 times within the last 60 s (broken file)
const maxRestarts = 3
const restartWindow = 60 * time.Second
s.mu.Lock()
if s.closed {
s.mu.Unlock()
return
}
// Reset the counter when the previous restart was outside the window;
// the IsZero check is unnecessary because zero time is well in the past
// and would also satisfy the "outside window" branch.
if time.Since(s.lastRestartAt) > restartWindow {
s.restartCount = 0
}
if s.restartCount >= maxRestarts {
s.mu.Unlock()
log.Printf("[hls %s] giving up after %d auto-restarts", shortHLSID(s.cfg.SessionID), maxRestarts)
return
}
s.restartCount++
s.lastRestartAt = time.Now()
s.mu.Unlock()
// Debrid URL session (hueco #2 / 2c): the likeliest cause of an ffmpeg
// network exit is the debrid link expiring. Re-resolve a fresh one before
// restarting, else the restart just retries the dead URL and burns the
// retry budget. The network call runs lock-free; the result is stored in
// s.liveURL under s.mu because restartFromSegment reads it from the HTTP
// handler goroutine too (seek-restart), not just this supervisor goroutine.
if s.cfg.SourceURL != "" && s.cfg.RefreshURL != nil {
rctx, cancel := context.WithTimeout(context.Background(), 20*time.Second)
newURL, rerr := s.cfg.RefreshURL(rctx)
cancel()
if rerr != nil {
log.Printf("[hls %s] URL refresh before restart failed: %v", shortHLSID(s.cfg.SessionID), rerr)
} else {
s.mu.Lock()
s.liveURL = newURL
s.mu.Unlock()
log.Printf("[hls %s] debrid URL refreshed before restart", shortHLSID(s.cfg.SessionID))
}
}
// Restart from the last segment we know is safely on disk. If readyMax
// is 0 (never produced anything), retry from segment 0 — covers initial
// startup failures on transient errors.
target := readyMax
if target < 0 {
target = 0
}
log.Printf("[hls %s] auto-restarting from segment %d (attempt %d/%d)",
shortHLSID(s.cfg.SessionID), target, s.restartCount, maxRestarts)
if rerr := s.restartFromSegment(target); rerr != nil {
log.Printf("[hls %s] auto-restart failed: %v", shortHLSID(s.cfg.SessionID), rerr)
}
}
// pollSegments watches the video tmpdir for newly-finished .m4s files and
// advances readyMax. ffmpeg writes a segment by first creating an empty
// file, then closing+renaming on completion (atomic-replace), so we use
// stat size > 0 + presence of the *next* segment as proof the previous one
// is done. For the last segment, ffmpeg's exit terminates the wait.
func (s *HLSSession) pollSegments(ctx context.Context) {
ticker := time.NewTicker(250 * time.Millisecond)
defer ticker.Stop()
videoDir := filepath.Join(s.tmpDir, "video")
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
}
// Walk segment files and find the highest contiguous index whose
// successor exists (which proves the segment is fully closed).
s.readyMu.Lock()
start := s.readyMax
exited := s.exited
s.readyMu.Unlock()
highest := start
for i := start; i < s.segmentCount; i++ {
cur := filepath.Join(videoDir, fmt.Sprintf("seg-%d.m4s", i))
next := filepath.Join(videoDir, fmt.Sprintf("seg-%d.m4s", i+1))
ci, err := os.Stat(cur)
if err != nil || ci.Size() == 0 {
break
}
// Last segment is "ready" only when ffmpeg has exited (no successor
// can ever appear) or when a later segment exists.
if i == s.segmentCount-1 {
if !exited {
break
}
highest = i + 1
break
}
if _, err := os.Stat(next); err != nil {
break
}
highest = i + 1
}
if highest > start {
s.readyMu.Lock()
s.readyMax = highest
ch := s.readyCh
s.readyCh = make(chan struct{})
s.readyMu.Unlock()
if ch != nil {
close(ch)
}
}
if exited && highest >= s.segmentCount {
return
}
}
}
// waitForSegment blocks until segment idx has been fully written, ffmpeg
// has exited, or ctx is cancelled. Returns nil iff the segment file is
// safe to read at return time.
func (s *HLSSession) waitForSegment(ctx context.Context, idx int) error {
deadline := time.Now().Add(60 * time.Second)
for {
s.readyMu.Lock()
ready := idx < s.readyMax
exited := s.exited
ch := s.readyCh
exitErr := s.exitErr
s.readyMu.Unlock()
if ready {
return nil
}
if exited {
if exitErr != nil {
return fmt.Errorf("hls: ffmpeg exited: %w", exitErr)
}
return errors.New("hls: ffmpeg exited before segment ready")
}
select {
case <-ctx.Done():
return ctx.Err()
case <-ch:
// loop and re-check
case <-time.After(time.Until(deadline)):
return errors.New("hls: timeout waiting for segment")
}
if time.Now().After(deadline) {
return errors.New("hls: timeout waiting for segment")
}
}
}
// isClosed reports whether Close() has been invoked.
func (s *HLSSession) isClosed() bool {
s.mu.Lock()
defer s.mu.Unlock()
return s.closed
}
// ---- HTTP handlers ----
// ServeMaster writes master.m3u8 to w.
func (s *HLSSession) ServeMaster(w http.ResponseWriter, r *http.Request) {
s.Touch()
w.Header().Set("Content-Type", "application/vnd.apple.mpegurl")
w.Header().Set("Cache-Control", "no-cache")
_, _ = io.WriteString(w, s.manifestRoot)
}
// ServeVideoPlaylist writes the video media playlist (index.m3u8) to w.
func (s *HLSSession) ServeVideoPlaylist(w http.ResponseWriter, r *http.Request) {
s.Touch()
w.Header().Set("Content-Type", "application/vnd.apple.mpegurl")
w.Header().Set("Cache-Control", "no-cache")
if s.cfg.VideoCopy {
s.serveCopyPlaylist(w, r)
return
}
_, _ = io.WriteString(w, s.manifestVideo)
}
// serveCopyPlaylist serves ffmpeg's own media playlist (VideoCopy mode). The
// file appears within ~1 s of spawn (copy is I/O-bound) but the player's
// first fetch can race it — poll briefly instead of returning a 404 hls.js
// would surface as a manifest error. Each request re-reads the file: the
// playlist GROWS (EVENT) until ffmpeg appends ENDLIST, and players re-poll
// growing playlists by design.
func (s *HLSSession) serveCopyPlaylist(w http.ResponseWriter, r *http.Request) {
path := filepath.Join(s.tmpDir, "video", copyPlaylistName)
deadline := time.Now().Add(10 * time.Second)
for {
data, err := os.ReadFile(path)
if err == nil && len(data) > 0 {
// Until ENDLIST lands a copy session is a growing EVENT playlist,
// and some native players (iOS) treat any not-yet-ended playlist
// like LIVE and join at the live edge instead of position 0.
// EXT-X-START pins the start to 0 explicitly (RFC 8216 §4.3.5.2);
// harmless once the playlist is final.
out := data
if !strings.Contains(string(data), "#EXT-X-START") {
out = []byte(strings.Replace(string(data),
"#EXT-X-VERSION:7\n",
"#EXT-X-VERSION:7\n#EXT-X-START:TIME-OFFSET=0,PRECISE=YES\n", 1))
}
_, _ = w.Write(out)
return
}
if r.Context().Err() != nil || time.Now().After(deadline) {
http.Error(w, "playlist not ready", http.StatusServiceUnavailable)
return
}
select {
case <-r.Context().Done():
case <-time.After(100 * time.Millisecond):
}
}
}
// ServeInit writes init.mp4 (the fMP4 init segment) to w.
func (s *HLSSession) ServeInit(w http.ResponseWriter, r *http.Request) {
s.Touch()
path := filepath.Join(s.tmpDir, "video", "init.mp4")
// Init segment is the first thing ffmpeg writes — wait briefly for it.
deadline := time.Now().Add(30 * time.Second)
for {
if fi, err := os.Stat(path); err == nil && fi.Size() > 0 {
break
}
if s.isClosed() || time.Now().After(deadline) {
http.Error(w, "init segment unavailable", http.StatusServiceUnavailable)
return
}
time.Sleep(150 * time.Millisecond)
}
w.Header().Set("Content-Type", "video/mp4")
w.Header().Set("Cache-Control", "max-age=3600")
http.ServeFile(w, r, path)
}
// ServeSegment writes the requested video segment, blocking until ffmpeg
// produces it (capped by waitForSegment timeout).
//
// Seek-restart: if the requested segment is far ahead of where the current
// ffmpeg writer is producing AND it's not already on disk, we kill ffmpeg
// and restart it from the requested position. Without this, a user dragging
// the scrubber to minute 30 would block until the encoder reaches minute 30
// in real time (~25 minutes wait at 1080p software encode).
func (s *HLSSession) ServeSegment(w http.ResponseWriter, r *http.Request, idx int) {
s.Touch()
if idx < 0 || idx >= s.segmentCount {
http.Error(w, "segment out of range", http.StatusNotFound)
return
}
path := filepath.Join(s.tmpDir, "video", fmt.Sprintf("seg-%d.m4s", idx))
// Fast path: file already on disk (either current writer reached it, or
// a previous session left it there before a seek-restart).
if fi, err := os.Stat(path); err == nil && fi.Size() > 0 {
w.Header().Set("Content-Type", "video/mp4")
w.Header().Set("Cache-Control", "max-age=3600")
http.ServeFile(w, r, path)
return
}
// Decide if we should restart ffmpeg from the requested segment. Check
// segStart vs idx — if the gap is wider than hlsSeekAhead and the file
// isn't on disk, the writer would take too long to reach it.
s.mu.Lock()
segStart := s.ffmpegSegStart
s.mu.Unlock()
s.readyMu.Lock()
readyMax := s.readyMax
s.readyMu.Unlock()
// Copy mode never seek-restarts: ffmpeg outruns playback (I/O-bound), the
// playlist only lists fully-written segments (temp_file), and segment
// indices don't map to uniform 2s slots anyway. Just wait for the writer.
if !s.cfg.VideoCopy && (idx >= readyMax+hlsSeekAhead || idx < segStart) {
if err := s.restartFromSegment(idx); err != nil {
http.Error(w, err.Error(), http.StatusServiceUnavailable)
return
}
}
if err := s.waitForSegment(r.Context(), idx); err != nil {
http.Error(w, err.Error(), http.StatusServiceUnavailable)
return
}
w.Header().Set("Content-Type", "video/mp4")
w.Header().Set("Cache-Control", "max-age=3600")
http.ServeFile(w, r, path)
}
// restartFromSegment kills the current ffmpeg, then spawns a new one whose
// `-ss` offset corresponds to segment `targetIdx`. The caller must NOT hold
// s.mu when calling — the function takes both s.mu and s.readyMu.
func (s *HLSSession) restartFromSegment(targetIdx int) error {
if s.cfg.VideoCopy {
// Defensive: callers already gate on VideoCopy, but the `-ss
// segmentStartSec(N)` math below assumes uniform 2s segments and
// would corrupt a copy session's keyframe-cut timeline.
return errors.New("hls: seek-restart not supported in copy mode")
}
s.mu.Lock()
if s.closed {
s.mu.Unlock()
return errors.New("hls: session closed")
}
// `s.exited` lives under s.readyMu (see field comment near declaration);
// take that lock briefly so the read-modify-write composite check below
// is consistent with `pollSegments` / `waitFFmpeg` writers.
s.readyMu.Lock()
exited := s.exited
s.readyMu.Unlock()
if targetIdx == s.ffmpegSegStart && !exited {
// Already writing from this point — nothing to do.
s.mu.Unlock()
return nil
}
prevCancel := s.cancel
prevCmd := s.cmd
s.mu.Unlock()
if prevCancel != nil {
prevCancel()
}
if prevCmd != nil && prevCmd.Process != nil {
_ = prevCmd.Process.Kill()
}
// Wait for old ffmpeg to exit so its file handles release. waitFFmpeg
// (the original goroutine) sets s.exited = true; poll until it does.
deadline := time.Now().Add(5 * time.Second)
for {
s.readyMu.Lock()
exited := s.exited
s.readyMu.Unlock()
if exited {
break
}
if time.Now().After(deadline) {
break // proceed anyway; new ffmpeg will overwrite
}
time.Sleep(50 * time.Millisecond)
}
// Build args for the new ffmpeg with -ss offset. Segments are non-uniform
// (seg-0 is hlsInitSegmentDuration s, the rest are hlsSegmentDuration s),
// so use segmentStartSec for the seek time instead of multiplying.
// Use a local cfg copy carrying the live (possibly-refreshed) debrid URL,
// read under s.mu — this runs from the HTTP handler goroutine too, so it
// can't read s.liveURL unsynchronised while waitFFmpeg writes it (2c).
startSec := segmentStartSec(targetIdx)
cfg := s.cfg
s.mu.Lock()
cfg.SourceURL = s.liveURL // "" for local-file sessions — no-op, sourceRef falls back to SourcePath
s.mu.Unlock()
args := buildHLSFFmpegArgsAt(cfg, s.probe, s.tmpDir, targetIdx, startSec)
ffCtx, cancel := context.WithCancel(context.Background())
cmd := exec.CommandContext(ffCtx, s.cfg.Transcode.FFmpegPath, args...)
cmd.Stderr = &hlsStderrCapture{owner: s}
if err := cmd.Start(); err != nil {
cancel()
return fmt.Errorf("hls: restart ffmpeg: %w", err)
}
// Reset session state so the poll + wait machinery picks up the new run.
s.mu.Lock()
s.cmd = cmd
s.cancel = cancel
s.ffmpegSegStart = targetIdx
s.mu.Unlock()
s.readyMu.Lock()
s.readyMax = targetIdx // new writer's segments start at targetIdx
s.exited = false
s.exitErr = nil
s.readyCh = make(chan struct{})
s.readyMu.Unlock()
go s.waitFFmpeg()
go s.pollSegments(ffCtx)
log.Printf("[hls %s] restarted ffmpeg at segment %d (%.1fs)",
shortHLSID(s.cfg.SessionID), targetIdx, startSec)
return nil
}
// ---- ffmpeg argument builders ----
// EncoderProfile names the codec + preset + decoder hint combination the HLS
// pipeline picks for the given hardware backend + transcode config. Exposed
// so callers can log the chosen encoder before ffmpeg launches and so both
// the demuxer-side `-hwaccel` flag and the encoder-side argv stay in sync
// (otherwise the two switches in buildHLSFFmpegArgsAt could silently drift
// when adding a new backend).
type EncoderProfile struct {
Codec string // ffmpeg encoder name (e.g. "h264_nvenc", "libx264")
Preset string // preset string, or "" when the codec has no preset knob
DecodeHwAccel string // ffmpeg `-hwaccel` value (e.g. "cuda", "qsv", "vaapi"), or ""
}
// ResolveEncoderProfile mirrors the codec + preset selection inside
// buildHLSFFmpegArgsAt so callers (registry, log lines, diagnostic
// endpoints) can know what ffmpeg will be told to do without parsing argv.
//
// The configured preset is libx264-specific by vocabulary (ultrafast…
// veryslow). Passing it through to NVENC / QSV would have ffmpeg reject
// the argv (NVENC uses p1-p7, QSV uses its own subset). So vendor encoders
// always use their hardcoded vendor preset and ignore configuredPreset.
// VideoToolbox has no preset knob at all.
//
// DecodeHwAccel mirrors the encoder family — `-hwaccel cuda` for NVENC,
// `-hwaccel qsv` for QSV, `-hwaccel vaapi` for VAAPI. We intentionally
// do NOT pass `-hwaccel_output_format vaapi`: that pins decoded frames
// to GPU memory, but our filter chain (scale/format/setparams) runs on
// CPU and can't consume VAAPI surfaces. Keeping output frames on CPU
// makes the filter chain work and the VAAPI encoder still benefits from
// HW-accelerated DECODE on the input side.
func ResolveEncoderProfile(hw HWAccel, configuredPreset string) EncoderProfile {
codec := hw.FFmpegVideoCodec("h264")
switch codec {
case "libx264":
preset := configuredPreset
if preset == "" {
preset = "superfast"
}
return EncoderProfile{Codec: codec, Preset: preset, DecodeHwAccel: ""}
case "h264_nvenc":
return EncoderProfile{Codec: codec, Preset: "p3", DecodeHwAccel: "cuda"}
case "h264_qsv":
return EncoderProfile{Codec: codec, Preset: "veryfast", DecodeHwAccel: "qsv"}
case "h264_vaapi":
return EncoderProfile{Codec: codec, Preset: "", DecodeHwAccel: "vaapi"}
case "h264_videotoolbox":
// No preset knob for VideoToolbox; the speed/quality dial is `-q:v`.
// VideoToolbox uses per-encoder flags rather than a demuxer hint.
return EncoderProfile{Codec: codec, Preset: "", DecodeHwAccel: ""}
}
// Unknown / future codecs: software path.
return EncoderProfile{Codec: codec, Preset: "", DecodeHwAccel: ""}
}
// buildHLSFFmpegArgsAt returns the argv for an HLS encode that starts at the
// given segment index (`-ss <startSec>`) and writes segments numbered from
// startIdx so they slot into the existing manifest at the correct position.
// `-output_ts_offset` keeps the segment PTS aligned with manifest timeline.
func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir string, startIdx int, startSec float64) []string {
profile := ResolveEncoderProfile(cfg.Transcode.HWAccel, cfg.Transcode.Preset)
// -stats forces ffmpeg to emit the frame=/fps=/speed= progress line to
// stderr even at -loglevel warning; hlsStderrCapture parses it for live
// transcode telemetry (F3) without logging it.
args := []string{"-y", "-hide_banner", "-loglevel", "warning", "-stats"}
// F4 — full-GPU NVENC downscale. When we're downscaling an SDR source with
// NVENC on a host whose ffmpeg can run scale_cuda, and NO subtitle is burned
// in, keep the decoded frame on the GPU through scale + encode (scale_cuda →
// h264_nvenc) instead of copying every frame to the CPU for `scale=`. That
// CPU round-trip is the wall on modest GPUs (a strong box still gains ~37%).
// Strictly gated — the cases that need CPU frames stay on the CPU path:
// - HDR (the libplacebo Vulkan / zscale CPU tonemap can't consume a CUDA
// surface, and mixing CUDA scale with the Vulkan pass is fragile),
// - burn-in (the scale2ref+overlay composite runs on CPU frames),
// - non-NVENC encoders, and no-op when not actually downscaling.
// Output height cap for this session — resolved once here so the F4 gate and
// the filter chain below share ONE value (a drift between them would emit
// scale_cuda for a height that isn't actually a downscale).
qcap := resolveQualityCap(cfg.Quality)
maxH := qcap.MaxHeight
if maxH == 0 {
maxH = cfg.Transcode.MaxHeight
}
useCudaScale := profile.Codec == "h264_nvenc" &&
profile.DecodeHwAccel == "cuda" &&
cfg.Transcode.HasScaleCuda &&
probe.HDR == "" &&
cfg.burnSubtitleIndexOrNone() < 0 &&
maxH > 0 && probe.Height > maxH
// Demuxer-side HW-decode hint. Sourced from the profile so a future
// codec/hint mismatch is impossible — the encoder + decode hint are
// computed once and stay coherent. Notably we do NOT add
// `-hwaccel_output_format vaapi` on the VAAPI path: that pins decoded
// frames to GPU memory but our CPU filter chain (scale, format,
// setparams) can't consume VAAPI surfaces. Letting frames flow on CPU
// keeps the filter chain working; the encoder still gets HW-accelerated
// decode on the input side.
if profile.DecodeHwAccel != "" {
args = append(args, "-hwaccel", profile.DecodeHwAccel)
// F4: pin decoded frames as CUDA surfaces ONLY on the gated scale_cuda
// path, so scale_cuda + h264_nvenc avoid the CPU copy. Off otherwise —
// the CPU filter chain can't consume CUDA surfaces.
if useCudaScale {
args = append(args, "-hwaccel_output_format", "cuda")
}
}
// Seek before -i for fast keyframe-aligned start. The new ffmpeg writes
// segments with PTS shifted via -output_ts_offset so the manifest's
// pre-computed segment numbering still matches the timeline.
if startSec > 0 {
args = append(args, "-ss", strconv.FormatFloat(startSec, 'f', 3, 64))
}
// Remote (debrid) input: make the HTTP read resilient. -reconnect* recovers
// from a dropped/idle connection (debrid CDNs close long-idle sockets);
// -rw_timeout (µs) bounds a stalled read so a hung CDN surfaces as a restart
// instead of a frozen player. A seek (-ss before -i) re-opens the URL with a
// Range request, which debrid supports. Flags are no-ops for local files, so
// only add them for a URL source.
if cfg.SourceURL != "" {
args = append(args,
"-reconnect", "1",
"-reconnect_streamed", "1",
"-reconnect_delay_max", "5",
"-rw_timeout", "30000000",
)
}
args = append(args, "-i", cfg.sourceRef())
if startSec > 0 {
args = append(args, "-output_ts_offset", strconv.FormatFloat(startSec, 'f', 3, 64))
}
// Burn a bitmap subtitle (PGS/DVB) into the video when requested. Validate
// the index points at a real bitmap track in range — text subs are served as
// separate WebVTT and never burned, and a stale/out-of-range index falls
// back to a clean encode rather than failing the session.
burnIdx := -1
if reqBurn := cfg.burnSubtitleIndexOrNone(); reqBurn >= 0 {
if reqBurn < len(probe.SubtitleTracks) &&
!probe.SubtitleTracks[reqBurn].IsTextSubtitle() {
burnIdx = reqBurn
} else {
log.Printf("[hls %s] burn subtitle %d ignored — not a bitmap track in range",
shortHLSID(cfg.SessionID), reqBurn)
}
}
// Map video + selected audio. With burn-in the video comes from the
// filter_complex graph ([vout], built below); otherwise map the source video
// stream directly. ffmpeg resolves the [vout] label from -filter_complex
// regardless of argv order, so mapping it here (before audio) keeps video as
// output stream 0.
if burnIdx >= 0 {
args = append(args, "-map", "[vout]")
} else {
args = append(args, "-map", "0:v:0")
}
audioIdx := cfg.AudioIndex
if audioIdx < 0 {
audioIdx = 0
for i, a := range probe.AudioTracks {
if a.Default {
audioIdx = i
break
}
}
}
// Clamp to an audio track that actually exists. The web persists the chosen
// audioIndex globally, so a value from a multi-track file can arrive for a
// file with fewer tracks; `-map 0:a:N?` would then match nothing and the
// optional `?` silently yields a VIDEO-ONLY stream (no sound — 2026-06-03,
// Wistoria S02E08 had one audio track but the session carried audioIndex=2).
// Fall back to the first track so audio is never silently dropped.
if n := len(probe.AudioTracks); n > 0 && audioIdx >= n {
log.Printf("[hls %s] audioIndex %d out of range (%d audio track(s)) — using 0:a:0",
shortHLSID(cfg.SessionID), audioIdx, n)
audioIdx = 0
}
args = append(args, "-map", fmt.Sprintf("0:a:%d?", audioIdx))
// Video encode. Codec + preset come from the EncoderProfile resolved at
// the top of this function so the demuxer hint, the encoder, and the
// per-session log line all stay consistent.
//
// Defaults are biased for FIRST-START LATENCY over quality — the player
// blocks on seg-0 before the first frame paints, and a slow seg-0 is
// what users notice ("preparando sesión" stuck). Users who want better
// quality can override via `download.transcode.preset` in config.toml.
codec := profile.Codec
args = append(args, "-c:v", codec)
switch codec {
case "libx264":
// superfast = ~15-20% faster than veryfast at marginal quality loss
// for the bitrates we target (5-25 Mbps). For 4K software encodes
// this is the difference between ~3 s and ~2.5 s per segment on a
// recent x86 CPU. `-threads 0` is libx264's default but explicit
// helps when the user has set GOMAXPROCS.
// -bf 0 (no B-frames) + -sc_threshold 0 (no scene-cut keyframes): both
// remove the timestamp irregularities that make ffmpeg's HLS muxer emit
// "Packet duration is out of range" on slightly-VFR / B-frame sources and
// produce uneven segment lengths the player stutters on. Keyframe cadence
// is driven by -force_key_frames below, so disabling scene-cut keeps every
// segment exactly hls_time long.
args = append(args, "-preset", profile.Preset, "-threads", "0", "-bf", "0", "-sc_threshold", "0")
case "h264_nvenc":
// p3 + vbr keeps NVENC fast (~1.5 s seg-0) without the segmentation
// breakage `-tune ll` introduced in 0.9.9: with -tune=ll the NVENC
// rate control emits long IDR-less GOPs that ignore -force_key_frames,
// so ffmpeg's HLS muxer never closes seg-0 and the player stalls at
// "preparando sesión" until the 60 s mark-ready timeout. Verified on
// ffmpeg 6.1.1 + driver 580 / RTX-class GPUs: dropping -tune ll
// restores per-segment cuts at 27x real-time vs 28x with -tune ll.
// -bf 0 + -no-scenecut: same rationale as libx264 (NVENC's own flag for
// scene-cut). No B-frame reorder → monotonic DTS → uniform segments, no
// "Packet duration is out of range" flood. Safe with -force_key_frames
// (unlike -tune ll, which broke per-segment cuts — see note above).
// -forced-idr 1 is LOAD-BEARING: NVENC emits -force_key_frames frames
// as plain (non-IDR) I-frames on current ffmpeg/driver combos, the HLS
// muxer only cuts on IDR, and every segment silently stretches to the
// default GOP (250 frames ≈ 10.4 s @24fps) while the server-rendered
// playlist still promises hlsSegmentDuration. The PTS↔playlist mismatch
// breaks seeks and desyncs subtitles (measured 2026-06-10: 3 segments
// per 30 s instead of 15; with -forced-idr exactly 15).
args = append(args, "-preset", profile.Preset, "-rc", "vbr", "-bf", "0", "-no-scenecut", "1", "-forced-idr", "1")
case "h264_qsv":
// veryfast is the fastest realistic QSV preset; medium was too
// conservative for first-start. look_ahead=0 keeps the encoder
// truly low-latency (no rate-control look-ahead window).
// -forced_idr: same non-IDR forced-keyframe failure mode as NVENC (see
// above) — QSV's AVOption spells it with an underscore.
args = append(args, "-preset", profile.Preset, "-look_ahead", "0", "-forced_idr", "1")
case "h264_videotoolbox":
// VideoToolbox has no "preset" knob; `-realtime` flips into the
// low-latency path used by FaceTime. We let the `-b:v / -maxrate
// / -bufsize` block (added later in this function) drive rate
// control — adding `-q:v` here would conflict because ffmpeg's
// videotoolbox encoder treats `-b:v` as authoritative and
// silently ignores `-q:v`, so the constant-quality knob never
// took effect anyway.
args = append(args, "-realtime", "1")
case "h264_vaapi":
// h264_vaapi has no preset knob. Bitrate args (set later) drive
// rate control. Add `-vaapi_device /dev/dri/renderD128` so the
// encoder doesn't fall back to a NULL device on multi-GPU hosts
// where the default render node is a non-VAAPI GPU (an Nvidia
// dGPU's render node, etc.). The filter chain below switches to
// `format=nv12,hwupload` so frames land on the right VAAPI
// surface before the encoder; we intentionally avoid scale_vaapi
// because mesa 25 + Raphael iGPU emits "Cannot allocate memory"
// per session start, polluting logs even though encode succeeds.
args = append(args, "-vaapi_device", "/dev/dri/renderD128")
}
// Derive H.264 level from the actual output FRAME (width × height), not just
// height. A fixed "4.0" caps the encoder at 1080p; deriving by height alone
// still under-levels anamorphic content — a 2.39:1 source scaled to 1080
// height is ~2586×1080 = 11016 MBs, busting level 4.1's 8192-MB cap, which
// fails the encode ("Invalid Level" on nvenc, "frame MB size > level limit"
// on libx264) and stalls the session. The output height matches qcap.MaxHeight
// when the source is downscaled, otherwise probe.Height; the output width is
// the source width scaled by the same factor (the filter chain preserves AR).
// qcap + maxH were resolved once at the top (shared with the F4 gate).
outputHeight := qcap.MaxHeight
if outputHeight == 0 {
outputHeight = cfg.Transcode.MaxHeight
}
if outputHeight == 0 || (probe.Height > 0 && probe.Height < outputHeight) {
outputHeight = probe.Height
}
outputWidth := probe.Width
if probe.Height > 0 && outputHeight != probe.Height {
outputWidth = int(math.Round(float64(probe.Width) * float64(outputHeight) / float64(probe.Height)))
}
args = append(args, "-profile:v", "main", "-level:v", H264LevelForFrame(outputWidth, outputHeight))
// Bitrate must match the level libx264 actually picks for outputHeight,
// not the qcap target for the user's requested label. If a user asks for
// "2160p" on a 1080p source, qcap.VideoBitrate is 25 Mbps but the level
// (derived from outputHeight=1080) is 4.0, which rejects bitrates >20 Mbps
// with "VBV bitrate (25000) > level limit (20000)". Re-derive the cap
// from the effective height so the (level, bitrate) pair stays coherent.
effectiveCap := capForHeight(outputHeight)
bitrate := effectiveCap.VideoBitrate
if bitrate == "" {
bitrate = qcap.VideoBitrate
}
if bitrate == "" {
bitrate = cfg.Transcode.VideoBitrate
}
if bitrate == "" {
bitrate = "5M"
}
// Rate control: capped constant-quality where the encoder supports it well
// (libx264 CRF, NVENC CQ), plain CBR-ish elsewhere. Constant quality is the
// on-the-fly analogue of per-title encoding: easy scenes (dialogue, anime
// flats) emit FAR fewer bits than the fixed target — which is what keeps a
// funnel/LTE link from stalling — while complex scenes can still use up to
// `-maxrate` (the same ceiling as before, so worst-case quality and the
// level-derived VBV pair are unchanged). `-bufsize 2×maxrate` gives the VBV
// a standard one-segment window to absorb spikes; the old 1× window forced
// the encoder to flatline at the cap. CPB stays far below every H.264
// level's limit (level 3.1 allows 14 Mbps CPB vs our 3M at 480p).
switch codec {
case "libx264":
// Capped CRF: no -b:v (CRF drives quality), -maxrate/-bufsize cap it.
args = append(args, "-crf", "23", "-maxrate", bitrate, "-bufsize", doubleBitrate(bitrate))
case "h264_nvenc":
// NVENC constant-quality VBR: -cq targets quality, -b:v 0 disables the
// default 2M average-bitrate target that would otherwise fight it.
args = append(args, "-cq", "23", "-b:v", "0", "-maxrate", bitrate, "-bufsize", doubleBitrate(bitrate))
default:
// QSV / VideoToolbox / VAAPI: keep the proven fixed-bitrate triple —
// their constant-quality knobs (ICQ, -q:v) have vendor-specific gotchas
// (VideoToolbox ignores -q:v when -b:v is set; QSV ICQ conflicts with
// look_ahead=0) and we can't regression-test them here.
args = append(args, "-b:v", bitrate, "-maxrate", bitrate, "-bufsize", bitrate)
}
// Force keyframe alignment with segment boundaries.
args = append(args, "-force_key_frames", fmt.Sprintf("expr:gte(t,n_forced*%d)", hlsSegmentDuration))
// Filter chain: optional scale, force 8-bit yuv420p, normalise color metadata.
//
// Width-rounding pitfall: `scale=-2:H` alone derives width from `H * dar` and
// rounds to the nearest multiple of 2, which is correct. But adding
// `force_original_aspect_ratio=decrease` makes ffmpeg ignore the `-2` and
// emit the exact computed width — which can be odd (e.g. 853×480) and
// libx264 then refuses to open. We chain a second `scale=trunc(iw/2)*2:...`
// after the cap to guarantee even dimensions before format/setparams.
// (maxH was resolved once at the top, shared with the F4 cuda-scale gate.)
// VAAPI needs frames as nv12 VAAPI surfaces before the encoder. We do
// scale + format conversion on CPU then `hwupload` once at the end —
// skips the mesa 25 + Raphael iGPU "Cannot allocate memory" log spam
// that scale_vaapi triggers per-session-start while still delivering
// the encoder a GPU surface. setparams is dropped because VAAPI
// surfaces don't expose VUI fields the way libx264 does; the encoder
// records its own color metadata via the source PTS chain.
pixFormat := "yuv420p"
hwUploadTail := ""
colorTail := ",setparams=colorspace=bt709:color_trc=bt709:color_primaries=bt709:range=tv"
if codec == "h264_vaapi" {
pixFormat = "nv12"
hwUploadTail = ",hwupload"
colorTail = ""
}
// HDR→SDR tonemap, after the scale (downscale-first = fewer pixels to map).
// Prefer libplacebo (GPU, ONE pass — it also emits the BT.709 colorspace +
// 8-bit format, so it REPLACES the format/setparams tail); else the zscale
// CPU chain; else play untonemapped (desaturated, last resort). Skip
// libplacebo on VAAPI: its Vulkan surface flow doesn't compose with our
// nv12+hwupload path, so VAAPI keeps the zscale-or-none behaviour.
//
// Gate on a real HW encoder (HWAccel != none): only then is the Vulkan
// device a genuine GPU. A software-only host with mesa would expose lavapipe
// (CPU Vulkan), which the functional probe accepts but whose tonemap is
// SLOWER than the zscale CPU chain — so on those hosts libplacebo would be a
// regression. No HW encoder ⇒ stay on zscale.
useLibplacebo := probe.HDR != "" && cfg.Transcode.HasLibplacebo &&
codec != "h264_vaapi" && cfg.Transcode.HWAccel != HWAccelNone
tonemap := ""
if probe.HDR != "" && cfg.Transcode.TonemapHDR && !useLibplacebo {
tonemap = hdrTonemapChain
}
// videoTail = everything after the scale: either libplacebo (tonemap +
// colorspace + format in one) or the (optional zscale) tonemap then the
// format + color-metadata tail. No leading comma — the scale chain ends in one.
videoTail := tonemap + "format=" + pixFormat + colorTail
if useLibplacebo {
videoTail = libplaceboTonemapFilter
}
// Core video chain (scale + tonemap/format tail), WITHOUT the optional
// hwUploadTail — that has to run last, after any subtitle overlay, so it's
// appended separately below.
var vchain string
switch {
case useCudaScale:
// F4: scale on the CUDA surface and hand h264_nvenc a yuv420p CUDA frame
// directly — no CPU `format`/`setparams` tail (the frame never leaves the
// GPU; nvenc records BT.709 SDR metadata from the source). scale_cuda's
// `-2` already yields an even width, so the second even-rounding pass the
// CPU path needs is unnecessary. useCudaScale already implies a real
// downscale (probe.Height > cudaCap) on an SDR, non-burn-in NVENC source.
vchain = fmt.Sprintf("scale_cuda=-2:%d:format=yuv420p", maxH)
case maxH > 0 && probe.Height > maxH:
vchain = fmt.Sprintf(
"scale=-2:%d:force_original_aspect_ratio=decrease,scale=trunc(iw/2)*2:trunc(ih/2)*2,%s",
maxH, videoTail,
)
default:
vchain = fmt.Sprintf(
"scale=trunc(iw/2)*2:trunc(ih/2)*2,%s",
videoTail,
)
}
if burnIdx >= 0 {
// Burn-in: process the video to its final size + SDR colorspace FIRST,
// then composite the subtitle. Overlaying SDR PGS/DVB graphics onto a
// still-HDR (PQ) frame and tonemapping afterwards would crush the
// subtitle brightness, so the overlay must come after the tonemap. The
// subtitle canvas is scaled to the processed frame via scale2ref so a
// PGS/DVB stream authored at any resolution lines up. hwUploadTail
// (VAAPI) runs last, on the composited frame.
filterComplex := fmt.Sprintf(
"[0:v:0]%s[base];[0:s:%d][base]scale2ref[sub][base2];[base2][sub]overlay%s[vout]",
vchain, burnIdx, hwUploadTail,
)
args = append(args, "-filter_complex", filterComplex)
} else {
args = append(args, "-vf", vchain+hwUploadTail)
}
// Audio: AAC stereo 48 kHz — broadest browser compatibility.
audioBitrate := cfg.Transcode.AudioBitrate
if audioBitrate == "" {
audioBitrate = "192k"
}
args = append(args,
"-c:a", "aac",
"-b:a", audioBitrate,
"-ar", "48000",
"-ac", "2",
)
// Force constant frame rate. Many MKV rips are slightly variable-frame-rate
// (or carry irregular PTS); muxed to fMP4 that produces non-monotonic packet
// durations ("Packet duration is out of range") and uneven segment lengths
// the player stutters on. CFR resamples to a steady cadence → uniform
// segments. Near-CFR sources (23.976/24/25) are essentially untouched.
args = append(args, "-fps_mode", "cfr")
// HLS muxer — fmp4 segments with pre-computed segment count.
// `-start_number` slots seg-N.m4s where N matches the segment index in
// the pre-rendered manifest. Each ffmpeg writes its own ffmpeg.m3u8 but
// we never serve it — the rendered VOD manifest already knows everything.
videoDir := filepath.Join(tmpDir, "video")
manifestName := fmt.Sprintf("ffmpeg-%d.m3u8", startIdx)
args = append(args,
"-f", "hls",
"-hls_time", strconv.Itoa(hlsSegmentDuration),
"-hls_playlist_type", "vod",
"-hls_segment_type", "fmp4",
"-hls_list_size", "0",
"-hls_flags", "independent_segments",
"-start_number", strconv.Itoa(startIdx),
"-hls_fmp4_init_filename", "init.mp4",
"-hls_segment_filename", filepath.Join(videoDir, "seg-%d.m4s"),
filepath.Join(videoDir, manifestName),
)
return args
}
// ---- Manifest rendering ----
// renderVideoPlaylist builds the VOD media playlist for the video stream.
// Segment count is derived from the source duration — the player learns the
// total timeline from the manifest before any segment is fetched.
//
// seg-0 is the short init segment (hlsInitSegmentDuration s); seg-1 onward
// are hlsSegmentDuration s each. The last segment may be shorter than the
// nominal duration when (duration - init) doesn't divide evenly.
func renderVideoPlaylist(durationSec float64, segCount int) string {
var b strings.Builder
b.WriteString("#EXTM3U\n")
b.WriteString("#EXT-X-VERSION:7\n")
b.WriteString("#EXT-X-PLAYLIST-TYPE:VOD\n")
b.WriteString(fmt.Sprintf("#EXT-X-TARGETDURATION:%d\n", hlsSegmentDuration+1))
b.WriteString("#EXT-X-MEDIA-SEQUENCE:0\n")
b.WriteString(`#EXT-X-MAP:URI="init.mp4"` + "\n")
remaining := durationSec
for i := 0; i < segCount; i++ {
segDur := float64(segmentDurationFor(i))
if remaining < segDur {
segDur = remaining
}
b.WriteString(fmt.Sprintf("#EXTINF:%.3f,\n", segDur))
b.WriteString(fmt.Sprintf("seg-%d.m4s\n", i))
remaining -= segDur
}
b.WriteString("#EXT-X-ENDLIST\n")
return b.String()
}
// renderMasterPlaylist builds the top-level master playlist with the single
// video variant + every text subtitle as an EXT-X-MEDIA group. Audio is muxed
// into the video segments for the MVP — separate audio renditions can come
// later (they require a second ffmpeg pipeline producing audio-only segments).
// buildHLSCopyArgs builds the ffmpeg invocation for VideoCopy mode: video
// stream copied bit-exact (`-c:v copy`, the segments cut at the source's own
// keyframes), audio copied when already AAC or re-encoded to AAC 192k
// otherwise, muxed to fMP4 HLS with ffmpeg writing its OWN media playlist
// (EVENT while running, ENDLIST on completion) with byte-exact EXTINF
// durations. Validated empirically on the incident source (HEVC Main10 +
// EAC3 MKV): seg-0 TTFB ~510 ms, valid hvc1+mp4a stream.
//
// Deliberate differences from the encode path:
// - no encoder/preset/bitrate/keyframe flags (nothing is encoded);
// - `+temp_file` so segments land atomically (write .tmp → rename) and a
// listed segment is always complete on disk;
// - playlist type EVENT: the timeline grows as ffmpeg outruns playback
// (I/O-bound) and players treat it as live-DVR until ENDLIST.
func buildHLSCopyArgs(cfg HLSSessionConfig, probe *StreamProbe, tmpDir string) []string {
args := []string{"-y", "-hide_banner", "-loglevel", "warning", "-stats"}
// StartSec is INTENTIONALLY ignored in copy mode: an EVENT playlist whose
// entries start at position 0 while the fragments carry an offset tfdt
// (-ss + -output_ts_offset) is exactly the shape iOS's native HLS parser
// chokes on (observed 2026-06-10: resume at 368s → player error + session
// re-bootstrap loop on iPhone). Copy always produces from 0 with true
// absolute PTS — it outruns playback at I/O speed, so the resume point
// appears in the growing timeline within seconds and the player's own
// startPosition seek lands normally.
if cfg.SourceURL != "" {
args = append(args,
"-reconnect", "1",
"-reconnect_streamed", "1",
"-reconnect_delay_max", "5",
"-rw_timeout", "30000000",
)
}
args = append(args, "-i", cfg.sourceRef())
// Map video + selected audio (same clamping rules as the encode path).
args = append(args, "-map", "0:v:0")
audioIdx := cfg.AudioIndex
if audioIdx < 0 {
audioIdx = 0
for i, a := range probe.AudioTracks {
if a.Default {
audioIdx = i
break
}
}
}
if n := len(probe.AudioTracks); n > 0 && audioIdx >= n {
log.Printf("[hls %s] audioIndex %d out of range (%d audio track(s)) — using 0:a:0",
shortHLSID(cfg.SessionID), audioIdx, n)
audioIdx = 0
}
args = append(args, "-map", fmt.Sprintf("0:a:%d?", audioIdx))
// Video: bit-exact copy. HEVC needs the hvc1 tag or Safari/Apple refuses
// the track (mkv extracts default to hev1).
args = append(args, "-c:v", "copy")
if strings.EqualFold(probe.VideoCodec, "hevc") {
args = append(args, "-tag:v", "hvc1")
}
// Audio: copy ONLY when the selected track is AAC with ≤2 channels —
// WebKit/Apple HLS rejects multichannel AAC at the first media segment
// (observed via the Safari access log: master → index → init → seg-0
// fetched twice, then silence — every 5.1 movie failed on iPhone while
// stereo-AAC episodes played). Anything else (non-AAC, or AAC 5.1+) is
// re-encoded mirroring the encode path exactly: AAC stereo 48k. The
// original multichannel track stays intact for external players.
audioCodec := probe.AudioCodec
audioChannels := 0
if audioIdx < len(probe.AudioTracks) {
audioCodec = probe.AudioTracks[audioIdx].Codec
audioChannels = probe.AudioTracks[audioIdx].Channels
}
if strings.EqualFold(audioCodec, "aac") && audioChannels > 0 && audioChannels <= 2 {
args = append(args, "-c:a", "copy")
} else {
args = append(args, "-c:a", "aac", "-b:a", "192k", "-ar", "48000", "-ac", "2")
}
args = append(args,
"-f", "hls",
"-hls_time", strconv.Itoa(hlsSegmentDuration),
"-hls_playlist_type", "event",
"-hls_segment_type", "fmp4",
"-hls_list_size", "0",
"-hls_flags", "independent_segments+temp_file",
"-hls_fmp4_init_filename", "init.mp4",
"-hls_segment_filename", filepath.Join(tmpDir, "video", "seg-%d.m4s"),
filepath.Join(tmpDir, "video", copyPlaylistName),
)
return args
}
// renderMasterPlaylistCopy builds the master playlist for VideoCopy mode.
// Unlike the encode master it deliberately OMITS the CODECS attribute: the
// stream carries the source's codec verbatim (hvc1/avc1/av01 at whatever
// profile/level the file has) and a wrong hardcoded string makes iOS reject
// the variant outright, while omission is legal and universally tolerated.
// Resolution/bandwidth are the source's real values (best-effort).
func renderMasterPlaylistCopy(probe *StreamProbe) string {
var b strings.Builder
b.WriteString("#EXTM3U\n")
b.WriteString("#EXT-X-VERSION:7\n")
// BANDWIDTH is advisory (single variant, no ABR) — a height-based
// estimate of typical source bitrates is plenty.
bw := 8_000_000
switch {
case probe.Height >= 2000:
bw = 25_000_000
case probe.Height >= 1000:
bw = 10_000_000
case probe.Height >= 700:
bw = 5_000_000
}
b.WriteString(fmt.Sprintf("#EXT-X-STREAM-INF:BANDWIDTH=%d,RESOLUTION=%dx%d\n", bw, probe.Width, probe.Height))
b.WriteString("video/index.m3u8\n")
return b.String()
}
func renderMasterPlaylist(probe *StreamProbe, qualityLabel string) string {
var b strings.Builder
b.WriteString("#EXTM3U\n")
b.WriteString("#EXT-X-VERSION:7\n")
// Subtitles are no longer embedded as HLS renditions. The web player attaches
// every TEXT subtitle as an external <track> served on demand by the /sub
// endpoint (subtitleHandler) — ONE source for direct-play AND HLS that works
// under native playback and hls.js alike. Embedding them here too would
// double the captions menu under hls.js, and the native-HLS path (Chrome's
// "maybe" canPlayType) never surfaced in-manifest SUBTITLES renditions
// anyway, which is what made subtitles inconsistent. Bitmap subs (PGS/DVB)
// remain burn-in (no WebVTT form).
// Video variant. Bandwidth + resolution are best-effort estimates from probe.
bw := bitrateForQuality(qualityLabel)
w, h := scaledDimensions(probe.Width, probe.Height, qualityHeight(qualityLabel))
codecs := `avc1.4D4028,mp4a.40.2`
b.WriteString(fmt.Sprintf("#EXT-X-STREAM-INF:BANDWIDTH=%d,RESOLUTION=%dx%d,CODECS=%q\n", bw, w, h, codecs))
b.WriteString("video/index.m3u8\n")
return b.String()
}
// bitrateForQuality returns a synthetic bandwidth attribute for the master
// playlist's STREAM-INF — only used by ABR logic, which we don't run yet.
func bitrateForQuality(q string) int {
switch q {
case "2160p":
return 25_000_000
case "1080p":
return 6_000_000
case "720p":
return 3_500_000
case "480p":
return 1_500_000
}
return 6_000_000
}
func qualityHeight(q string) int {
switch q {
case "2160p":
return 2160
case "1080p":
return 1080
case "720p":
return 720
case "480p":
return 480
}
return 0
}
// scaledDimensions returns (width, height) after applying a height cap that
// preserves the source aspect ratio. capH=0 returns the original dims.
func scaledDimensions(srcW, srcH, capH int) (int, int) {
if srcW <= 0 || srcH <= 0 {
return 1920, 1080
}
if capH == 0 || srcH <= capH {
return srcW, srcH
}
w := srcW * capH / srcH
if w%2 != 0 {
w++
}
return w, capH
}
// ---- Logger plumbing ----
// hlsStderrCapture forwards ffmpeg stderr lines to the daemon log prefixed by
// the session ID, so failures are visible without spelunking tmpdirs.
//
// The internal buffer accumulates partial bytes between writes (a single line
// can span multiple Write calls). Capped at maxStderrBuf so a misbehaving
// ffmpeg that emits megabytes without newlines can't grow daemon memory
// unbounded; on overflow we discard the partial line and keep going.
type hlsStderrCapture struct {
owner *HLSSession
buf strings.Builder
}
const maxStderrBuf = 64 * 1024
// ffmpeg -stats progress lines look like:
//
// frame= 123 fps= 30 q=28.0 size= 456kB time=00:00:08.00 speed=1.05x
//
// emitted with a trailing \r (overwrite-in-place), once per ~0.5s. We parse
// speed=/fps= out of them for live transcode telemetry (F3) and DON'T log them
// (one per 0.5s would drown the daemon log) — only \n-terminated warning/error
// lines reach log.Printf below.
var (
reFFmpegSpeed = regexp.MustCompile(`speed=\s*([0-9.]+)x`)
reFFmpegFps = regexp.MustCompile(`fps=\s*([0-9.]+)`)
)
func parseFFmpegProgress(line string) (speedX, fps float64, ok bool) {
m := reFFmpegSpeed.FindStringSubmatch(line)
if m == nil {
return 0, 0, false
}
v, err := strconv.ParseFloat(m[1], 64)
if err != nil {
return 0, 0, false
}
if fm := reFFmpegFps.FindStringSubmatch(line); fm != nil {
fps, _ = strconv.ParseFloat(fm[1], 64)
}
return v, fps, true
}
// isInputBoundLine spots ffmpeg stderr that means the SOURCE read failed (slow
// debrid link, dropped torrent peer, network timeout) rather than the encoder
// being too slow — so the player names the bottleneck as the link, not the GPU.
func isInputBoundLine(line string) bool {
l := strings.ToLower(line)
return strings.Contains(l, "i/o error") ||
strings.Contains(l, "connection reset") ||
strings.Contains(l, "rw_timeout") ||
strings.Contains(l, "error in the pull function") ||
strings.Contains(l, "connection timed out")
}
func (c *hlsStderrCapture) Write(p []byte) (int, error) {
// If the incoming chunk alone exceeds the cap (very long unterminated
// line), drop the buffered prefix AND truncate p so a single multi-MB
// write can't grow memory.
if len(p) > maxStderrBuf {
c.buf.Reset()
p = p[len(p)-maxStderrBuf:]
} else if c.buf.Len()+len(p) > maxStderrBuf {
// Drop the unterminated partial line; we'll resync on the next \r/\n.
c.buf.Reset()
}
c.buf.Write(p)
// Frame on \r OR \n: ffmpeg's progress line is \r-terminated, warnings are
// \n-terminated. Parsing progress per-frame keeps the EWMA fresh; logging
// only the \n lines keeps the log readable.
for {
s := c.buf.String()
idx := strings.IndexAny(s, "\r\n")
if idx < 0 {
break
}
line := strings.TrimSpace(s[:idx])
c.buf.Reset()
c.buf.WriteString(s[idx+1:])
if line == "" {
continue
}
if speedX, fps, ok := parseFFmpegProgress(line); ok {
c.owner.recordProgress(speedX, fps)
continue // progress line — telemetry only, never logged
}
if isInputBoundLine(line) {
c.owner.markInputBound()
}
log.Printf("[hls %s] ffmpeg: %s", shortHLSID(c.owner.cfg.SessionID), line)
}
return len(p), nil
}