diff --git a/internal/cmd/daemon.go b/internal/cmd/daemon.go index d129896..bad8ece 100644 --- a/internal/cmd/daemon.go +++ b/internal/cmd/daemon.go @@ -346,6 +346,10 @@ func runDaemonStart() error { // Wire ffmpeg so /thumbnail can extract single frames for the web's "file // characteristics" panel (frames on demand). Empty = thumbnails 503. streamSrv.SetFFmpegPath(ffmpegResolved) + // Write-through cache extracted WebVTT into the hidden ".unarr" sidecar dir so + // /sub serves instantly (and giant remuxes that exceed the on-demand timeout + // work once the scan prewarm has filled the cache). Default true. + streamSrv.SetCacheSubtitles(cfg.Library.CacheSubtitles) streamSrv.SetRequireStreamToken(cfg.Download.RequireStreamToken) // Report the stream-token signing key ONLY when enforcing, so the web's // "secret present → mint HLS token" signal accurately means "this agent @@ -995,6 +999,18 @@ func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration, Incremental: existing != nil, } + // Resolve ffmpeg once for the subtitle-sidecar prewarm (extracts text subs + // to the hidden ".unarr" cache so /sub is instant + huge remuxes work). + // Empty/err = prewarm is skipped silently (on-demand extraction still runs). + prewarmFFmpeg := "" + if cfg.Library.CacheSubtitles { + if ff, err := mediainfo.ResolveFFmpeg(cfg.Library.FFmpegPath); err == nil { + prewarmFFmpeg = ff + } else { + log.Printf("[auto-scan] subtitle prewarm disabled: ffmpeg unavailable: %v", err) + } + } + // Scan each path independently and sync per path so the server can // scope stale-item deletion to the correct directory prefix. const batchSize = 100 @@ -1009,6 +1025,14 @@ func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration, } mergedItems = append(mergedItems, cache.Items...) + if prewarmFFmpeg != "" { + library.PrewarmSidecars(ctx, cache, library.PrewarmOptions{ + FFmpegPath: prewarmFFmpeg, + CacheSubtitles: true, + Workers: 2, + }) + } + items := library.BuildSyncItems(cache) if len(items) == 0 { log.Printf("[auto-scan] no items under %s", scanPath) diff --git a/internal/cmd/scan.go b/internal/cmd/scan.go index d05ae29..4ef58aa 100644 --- a/internal/cmd/scan.go +++ b/internal/cmd/scan.go @@ -16,6 +16,7 @@ import ( "github.com/torrentclaw/unarr/internal/agent" "github.com/torrentclaw/unarr/internal/config" "github.com/torrentclaw/unarr/internal/library" + "github.com/torrentclaw/unarr/internal/library/mediainfo" ) func newScanCmd() *cobra.Command { @@ -139,6 +140,20 @@ func runScan(dirPath string, workers int, ffprobePath string, noSync bool) error return enc.Encode(cache) } + // Pre-extract subtitle sidecars (text subs → WebVTT in a hidden ".unarr" dir) + // so playback gets instant subtitles and huge remuxes never hit the on-demand + // timeout. Best-effort + Ctrl-C interruptible (the scan itself is already saved). + if cfg.Library.CacheSubtitles { + if ff, err := mediainfo.ResolveFFmpeg(cfg.Library.FFmpegPath); err == nil { + fmt.Fprintf(os.Stderr, " Pre-extracting subtitles to cache… (Ctrl-C to skip)\n") + library.PrewarmSidecars(ctx, cache, library.PrewarmOptions{ + FFmpegPath: ff, + CacheSubtitles: true, + Workers: 2, + }) + } + } + // Sync to server if !noSync { return syncToServer(ctx, cfg, cache) diff --git a/internal/config/config.go b/internal/config/config.go index f5ac09b..cffe12a 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -189,6 +189,14 @@ type LibraryConfig struct { AutoScan bool `toml:"auto_scan"` // enable daily auto-scan in daemon (default true) ScanInterval string `toml:"scan_interval"` // e.g. "24h", "12h", "6h" (default "24h") AllowDelete bool `toml:"allow_delete"` // allow web UI to request file deletion from disk + + // Sidecar caching: extract text subtitles (WebVTT) and thumbnail frames once + // during the library scan and store them in a hidden ".unarr" dir next to the + // media file, so the stream handlers serve them instantly instead of running + // ffmpeg per request (and so huge remuxes don't hit the on-demand HTTP + // timeout). Both default true; disable to save the disk/CPU of pre-extraction. + CacheSubtitles bool `toml:"cache_subtitles"` // default true + CacheThumbnails bool `toml:"cache_thumbnails"` // default true } // Default returns a Config with sensible defaults. Used both for fresh @@ -255,9 +263,11 @@ func Default() Config { Locale: "en", }, Library: LibraryConfig{ - AutoScan: true, - ScanInterval: "24h", - Workers: 8, + AutoScan: true, + ScanInterval: "24h", + Workers: 8, + CacheSubtitles: true, + CacheThumbnails: true, }, } } @@ -321,6 +331,16 @@ func applyDefaults(cfg *Config, meta toml.MetaData) { cfg.General.Country = "US" } + // Sidecar caching defaults ON for existing configs that predate these keys — + // it only adds small hidden files next to media and makes subs/thumbnails + // instant. Power users can set them false explicitly to opt out. + if !meta.IsDefined("library", "cache_subtitles") { + cfg.Library.CacheSubtitles = true + } + if !meta.IsDefined("library", "cache_thumbnails") { + cfg.Library.CacheThumbnails = true + } + if !meta.IsDefined("downloads", "transcode", "enabled") { cfg.Download.Transcode.Enabled = true } diff --git a/internal/engine/stream_server.go b/internal/engine/stream_server.go index f785d38..f21cacd 100644 --- a/internal/engine/stream_server.go +++ b/internal/engine/stream_server.go @@ -21,6 +21,7 @@ import ( "time" "github.com/anacrolix/torrent" + "github.com/torrentclaw/unarr/internal/library/mediainfo" ) // StreamURLs holds all available stream URLs keyed by network type. @@ -105,6 +106,12 @@ type StreamServer struct { // Listen() via SetFFmpegPath; read-only thereafter so the handler needs no lock. ffmpegPath string + // cacheSubtitles enables write-through caching of extracted WebVTT to the + // hidden ".unarr" sidecar dir next to the media (mirrors the scan-time + // prewarm). Set once before Listen() via SetCacheSubtitles; default false here, + // flipped on from config (default true) by the daemon. read-only thereafter. + cacheSubtitles bool + lastActivity atomic.Int64 maxByteOffset atomic.Int64 // highest sequential read position (main playback connection) totalFileSize atomic.Int64 @@ -204,6 +211,13 @@ func (ss *StreamServer) SetFFmpegPath(path string) { ss.ffmpegPath = path } +// SetCacheSubtitles toggles write-through caching of extracted WebVTT into the +// hidden ".unarr" sidecar dir next to the media file (library.cache_subtitles, +// default true). Call before Listen(); read-only thereafter. +func (ss *StreamServer) SetCacheSubtitles(on bool) { + ss.cacheSubtitles = on +} + // SetCORSAllowedOrigins replaces the operator-supplied extra origins. The // default allowlist (torrentclaw.com / app.torrentclaw.com / localhost dev // ports) is always merged in. Call before Listen(). @@ -1003,45 +1017,51 @@ func (ss *StreamServer) subtitleHandler(w http.ResponseWriter, r *http.Request) return } - // A full subtitle track is small (KBs–low MBs); 60s is ample even for a - // long movie's text track and bounds a hung/corrupt ffmpeg. - ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second) - defer cancel() - - // -map 0:s: selects the Nth subtitle stream (same ordering as the - // library scan / probe.json / burn-in si=N). `-c:s webvtt -f webvtt` converts - // srt/ass/mov_text/etc. to WebVTT on stdout. `?` makes the map non-fatal if - // the stream is absent (yields empty output rather than a hard error). - args := []string{ - "-nostdin", - "-loglevel", "error", - "-i", rawPath, - "-map", fmt.Sprintf("0:s:%d?", index), - "-c:s", "webvtt", - "-f", "webvtt", - "-", - } - cmd := exec.CommandContext(ctx, ss.ffmpegPath, args...) - var stderr strings.Builder - cmd.Stderr = &stderr - out, err := cmd.Output() - if err != nil || len(out) == 0 { - log.Printf("[sub] extract failed (i=%d path=%q): err=%v %s", - index, rawPath, err, strings.TrimSpace(stderr.String())) - http.Error(w, "subtitle extract failed", http.StatusInternalServerError) + // Cache hit: serve a fresh sidecar (written by the scan-time prewarm or a + // prior request) instantly, skipping ffmpeg. This is also what makes huge + // remuxes work — the prewarm extracts without the on-demand HTTP timeout + // below, so by play time the hit avoids the 60s ceiling that was returning + // 500s on 50GB+ files. + if vtt, ok := mediainfo.ReadCachedSubtitle(rawPath, index); ok { + ss.writeVTT(w, vtt) return } + // A full subtitle track is small (KBs–low MBs); 60s is ample for a normal + // movie's text track and bounds a hung/corrupt ffmpeg. Giant remuxes can + // exceed this on first play — the prewarm pre-fills the cache so this + // on-demand path is the fallback, not the steady state. + ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second) + defer cancel() + + out, err := mediainfo.ExtractSubtitleVTT(ctx, ss.ffmpegPath, rawPath, index) + if err != nil { + log.Printf("[sub] extract failed (i=%d path=%q): %v", index, rawPath, err) + http.Error(w, "subtitle extract failed", http.StatusInternalServerError) + return + } + // Write-through so the next request is a cache hit. Best-effort: a read-only + // media mount just logs and serves the in-memory bytes. + if ss.cacheSubtitles { + if werr := mediainfo.WriteCachedSubtitle(rawPath, index, out); werr != nil { + log.Printf("[sub] cache write skipped (i=%d path=%q): %v", index, rawPath, werr) + } + } + ss.writeVTT(w, out) +} + +// writeVTT writes the standard WebVTT response headers + body for both the +// cache-hit and freshly-extracted paths of subtitleHandler. +func (ss *StreamServer) writeVTT(w http.ResponseWriter, vtt []byte) { w.Header().Set("Content-Type", "text/vtt; charset=utf-8") // path+index is stable content for the daemon's lifetime; let the browser - // cache so re-selecting a track doesn't re-run ffmpeg. private — the user's - // own file. + // cache so re-selecting a track doesn't re-fetch. private — the user's file. w.Header().Set("Cache-Control", "private, max-age=3600") - w.Header().Set("Content-Length", strconv.Itoa(len(out))) + w.Header().Set("Content-Length", strconv.Itoa(len(vtt))) //nolint:gosec // G705: WebVTT served as text/vtt to a element — not // HTML, so cue text can't execute; the path is token-scoped + stat'd as a // regular file, and ffmpeg only emits well-formed WebVTT. - if _, err := w.Write(out); err != nil { + if _, err := w.Write(vtt); err != nil { log.Printf("[sub] write failed: %v", err) } } diff --git a/internal/library/mediainfo/sidecar.go b/internal/library/mediainfo/sidecar.go new file mode 100644 index 0000000..0574ac9 --- /dev/null +++ b/internal/library/mediainfo/sidecar.go @@ -0,0 +1,135 @@ +package mediainfo + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" +) + +// Sidecar cache: unarr stores extracted artifacts (WebVTT subtitles, thumbnail +// frames) in a hidden ".unarr" directory NEXT TO the media file, not in the XDG +// cache. Keeping them beside the content means they travel with the file and +// survive a cache-dir wipe, and the scan-time prewarm and the on-demand stream +// handlers share the exact same path scheme — so a subtitle/thumbnail extracted +// during a library scan is reused verbatim at play time (no re-extraction, no +// 60s-HTTP-timeout failures on huge remuxes). +// +// Everything here is best-effort: a read-only media mount just means no cache +// (the on-demand path still works), and a stale cache (media replaced) is +// detected by mtime and ignored. + +const sidecarDirName = ".unarr" + +// IsTextSubtitleCodec reports whether a subtitle codec can be extracted to +// WebVTT (text-based). Mirrors engine.ProbeSubtitleTrack.IsTextSubtitle and the +// web's isTextSubtitleCodec whitelist — bitmap subs (PGS/DVB/VOBSUB) are burned +// in, not extracted. Defined here (the leaf media package) so both the stream +// handlers and the scan-time prewarm classify codecs identically. +func IsTextSubtitleCodec(codec string) bool { + switch strings.ToLower(strings.TrimSpace(codec)) { + case "subrip", "srt", "ass", "ssa", "webvtt", "mov_text", "text": + return true + default: + return false + } +} + +// SidecarDir returns the hidden per-folder cache directory for a media file. +func SidecarDir(mediaPath string) string { + return filepath.Join(filepath.Dir(mediaPath), sidecarDirName) +} + +// SubtitleCachePath is the cached WebVTT path for subtitle stream `index` +// (0-based, matching ffmpeg's 0:s:N ordering) of mediaPath. +func SubtitleCachePath(mediaPath string, index int) string { + return filepath.Join(SidecarDir(mediaPath), fmt.Sprintf("%s.s%d.vtt", filepath.Base(mediaPath), index)) +} + +// sidecarFresh reports whether a cache file exists and is at least as new as the +// media file. A re-download/replace bumps the media mtime and invalidates the +// stale sidecar so we re-extract. +func sidecarFresh(cachePath, mediaPath string) bool { + cfi, err := os.Stat(cachePath) + if err != nil { + return false + } + mfi, err := os.Stat(mediaPath) + if err != nil { + return false + } + return !cfi.ModTime().Before(mfi.ModTime()) +} + +// writeSidecar atomically writes data to a sidecar path (temp + rename), creating +// the hidden dir if needed. Returns an error the caller logs and continues on +// (e.g. a read-only mount) — caching is never required for correctness. +func writeSidecar(path string, data []byte) error { + if len(data) == 0 { + return errors.New("refusing to cache empty artifact") + } + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return err + } + tmp := path + ".tmp" + if err := os.WriteFile(tmp, data, 0o644); err != nil { + return err + } + if err := os.Rename(tmp, path); err != nil { + _ = os.Remove(tmp) + return err + } + return nil +} + +// ReadCachedSubtitle returns the cached WebVTT for (mediaPath, index) when a +// fresh sidecar exists. ok=false means the caller should extract on demand. +func ReadCachedSubtitle(mediaPath string, index int) ([]byte, bool) { + p := SubtitleCachePath(mediaPath, index) + if !sidecarFresh(p, mediaPath) { + return nil, false + } + b, err := os.ReadFile(p) + if err != nil || len(b) == 0 { + return nil, false + } + return b, true +} + +// WriteCachedSubtitle stores extracted WebVTT next to the media. Best-effort. +func WriteCachedSubtitle(mediaPath string, index int, vtt []byte) error { + return writeSidecar(SubtitleCachePath(mediaPath, index), vtt) +} + +// ExtractSubtitleVTT runs ffmpeg to convert subtitle stream `index` of mediaPath +// to WebVTT bytes. Shared by the on-demand /sub handler and the scan-time prewarm +// so both produce identical output. The caller owns the ctx deadline: the handler +// uses a short HTTP-bound timeout; the prewarm uses a generous one (a full text +// track on a multi-GB remux can take minutes to demux). +func ExtractSubtitleVTT(ctx context.Context, ffmpegPath, mediaPath string, index int) ([]byte, error) { + // -map 0:s:? selects the Nth subtitle stream (non-fatal if absent); + // -c:s webvtt converts srt/ass/mov_text/etc. to WebVTT on stdout. + args := []string{ + "-nostdin", + "-loglevel", "error", + "-i", mediaPath, + "-map", fmt.Sprintf("0:s:%d?", index), + "-c:s", "webvtt", + "-f", "webvtt", + "-", + } + cmd := exec.CommandContext(ctx, ffmpegPath, args...) + var stderr strings.Builder + cmd.Stderr = &stderr + out, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("ffmpeg subtitle extract: %w: %s", err, strings.TrimSpace(stderr.String())) + } + if len(out) == 0 { + return nil, errors.New("ffmpeg produced no subtitle output") + } + return out, nil +} diff --git a/internal/library/prewarm.go b/internal/library/prewarm.go new file mode 100644 index 0000000..3d9ee6d --- /dev/null +++ b/internal/library/prewarm.go @@ -0,0 +1,106 @@ +package library + +import ( + "context" + "log" + "sync" + "time" + + "github.com/torrentclaw/unarr/internal/library/mediainfo" +) + +// PrewarmOptions controls scan-time sidecar extraction. +type PrewarmOptions struct { + FFmpegPath string // resolved ffmpeg binary; empty disables prewarm + CacheSubtitles bool // library.cache_subtitles + Workers int // concurrent ffmpeg jobs (each is heavy); default 2 +} + +// PrewarmSidecars extracts every text subtitle of every scanned item into the +// hidden ".unarr" sidecar dir next to the media file, so the /sub handler serves +// it instantly at play time (instead of re-running ffmpeg, which on a 50GB+ +// remux exceeds the on-demand HTTP timeout). Without the per-request 60s ceiling +// here, even huge files complete (generous per-file timeout). +// +// Best-effort and idempotent: an already-fresh sidecar is skipped, errors are +// logged and the item moves on, and ctx cancellation (Ctrl-C / daemon shutdown) +// stops cleanly. Safe to call after every scan — only missing/stale caches do work. +func PrewarmSidecars(ctx context.Context, cache *LibraryCache, opts PrewarmOptions) { + if cache == nil || opts.FFmpegPath == "" || !opts.CacheSubtitles { + return + } + workers := opts.Workers + if workers < 1 { + workers = 2 + } + + type job struct { + path string + index int + } + jobs := make(chan job) + var wg sync.WaitGroup + var mu sync.Mutex + cached, failed := 0, 0 + + for i := 0; i < workers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := range jobs { + if ctx.Err() != nil { + return + } + if _, ok := mediainfo.ReadCachedSubtitle(j.path, j.index); ok { + continue // already fresh + } + // Generous per-file deadline: a full text track on a multi-GB + // remux can take minutes to demux. Bounded so one corrupt file + // can't wedge a worker forever. + jctx, cancel := context.WithTimeout(ctx, 10*time.Minute) + vtt, err := mediainfo.ExtractSubtitleVTT(jctx, opts.FFmpegPath, j.path, j.index) + cancel() + if err != nil { + mu.Lock() + failed++ + mu.Unlock() + continue + } + if werr := mediainfo.WriteCachedSubtitle(j.path, j.index, vtt); werr != nil { + log.Printf("[prewarm] sidecar write skipped (i=%d path=%q): %v", j.index, j.path, werr) + mu.Lock() + failed++ + mu.Unlock() + continue + } + mu.Lock() + cached++ + mu.Unlock() + } + }() + } + + go func() { + defer close(jobs) + for _, item := range cache.Items { + if item.MediaInfo == nil || item.FilePath == "" { + continue + } + for idx, sub := range item.MediaInfo.Subtitles { + if !mediainfo.IsTextSubtitleCodec(sub.Codec) { + continue // bitmap → burned in, not extractable to WebVTT + } + select { + case jobs <- job{path: item.FilePath, index: idx}: + case <-ctx.Done(): + return + } + } + } + }() + + wg.Wait() + if cached > 0 || failed > 0 { + log.Printf("[prewarm] subtitles: %d cached, %d failed", cached, failed) + } +}