feat(stream): cache extracted subtitles to a hidden .unarr sidecar
On-demand WebVTT extraction re-ran ffmpeg on every /sub request and, for 50GB+ remuxes, couldn't finish a full text track within the 60s HTTP timeout → the web player got a 500 and no subtitles. Extract each text subtitle ONCE — during the library scan (no HTTP deadline, generous per-file timeout) and write-through on the first on-demand request — into a hidden ".unarr/<name>.s<index>.vtt" sidecar next to the media file. The /sub handler serves a fresh sidecar instantly (mtime-invalidated when the media is replaced), so playback subtitles are instant and huge files work. - mediainfo.sidecar: cache paths, mtime freshness, atomic write, ExtractSubtitleVTT, IsTextSubtitleCodec (shared classifier, mirrors engine + web whitelists). - library.PrewarmSidecars: bounded, idempotent, ctx-cancellable background pass run after every scan (manual + daemon auto-scan). - subtitleHandler: cache-read → hit; miss → extract → write-through. - config: library.cache_subtitles (default true), wired via SetCacheSubtitles. Local-only by design: nothing extracted is uploaded — the sidecar is the user's own content, private to their disk.
This commit is contained in:
parent
7417fad45f
commit
178c16f458
6 changed files with 353 additions and 33 deletions
|
|
@ -346,6 +346,10 @@ func runDaemonStart() error {
|
||||||
// Wire ffmpeg so /thumbnail can extract single frames for the web's "file
|
// Wire ffmpeg so /thumbnail can extract single frames for the web's "file
|
||||||
// characteristics" panel (frames on demand). Empty = thumbnails 503.
|
// characteristics" panel (frames on demand). Empty = thumbnails 503.
|
||||||
streamSrv.SetFFmpegPath(ffmpegResolved)
|
streamSrv.SetFFmpegPath(ffmpegResolved)
|
||||||
|
// Write-through cache extracted WebVTT into the hidden ".unarr" sidecar dir so
|
||||||
|
// /sub serves instantly (and giant remuxes that exceed the on-demand timeout
|
||||||
|
// work once the scan prewarm has filled the cache). Default true.
|
||||||
|
streamSrv.SetCacheSubtitles(cfg.Library.CacheSubtitles)
|
||||||
streamSrv.SetRequireStreamToken(cfg.Download.RequireStreamToken)
|
streamSrv.SetRequireStreamToken(cfg.Download.RequireStreamToken)
|
||||||
// Report the stream-token signing key ONLY when enforcing, so the web's
|
// Report the stream-token signing key ONLY when enforcing, so the web's
|
||||||
// "secret present → mint HLS token" signal accurately means "this agent
|
// "secret present → mint HLS token" signal accurately means "this agent
|
||||||
|
|
@ -995,6 +999,18 @@ func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration,
|
||||||
Incremental: existing != nil,
|
Incremental: existing != nil,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Resolve ffmpeg once for the subtitle-sidecar prewarm (extracts text subs
|
||||||
|
// to the hidden ".unarr" cache so /sub is instant + huge remuxes work).
|
||||||
|
// Empty/err = prewarm is skipped silently (on-demand extraction still runs).
|
||||||
|
prewarmFFmpeg := ""
|
||||||
|
if cfg.Library.CacheSubtitles {
|
||||||
|
if ff, err := mediainfo.ResolveFFmpeg(cfg.Library.FFmpegPath); err == nil {
|
||||||
|
prewarmFFmpeg = ff
|
||||||
|
} else {
|
||||||
|
log.Printf("[auto-scan] subtitle prewarm disabled: ffmpeg unavailable: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Scan each path independently and sync per path so the server can
|
// Scan each path independently and sync per path so the server can
|
||||||
// scope stale-item deletion to the correct directory prefix.
|
// scope stale-item deletion to the correct directory prefix.
|
||||||
const batchSize = 100
|
const batchSize = 100
|
||||||
|
|
@ -1009,6 +1025,14 @@ func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration,
|
||||||
}
|
}
|
||||||
mergedItems = append(mergedItems, cache.Items...)
|
mergedItems = append(mergedItems, cache.Items...)
|
||||||
|
|
||||||
|
if prewarmFFmpeg != "" {
|
||||||
|
library.PrewarmSidecars(ctx, cache, library.PrewarmOptions{
|
||||||
|
FFmpegPath: prewarmFFmpeg,
|
||||||
|
CacheSubtitles: true,
|
||||||
|
Workers: 2,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
items := library.BuildSyncItems(cache)
|
items := library.BuildSyncItems(cache)
|
||||||
if len(items) == 0 {
|
if len(items) == 0 {
|
||||||
log.Printf("[auto-scan] no items under %s", scanPath)
|
log.Printf("[auto-scan] no items under %s", scanPath)
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ import (
|
||||||
"github.com/torrentclaw/unarr/internal/agent"
|
"github.com/torrentclaw/unarr/internal/agent"
|
||||||
"github.com/torrentclaw/unarr/internal/config"
|
"github.com/torrentclaw/unarr/internal/config"
|
||||||
"github.com/torrentclaw/unarr/internal/library"
|
"github.com/torrentclaw/unarr/internal/library"
|
||||||
|
"github.com/torrentclaw/unarr/internal/library/mediainfo"
|
||||||
)
|
)
|
||||||
|
|
||||||
func newScanCmd() *cobra.Command {
|
func newScanCmd() *cobra.Command {
|
||||||
|
|
@ -139,6 +140,20 @@ func runScan(dirPath string, workers int, ffprobePath string, noSync bool) error
|
||||||
return enc.Encode(cache)
|
return enc.Encode(cache)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pre-extract subtitle sidecars (text subs → WebVTT in a hidden ".unarr" dir)
|
||||||
|
// so playback gets instant subtitles and huge remuxes never hit the on-demand
|
||||||
|
// timeout. Best-effort + Ctrl-C interruptible (the scan itself is already saved).
|
||||||
|
if cfg.Library.CacheSubtitles {
|
||||||
|
if ff, err := mediainfo.ResolveFFmpeg(cfg.Library.FFmpegPath); err == nil {
|
||||||
|
fmt.Fprintf(os.Stderr, " Pre-extracting subtitles to cache… (Ctrl-C to skip)\n")
|
||||||
|
library.PrewarmSidecars(ctx, cache, library.PrewarmOptions{
|
||||||
|
FFmpegPath: ff,
|
||||||
|
CacheSubtitles: true,
|
||||||
|
Workers: 2,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sync to server
|
// Sync to server
|
||||||
if !noSync {
|
if !noSync {
|
||||||
return syncToServer(ctx, cfg, cache)
|
return syncToServer(ctx, cfg, cache)
|
||||||
|
|
|
||||||
|
|
@ -189,6 +189,14 @@ type LibraryConfig struct {
|
||||||
AutoScan bool `toml:"auto_scan"` // enable daily auto-scan in daemon (default true)
|
AutoScan bool `toml:"auto_scan"` // enable daily auto-scan in daemon (default true)
|
||||||
ScanInterval string `toml:"scan_interval"` // e.g. "24h", "12h", "6h" (default "24h")
|
ScanInterval string `toml:"scan_interval"` // e.g. "24h", "12h", "6h" (default "24h")
|
||||||
AllowDelete bool `toml:"allow_delete"` // allow web UI to request file deletion from disk
|
AllowDelete bool `toml:"allow_delete"` // allow web UI to request file deletion from disk
|
||||||
|
|
||||||
|
// Sidecar caching: extract text subtitles (WebVTT) and thumbnail frames once
|
||||||
|
// during the library scan and store them in a hidden ".unarr" dir next to the
|
||||||
|
// media file, so the stream handlers serve them instantly instead of running
|
||||||
|
// ffmpeg per request (and so huge remuxes don't hit the on-demand HTTP
|
||||||
|
// timeout). Both default true; disable to save the disk/CPU of pre-extraction.
|
||||||
|
CacheSubtitles bool `toml:"cache_subtitles"` // default true
|
||||||
|
CacheThumbnails bool `toml:"cache_thumbnails"` // default true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Default returns a Config with sensible defaults. Used both for fresh
|
// Default returns a Config with sensible defaults. Used both for fresh
|
||||||
|
|
@ -258,6 +266,8 @@ func Default() Config {
|
||||||
AutoScan: true,
|
AutoScan: true,
|
||||||
ScanInterval: "24h",
|
ScanInterval: "24h",
|
||||||
Workers: 8,
|
Workers: 8,
|
||||||
|
CacheSubtitles: true,
|
||||||
|
CacheThumbnails: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -321,6 +331,16 @@ func applyDefaults(cfg *Config, meta toml.MetaData) {
|
||||||
cfg.General.Country = "US"
|
cfg.General.Country = "US"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Sidecar caching defaults ON for existing configs that predate these keys —
|
||||||
|
// it only adds small hidden files next to media and makes subs/thumbnails
|
||||||
|
// instant. Power users can set them false explicitly to opt out.
|
||||||
|
if !meta.IsDefined("library", "cache_subtitles") {
|
||||||
|
cfg.Library.CacheSubtitles = true
|
||||||
|
}
|
||||||
|
if !meta.IsDefined("library", "cache_thumbnails") {
|
||||||
|
cfg.Library.CacheThumbnails = true
|
||||||
|
}
|
||||||
|
|
||||||
if !meta.IsDefined("downloads", "transcode", "enabled") {
|
if !meta.IsDefined("downloads", "transcode", "enabled") {
|
||||||
cfg.Download.Transcode.Enabled = true
|
cfg.Download.Transcode.Enabled = true
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/anacrolix/torrent"
|
"github.com/anacrolix/torrent"
|
||||||
|
"github.com/torrentclaw/unarr/internal/library/mediainfo"
|
||||||
)
|
)
|
||||||
|
|
||||||
// StreamURLs holds all available stream URLs keyed by network type.
|
// StreamURLs holds all available stream URLs keyed by network type.
|
||||||
|
|
@ -105,6 +106,12 @@ type StreamServer struct {
|
||||||
// Listen() via SetFFmpegPath; read-only thereafter so the handler needs no lock.
|
// Listen() via SetFFmpegPath; read-only thereafter so the handler needs no lock.
|
||||||
ffmpegPath string
|
ffmpegPath string
|
||||||
|
|
||||||
|
// cacheSubtitles enables write-through caching of extracted WebVTT to the
|
||||||
|
// hidden ".unarr" sidecar dir next to the media (mirrors the scan-time
|
||||||
|
// prewarm). Set once before Listen() via SetCacheSubtitles; default false here,
|
||||||
|
// flipped on from config (default true) by the daemon. read-only thereafter.
|
||||||
|
cacheSubtitles bool
|
||||||
|
|
||||||
lastActivity atomic.Int64
|
lastActivity atomic.Int64
|
||||||
maxByteOffset atomic.Int64 // highest sequential read position (main playback connection)
|
maxByteOffset atomic.Int64 // highest sequential read position (main playback connection)
|
||||||
totalFileSize atomic.Int64
|
totalFileSize atomic.Int64
|
||||||
|
|
@ -204,6 +211,13 @@ func (ss *StreamServer) SetFFmpegPath(path string) {
|
||||||
ss.ffmpegPath = path
|
ss.ffmpegPath = path
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetCacheSubtitles toggles write-through caching of extracted WebVTT into the
|
||||||
|
// hidden ".unarr" sidecar dir next to the media file (library.cache_subtitles,
|
||||||
|
// default true). Call before Listen(); read-only thereafter.
|
||||||
|
func (ss *StreamServer) SetCacheSubtitles(on bool) {
|
||||||
|
ss.cacheSubtitles = on
|
||||||
|
}
|
||||||
|
|
||||||
// SetCORSAllowedOrigins replaces the operator-supplied extra origins. The
|
// SetCORSAllowedOrigins replaces the operator-supplied extra origins. The
|
||||||
// default allowlist (torrentclaw.com / app.torrentclaw.com / localhost dev
|
// default allowlist (torrentclaw.com / app.torrentclaw.com / localhost dev
|
||||||
// ports) is always merged in. Call before Listen().
|
// ports) is always merged in. Call before Listen().
|
||||||
|
|
@ -1003,45 +1017,51 @@ func (ss *StreamServer) subtitleHandler(w http.ResponseWriter, r *http.Request)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// A full subtitle track is small (KBs–low MBs); 60s is ample even for a
|
// Cache hit: serve a fresh sidecar (written by the scan-time prewarm or a
|
||||||
// long movie's text track and bounds a hung/corrupt ffmpeg.
|
// prior request) instantly, skipping ffmpeg. This is also what makes huge
|
||||||
ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second)
|
// remuxes work — the prewarm extracts without the on-demand HTTP timeout
|
||||||
defer cancel()
|
// below, so by play time the hit avoids the 60s ceiling that was returning
|
||||||
|
// 500s on 50GB+ files.
|
||||||
// -map 0:s:<index> selects the Nth subtitle stream (same ordering as the
|
if vtt, ok := mediainfo.ReadCachedSubtitle(rawPath, index); ok {
|
||||||
// library scan / probe.json / burn-in si=N). `-c:s webvtt -f webvtt` converts
|
ss.writeVTT(w, vtt)
|
||||||
// srt/ass/mov_text/etc. to WebVTT on stdout. `?` makes the map non-fatal if
|
|
||||||
// the stream is absent (yields empty output rather than a hard error).
|
|
||||||
args := []string{
|
|
||||||
"-nostdin",
|
|
||||||
"-loglevel", "error",
|
|
||||||
"-i", rawPath,
|
|
||||||
"-map", fmt.Sprintf("0:s:%d?", index),
|
|
||||||
"-c:s", "webvtt",
|
|
||||||
"-f", "webvtt",
|
|
||||||
"-",
|
|
||||||
}
|
|
||||||
cmd := exec.CommandContext(ctx, ss.ffmpegPath, args...)
|
|
||||||
var stderr strings.Builder
|
|
||||||
cmd.Stderr = &stderr
|
|
||||||
out, err := cmd.Output()
|
|
||||||
if err != nil || len(out) == 0 {
|
|
||||||
log.Printf("[sub] extract failed (i=%d path=%q): err=%v %s",
|
|
||||||
index, rawPath, err, strings.TrimSpace(stderr.String()))
|
|
||||||
http.Error(w, "subtitle extract failed", http.StatusInternalServerError)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A full subtitle track is small (KBs–low MBs); 60s is ample for a normal
|
||||||
|
// movie's text track and bounds a hung/corrupt ffmpeg. Giant remuxes can
|
||||||
|
// exceed this on first play — the prewarm pre-fills the cache so this
|
||||||
|
// on-demand path is the fallback, not the steady state.
|
||||||
|
ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
out, err := mediainfo.ExtractSubtitleVTT(ctx, ss.ffmpegPath, rawPath, index)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("[sub] extract failed (i=%d path=%q): %v", index, rawPath, err)
|
||||||
|
http.Error(w, "subtitle extract failed", http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// Write-through so the next request is a cache hit. Best-effort: a read-only
|
||||||
|
// media mount just logs and serves the in-memory bytes.
|
||||||
|
if ss.cacheSubtitles {
|
||||||
|
if werr := mediainfo.WriteCachedSubtitle(rawPath, index, out); werr != nil {
|
||||||
|
log.Printf("[sub] cache write skipped (i=%d path=%q): %v", index, rawPath, werr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ss.writeVTT(w, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeVTT writes the standard WebVTT response headers + body for both the
|
||||||
|
// cache-hit and freshly-extracted paths of subtitleHandler.
|
||||||
|
func (ss *StreamServer) writeVTT(w http.ResponseWriter, vtt []byte) {
|
||||||
w.Header().Set("Content-Type", "text/vtt; charset=utf-8")
|
w.Header().Set("Content-Type", "text/vtt; charset=utf-8")
|
||||||
// path+index is stable content for the daemon's lifetime; let the browser
|
// path+index is stable content for the daemon's lifetime; let the browser
|
||||||
// cache so re-selecting a track doesn't re-run ffmpeg. private — the user's
|
// cache so re-selecting a track doesn't re-fetch. private — the user's file.
|
||||||
// own file.
|
|
||||||
w.Header().Set("Cache-Control", "private, max-age=3600")
|
w.Header().Set("Cache-Control", "private, max-age=3600")
|
||||||
w.Header().Set("Content-Length", strconv.Itoa(len(out)))
|
w.Header().Set("Content-Length", strconv.Itoa(len(vtt)))
|
||||||
//nolint:gosec // G705: WebVTT served as text/vtt to a <track> element — not
|
//nolint:gosec // G705: WebVTT served as text/vtt to a <track> element — not
|
||||||
// HTML, so cue text can't execute; the path is token-scoped + stat'd as a
|
// HTML, so cue text can't execute; the path is token-scoped + stat'd as a
|
||||||
// regular file, and ffmpeg only emits well-formed WebVTT.
|
// regular file, and ffmpeg only emits well-formed WebVTT.
|
||||||
if _, err := w.Write(out); err != nil {
|
if _, err := w.Write(vtt); err != nil {
|
||||||
log.Printf("[sub] write failed: %v", err)
|
log.Printf("[sub] write failed: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
135
internal/library/mediainfo/sidecar.go
Normal file
135
internal/library/mediainfo/sidecar.go
Normal file
|
|
@ -0,0 +1,135 @@
|
||||||
|
package mediainfo
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Sidecar cache: unarr stores extracted artifacts (WebVTT subtitles, thumbnail
|
||||||
|
// frames) in a hidden ".unarr" directory NEXT TO the media file, not in the XDG
|
||||||
|
// cache. Keeping them beside the content means they travel with the file and
|
||||||
|
// survive a cache-dir wipe, and the scan-time prewarm and the on-demand stream
|
||||||
|
// handlers share the exact same path scheme — so a subtitle/thumbnail extracted
|
||||||
|
// during a library scan is reused verbatim at play time (no re-extraction, no
|
||||||
|
// 60s-HTTP-timeout failures on huge remuxes).
|
||||||
|
//
|
||||||
|
// Everything here is best-effort: a read-only media mount just means no cache
|
||||||
|
// (the on-demand path still works), and a stale cache (media replaced) is
|
||||||
|
// detected by mtime and ignored.
|
||||||
|
|
||||||
|
const sidecarDirName = ".unarr"
|
||||||
|
|
||||||
|
// IsTextSubtitleCodec reports whether a subtitle codec can be extracted to
|
||||||
|
// WebVTT (text-based). Mirrors engine.ProbeSubtitleTrack.IsTextSubtitle and the
|
||||||
|
// web's isTextSubtitleCodec whitelist — bitmap subs (PGS/DVB/VOBSUB) are burned
|
||||||
|
// in, not extracted. Defined here (the leaf media package) so both the stream
|
||||||
|
// handlers and the scan-time prewarm classify codecs identically.
|
||||||
|
func IsTextSubtitleCodec(codec string) bool {
|
||||||
|
switch strings.ToLower(strings.TrimSpace(codec)) {
|
||||||
|
case "subrip", "srt", "ass", "ssa", "webvtt", "mov_text", "text":
|
||||||
|
return true
|
||||||
|
default:
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SidecarDir returns the hidden per-folder cache directory for a media file.
|
||||||
|
func SidecarDir(mediaPath string) string {
|
||||||
|
return filepath.Join(filepath.Dir(mediaPath), sidecarDirName)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SubtitleCachePath is the cached WebVTT path for subtitle stream `index`
|
||||||
|
// (0-based, matching ffmpeg's 0:s:N ordering) of mediaPath.
|
||||||
|
func SubtitleCachePath(mediaPath string, index int) string {
|
||||||
|
return filepath.Join(SidecarDir(mediaPath), fmt.Sprintf("%s.s%d.vtt", filepath.Base(mediaPath), index))
|
||||||
|
}
|
||||||
|
|
||||||
|
// sidecarFresh reports whether a cache file exists and is at least as new as the
|
||||||
|
// media file. A re-download/replace bumps the media mtime and invalidates the
|
||||||
|
// stale sidecar so we re-extract.
|
||||||
|
func sidecarFresh(cachePath, mediaPath string) bool {
|
||||||
|
cfi, err := os.Stat(cachePath)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
mfi, err := os.Stat(mediaPath)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return !cfi.ModTime().Before(mfi.ModTime())
|
||||||
|
}
|
||||||
|
|
||||||
|
// writeSidecar atomically writes data to a sidecar path (temp + rename), creating
|
||||||
|
// the hidden dir if needed. Returns an error the caller logs and continues on
|
||||||
|
// (e.g. a read-only mount) — caching is never required for correctness.
|
||||||
|
func writeSidecar(path string, data []byte) error {
|
||||||
|
if len(data) == 0 {
|
||||||
|
return errors.New("refusing to cache empty artifact")
|
||||||
|
}
|
||||||
|
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
tmp := path + ".tmp"
|
||||||
|
if err := os.WriteFile(tmp, data, 0o644); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := os.Rename(tmp, path); err != nil {
|
||||||
|
_ = os.Remove(tmp)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ReadCachedSubtitle returns the cached WebVTT for (mediaPath, index) when a
|
||||||
|
// fresh sidecar exists. ok=false means the caller should extract on demand.
|
||||||
|
func ReadCachedSubtitle(mediaPath string, index int) ([]byte, bool) {
|
||||||
|
p := SubtitleCachePath(mediaPath, index)
|
||||||
|
if !sidecarFresh(p, mediaPath) {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
b, err := os.ReadFile(p)
|
||||||
|
if err != nil || len(b) == 0 {
|
||||||
|
return nil, false
|
||||||
|
}
|
||||||
|
return b, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// WriteCachedSubtitle stores extracted WebVTT next to the media. Best-effort.
|
||||||
|
func WriteCachedSubtitle(mediaPath string, index int, vtt []byte) error {
|
||||||
|
return writeSidecar(SubtitleCachePath(mediaPath, index), vtt)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ExtractSubtitleVTT runs ffmpeg to convert subtitle stream `index` of mediaPath
|
||||||
|
// to WebVTT bytes. Shared by the on-demand /sub handler and the scan-time prewarm
|
||||||
|
// so both produce identical output. The caller owns the ctx deadline: the handler
|
||||||
|
// uses a short HTTP-bound timeout; the prewarm uses a generous one (a full text
|
||||||
|
// track on a multi-GB remux can take minutes to demux).
|
||||||
|
func ExtractSubtitleVTT(ctx context.Context, ffmpegPath, mediaPath string, index int) ([]byte, error) {
|
||||||
|
// -map 0:s:<index>? selects the Nth subtitle stream (non-fatal if absent);
|
||||||
|
// -c:s webvtt converts srt/ass/mov_text/etc. to WebVTT on stdout.
|
||||||
|
args := []string{
|
||||||
|
"-nostdin",
|
||||||
|
"-loglevel", "error",
|
||||||
|
"-i", mediaPath,
|
||||||
|
"-map", fmt.Sprintf("0:s:%d?", index),
|
||||||
|
"-c:s", "webvtt",
|
||||||
|
"-f", "webvtt",
|
||||||
|
"-",
|
||||||
|
}
|
||||||
|
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||||
|
var stderr strings.Builder
|
||||||
|
cmd.Stderr = &stderr
|
||||||
|
out, err := cmd.Output()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("ffmpeg subtitle extract: %w: %s", err, strings.TrimSpace(stderr.String()))
|
||||||
|
}
|
||||||
|
if len(out) == 0 {
|
||||||
|
return nil, errors.New("ffmpeg produced no subtitle output")
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
106
internal/library/prewarm.go
Normal file
106
internal/library/prewarm.go
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
package library
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/torrentclaw/unarr/internal/library/mediainfo"
|
||||||
|
)
|
||||||
|
|
||||||
|
// PrewarmOptions controls scan-time sidecar extraction.
|
||||||
|
type PrewarmOptions struct {
|
||||||
|
FFmpegPath string // resolved ffmpeg binary; empty disables prewarm
|
||||||
|
CacheSubtitles bool // library.cache_subtitles
|
||||||
|
Workers int // concurrent ffmpeg jobs (each is heavy); default 2
|
||||||
|
}
|
||||||
|
|
||||||
|
// PrewarmSidecars extracts every text subtitle of every scanned item into the
|
||||||
|
// hidden ".unarr" sidecar dir next to the media file, so the /sub handler serves
|
||||||
|
// it instantly at play time (instead of re-running ffmpeg, which on a 50GB+
|
||||||
|
// remux exceeds the on-demand HTTP timeout). Without the per-request 60s ceiling
|
||||||
|
// here, even huge files complete (generous per-file timeout).
|
||||||
|
//
|
||||||
|
// Best-effort and idempotent: an already-fresh sidecar is skipped, errors are
|
||||||
|
// logged and the item moves on, and ctx cancellation (Ctrl-C / daemon shutdown)
|
||||||
|
// stops cleanly. Safe to call after every scan — only missing/stale caches do work.
|
||||||
|
func PrewarmSidecars(ctx context.Context, cache *LibraryCache, opts PrewarmOptions) {
|
||||||
|
if cache == nil || opts.FFmpegPath == "" || !opts.CacheSubtitles {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
workers := opts.Workers
|
||||||
|
if workers < 1 {
|
||||||
|
workers = 2
|
||||||
|
}
|
||||||
|
|
||||||
|
type job struct {
|
||||||
|
path string
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
jobs := make(chan job)
|
||||||
|
var wg sync.WaitGroup
|
||||||
|
var mu sync.Mutex
|
||||||
|
cached, failed := 0, 0
|
||||||
|
|
||||||
|
for i := 0; i < workers; i++ {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
defer wg.Done()
|
||||||
|
for j := range jobs {
|
||||||
|
if ctx.Err() != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if _, ok := mediainfo.ReadCachedSubtitle(j.path, j.index); ok {
|
||||||
|
continue // already fresh
|
||||||
|
}
|
||||||
|
// Generous per-file deadline: a full text track on a multi-GB
|
||||||
|
// remux can take minutes to demux. Bounded so one corrupt file
|
||||||
|
// can't wedge a worker forever.
|
||||||
|
jctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
|
||||||
|
vtt, err := mediainfo.ExtractSubtitleVTT(jctx, opts.FFmpegPath, j.path, j.index)
|
||||||
|
cancel()
|
||||||
|
if err != nil {
|
||||||
|
mu.Lock()
|
||||||
|
failed++
|
||||||
|
mu.Unlock()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if werr := mediainfo.WriteCachedSubtitle(j.path, j.index, vtt); werr != nil {
|
||||||
|
log.Printf("[prewarm] sidecar write skipped (i=%d path=%q): %v", j.index, j.path, werr)
|
||||||
|
mu.Lock()
|
||||||
|
failed++
|
||||||
|
mu.Unlock()
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
mu.Lock()
|
||||||
|
cached++
|
||||||
|
mu.Unlock()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(jobs)
|
||||||
|
for _, item := range cache.Items {
|
||||||
|
if item.MediaInfo == nil || item.FilePath == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for idx, sub := range item.MediaInfo.Subtitles {
|
||||||
|
if !mediainfo.IsTextSubtitleCodec(sub.Codec) {
|
||||||
|
continue // bitmap → burned in, not extractable to WebVTT
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case jobs <- job{path: item.FilePath, index: idx}:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
if cached > 0 || failed > 0 {
|
||||||
|
log.Printf("[prewarm] subtitles: %d cached, %d failed", cached, failed)
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue