feat(stream): cache extracted subtitles to a hidden .unarr sidecar
On-demand WebVTT extraction re-ran ffmpeg on every /sub request and, for 50GB+ remuxes, couldn't finish a full text track within the 60s HTTP timeout → the web player got a 500 and no subtitles. Extract each text subtitle ONCE — during the library scan (no HTTP deadline, generous per-file timeout) and write-through on the first on-demand request — into a hidden ".unarr/<name>.s<index>.vtt" sidecar next to the media file. The /sub handler serves a fresh sidecar instantly (mtime-invalidated when the media is replaced), so playback subtitles are instant and huge files work. - mediainfo.sidecar: cache paths, mtime freshness, atomic write, ExtractSubtitleVTT, IsTextSubtitleCodec (shared classifier, mirrors engine + web whitelists). - library.PrewarmSidecars: bounded, idempotent, ctx-cancellable background pass run after every scan (manual + daemon auto-scan). - subtitleHandler: cache-read → hit; miss → extract → write-through. - config: library.cache_subtitles (default true), wired via SetCacheSubtitles. Local-only by design: nothing extracted is uploaded — the sidecar is the user's own content, private to their disk.
This commit is contained in:
parent
7417fad45f
commit
178c16f458
6 changed files with 353 additions and 33 deletions
135
internal/library/mediainfo/sidecar.go
Normal file
135
internal/library/mediainfo/sidecar.go
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
package mediainfo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Sidecar cache: unarr stores extracted artifacts (WebVTT subtitles, thumbnail
|
||||
// frames) in a hidden ".unarr" directory NEXT TO the media file, not in the XDG
|
||||
// cache. Keeping them beside the content means they travel with the file and
|
||||
// survive a cache-dir wipe, and the scan-time prewarm and the on-demand stream
|
||||
// handlers share the exact same path scheme — so a subtitle/thumbnail extracted
|
||||
// during a library scan is reused verbatim at play time (no re-extraction, no
|
||||
// 60s-HTTP-timeout failures on huge remuxes).
|
||||
//
|
||||
// Everything here is best-effort: a read-only media mount just means no cache
|
||||
// (the on-demand path still works), and a stale cache (media replaced) is
|
||||
// detected by mtime and ignored.
|
||||
|
||||
const sidecarDirName = ".unarr"
|
||||
|
||||
// IsTextSubtitleCodec reports whether a subtitle codec can be extracted to
|
||||
// WebVTT (text-based). Mirrors engine.ProbeSubtitleTrack.IsTextSubtitle and the
|
||||
// web's isTextSubtitleCodec whitelist — bitmap subs (PGS/DVB/VOBSUB) are burned
|
||||
// in, not extracted. Defined here (the leaf media package) so both the stream
|
||||
// handlers and the scan-time prewarm classify codecs identically.
|
||||
func IsTextSubtitleCodec(codec string) bool {
|
||||
switch strings.ToLower(strings.TrimSpace(codec)) {
|
||||
case "subrip", "srt", "ass", "ssa", "webvtt", "mov_text", "text":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// SidecarDir returns the hidden per-folder cache directory for a media file.
|
||||
func SidecarDir(mediaPath string) string {
|
||||
return filepath.Join(filepath.Dir(mediaPath), sidecarDirName)
|
||||
}
|
||||
|
||||
// SubtitleCachePath is the cached WebVTT path for subtitle stream `index`
|
||||
// (0-based, matching ffmpeg's 0:s:N ordering) of mediaPath.
|
||||
func SubtitleCachePath(mediaPath string, index int) string {
|
||||
return filepath.Join(SidecarDir(mediaPath), fmt.Sprintf("%s.s%d.vtt", filepath.Base(mediaPath), index))
|
||||
}
|
||||
|
||||
// sidecarFresh reports whether a cache file exists and is at least as new as the
|
||||
// media file. A re-download/replace bumps the media mtime and invalidates the
|
||||
// stale sidecar so we re-extract.
|
||||
func sidecarFresh(cachePath, mediaPath string) bool {
|
||||
cfi, err := os.Stat(cachePath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
mfi, err := os.Stat(mediaPath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return !cfi.ModTime().Before(mfi.ModTime())
|
||||
}
|
||||
|
||||
// writeSidecar atomically writes data to a sidecar path (temp + rename), creating
|
||||
// the hidden dir if needed. Returns an error the caller logs and continues on
|
||||
// (e.g. a read-only mount) — caching is never required for correctness.
|
||||
func writeSidecar(path string, data []byte) error {
|
||||
if len(data) == 0 {
|
||||
return errors.New("refusing to cache empty artifact")
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
||||
return err
|
||||
}
|
||||
tmp := path + ".tmp"
|
||||
if err := os.WriteFile(tmp, data, 0o644); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.Rename(tmp, path); err != nil {
|
||||
_ = os.Remove(tmp)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReadCachedSubtitle returns the cached WebVTT for (mediaPath, index) when a
|
||||
// fresh sidecar exists. ok=false means the caller should extract on demand.
|
||||
func ReadCachedSubtitle(mediaPath string, index int) ([]byte, bool) {
|
||||
p := SubtitleCachePath(mediaPath, index)
|
||||
if !sidecarFresh(p, mediaPath) {
|
||||
return nil, false
|
||||
}
|
||||
b, err := os.ReadFile(p)
|
||||
if err != nil || len(b) == 0 {
|
||||
return nil, false
|
||||
}
|
||||
return b, true
|
||||
}
|
||||
|
||||
// WriteCachedSubtitle stores extracted WebVTT next to the media. Best-effort.
|
||||
func WriteCachedSubtitle(mediaPath string, index int, vtt []byte) error {
|
||||
return writeSidecar(SubtitleCachePath(mediaPath, index), vtt)
|
||||
}
|
||||
|
||||
// ExtractSubtitleVTT runs ffmpeg to convert subtitle stream `index` of mediaPath
|
||||
// to WebVTT bytes. Shared by the on-demand /sub handler and the scan-time prewarm
|
||||
// so both produce identical output. The caller owns the ctx deadline: the handler
|
||||
// uses a short HTTP-bound timeout; the prewarm uses a generous one (a full text
|
||||
// track on a multi-GB remux can take minutes to demux).
|
||||
func ExtractSubtitleVTT(ctx context.Context, ffmpegPath, mediaPath string, index int) ([]byte, error) {
|
||||
// -map 0:s:<index>? selects the Nth subtitle stream (non-fatal if absent);
|
||||
// -c:s webvtt converts srt/ass/mov_text/etc. to WebVTT on stdout.
|
||||
args := []string{
|
||||
"-nostdin",
|
||||
"-loglevel", "error",
|
||||
"-i", mediaPath,
|
||||
"-map", fmt.Sprintf("0:s:%d?", index),
|
||||
"-c:s", "webvtt",
|
||||
"-f", "webvtt",
|
||||
"-",
|
||||
}
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
var stderr strings.Builder
|
||||
cmd.Stderr = &stderr
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("ffmpeg subtitle extract: %w: %s", err, strings.TrimSpace(stderr.String()))
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil, errors.New("ffmpeg produced no subtitle output")
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
106
internal/library/prewarm.go
Normal file
106
internal/library/prewarm.go
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
package library
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/torrentclaw/unarr/internal/library/mediainfo"
|
||||
)
|
||||
|
||||
// PrewarmOptions controls scan-time sidecar extraction.
|
||||
type PrewarmOptions struct {
|
||||
FFmpegPath string // resolved ffmpeg binary; empty disables prewarm
|
||||
CacheSubtitles bool // library.cache_subtitles
|
||||
Workers int // concurrent ffmpeg jobs (each is heavy); default 2
|
||||
}
|
||||
|
||||
// PrewarmSidecars extracts every text subtitle of every scanned item into the
|
||||
// hidden ".unarr" sidecar dir next to the media file, so the /sub handler serves
|
||||
// it instantly at play time (instead of re-running ffmpeg, which on a 50GB+
|
||||
// remux exceeds the on-demand HTTP timeout). Without the per-request 60s ceiling
|
||||
// here, even huge files complete (generous per-file timeout).
|
||||
//
|
||||
// Best-effort and idempotent: an already-fresh sidecar is skipped, errors are
|
||||
// logged and the item moves on, and ctx cancellation (Ctrl-C / daemon shutdown)
|
||||
// stops cleanly. Safe to call after every scan — only missing/stale caches do work.
|
||||
func PrewarmSidecars(ctx context.Context, cache *LibraryCache, opts PrewarmOptions) {
|
||||
if cache == nil || opts.FFmpegPath == "" || !opts.CacheSubtitles {
|
||||
return
|
||||
}
|
||||
workers := opts.Workers
|
||||
if workers < 1 {
|
||||
workers = 2
|
||||
}
|
||||
|
||||
type job struct {
|
||||
path string
|
||||
index int
|
||||
}
|
||||
jobs := make(chan job)
|
||||
var wg sync.WaitGroup
|
||||
var mu sync.Mutex
|
||||
cached, failed := 0, 0
|
||||
|
||||
for i := 0; i < workers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for j := range jobs {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
if _, ok := mediainfo.ReadCachedSubtitle(j.path, j.index); ok {
|
||||
continue // already fresh
|
||||
}
|
||||
// Generous per-file deadline: a full text track on a multi-GB
|
||||
// remux can take minutes to demux. Bounded so one corrupt file
|
||||
// can't wedge a worker forever.
|
||||
jctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
|
||||
vtt, err := mediainfo.ExtractSubtitleVTT(jctx, opts.FFmpegPath, j.path, j.index)
|
||||
cancel()
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
failed++
|
||||
mu.Unlock()
|
||||
continue
|
||||
}
|
||||
if werr := mediainfo.WriteCachedSubtitle(j.path, j.index, vtt); werr != nil {
|
||||
log.Printf("[prewarm] sidecar write skipped (i=%d path=%q): %v", j.index, j.path, werr)
|
||||
mu.Lock()
|
||||
failed++
|
||||
mu.Unlock()
|
||||
continue
|
||||
}
|
||||
mu.Lock()
|
||||
cached++
|
||||
mu.Unlock()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
go func() {
|
||||
defer close(jobs)
|
||||
for _, item := range cache.Items {
|
||||
if item.MediaInfo == nil || item.FilePath == "" {
|
||||
continue
|
||||
}
|
||||
for idx, sub := range item.MediaInfo.Subtitles {
|
||||
if !mediainfo.IsTextSubtitleCodec(sub.Codec) {
|
||||
continue // bitmap → burned in, not extractable to WebVTT
|
||||
}
|
||||
select {
|
||||
case jobs <- job{path: item.FilePath, index: idx}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
if cached > 0 || failed > 0 {
|
||||
log.Printf("[prewarm] subtitles: %d cached, %d failed", cached, failed)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue