diff --git a/internal/library/mediainfo/sidecar.go b/internal/library/mediainfo/sidecar.go index 8aaabbd..f5afc3f 100644 --- a/internal/library/mediainfo/sidecar.go +++ b/internal/library/mediainfo/sidecar.go @@ -147,6 +147,52 @@ func ExtractSubtitleVTT(ctx context.Context, ffmpegPath, mediaPath string, index return out, nil } +// ExtractSubtitlesVTTMulti extracts several text subtitle streams in a SINGLE +// ffmpeg pass. The expensive part of subtitle extraction is demuxing the whole +// container (subtitle packets are interleaved across the runtime), so a 60GB +// remux with N text tracks costs N full reads when done one index at a time — +// here it's one read for all of them. Returns index→WebVTT for the streams that +// produced output (an empty stream is simply absent, not an error). ffmpeg can't +// multiplex several outputs onto stdout, so it writes per-track temp files which +// are read back; callers cache them via WriteCachedSubtitle. +func ExtractSubtitlesVTTMulti(ctx context.Context, ffmpegPath, mediaPath string, indices []int) (map[int][]byte, error) { + if len(indices) == 0 { + return nil, nil + } + tmpDir, err := os.MkdirTemp("", "unarr-subs-") + if err != nil { + return nil, err + } + defer func() { _ = os.RemoveAll(tmpDir) }() + + args := []string{"-nostdin", "-loglevel", "error", "-i", mediaPath} + tmp := make(map[int]string, len(indices)) + for _, idx := range indices { + f := filepath.Join(tmpDir, fmt.Sprintf("s%d.vtt", idx)) + tmp[idx] = f + // One output file per stream; output options precede each output path. + args = append(args, "-map", fmt.Sprintf("0:s:%d?", idx), "-c:s", "webvtt", "-f", "webvtt", "-y", f) + } + + cmd := exec.CommandContext(ctx, ffmpegPath, args...) + var stderr strings.Builder + cmd.Stderr = &stderr + // A non-zero exit can still leave good per-track files (e.g. one corrupt + // stream), so don't bail on err — read whatever landed and judge by that. + runErr := cmd.Run() + + out := make(map[int][]byte, len(indices)) + for idx, f := range tmp { + if b, rerr := os.ReadFile(f); rerr == nil && len(b) > 0 { + out[idx] = b + } + } + if len(out) == 0 { + return nil, fmt.Errorf("ffmpeg multi-subtitle extract: no output (err=%v): %s", runErr, strings.TrimSpace(stderr.String())) + } + return out, nil +} + // ReadCachedThumbnail returns the cached JPEG for (mediaPath, posSec, width) when // a fresh sidecar exists. ok=false means extract on demand. func ReadCachedThumbnail(mediaPath string, posSec float64, width int) ([]byte, bool) { diff --git a/internal/library/prewarm.go b/internal/library/prewarm.go index cac44a1..ea3273b 100644 --- a/internal/library/prewarm.go +++ b/internal/library/prewarm.go @@ -28,12 +28,12 @@ type PrewarmOptions struct { Workers int // concurrent ffmpeg jobs (each is heavy); default 2 } -// prewarmJob is one extraction unit: a text subtitle (thumb=false) or a single -// thumbnail frame (thumb=true). +// prewarmJob is one extraction unit: all text subtitles of a file in one ffmpeg +// pass (thumb=false) or a single thumbnail frame (thumb=true). type prewarmJob struct { path string thumb bool - index int // subtitle stream index (subtitle job) + subIdx []int // subtitle stream indices to extract in ONE pass (subtitle job) posSec float64 // frame position in seconds (thumbnail job) width int // frame width (thumbnail job) } @@ -96,31 +96,45 @@ func PrewarmSidecars(ctx context.Context, cache *LibraryCache, opts PrewarmOptio continue } - if _, ok := mediainfo.ReadCachedSubtitle(j.path, j.index); ok { - continue // already fresh + // Extract only the indices not already fresh, and do them in ONE + // ffmpeg pass — a multi-GB remux is demuxed once for all its text + // tracks instead of once per track. + todo := make([]int, 0, len(j.subIdx)) + for _, idx := range j.subIdx { + if _, ok := mediainfo.ReadCachedSubtitle(j.path, idx); !ok { + todo = append(todo, idx) + } } - // Generous per-file deadline: a full text track on a multi-GB - // remux can take minutes to demux. Bounded so one corrupt file - // can't wedge a worker forever. - jctx, cancel := context.WithTimeout(ctx, 10*time.Minute) - vtt, err := mediainfo.ExtractSubtitleVTT(jctx, opts.FFmpegPath, j.path, j.index) + if len(todo) == 0 { + continue + } + // Generous per-file deadline. Subtitle packets are interleaved across + // the whole container, so extraction is I/O-bound: it must read the + // entire file once (all text tracks share that single pass). A 60GB + // remux over ~75 MB/s NFS is ~14 min, so 45 min covers files up to + // ~200GB; bounded so one corrupt/stalled file can't wedge a worker. + // This is background + idempotent — it only runs until the cache fills. + jctx, cancel := context.WithTimeout(ctx, 45*time.Minute) + res, err := mediainfo.ExtractSubtitlesVTTMulti(jctx, opts.FFmpegPath, j.path, todo) cancel() if err != nil { mu.Lock() - failed++ + failed += len(todo) mu.Unlock() continue } - if werr := mediainfo.WriteCachedSubtitle(j.path, j.index, vtt); werr != nil { - log.Printf("[prewarm] sidecar write skipped (i=%d path=%q): %v", j.index, j.path, werr) + for idx, vtt := range res { + if werr := mediainfo.WriteCachedSubtitle(j.path, idx, vtt); werr != nil { + log.Printf("[prewarm] sidecar write skipped (i=%d path=%q): %v", idx, j.path, werr) + mu.Lock() + failed++ + mu.Unlock() + continue + } mu.Lock() - failed++ + subCached++ mu.Unlock() - continue } - mu.Lock() - subCached++ - mu.Unlock() } }() } @@ -132,12 +146,15 @@ func PrewarmSidecars(ctx context.Context, cache *LibraryCache, opts PrewarmOptio continue } if opts.CacheSubtitles { + var subIdx []int for idx, sub := range item.MediaInfo.Subtitles { - if !mediainfo.IsTextSubtitleCodec(sub.Codec) { - continue // bitmap → burned in, not extractable to WebVTT + if mediainfo.IsTextSubtitleCodec(sub.Codec) { + subIdx = append(subIdx, idx) // bitmap → burned in, skipped } + } + if len(subIdx) > 0 { select { - case jobs <- prewarmJob{path: item.FilePath, index: idx}: + case jobs <- prewarmJob{path: item.FilePath, subIdx: subIdx}: case <-ctx.Done(): return }