perf(stream): extract all text subtitles of a file in one ffmpeg pass
Subtitle extraction is I/O-bound: subtitle packets are interleaved across the whole container, so ffmpeg must read the entire file to demux a complete track (measured ~57 MB/s reading a 60GB remux over ~75 MB/s NFS → ~14 min for the full read). Doing that once per track meant N full reads of a huge file. ExtractSubtitlesVTTMulti demuxes the container ONCE and routes every text track to its own WebVTT output, so an N-text-track file costs one read instead of N. The prewarm now enqueues one job per file (all its text indices) and raises the per-file deadline to 45 min so even ~200GB remuxes finish the single read in the background (idempotent; the on-demand /sub keeps its 60s fallback). Thumbnails are unaffected — a keyframe seek reads a tiny slice (~0.7s even on 60GB).
This commit is contained in:
parent
1e5de874cf
commit
8a47132f15
2 changed files with 84 additions and 21 deletions
|
|
@ -147,6 +147,52 @@ func ExtractSubtitleVTT(ctx context.Context, ffmpegPath, mediaPath string, index
|
|||
return out, nil
|
||||
}
|
||||
|
||||
// ExtractSubtitlesVTTMulti extracts several text subtitle streams in a SINGLE
|
||||
// ffmpeg pass. The expensive part of subtitle extraction is demuxing the whole
|
||||
// container (subtitle packets are interleaved across the runtime), so a 60GB
|
||||
// remux with N text tracks costs N full reads when done one index at a time —
|
||||
// here it's one read for all of them. Returns index→WebVTT for the streams that
|
||||
// produced output (an empty stream is simply absent, not an error). ffmpeg can't
|
||||
// multiplex several outputs onto stdout, so it writes per-track temp files which
|
||||
// are read back; callers cache them via WriteCachedSubtitle.
|
||||
func ExtractSubtitlesVTTMulti(ctx context.Context, ffmpegPath, mediaPath string, indices []int) (map[int][]byte, error) {
|
||||
if len(indices) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
tmpDir, err := os.MkdirTemp("", "unarr-subs-")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { _ = os.RemoveAll(tmpDir) }()
|
||||
|
||||
args := []string{"-nostdin", "-loglevel", "error", "-i", mediaPath}
|
||||
tmp := make(map[int]string, len(indices))
|
||||
for _, idx := range indices {
|
||||
f := filepath.Join(tmpDir, fmt.Sprintf("s%d.vtt", idx))
|
||||
tmp[idx] = f
|
||||
// One output file per stream; output options precede each output path.
|
||||
args = append(args, "-map", fmt.Sprintf("0:s:%d?", idx), "-c:s", "webvtt", "-f", "webvtt", "-y", f)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
|
||||
var stderr strings.Builder
|
||||
cmd.Stderr = &stderr
|
||||
// A non-zero exit can still leave good per-track files (e.g. one corrupt
|
||||
// stream), so don't bail on err — read whatever landed and judge by that.
|
||||
runErr := cmd.Run()
|
||||
|
||||
out := make(map[int][]byte, len(indices))
|
||||
for idx, f := range tmp {
|
||||
if b, rerr := os.ReadFile(f); rerr == nil && len(b) > 0 {
|
||||
out[idx] = b
|
||||
}
|
||||
}
|
||||
if len(out) == 0 {
|
||||
return nil, fmt.Errorf("ffmpeg multi-subtitle extract: no output (err=%v): %s", runErr, strings.TrimSpace(stderr.String()))
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ReadCachedThumbnail returns the cached JPEG for (mediaPath, posSec, width) when
|
||||
// a fresh sidecar exists. ok=false means extract on demand.
|
||||
func ReadCachedThumbnail(mediaPath string, posSec float64, width int) ([]byte, bool) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue