perf(stream): extract all text subtitles of a file in one ffmpeg pass
Subtitle extraction is I/O-bound: subtitle packets are interleaved across the whole container, so ffmpeg must read the entire file to demux a complete track (measured ~57 MB/s reading a 60GB remux over ~75 MB/s NFS → ~14 min for the full read). Doing that once per track meant N full reads of a huge file. ExtractSubtitlesVTTMulti demuxes the container ONCE and routes every text track to its own WebVTT output, so an N-text-track file costs one read instead of N. The prewarm now enqueues one job per file (all its text indices) and raises the per-file deadline to 45 min so even ~200GB remuxes finish the single read in the background (idempotent; the on-demand /sub keeps its 60s fallback). Thumbnails are unaffected — a keyframe seek reads a tiny slice (~0.7s even on 60GB).
This commit is contained in:
parent
1e5de874cf
commit
8a47132f15
2 changed files with 84 additions and 21 deletions
|
|
@ -28,12 +28,12 @@ type PrewarmOptions struct {
|
|||
Workers int // concurrent ffmpeg jobs (each is heavy); default 2
|
||||
}
|
||||
|
||||
// prewarmJob is one extraction unit: a text subtitle (thumb=false) or a single
|
||||
// thumbnail frame (thumb=true).
|
||||
// prewarmJob is one extraction unit: all text subtitles of a file in one ffmpeg
|
||||
// pass (thumb=false) or a single thumbnail frame (thumb=true).
|
||||
type prewarmJob struct {
|
||||
path string
|
||||
thumb bool
|
||||
index int // subtitle stream index (subtitle job)
|
||||
subIdx []int // subtitle stream indices to extract in ONE pass (subtitle job)
|
||||
posSec float64 // frame position in seconds (thumbnail job)
|
||||
width int // frame width (thumbnail job)
|
||||
}
|
||||
|
|
@ -96,31 +96,45 @@ func PrewarmSidecars(ctx context.Context, cache *LibraryCache, opts PrewarmOptio
|
|||
continue
|
||||
}
|
||||
|
||||
if _, ok := mediainfo.ReadCachedSubtitle(j.path, j.index); ok {
|
||||
continue // already fresh
|
||||
// Extract only the indices not already fresh, and do them in ONE
|
||||
// ffmpeg pass — a multi-GB remux is demuxed once for all its text
|
||||
// tracks instead of once per track.
|
||||
todo := make([]int, 0, len(j.subIdx))
|
||||
for _, idx := range j.subIdx {
|
||||
if _, ok := mediainfo.ReadCachedSubtitle(j.path, idx); !ok {
|
||||
todo = append(todo, idx)
|
||||
}
|
||||
}
|
||||
// Generous per-file deadline: a full text track on a multi-GB
|
||||
// remux can take minutes to demux. Bounded so one corrupt file
|
||||
// can't wedge a worker forever.
|
||||
jctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
|
||||
vtt, err := mediainfo.ExtractSubtitleVTT(jctx, opts.FFmpegPath, j.path, j.index)
|
||||
if len(todo) == 0 {
|
||||
continue
|
||||
}
|
||||
// Generous per-file deadline. Subtitle packets are interleaved across
|
||||
// the whole container, so extraction is I/O-bound: it must read the
|
||||
// entire file once (all text tracks share that single pass). A 60GB
|
||||
// remux over ~75 MB/s NFS is ~14 min, so 45 min covers files up to
|
||||
// ~200GB; bounded so one corrupt/stalled file can't wedge a worker.
|
||||
// This is background + idempotent — it only runs until the cache fills.
|
||||
jctx, cancel := context.WithTimeout(ctx, 45*time.Minute)
|
||||
res, err := mediainfo.ExtractSubtitlesVTTMulti(jctx, opts.FFmpegPath, j.path, todo)
|
||||
cancel()
|
||||
if err != nil {
|
||||
mu.Lock()
|
||||
failed++
|
||||
failed += len(todo)
|
||||
mu.Unlock()
|
||||
continue
|
||||
}
|
||||
if werr := mediainfo.WriteCachedSubtitle(j.path, j.index, vtt); werr != nil {
|
||||
log.Printf("[prewarm] sidecar write skipped (i=%d path=%q): %v", j.index, j.path, werr)
|
||||
for idx, vtt := range res {
|
||||
if werr := mediainfo.WriteCachedSubtitle(j.path, idx, vtt); werr != nil {
|
||||
log.Printf("[prewarm] sidecar write skipped (i=%d path=%q): %v", idx, j.path, werr)
|
||||
mu.Lock()
|
||||
failed++
|
||||
mu.Unlock()
|
||||
continue
|
||||
}
|
||||
mu.Lock()
|
||||
failed++
|
||||
subCached++
|
||||
mu.Unlock()
|
||||
continue
|
||||
}
|
||||
mu.Lock()
|
||||
subCached++
|
||||
mu.Unlock()
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
|
@ -132,12 +146,15 @@ func PrewarmSidecars(ctx context.Context, cache *LibraryCache, opts PrewarmOptio
|
|||
continue
|
||||
}
|
||||
if opts.CacheSubtitles {
|
||||
var subIdx []int
|
||||
for idx, sub := range item.MediaInfo.Subtitles {
|
||||
if !mediainfo.IsTextSubtitleCodec(sub.Codec) {
|
||||
continue // bitmap → burned in, not extractable to WebVTT
|
||||
if mediainfo.IsTextSubtitleCodec(sub.Codec) {
|
||||
subIdx = append(subIdx, idx) // bitmap → burned in, skipped
|
||||
}
|
||||
}
|
||||
if len(subIdx) > 0 {
|
||||
select {
|
||||
case jobs <- prewarmJob{path: item.FilePath, index: idx}:
|
||||
case jobs <- prewarmJob{path: item.FilePath, subIdx: subIdx}:
|
||||
case <-ctx.Done():
|
||||
return
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue