feat(subs): resilient subtitle extraction — sidecars, charset, torrent/debrid

Close the recurring "video has subtitles but the web player shows none" gap
with a source-agnostic pipeline:

- Discover EXTERNAL sidecar subs in the scan (Video.es.ass siblings + a Subs/
  bundle), parse lang/forced/SDH from the filename, skip VobSub (.sub+.idx).
  ffprobe-only scanning ignored these (ToonsHub/anime "MSubs" releases).
- Transcode sidecar charset -> UTF-8 before WebVTT (BOM/UTF-16/code-page by
  language). Chinese SCRIPT matters: chs/sc -> GBK, cht/tc/big5 -> Big5
  (decoding one as the other is mojibake).
- /sub now serves a standalone sidecar file (i=-1, p=file, &l=lang hint) and a
  remote debrid URL (ffmpeg reads http, no local stat) — not just embedded
  streams of a local file.
- probe.json emits a tokened vttUrl per TEXT track so torrent/debrid HLS streams
  (never library-scanned) get subtitles too. Embedded index is counted among
  embedded streams only, so -map 0:s:N stays aligned when sidecars are appended.

Tested against a real 347-file gallery: 26/26 sidecars and embedded ass/srt/
mov_text all extract to valid WebVTT; bitmap (pgs/dvd_subtitle) correctly stays
burn-in. Manual harness gated behind GALLERY_DIR.
This commit is contained in:
Deivid Soto 2026-06-08 13:04:09 +02:00
parent 22081cf106
commit d708ea2360
13 changed files with 957 additions and 39 deletions

View file

@ -5,6 +5,7 @@ import (
"context"
"errors"
"fmt"
"log"
"math"
"os"
"os/exec"
@ -148,6 +149,66 @@ func ExtractSubtitleVTT(ctx context.Context, ffmpegPath, mediaPath string, index
return out, nil
}
// ExtractExternalSubtitleVTT converts a STANDALONE sidecar subtitle file (a
// .srt/.ass/.ssa/.vtt sitting next to the media) to WebVTT. Unlike the embedded
// path it has no stream index — the whole file is the track. It first transcodes
// the bytes to UTF-8 (legacy code pages → mojibake otherwise; see charset.go)
// using the track's language as the detection hint, then runs ffmpeg to emit
// WebVTT. The UTF-8 bytes go through a temp file with the ORIGINAL extension so
// ffmpeg selects the right demuxer (.srt→subrip, .ass→ass, .vtt→webvtt), and
// `-sub_charenc UTF-8` stops ffmpeg from re-guessing what we already decoded.
func ExtractExternalSubtitleVTT(ctx context.Context, ffmpegPath, subPath, langHint string) ([]byte, error) {
raw, err := os.ReadFile(subPath)
if err != nil {
return nil, fmt.Errorf("read sidecar subtitle: %w", err)
}
if len(raw) == 0 {
return nil, errors.New("sidecar subtitle is empty")
}
utf8Bytes, encName := DecodeSubtitleToUTF8(raw, langHint)
// A "(raw)" suffix means the legacy transcode failed and we're passing the
// original bytes through — the likeliest cause of user-visible mojibake, so
// leave a trail to diagnose it in the field.
if strings.HasSuffix(encName, "(raw)") {
log.Printf("[sub] external charset transcode fell back to raw bytes (%s, lang=%q): possible mojibake", filepath.Base(subPath), langHint)
}
ext := strings.ToLower(filepath.Ext(subPath))
if ext == "" {
ext = ".srt"
}
tmpDir, err := os.MkdirTemp("", "unarr-extsub-")
if err != nil {
return nil, err
}
defer func() { _ = os.RemoveAll(tmpDir) }()
tmpIn := filepath.Join(tmpDir, "in"+ext)
if werr := os.WriteFile(tmpIn, utf8Bytes, 0o600); werr != nil {
return nil, werr
}
args := []string{
"-nostdin",
"-loglevel", "error",
"-sub_charenc", "UTF-8",
"-i", tmpIn,
"-c:s", "webvtt",
"-f", "webvtt",
"-",
}
cmd := exec.CommandContext(ctx, ffmpegPath, args...)
var stderr strings.Builder
cmd.Stderr = &stderr
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("ffmpeg external subtitle extract: %w: %s", err, strings.TrimSpace(stderr.String()))
}
if len(out) == 0 {
return nil, errors.New("ffmpeg produced no subtitle output")
}
return out, nil
}
// ExtractSubtitlesVTTMulti extracts several text subtitle streams in a SINGLE
// ffmpeg pass. The expensive part of subtitle extraction is demuxing the whole
// container (subtitle packets are interleaved across the runtime), so a 60GB