Close the recurring "video has subtitles but the web player shows none" gap with a source-agnostic pipeline: - Discover EXTERNAL sidecar subs in the scan (Video.es.ass siblings + a Subs/ bundle), parse lang/forced/SDH from the filename, skip VobSub (.sub+.idx). ffprobe-only scanning ignored these (ToonsHub/anime "MSubs" releases). - Transcode sidecar charset -> UTF-8 before WebVTT (BOM/UTF-16/code-page by language). Chinese SCRIPT matters: chs/sc -> GBK, cht/tc/big5 -> Big5 (decoding one as the other is mojibake). - /sub now serves a standalone sidecar file (i=-1, p=file, &l=lang hint) and a remote debrid URL (ffmpeg reads http, no local stat) — not just embedded streams of a local file. - probe.json emits a tokened vttUrl per TEXT track so torrent/debrid HLS streams (never library-scanned) get subtitles too. Embedded index is counted among embedded streams only, so -map 0:s:N stays aligned when sidecars are appended. Tested against a real 347-file gallery: 26/26 sidecars and embedded ass/srt/ mov_text all extract to valid WebVTT; bitmap (pgs/dvd_subtitle) correctly stays burn-in. Manual harness gated behind GALLERY_DIR.
64 lines
1.8 KiB
Go
64 lines
1.8 KiB
Go
package mediainfo
|
||
|
||
import (
|
||
"testing"
|
||
|
||
"golang.org/x/text/encoding/charmap"
|
||
"golang.org/x/text/transform"
|
||
)
|
||
|
||
func TestDecodeSubtitleToUTF8_PlainASCII(t *testing.T) {
|
||
in := []byte("Hello world")
|
||
out, name := DecodeSubtitleToUTF8(in, "en")
|
||
if string(out) != "Hello world" || name != "utf-8" {
|
||
t.Fatalf("ASCII passthrough failed: %q %s", out, name)
|
||
}
|
||
}
|
||
|
||
func TestDecodeSubtitleToUTF8_BOMStripped(t *testing.T) {
|
||
in := append([]byte{0xEF, 0xBB, 0xBF}, []byte("café")...)
|
||
out, name := DecodeSubtitleToUTF8(in, "fr")
|
||
if string(out) != "café" || name != "bom-utf8" {
|
||
t.Fatalf("UTF-8 BOM strip failed: %q %s", out, name)
|
||
}
|
||
}
|
||
|
||
func TestDecodeSubtitleToUTF8_Windows1252(t *testing.T) {
|
||
// "café" encoded in Windows-1252 (é = 0xE9) is NOT valid UTF-8.
|
||
enc1252, _, err := transform.Bytes(charmap.Windows1252.NewEncoder(), []byte("café"))
|
||
if err != nil {
|
||
t.Fatal(err)
|
||
}
|
||
out, name := DecodeSubtitleToUTF8(enc1252, "fr")
|
||
if string(out) != "café" {
|
||
t.Fatalf("Windows-1252 decode failed: got %q (%s)", out, name)
|
||
}
|
||
if name != "windows-1252" {
|
||
t.Fatalf("expected windows-1252, got %s", name)
|
||
}
|
||
}
|
||
|
||
func TestDecodeSubtitleToUTF8_TraditionalChineseBig5(t *testing.T) {
|
||
// 繁 (U+7E41) in Big5 is 0xC1 0x63. Decoding it as GBK would be mojibake, so
|
||
// the zh-Hant hint must route to Big5.
|
||
in := []byte{0xC1, 0x63}
|
||
out, name := DecodeSubtitleToUTF8(in, "zh-Hant")
|
||
if name != "big5" {
|
||
t.Fatalf("expected big5 for zh-Hant, got %s", name)
|
||
}
|
||
if string(out) != "繁" {
|
||
t.Fatalf("Big5 decode failed: got %q", out)
|
||
}
|
||
}
|
||
|
||
func TestDecodeSubtitleToUTF8_ArabicByLang(t *testing.T) {
|
||
// Arabic letter ا (U+0627) is 0xC7 in Windows-1256.
|
||
in := []byte{0xC7}
|
||
out, name := DecodeSubtitleToUTF8(in, "ar")
|
||
if name != "windows-1256" {
|
||
t.Fatalf("expected windows-1256 for Arabic, got %s", name)
|
||
}
|
||
if string(out) != "ا" {
|
||
t.Fatalf("Arabic decode failed: got %q", out)
|
||
}
|
||
}
|