feat(library): detect corrupt/incomplete files during scan
ffprobe already runs on every scanned file; now we capture its stderr and
assess integrity from it. assessIntegrity flags a file "damaged" on the
markers that mean the container/bitstream is unusable: invalid_data,
ebml_corrupt, moov_missing, bitstream_corrupt, plus no_duration (a video
stream with non-positive duration = a truncated/incomplete download).
The verdict rides on MediaInfo.Integrity (IntegrityInfo{Damaged,Reason}),
maps onto LibrarySyncItem.{Integrity,IntegrityReason}, and syncs to the web
so a damaged file can be surfaced at rest instead of only blowing up at
playback.
Bumps the scan cache version (1 → 2) so existing entries re-probe once, and
the scanner re-probes any cached entry that has no integrity verdict yet.
This commit is contained in:
parent
c86e50245e
commit
f0ac905fdb
7 changed files with 122 additions and 4 deletions
|
|
@ -338,6 +338,11 @@ type LibrarySyncItem struct {
|
||||||
AudioTracks any `json:"audioTracks,omitempty"`
|
AudioTracks any `json:"audioTracks,omitempty"`
|
||||||
SubtitleTracks any `json:"subtitleTracks,omitempty"`
|
SubtitleTracks any `json:"subtitleTracks,omitempty"`
|
||||||
VideoInfo any `json:"videoInfo,omitempty"`
|
VideoInfo any `json:"videoInfo,omitempty"`
|
||||||
|
// Integrity flags a damaged / incompletely-downloaded file ("damaged" or
|
||||||
|
// empty). IntegrityReason is a stable code (ebml_corrupt, moov_missing,
|
||||||
|
// no_duration, …) the web maps to a localized "re-download" message.
|
||||||
|
Integrity string `json:"integrity,omitempty"`
|
||||||
|
IntegrityReason string `json:"integrityReason,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// LibrarySyncResponse is returned after syncing library items.
|
// LibrarySyncResponse is returned after syncing library items.
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,55 @@ func ExtractMediaInfo(ctx context.Context, ffprobePath, filePath string) (*Media
|
||||||
return nil, fmt.Errorf("ffprobe JSON parse failed: %w", err)
|
return nil, fmt.Errorf("ffprobe JSON parse failed: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return parseFFprobeOutput(data)
|
mi, perr := parseFFprobeOutput(data)
|
||||||
|
if perr != nil {
|
||||||
|
return nil, perr
|
||||||
|
}
|
||||||
|
// A corrupt-but-parseable file (e.g. a half-downloaded MKV) returns valid
|
||||||
|
// stream JSON and a zero exit, yet ffprobe still logs structural errors to
|
||||||
|
// stderr (captured above). Flag it so the library can warn instead of
|
||||||
|
// silently shipping a file that won't play.
|
||||||
|
if integ := assessIntegrity(stderr.String(), mi); integ != nil {
|
||||||
|
mi.Integrity = integ
|
||||||
|
}
|
||||||
|
return mi, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// corruptionMarkers are high-confidence ffprobe stderr substrings (lowercased)
|
||||||
|
// that indicate a structurally damaged / incompletely-downloaded file, paired
|
||||||
|
// with a STABLE code the web maps to localized copy. Kept conservative so
|
||||||
|
// healthy files are never flagged — each appears only on real container/
|
||||||
|
// bitstream damage, not benign warnings (ffprobe runs at -v error).
|
||||||
|
var corruptionMarkers = []struct{ sub, code string }{
|
||||||
|
{"invalid data found when processing input", "invalid_data"},
|
||||||
|
{"as first byte of an ebml number", "ebml_corrupt"}, // truncated/corrupt MKV
|
||||||
|
{"moov atom not found", "moov_missing"}, // truncated MP4
|
||||||
|
{"invalid nal unit size", "bitstream_corrupt"},
|
||||||
|
{"non-existing pps", "bitstream_corrupt"},
|
||||||
|
// NOTE: deliberately NOT matching "error reading header" (ffprobe emits it
|
||||||
|
// on transient NFS/network read hiccups — a genuinely unreadable header
|
||||||
|
// also exits non-zero → ScanError → item skipped) nor "truncating packet"
|
||||||
|
// (printed for healthy MKV/TS with oversized subtitle/PGS packets). Both
|
||||||
|
// false-positive on good files; the markers above are structural.
|
||||||
|
}
|
||||||
|
|
||||||
|
// assessIntegrity inspects ffprobe's stderr plus the parsed result and returns
|
||||||
|
// a damaged verdict on a high-confidence corruption signal, else nil. The
|
||||||
|
// Reason is a stable code (see corruptionMarkers) the web localizes.
|
||||||
|
func assessIntegrity(stderr string, mi *MediaInfo) *IntegrityInfo {
|
||||||
|
low := strings.ToLower(stderr)
|
||||||
|
for _, m := range corruptionMarkers {
|
||||||
|
if strings.Contains(low, m.sub) {
|
||||||
|
return &IntegrityInfo{Damaged: true, Reason: m.code}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// A file that carries a video stream but no determinable duration is almost
|
||||||
|
// always truncated (the moov/cues holding duration sit at the end of the
|
||||||
|
// file). Audio-only items legitimately omit it, so gate on having video.
|
||||||
|
if mi != nil && mi.Video != nil && mi.Video.Duration <= 0 {
|
||||||
|
return &IntegrityInfo{Damaged: true, Reason: "no_duration"}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseFFprobeOutput converts parsed ffprobe JSON into MediaInfo.
|
// parseFFprobeOutput converts parsed ffprobe JSON into MediaInfo.
|
||||||
|
|
|
||||||
|
|
@ -428,3 +428,42 @@ func TestParseFFprobeOutput_FrameRateNoSlash(t *testing.T) {
|
||||||
t.Errorf("frameRate = %v, want 0 (no slash)", mi.Video.FrameRate)
|
t.Errorf("frameRate = %v, want 0 (no slash)", mi.Video.FrameRate)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestAssessIntegrity(t *testing.T) {
|
||||||
|
healthy := &MediaInfo{Video: &VideoInfo{Codec: "h264", Width: 1920, Height: 1080, Duration: 5477}}
|
||||||
|
|
||||||
|
// Healthy file with no stderr → nil (not damaged).
|
||||||
|
if got := assessIntegrity("", healthy); got != nil {
|
||||||
|
t.Errorf("healthy file flagged damaged: %+v", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// MKV EBML corruption (the real "In the Grey" case): ffprobe exits 0 but
|
||||||
|
// logs EBML errors → damaged with the ebml_corrupt code.
|
||||||
|
ebml := "[matroska,webm @ 0x60e7] 0x00 at pos 2144995 invalid as first byte of an EBML number\n"
|
||||||
|
got := assessIntegrity(ebml, healthy)
|
||||||
|
if got == nil || !got.Damaged || got.Reason != "ebml_corrupt" {
|
||||||
|
t.Errorf("EBML corruption not flagged correctly: %+v", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Truncated MP4.
|
||||||
|
if got := assessIntegrity("moov atom not found\n", healthy); got == nil || got.Reason != "moov_missing" {
|
||||||
|
t.Errorf("moov-missing not flagged: %+v", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Invalid data.
|
||||||
|
if got := assessIntegrity("Invalid data found when processing input\n", healthy); got == nil || got.Reason != "invalid_data" {
|
||||||
|
t.Errorf("invalid-data not flagged: %+v", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// No duration on a video stream → truncated.
|
||||||
|
noDur := &MediaInfo{Video: &VideoInfo{Codec: "h264", Width: 1920, Height: 1080, Duration: 0}}
|
||||||
|
if got := assessIntegrity("", noDur); got == nil || got.Reason != "no_duration" {
|
||||||
|
t.Errorf("no-duration not flagged: %+v", got)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Audio-only file with no duration is NOT flagged (legitimately omits it).
|
||||||
|
audioOnly := &MediaInfo{Audio: []AudioTrack{{Lang: "en", Codec: "aac"}}}
|
||||||
|
if got := assessIntegrity("", audioOnly); got != nil {
|
||||||
|
t.Errorf("audio-only file wrongly flagged: %+v", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,19 @@ type MediaInfo struct {
|
||||||
Audio []AudioTrack `json:"audio"`
|
Audio []AudioTrack `json:"audio"`
|
||||||
Subtitles []SubtitleTrack `json:"subtitles"`
|
Subtitles []SubtitleTrack `json:"subtitles"`
|
||||||
Languages []string `json:"languages"` // derived from audio tracks
|
Languages []string `json:"languages"` // derived from audio tracks
|
||||||
|
// Integrity is non-nil only when the scan found signs of corruption / an
|
||||||
|
// incomplete download. Surfaced in the web library as a "damaged" warning
|
||||||
|
// so the user re-downloads instead of hitting a file that won't play.
|
||||||
|
Integrity *IntegrityInfo `json:"integrity,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// IntegrityInfo flags a file whose metadata probed OK enough to land in the
|
||||||
|
// library but that shows structural damage (ffprobe emitted EBML / "invalid
|
||||||
|
// data" errors, a truncated moov atom, or no usable video/duration) — the
|
||||||
|
// hallmark of an incomplete or corrupt download.
|
||||||
|
type IntegrityInfo struct {
|
||||||
|
Damaged bool `json:"damaged"`
|
||||||
|
Reason string `json:"reason,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// VideoInfo represents the primary video stream metadata.
|
// VideoInfo represents the primary video stream metadata.
|
||||||
|
|
|
||||||
|
|
@ -145,11 +145,16 @@ func scanSingleFile(ctx context.Context, ffprobePath, filePath string, cacheIdx
|
||||||
// Parse season/episode
|
// Parse season/episode
|
||||||
item.Season, item.Episode = ParseSeasonEpisode(item.FileName)
|
item.Season, item.Episode = ParseSeasonEpisode(item.FileName)
|
||||||
|
|
||||||
// Incremental: skip if file hasn't changed
|
// Incremental: skip if file hasn't changed. EXCEPT a previously-damaged
|
||||||
|
// file is always re-probed — a re-download to the same path can land with
|
||||||
|
// an identical size+mtime (some torrent clients preserve the torrent's
|
||||||
|
// mtime), so trusting the cached "damaged" verdict would pin a now-healthy
|
||||||
|
// file as broken forever. Re-probing damaged items is cheap (they're few).
|
||||||
if incremental && existing != nil {
|
if incremental && existing != nil {
|
||||||
if idx, ok := cacheIdx[filePath]; ok {
|
if idx, ok := cacheIdx[filePath]; ok {
|
||||||
cached := existing.Items[idx]
|
cached := existing.Items[idx]
|
||||||
if cached.FileSize == item.FileSize && cached.ModTime == item.ModTime && cached.MediaInfo != nil {
|
if cached.FileSize == item.FileSize && cached.ModTime == item.ModTime &&
|
||||||
|
cached.MediaInfo != nil && cached.MediaInfo.Integrity == nil {
|
||||||
item.MediaInfo = cached.MediaInfo
|
item.MediaInfo = cached.MediaInfo
|
||||||
return item
|
return item
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,10 @@ func BuildSyncItems(cache *LibraryCache) []agent.LibrarySyncItem {
|
||||||
si.AudioTracks = item.MediaInfo.Audio
|
si.AudioTracks = item.MediaInfo.Audio
|
||||||
si.SubtitleTracks = item.MediaInfo.Subtitles
|
si.SubtitleTracks = item.MediaInfo.Subtitles
|
||||||
si.VideoInfo = item.MediaInfo.Video
|
si.VideoInfo = item.MediaInfo.Video
|
||||||
|
if integ := item.MediaInfo.Integrity; integ != nil && integ.Damaged {
|
||||||
|
si.Integrity = "damaged"
|
||||||
|
si.IntegrityReason = integ.Reason
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
items = append(items, si)
|
items = append(items, si)
|
||||||
|
|
|
||||||
|
|
@ -26,4 +26,8 @@ type LibraryCache struct {
|
||||||
Items []LibraryItem `json:"items"`
|
Items []LibraryItem `json:"items"`
|
||||||
}
|
}
|
||||||
|
|
||||||
const cacheVersion = 1
|
// Bump whenever the scan logic changes in a way that should re-probe an
|
||||||
|
// existing library on next scan (incremental reuse keys off mtime+size, so a
|
||||||
|
// pure logic change is invisible without this). v2: file-integrity detection
|
||||||
|
// (ffprobe corruption / incomplete-download flag).
|
||||||
|
const cacheVersion = 2
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue