feat(library): detect corrupt/incomplete files during scan
ffprobe already runs on every scanned file; now we capture its stderr and
assess integrity from it. assessIntegrity flags a file "damaged" on the
markers that mean the container/bitstream is unusable: invalid_data,
ebml_corrupt, moov_missing, bitstream_corrupt, plus no_duration (a video
stream with non-positive duration = a truncated/incomplete download).
The verdict rides on MediaInfo.Integrity (IntegrityInfo{Damaged,Reason}),
maps onto LibrarySyncItem.{Integrity,IntegrityReason}, and syncs to the web
so a damaged file can be surfaced at rest instead of only blowing up at
playback.
Bumps the scan cache version (1 → 2) so existing entries re-probe once, and
the scanner re-probes any cached entry that has no integrity verdict yet.
This commit is contained in:
parent
c86e50245e
commit
f0ac905fdb
7 changed files with 122 additions and 4 deletions
|
|
@ -338,6 +338,11 @@ type LibrarySyncItem struct {
|
|||
AudioTracks any `json:"audioTracks,omitempty"`
|
||||
SubtitleTracks any `json:"subtitleTracks,omitempty"`
|
||||
VideoInfo any `json:"videoInfo,omitempty"`
|
||||
// Integrity flags a damaged / incompletely-downloaded file ("damaged" or
|
||||
// empty). IntegrityReason is a stable code (ebml_corrupt, moov_missing,
|
||||
// no_duration, …) the web maps to a localized "re-download" message.
|
||||
Integrity string `json:"integrity,omitempty"`
|
||||
IntegrityReason string `json:"integrityReason,omitempty"`
|
||||
}
|
||||
|
||||
// LibrarySyncResponse is returned after syncing library items.
|
||||
|
|
|
|||
|
|
@ -84,7 +84,55 @@ func ExtractMediaInfo(ctx context.Context, ffprobePath, filePath string) (*Media
|
|||
return nil, fmt.Errorf("ffprobe JSON parse failed: %w", err)
|
||||
}
|
||||
|
||||
return parseFFprobeOutput(data)
|
||||
mi, perr := parseFFprobeOutput(data)
|
||||
if perr != nil {
|
||||
return nil, perr
|
||||
}
|
||||
// A corrupt-but-parseable file (e.g. a half-downloaded MKV) returns valid
|
||||
// stream JSON and a zero exit, yet ffprobe still logs structural errors to
|
||||
// stderr (captured above). Flag it so the library can warn instead of
|
||||
// silently shipping a file that won't play.
|
||||
if integ := assessIntegrity(stderr.String(), mi); integ != nil {
|
||||
mi.Integrity = integ
|
||||
}
|
||||
return mi, nil
|
||||
}
|
||||
|
||||
// corruptionMarkers are high-confidence ffprobe stderr substrings (lowercased)
|
||||
// that indicate a structurally damaged / incompletely-downloaded file, paired
|
||||
// with a STABLE code the web maps to localized copy. Kept conservative so
|
||||
// healthy files are never flagged — each appears only on real container/
|
||||
// bitstream damage, not benign warnings (ffprobe runs at -v error).
|
||||
var corruptionMarkers = []struct{ sub, code string }{
|
||||
{"invalid data found when processing input", "invalid_data"},
|
||||
{"as first byte of an ebml number", "ebml_corrupt"}, // truncated/corrupt MKV
|
||||
{"moov atom not found", "moov_missing"}, // truncated MP4
|
||||
{"invalid nal unit size", "bitstream_corrupt"},
|
||||
{"non-existing pps", "bitstream_corrupt"},
|
||||
// NOTE: deliberately NOT matching "error reading header" (ffprobe emits it
|
||||
// on transient NFS/network read hiccups — a genuinely unreadable header
|
||||
// also exits non-zero → ScanError → item skipped) nor "truncating packet"
|
||||
// (printed for healthy MKV/TS with oversized subtitle/PGS packets). Both
|
||||
// false-positive on good files; the markers above are structural.
|
||||
}
|
||||
|
||||
// assessIntegrity inspects ffprobe's stderr plus the parsed result and returns
|
||||
// a damaged verdict on a high-confidence corruption signal, else nil. The
|
||||
// Reason is a stable code (see corruptionMarkers) the web localizes.
|
||||
func assessIntegrity(stderr string, mi *MediaInfo) *IntegrityInfo {
|
||||
low := strings.ToLower(stderr)
|
||||
for _, m := range corruptionMarkers {
|
||||
if strings.Contains(low, m.sub) {
|
||||
return &IntegrityInfo{Damaged: true, Reason: m.code}
|
||||
}
|
||||
}
|
||||
// A file that carries a video stream but no determinable duration is almost
|
||||
// always truncated (the moov/cues holding duration sit at the end of the
|
||||
// file). Audio-only items legitimately omit it, so gate on having video.
|
||||
if mi != nil && mi.Video != nil && mi.Video.Duration <= 0 {
|
||||
return &IntegrityInfo{Damaged: true, Reason: "no_duration"}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseFFprobeOutput converts parsed ffprobe JSON into MediaInfo.
|
||||
|
|
|
|||
|
|
@ -428,3 +428,42 @@ func TestParseFFprobeOutput_FrameRateNoSlash(t *testing.T) {
|
|||
t.Errorf("frameRate = %v, want 0 (no slash)", mi.Video.FrameRate)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAssessIntegrity(t *testing.T) {
|
||||
healthy := &MediaInfo{Video: &VideoInfo{Codec: "h264", Width: 1920, Height: 1080, Duration: 5477}}
|
||||
|
||||
// Healthy file with no stderr → nil (not damaged).
|
||||
if got := assessIntegrity("", healthy); got != nil {
|
||||
t.Errorf("healthy file flagged damaged: %+v", got)
|
||||
}
|
||||
|
||||
// MKV EBML corruption (the real "In the Grey" case): ffprobe exits 0 but
|
||||
// logs EBML errors → damaged with the ebml_corrupt code.
|
||||
ebml := "[matroska,webm @ 0x60e7] 0x00 at pos 2144995 invalid as first byte of an EBML number\n"
|
||||
got := assessIntegrity(ebml, healthy)
|
||||
if got == nil || !got.Damaged || got.Reason != "ebml_corrupt" {
|
||||
t.Errorf("EBML corruption not flagged correctly: %+v", got)
|
||||
}
|
||||
|
||||
// Truncated MP4.
|
||||
if got := assessIntegrity("moov atom not found\n", healthy); got == nil || got.Reason != "moov_missing" {
|
||||
t.Errorf("moov-missing not flagged: %+v", got)
|
||||
}
|
||||
|
||||
// Invalid data.
|
||||
if got := assessIntegrity("Invalid data found when processing input\n", healthy); got == nil || got.Reason != "invalid_data" {
|
||||
t.Errorf("invalid-data not flagged: %+v", got)
|
||||
}
|
||||
|
||||
// No duration on a video stream → truncated.
|
||||
noDur := &MediaInfo{Video: &VideoInfo{Codec: "h264", Width: 1920, Height: 1080, Duration: 0}}
|
||||
if got := assessIntegrity("", noDur); got == nil || got.Reason != "no_duration" {
|
||||
t.Errorf("no-duration not flagged: %+v", got)
|
||||
}
|
||||
|
||||
// Audio-only file with no duration is NOT flagged (legitimately omits it).
|
||||
audioOnly := &MediaInfo{Audio: []AudioTrack{{Lang: "en", Codec: "aac"}}}
|
||||
if got := assessIntegrity("", audioOnly); got != nil {
|
||||
t.Errorf("audio-only file wrongly flagged: %+v", got)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,19 @@ type MediaInfo struct {
|
|||
Audio []AudioTrack `json:"audio"`
|
||||
Subtitles []SubtitleTrack `json:"subtitles"`
|
||||
Languages []string `json:"languages"` // derived from audio tracks
|
||||
// Integrity is non-nil only when the scan found signs of corruption / an
|
||||
// incomplete download. Surfaced in the web library as a "damaged" warning
|
||||
// so the user re-downloads instead of hitting a file that won't play.
|
||||
Integrity *IntegrityInfo `json:"integrity,omitempty"`
|
||||
}
|
||||
|
||||
// IntegrityInfo flags a file whose metadata probed OK enough to land in the
|
||||
// library but that shows structural damage (ffprobe emitted EBML / "invalid
|
||||
// data" errors, a truncated moov atom, or no usable video/duration) — the
|
||||
// hallmark of an incomplete or corrupt download.
|
||||
type IntegrityInfo struct {
|
||||
Damaged bool `json:"damaged"`
|
||||
Reason string `json:"reason,omitempty"`
|
||||
}
|
||||
|
||||
// VideoInfo represents the primary video stream metadata.
|
||||
|
|
|
|||
|
|
@ -145,11 +145,16 @@ func scanSingleFile(ctx context.Context, ffprobePath, filePath string, cacheIdx
|
|||
// Parse season/episode
|
||||
item.Season, item.Episode = ParseSeasonEpisode(item.FileName)
|
||||
|
||||
// Incremental: skip if file hasn't changed
|
||||
// Incremental: skip if file hasn't changed. EXCEPT a previously-damaged
|
||||
// file is always re-probed — a re-download to the same path can land with
|
||||
// an identical size+mtime (some torrent clients preserve the torrent's
|
||||
// mtime), so trusting the cached "damaged" verdict would pin a now-healthy
|
||||
// file as broken forever. Re-probing damaged items is cheap (they're few).
|
||||
if incremental && existing != nil {
|
||||
if idx, ok := cacheIdx[filePath]; ok {
|
||||
cached := existing.Items[idx]
|
||||
if cached.FileSize == item.FileSize && cached.ModTime == item.ModTime && cached.MediaInfo != nil {
|
||||
if cached.FileSize == item.FileSize && cached.ModTime == item.ModTime &&
|
||||
cached.MediaInfo != nil && cached.MediaInfo.Integrity == nil {
|
||||
item.MediaInfo = cached.MediaInfo
|
||||
return item
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,10 @@ func BuildSyncItems(cache *LibraryCache) []agent.LibrarySyncItem {
|
|||
si.AudioTracks = item.MediaInfo.Audio
|
||||
si.SubtitleTracks = item.MediaInfo.Subtitles
|
||||
si.VideoInfo = item.MediaInfo.Video
|
||||
if integ := item.MediaInfo.Integrity; integ != nil && integ.Damaged {
|
||||
si.Integrity = "damaged"
|
||||
si.IntegrityReason = integ.Reason
|
||||
}
|
||||
}
|
||||
|
||||
items = append(items, si)
|
||||
|
|
|
|||
|
|
@ -26,4 +26,8 @@ type LibraryCache struct {
|
|||
Items []LibraryItem `json:"items"`
|
||||
}
|
||||
|
||||
const cacheVersion = 1
|
||||
// Bump whenever the scan logic changes in a way that should re-probe an
|
||||
// existing library on next scan (incremental reuse keys off mtime+size, so a
|
||||
// pure logic change is invisible without this). v2: file-integrity detection
|
||||
// (ffprobe corruption / incomplete-download flag).
|
||||
const cacheVersion = 2
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue