unarr/internal/library/scanner.go
Deivid Soto b6ddeea129 feat(library): content fingerprint + path-resilient sync + stream self-heal
Stop treating the absolute path as a file's identity so a base-path change
(host binary→docker remap, moved media folder, remount) no longer makes the
server duplicate and orphan library rows.

- fingerprint.go: ComputeFingerprint = sha256(size ‖ first 1MiB ‖ last 1MiB),
  a stable content identity that survives rename/move/base-path change. Cached
  in LibraryItem and reused on incremental scans when size+mtime are unchanged.
- sync: send fingerprint + rel_path (relative to the scan root) + agent_id in
  the library-sync request, so the server can move a row in place and scope
  stale-cleanup per agent.
- daemon: force a FULL re-scan (with a user-facing WARNING) when the scan root
  changed since the last cache, so the server re-maps by fingerprint instead of
  duplicating. basePathChanged compares filepath.Clean'd roots.
- daemon: relocateUnreachable self-heals a stream request whose path is under an
  old root but whose file still exists under a current allowed root, so playback
  works immediately without waiting for the re-scan. Conservative: requires a
  3-segment tail and re-checks containment after resolving symlinks so it can
  neither serve the wrong file nor escape the allowed dirs.

See docs/plans/unarr-path-resilience.md in the web repo.
2026-06-03 12:08:58 +02:00

230 lines
5.8 KiB
Go

package library
import (
"context"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/torrentclaw/unarr/internal/library/mediainfo"
"github.com/torrentclaw/unarr/internal/parser"
)
// videoExts are file extensions considered as video files.
var videoExts = map[string]bool{
".mkv": true, ".mp4": true, ".avi": true, ".m4v": true,
".ts": true, ".wmv": true, ".mov": true, ".webm": true,
".flv": true, ".mpg": true, ".mpeg": true, ".vob": true,
}
// excludePatterns are path substrings that indicate non-content files.
var excludePatterns = []string{
"sample", "trailer", "featurette", "extras", "bonus",
"behind the scenes", "deleted scenes", "interview",
}
const minFileSize = 100 * 1024 * 1024 // 100MB minimum
// ScanOptions configures the library scanner.
type ScanOptions struct {
Workers int // concurrent ffprobe processes (default 8)
FFprobePath string // explicit path, or auto-resolve
Incremental bool // skip unchanged files (mtime+size match cache)
OnProgress func(scanned, total int, current string)
}
// Scan walks a directory recursively, finds video files, and runs ffprobe on each.
func Scan(ctx context.Context, dirPath string, existing *LibraryCache, opts ScanOptions) (*LibraryCache, error) {
if opts.Workers <= 0 {
opts.Workers = 8
}
// Resolve ffprobe
ffprobePath, err := mediainfo.ResolveFFprobe(opts.FFprobePath)
if err != nil {
return nil, fmt.Errorf("ffprobe: %w", err)
}
// Discover video files
files, err := discoverFiles(dirPath)
if err != nil {
return nil, fmt.Errorf("discover files: %w", err)
}
if len(files) == 0 {
return &LibraryCache{
Version: cacheVersion,
ScannedAt: time.Now().UTC().Format(time.RFC3339),
Path: dirPath,
}, nil
}
// Build cache index for incremental mode
cacheIdx := BuildCacheIndex(existing)
// Scan files concurrently
var (
scanned atomic.Int32
total = len(files)
mu sync.Mutex
items = make([]LibraryItem, 0, total)
)
sem := make(chan struct{}, opts.Workers)
var wg sync.WaitGroup
for _, filePath := range files {
select {
case <-ctx.Done():
break
case sem <- struct{}{}:
}
wg.Add(1)
go func(fp string) {
defer wg.Done()
defer func() { <-sem }()
item := scanSingleFile(ctx, ffprobePath, fp, cacheIdx, existing, opts.Incremental)
mu.Lock()
items = append(items, item)
mu.Unlock()
n := int(scanned.Add(1))
if opts.OnProgress != nil {
opts.OnProgress(n, total, filepath.Base(fp))
}
}(filePath)
}
wg.Wait()
return &LibraryCache{
Version: cacheVersion,
ScannedAt: time.Now().UTC().Format(time.RFC3339),
Path: dirPath,
Items: items,
}, nil
}
func scanSingleFile(ctx context.Context, ffprobePath, filePath string, cacheIdx map[string]int, existing *LibraryCache, incremental bool) LibraryItem {
info, err := os.Stat(filePath)
if err != nil {
return LibraryItem{
FilePath: filePath,
FileName: filepath.Base(filePath),
ScanError: err.Error(),
}
}
item := LibraryItem{
FilePath: filePath,
FileName: filepath.Base(filePath),
FileSize: info.Size(),
ModTime: info.ModTime().UTC().Format(time.RFC3339),
}
// Look up the cached entry once — reused for both fingerprint reuse and the
// incremental ffprobe skip below.
var cached *LibraryItem
if existing != nil {
if idx, ok := cacheIdx[filePath]; ok {
cached = &existing.Items[idx]
}
}
unchanged := cached != nil &&
cached.FileSize == item.FileSize && cached.ModTime == item.ModTime
// Fingerprint: reuse the cached value when the file is unchanged and already
// has one; otherwise compute it (cheap, two bounded reads). Computed even on
// the incremental path so every synced item carries a stable identity.
if unchanged && cached.Fingerprint != "" {
item.Fingerprint = cached.Fingerprint
} else if fp, fpErr := ComputeFingerprint(filePath, item.FileSize); fpErr == nil {
item.Fingerprint = fp
}
// Parse filename for title, year, quality, codec
parsed := parser.Parse(item.FileName)
item.Quality = parsed.Quality
item.Codec = parsed.Codec
item.Year = parsed.Year
// Extract title from filename
item.Title = CleanTitle(item.FileName)
if item.Title == "" {
item.Title = item.FileName
}
// Parse season/episode
item.Season, item.Episode = ParseSeasonEpisode(item.FileName)
// Incremental: skip if file hasn't changed. EXCEPT a previously-damaged
// file is always re-probed — a re-download to the same path can land with
// an identical size+mtime (some torrent clients preserve the torrent's
// mtime), so trusting the cached "damaged" verdict would pin a now-healthy
// file as broken forever. Re-probing damaged items is cheap (they're few).
if incremental && unchanged &&
cached.MediaInfo != nil && cached.MediaInfo.Integrity == nil {
item.MediaInfo = cached.MediaInfo
return item
}
// Run ffprobe
mi, err := mediainfo.ExtractMediaInfo(ctx, ffprobePath, filePath)
if err != nil {
item.ScanError = err.Error()
return item
}
item.MediaInfo = mi
return item
}
// discoverFiles walks a directory and returns paths of video files.
func discoverFiles(root string) ([]string, error) {
var files []string
err := filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return nil // skip errors, continue walking
}
if d.IsDir() {
return nil
}
ext := strings.ToLower(filepath.Ext(path))
if !videoExts[ext] {
return nil
}
// Check file size (stat is lazy on some systems)
info, err := d.Info()
if err != nil {
return nil
}
if info.Size() < minFileSize {
return nil
}
// Exclude non-content files
lower := strings.ToLower(path)
for _, pattern := range excludePatterns {
if strings.Contains(lower, pattern) {
return nil
}
}
files = append(files, path)
return nil
})
return files, err
}