diff --git a/internal/agent/types.go b/internal/agent/types.go index 58ecd38..ea7a51b 100644 --- a/internal/agent/types.go +++ b/internal/agent/types.go @@ -318,6 +318,7 @@ type DebridAccount struct { type LibrarySyncRequest struct { Items []LibrarySyncItem `json:"items"` ScanPath string `json:"scanPath"` + AgentID string `json:"agentId,omitempty"` // lets the server scope stale-cleanup per agent IsLastBatch bool `json:"isLastBatch"` SyncStartedAt string `json:"syncStartedAt,omitempty"` // ISO-8601; same for all batches in a session } @@ -346,8 +347,14 @@ type LibrarySyncItem struct { // Integrity flags a damaged / incompletely-downloaded file ("damaged" or // empty). IntegrityReason is a stable code (ebml_corrupt, moov_missing, // no_duration, …) the web maps to a localized "re-download" message. - Integrity string `json:"integrity,omitempty"` - IntegrityReason string `json:"integrityReason,omitempty"` + Integrity string `json:"integrity,omitempty"` + IntegrityReason string `json:"integrityReason,omitempty"` + // Path resilience: a stable content identity + the file's location relative + // to its library root, so the server can move a row in place on a rename / + // base-path change instead of duplicating it. + Fingerprint string `json:"fingerprint,omitempty"` + RelPath string `json:"relPath,omitempty"` + LibraryRootKey string `json:"libraryRootKey,omitempty"` } // LibrarySyncResponse is returned after syncing library items. diff --git a/internal/cmd/daemon.go b/internal/cmd/daemon.go index eb6cbbf..8fd8938 100644 --- a/internal/cmd/daemon.go +++ b/internal/cmd/daemon.go @@ -598,12 +598,25 @@ func runDaemonStart() error { }() } + allowedRoots := []string{cfg.Download.Dir, cfg.Library.ScanPath, + cfg.Organize.MoviesDir, cfg.Organize.TVShowsDir} + filePath := filepath.Clean(sr.FilePath) - if !isAllowedStreamPath(filePath, cfg.Download.Dir, cfg.Library.ScanPath, - cfg.Organize.MoviesDir, cfg.Organize.TVShowsDir) { - log.Printf("[%s] stream request rejected: path outside allowed dirs: %s", agent.ShortID(sr.TaskID), filePath) - reportStreamError(fmt.Sprintf("path outside allowed dirs: %s", filePath)) - return + // Self-heal a base-path mismatch: the web may hand us a path under an old + // root (e.g. /mnt/nas/peliculas/… from before a binary→docker move) that + // is now outside our allowed dirs but whose file still exists under a + // current root (/downloads/…). Remap the path's tail onto an allowed root + // so playback works immediately; the next re-scan persists the fix to the + // DB. See docs/plans/unarr-path-resilience.md. + if !isAllowedStreamPath(filePath, allowedRoots...) { + if remapped := relocateUnreachable(filePath, allowedRoots); remapped != "" { + log.Printf("[%s] stream self-heal: remapped %s → %s", agent.ShortID(sr.TaskID), filePath, remapped) + filePath = remapped + } else { + log.Printf("[%s] stream request rejected: path outside allowed dirs: %s", agent.ShortID(sr.TaskID), filePath) + reportStreamError(fmt.Sprintf("path outside allowed dirs: %s", filePath)) + return + } } // os.Stat over NFS can transiently fail (ESTALE/EAGAIN/timeout) right // after a remount or under load. Retry a few times before giving up so @@ -619,6 +632,15 @@ func runDaemonStart() error { time.Sleep(300 * time.Millisecond) } } + if statErr != nil { + // Last resort before failing: the file may simply have moved within + // an allowed root — try to relocate it by path tail. + if remapped := relocateUnreachable(filePath, allowedRoots); remapped != "" { + log.Printf("[%s] stream self-heal: relocated missing %s → %s", agent.ShortID(sr.TaskID), filePath, remapped) + filePath = remapped + info, statErr = os.Stat(filePath) + } + } if statErr != nil { log.Printf("[%s] stream request: file not found after retries: %s (%v)", agent.ShortID(sr.TaskID), filePath, statErr) reportStreamError(fmt.Sprintf("file not found: %s", filePath)) @@ -977,6 +999,53 @@ func isAllowedStreamPath(filePath string, allowedDirs ...string) bool { return false } +// relocateUnreachable tries to find a file the web asked us to stream under a +// path we can't serve (e.g. an old base path) by joining the longest suffix of +// that path onto each current allowed root and checking it exists. Returns the +// found absolute path or "". +// +// Conservative by design — it must never serve the WRONG file: +// - Requires a tail of at least three segments (collection/season/file), so a +// generic "Season 01/Episode.mkv" can't match a different show by accident. +// Flat single-file-at-root layouts simply aren't self-healed here; the next +// re-scan re-maps them instead. +// - Re-checks containment AFTER resolving symlinks, so a symlink inside a root +// pointing outside it can't be used to escape the allowed dirs (isAllowed‑ +// StreamPath alone is a lexical check that os.Stat would happily follow out). +func relocateUnreachable(filePath string, allowedRoots []string) string { + segs := strings.Split(filepath.ToSlash(filePath), "/") + // Longest tail first (most specific match wins). Stop before 3-segment tails + // so a short, ambiguous suffix can't match the wrong file. + for start := 0; start <= len(segs)-3; start++ { + tail := filepath.Join(segs[start:]...) + if tail == "" { + continue + } + for _, root := range allowedRoots { + if root == "" { + continue + } + cand := filepath.Join(root, tail) + if !isAllowedStreamPath(cand, root) { + continue + } + fi, err := os.Stat(cand) + if err != nil || fi.IsDir() { + continue + } + // Re-validate containment against the symlink-resolved real paths so + // a symlink under the root can't point the stream outside it. + realCand, e1 := filepath.EvalSymlinks(cand) + realRoot, e2 := filepath.EvalSymlinks(root) + if e1 != nil || e2 != nil || !isAllowedStreamPath(realCand, realRoot) { + continue + } + return cand + } + } + return "" +} + func formatSpeedLog(bps int64) string { switch { case bps >= 1024*1024*1024: @@ -993,6 +1062,23 @@ func formatSpeedLog(bps int64) string { // runAutoScan runs a library scan + sync on a timer or on-demand via scanNow channel. // It scans all provided paths and syncs each independently so stale-item cleanup // is scoped to the correct directory prefix on the server. +// basePathChanged reports whether the library's scan root moved since the last +// saved cache — i.e. the previously-scanned root is no longer one of the current +// scan paths. Used to force a full (non-incremental) re-scan so the server can +// re-map paths by fingerprint and reap the old prefix. +func basePathChanged(existing *library.LibraryCache, scanPaths []string) bool { + if existing == nil || len(existing.Items) == 0 || existing.Path == "" { + return false + } + prev := filepath.Clean(existing.Path) + for _, p := range scanPaths { + if filepath.Clean(p) == prev { + return false + } + } + return true +} + func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration, ac *agent.Client, scanNow <-chan struct{}, scanPaths []string) { log.Printf("[auto-scan] enabled: every %s, paths: %v", interval, scanPaths) @@ -1018,10 +1104,23 @@ func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration, workers = 8 } + // If the library base path changed (e.g. the agent moved from the host + // binary to docker, remapping /mnt/nas/peliculas → /downloads, or the + // user moved their media folder), force a FULL re-scan instead of an + // incremental one. The fingerprint merge on the server then relocates + // existing rows in place rather than duplicating, and per-agent cleanup + // reaps the old prefix. See docs/plans/unarr-path-resilience.md. + forceFull := basePathChanged(existing, scanPaths) + if forceFull { + log.Printf("[auto-scan] WARNING: library base path changed (was %q, now %v) — "+ + "running a FULL re-scan. This can take a while on large libraries; "+ + "playback and matches are preserved.", existing.Path, scanPaths) + } + scanOpts := library.ScanOptions{ Workers: workers, FFprobePath: cfg.Library.FFprobePath, - Incremental: existing != nil, + Incremental: existing != nil && !forceFull, } // Resolve ffmpeg once for the sidecar prewarm (extracts text subs → WebVTT @@ -1077,6 +1176,7 @@ func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration, _, err := ac.SyncLibrary(ctx, agent.LibrarySyncRequest{ Items: items[i:end], ScanPath: scanPath, + AgentID: cfg.Agent.ID, IsLastBatch: isLast, SyncStartedAt: syncStartedAt, }) diff --git a/internal/cmd/relocate_test.go b/internal/cmd/relocate_test.go new file mode 100644 index 0000000..07ea7c5 --- /dev/null +++ b/internal/cmd/relocate_test.go @@ -0,0 +1,74 @@ +package cmd + +import ( + "os" + "path/filepath" + "runtime" + "testing" +) + +func mkfile(t *testing.T, path string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(path, []byte("x"), 0o644); err != nil { + t.Fatal(err) + } +} + +func TestRelocateUnreachable(t *testing.T) { + root := t.TempDir() + // A 3-segment-deep file under the current root. + mkfile(t, filepath.Join(root, "Acme Show", "Season 01", "ep.mkv")) + // A 2-segment-deep file (too shallow to be matched by a short tail). + mkfile(t, filepath.Join(root, "Season 01", "lonely.mkv")) + + roots := []string{root} + + // Base-path change: an old-root path whose 3-seg tail exists under the new + // root → relocates to the real file. + got := relocateUnreachable("/old/base/Acme Show/Season 01/ep.mkv", roots) + want := filepath.Join(root, "Acme Show", "Season 01", "ep.mkv") + if got != want { + t.Errorf("relocate moved file: got %q want %q", got, want) + } + + // Only a 2-segment tail would match → must NOT relocate (ambiguous). + if got := relocateUnreachable("/old/Season 01/lonely.mkv", roots); got != "" { + t.Errorf("2-segment tail should not match, got %q", got) + } + + // Nonexistent file → no relocation. + if got := relocateUnreachable("/old/base/Acme Show/Season 01/missing.mkv", roots); got != "" { + t.Errorf("missing file should not relocate, got %q", got) + } + + // Traversal attempt: ".." segments are cleaned by filepath.Join and the + // result is re-validated, so it can't escape. + if got := relocateUnreachable("/old/../../../etc/passwd", roots); got != "" { + t.Errorf("traversal should not match, got %q", got) + } +} + +func TestRelocateUnreachableSymlinkEscape(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("symlink semantics differ on windows") + } + root := t.TempDir() + outside := t.TempDir() + // A real file living OUTSIDE any allowed root. + mkfile(t, filepath.Join(outside, "sub", "secret.mkv")) + // A symlink inside the root pointing at the outside tree. + if err := os.Symlink(outside, filepath.Join(root, "link")); err != nil { + t.Skipf("symlink unsupported: %v", err) + } + + // The lexical candidate root/link/sub/secret.mkv exists (os.Stat follows the + // symlink), but after resolving symlinks it's outside the root → must be + // rejected so the stream can't escape the allowed dirs. + got := relocateUnreachable("/old/link/sub/secret.mkv", []string{root}) + if got != "" { + t.Errorf("symlink escape must be rejected, got %q", got) + } +} diff --git a/internal/cmd/scan.go b/internal/cmd/scan.go index 2fdba90..644324e 100644 --- a/internal/cmd/scan.go +++ b/internal/cmd/scan.go @@ -205,6 +205,7 @@ func syncToServer(ctx context.Context, cfg config.Config, cache *library.Library resp, err := ac.SyncLibrary(ctx, agent.LibrarySyncRequest{ Items: batch, ScanPath: cache.Path, + AgentID: cfg.Agent.ID, IsLastBatch: isLast, SyncStartedAt: syncStartedAt, }) diff --git a/internal/library/fingerprint.go b/internal/library/fingerprint.go new file mode 100644 index 0000000..525fadf --- /dev/null +++ b/internal/library/fingerprint.go @@ -0,0 +1,55 @@ +package library + +import ( + "crypto/sha256" + "encoding/binary" + "encoding/hex" + "io" + "os" +) + +// fpChunk is how many bytes are hashed from the head and the tail of a file. +const fpChunk = 1 << 20 // 1 MiB + +// ComputeFingerprint returns a stable content identity for a media file: +// sha256(fileSize ‖ first 1 MiB ‖ last 1 MiB). It survives renames, moves, and +// base-path changes (unlike the absolute path), so the server can recognise the +// same file at a new location and move its library row in place instead of +// duplicating it. Cheap: two bounded reads, never the whole file (except small +// ones). See docs/plans/unarr-path-resilience.md in the web repo. +func ComputeFingerprint(path string, size int64) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + + h := sha256.New() + var sizeBuf [8]byte + binary.LittleEndian.PutUint64(sizeBuf[:], uint64(size)) + h.Write(sizeBuf[:]) + + if size <= 2*fpChunk { + // Small file: hash it whole — head+tail would overlap anyway. + if _, err := io.Copy(h, f); err != nil { + return "", err + } + } else { + head := make([]byte, fpChunk) + if _, err := io.ReadFull(f, head); err != nil { + return "", err + } + h.Write(head) + + if _, err := f.Seek(size-fpChunk, io.SeekStart); err != nil { + return "", err + } + tail := make([]byte, fpChunk) + if _, err := io.ReadFull(f, tail); err != nil { + return "", err + } + h.Write(tail) + } + + return hex.EncodeToString(h.Sum(nil)), nil +} diff --git a/internal/library/fingerprint_test.go b/internal/library/fingerprint_test.go new file mode 100644 index 0000000..e8b5d20 --- /dev/null +++ b/internal/library/fingerprint_test.go @@ -0,0 +1,81 @@ +package library + +import ( + "os" + "path/filepath" + "testing" +) + +func writeFile(t *testing.T, dir, name string, data []byte) string { + t.Helper() + p := filepath.Join(dir, name) + if err := os.WriteFile(p, data, 0o644); err != nil { + t.Fatalf("write %s: %v", p, err) + } + return p +} + +func fp(t *testing.T, path string) string { + t.Helper() + fi, err := os.Stat(path) + if err != nil { + t.Fatalf("stat %s: %v", path, err) + } + s, err := ComputeFingerprint(path, fi.Size()) + if err != nil { + t.Fatalf("fingerprint %s: %v", path, err) + } + return s +} + +func TestComputeFingerprint(t *testing.T) { + dir := t.TempDir() + big := make([]byte, 5<<20) // 5 MiB > 2*chunk + for i := range big { + big[i] = byte(i * 7) + } + + a := fp(t, writeFile(t, dir, "a.bin", big)) + if len(a) != 64 { + t.Fatalf("want 64-hex, got %d", len(a)) + } + + // Move-invariance: identical bytes at a different path → same fingerprint. + if b := fp(t, writeFile(t, dir, "moved.bin", big)); b != a { + t.Errorf("move changed fingerprint: %s != %s", a, b) + } + + // Tail sensitivity: flipping the last byte must change the fingerprint. + tailMut := append([]byte(nil), big...) + tailMut[len(tailMut)-1] ^= 0xFF + if c := fp(t, writeFile(t, dir, "tail.bin", tailMut)); c == a { + t.Error("tail mutation did not change fingerprint") + } + + // Head sensitivity. + headMut := append([]byte(nil), big...) + headMut[0] ^= 0xFF + if c := fp(t, writeFile(t, dir, "head.bin", headMut)); c == a { + t.Error("head mutation did not change fingerprint") + } + + // Size is mixed in: a small file and a large file never collide trivially. + small := fp(t, writeFile(t, dir, "small.bin", []byte("hello world"))) + if small == a { + t.Error("small and big fingerprints collided") + } +} + +func TestRelToRoot(t *testing.T) { + cases := []struct{ root, full, want string }{ + {"/downloads", "/downloads/TV Shows/X/S01E09.mkv", "TV Shows/X/S01E09.mkv"}, + {"/downloads", "/mnt/other/file.mkv", ""}, // outside root + {"/downloads", "/downloads", ""}, // equal → "." + {"", "/x/y.mkv", ""}, // no root + } + for _, c := range cases { + if got := relToRoot(c.root, c.full); got != c.want { + t.Errorf("relToRoot(%q,%q)=%q want %q", c.root, c.full, got, c.want) + } + } +} diff --git a/internal/library/scanner.go b/internal/library/scanner.go index 3d6b5fe..7feb746 100644 --- a/internal/library/scanner.go +++ b/internal/library/scanner.go @@ -130,6 +130,26 @@ func scanSingleFile(ctx context.Context, ffprobePath, filePath string, cacheIdx ModTime: info.ModTime().UTC().Format(time.RFC3339), } + // Look up the cached entry once — reused for both fingerprint reuse and the + // incremental ffprobe skip below. + var cached *LibraryItem + if existing != nil { + if idx, ok := cacheIdx[filePath]; ok { + cached = &existing.Items[idx] + } + } + unchanged := cached != nil && + cached.FileSize == item.FileSize && cached.ModTime == item.ModTime + + // Fingerprint: reuse the cached value when the file is unchanged and already + // has one; otherwise compute it (cheap, two bounded reads). Computed even on + // the incremental path so every synced item carries a stable identity. + if unchanged && cached.Fingerprint != "" { + item.Fingerprint = cached.Fingerprint + } else if fp, fpErr := ComputeFingerprint(filePath, item.FileSize); fpErr == nil { + item.Fingerprint = fp + } + // Parse filename for title, year, quality, codec parsed := parser.Parse(item.FileName) item.Quality = parsed.Quality @@ -150,15 +170,10 @@ func scanSingleFile(ctx context.Context, ffprobePath, filePath string, cacheIdx // an identical size+mtime (some torrent clients preserve the torrent's // mtime), so trusting the cached "damaged" verdict would pin a now-healthy // file as broken forever. Re-probing damaged items is cheap (they're few). - if incremental && existing != nil { - if idx, ok := cacheIdx[filePath]; ok { - cached := existing.Items[idx] - if cached.FileSize == item.FileSize && cached.ModTime == item.ModTime && - cached.MediaInfo != nil && cached.MediaInfo.Integrity == nil { - item.MediaInfo = cached.MediaInfo - return item - } - } + if incremental && unchanged && + cached.MediaInfo != nil && cached.MediaInfo.Integrity == nil { + item.MediaInfo = cached.MediaInfo + return item } // Run ffprobe diff --git a/internal/library/sync.go b/internal/library/sync.go index 5d55a57..461c189 100644 --- a/internal/library/sync.go +++ b/internal/library/sync.go @@ -1,6 +1,25 @@ package library -import "github.com/torrentclaw/unarr/internal/agent" +import ( + "path/filepath" + "strings" + + "github.com/torrentclaw/unarr/internal/agent" +) + +// relToRoot returns the file's path relative to the scan root (forward-slashed), +// or "" when it doesn't live under root. The server stores this so streaming can +// later reconstruct the absolute path from the agent's *current* root. +func relToRoot(root, full string) string { + if root == "" { + return "" + } + rel, err := filepath.Rel(root, full) + if err != nil || rel == "." || strings.HasPrefix(rel, "..") { + return "" + } + return filepath.ToSlash(rel) +} // BuildSyncItems converts cached library items to sync request items. // Shared between unarr scan (cmd/scan.go) and auto-scan (cmd/daemon.go). @@ -11,14 +30,17 @@ func BuildSyncItems(cache *LibraryCache) []agent.LibrarySyncItem { continue } si := agent.LibrarySyncItem{ - FilePath: item.FilePath, - FileName: item.FileName, - FileSize: item.FileSize, - Title: item.Title, - Year: item.Year, - ContentType: DeriveContentType(item), - Season: item.Season, - Episode: item.Episode, + FilePath: item.FilePath, + FileName: item.FileName, + FileSize: item.FileSize, + Title: item.Title, + Year: item.Year, + ContentType: DeriveContentType(item), + Season: item.Season, + Episode: item.Episode, + Fingerprint: item.Fingerprint, + RelPath: relToRoot(cache.Path, item.FilePath), + LibraryRootKey: "library", } if item.MediaInfo != nil { diff --git a/internal/library/types.go b/internal/library/types.go index bd2591b..6346461 100644 --- a/internal/library/types.go +++ b/internal/library/types.go @@ -4,18 +4,21 @@ import "github.com/torrentclaw/unarr/internal/library/mediainfo" // LibraryItem represents a single scanned media file. type LibraryItem struct { - FilePath string `json:"filePath"` - FileName string `json:"fileName"` - FileSize int64 `json:"fileSize"` - ModTime string `json:"modTime"` // ISO 8601 - Title string `json:"title"` - Year string `json:"year,omitempty"` - Season int `json:"season,omitempty"` - Episode int `json:"episode,omitempty"` - Quality string `json:"quality,omitempty"` // "1080p" etc (from filename) - Codec string `json:"codec,omitempty"` // "x265" etc (from filename) - MediaInfo *mediainfo.MediaInfo `json:"mediaInfo,omitempty"` - ScanError string `json:"scanError,omitempty"` + FilePath string `json:"filePath"` + FileName string `json:"fileName"` + FileSize int64 `json:"fileSize"` + ModTime string `json:"modTime"` // ISO 8601 + // Fingerprint is a stable content identity (see fingerprint.go). Cached so + // incremental scans reuse it when size+mtime are unchanged. + Fingerprint string `json:"fingerprint,omitempty"` + Title string `json:"title"` + Year string `json:"year,omitempty"` + Season int `json:"season,omitempty"` + Episode int `json:"episode,omitempty"` + Quality string `json:"quality,omitempty"` // "1080p" etc (from filename) + Codec string `json:"codec,omitempty"` // "x265" etc (from filename) + MediaInfo *mediainfo.MediaInfo `json:"mediaInfo,omitempty"` + ScanError string `json:"scanError,omitempty"` } // LibraryCache is the on-disk cache of scanned library items.