feat(library): content fingerprint + path-resilient sync + stream self-heal

Stop treating the absolute path as a file's identity so a base-path change
(host binary→docker remap, moved media folder, remount) no longer makes the
server duplicate and orphan library rows.

- fingerprint.go: ComputeFingerprint = sha256(size ‖ first 1MiB ‖ last 1MiB),
  a stable content identity that survives rename/move/base-path change. Cached
  in LibraryItem and reused on incremental scans when size+mtime are unchanged.
- sync: send fingerprint + rel_path (relative to the scan root) + agent_id in
  the library-sync request, so the server can move a row in place and scope
  stale-cleanup per agent.
- daemon: force a FULL re-scan (with a user-facing WARNING) when the scan root
  changed since the last cache, so the server re-maps by fingerprint instead of
  duplicating. basePathChanged compares filepath.Clean'd roots.
- daemon: relocateUnreachable self-heals a stream request whose path is under an
  old root but whose file still exists under a current allowed root, so playback
  works immediately without waiting for the re-scan. Conservative: requires a
  3-segment tail and re-checks containment after resolving symlinks so it can
  neither serve the wrong file nor escape the allowed dirs.

See docs/plans/unarr-path-resilience.md in the web repo.
This commit is contained in:
Deivid Soto 2026-06-03 12:04:04 +02:00
parent e298ff6c05
commit b6ddeea129
9 changed files with 396 additions and 38 deletions

View file

@ -598,12 +598,25 @@ func runDaemonStart() error {
}()
}
allowedRoots := []string{cfg.Download.Dir, cfg.Library.ScanPath,
cfg.Organize.MoviesDir, cfg.Organize.TVShowsDir}
filePath := filepath.Clean(sr.FilePath)
if !isAllowedStreamPath(filePath, cfg.Download.Dir, cfg.Library.ScanPath,
cfg.Organize.MoviesDir, cfg.Organize.TVShowsDir) {
log.Printf("[%s] stream request rejected: path outside allowed dirs: %s", agent.ShortID(sr.TaskID), filePath)
reportStreamError(fmt.Sprintf("path outside allowed dirs: %s", filePath))
return
// Self-heal a base-path mismatch: the web may hand us a path under an old
// root (e.g. /mnt/nas/peliculas/… from before a binary→docker move) that
// is now outside our allowed dirs but whose file still exists under a
// current root (/downloads/…). Remap the path's tail onto an allowed root
// so playback works immediately; the next re-scan persists the fix to the
// DB. See docs/plans/unarr-path-resilience.md.
if !isAllowedStreamPath(filePath, allowedRoots...) {
if remapped := relocateUnreachable(filePath, allowedRoots); remapped != "" {
log.Printf("[%s] stream self-heal: remapped %s → %s", agent.ShortID(sr.TaskID), filePath, remapped)
filePath = remapped
} else {
log.Printf("[%s] stream request rejected: path outside allowed dirs: %s", agent.ShortID(sr.TaskID), filePath)
reportStreamError(fmt.Sprintf("path outside allowed dirs: %s", filePath))
return
}
}
// os.Stat over NFS can transiently fail (ESTALE/EAGAIN/timeout) right
// after a remount or under load. Retry a few times before giving up so
@ -619,6 +632,15 @@ func runDaemonStart() error {
time.Sleep(300 * time.Millisecond)
}
}
if statErr != nil {
// Last resort before failing: the file may simply have moved within
// an allowed root — try to relocate it by path tail.
if remapped := relocateUnreachable(filePath, allowedRoots); remapped != "" {
log.Printf("[%s] stream self-heal: relocated missing %s → %s", agent.ShortID(sr.TaskID), filePath, remapped)
filePath = remapped
info, statErr = os.Stat(filePath)
}
}
if statErr != nil {
log.Printf("[%s] stream request: file not found after retries: %s (%v)", agent.ShortID(sr.TaskID), filePath, statErr)
reportStreamError(fmt.Sprintf("file not found: %s", filePath))
@ -977,6 +999,53 @@ func isAllowedStreamPath(filePath string, allowedDirs ...string) bool {
return false
}
// relocateUnreachable tries to find a file the web asked us to stream under a
// path we can't serve (e.g. an old base path) by joining the longest suffix of
// that path onto each current allowed root and checking it exists. Returns the
// found absolute path or "".
//
// Conservative by design — it must never serve the WRONG file:
// - Requires a tail of at least three segments (collection/season/file), so a
// generic "Season 01/Episode.mkv" can't match a different show by accident.
// Flat single-file-at-root layouts simply aren't self-healed here; the next
// re-scan re-maps them instead.
// - Re-checks containment AFTER resolving symlinks, so a symlink inside a root
// pointing outside it can't be used to escape the allowed dirs (isAllowed
// StreamPath alone is a lexical check that os.Stat would happily follow out).
func relocateUnreachable(filePath string, allowedRoots []string) string {
segs := strings.Split(filepath.ToSlash(filePath), "/")
// Longest tail first (most specific match wins). Stop before 3-segment tails
// so a short, ambiguous suffix can't match the wrong file.
for start := 0; start <= len(segs)-3; start++ {
tail := filepath.Join(segs[start:]...)
if tail == "" {
continue
}
for _, root := range allowedRoots {
if root == "" {
continue
}
cand := filepath.Join(root, tail)
if !isAllowedStreamPath(cand, root) {
continue
}
fi, err := os.Stat(cand)
if err != nil || fi.IsDir() {
continue
}
// Re-validate containment against the symlink-resolved real paths so
// a symlink under the root can't point the stream outside it.
realCand, e1 := filepath.EvalSymlinks(cand)
realRoot, e2 := filepath.EvalSymlinks(root)
if e1 != nil || e2 != nil || !isAllowedStreamPath(realCand, realRoot) {
continue
}
return cand
}
}
return ""
}
func formatSpeedLog(bps int64) string {
switch {
case bps >= 1024*1024*1024:
@ -993,6 +1062,23 @@ func formatSpeedLog(bps int64) string {
// runAutoScan runs a library scan + sync on a timer or on-demand via scanNow channel.
// It scans all provided paths and syncs each independently so stale-item cleanup
// is scoped to the correct directory prefix on the server.
// basePathChanged reports whether the library's scan root moved since the last
// saved cache — i.e. the previously-scanned root is no longer one of the current
// scan paths. Used to force a full (non-incremental) re-scan so the server can
// re-map paths by fingerprint and reap the old prefix.
func basePathChanged(existing *library.LibraryCache, scanPaths []string) bool {
if existing == nil || len(existing.Items) == 0 || existing.Path == "" {
return false
}
prev := filepath.Clean(existing.Path)
for _, p := range scanPaths {
if filepath.Clean(p) == prev {
return false
}
}
return true
}
func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration, ac *agent.Client, scanNow <-chan struct{}, scanPaths []string) {
log.Printf("[auto-scan] enabled: every %s, paths: %v", interval, scanPaths)
@ -1018,10 +1104,23 @@ func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration,
workers = 8
}
// If the library base path changed (e.g. the agent moved from the host
// binary to docker, remapping /mnt/nas/peliculas → /downloads, or the
// user moved their media folder), force a FULL re-scan instead of an
// incremental one. The fingerprint merge on the server then relocates
// existing rows in place rather than duplicating, and per-agent cleanup
// reaps the old prefix. See docs/plans/unarr-path-resilience.md.
forceFull := basePathChanged(existing, scanPaths)
if forceFull {
log.Printf("[auto-scan] WARNING: library base path changed (was %q, now %v) — "+
"running a FULL re-scan. This can take a while on large libraries; "+
"playback and matches are preserved.", existing.Path, scanPaths)
}
scanOpts := library.ScanOptions{
Workers: workers,
FFprobePath: cfg.Library.FFprobePath,
Incremental: existing != nil,
Incremental: existing != nil && !forceFull,
}
// Resolve ffmpeg once for the sidecar prewarm (extracts text subs → WebVTT
@ -1077,6 +1176,7 @@ func runAutoScan(ctx context.Context, cfg config.Config, interval time.Duration,
_, err := ac.SyncLibrary(ctx, agent.LibrarySyncRequest{
Items: items[i:end],
ScanPath: scanPath,
AgentID: cfg.Agent.ID,
IsLastBatch: isLast,
SyncStartedAt: syncStartedAt,
})

View file

@ -0,0 +1,74 @@
package cmd
import (
"os"
"path/filepath"
"runtime"
"testing"
)
func mkfile(t *testing.T, path string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(path, []byte("x"), 0o644); err != nil {
t.Fatal(err)
}
}
func TestRelocateUnreachable(t *testing.T) {
root := t.TempDir()
// A 3-segment-deep file under the current root.
mkfile(t, filepath.Join(root, "Acme Show", "Season 01", "ep.mkv"))
// A 2-segment-deep file (too shallow to be matched by a short tail).
mkfile(t, filepath.Join(root, "Season 01", "lonely.mkv"))
roots := []string{root}
// Base-path change: an old-root path whose 3-seg tail exists under the new
// root → relocates to the real file.
got := relocateUnreachable("/old/base/Acme Show/Season 01/ep.mkv", roots)
want := filepath.Join(root, "Acme Show", "Season 01", "ep.mkv")
if got != want {
t.Errorf("relocate moved file: got %q want %q", got, want)
}
// Only a 2-segment tail would match → must NOT relocate (ambiguous).
if got := relocateUnreachable("/old/Season 01/lonely.mkv", roots); got != "" {
t.Errorf("2-segment tail should not match, got %q", got)
}
// Nonexistent file → no relocation.
if got := relocateUnreachable("/old/base/Acme Show/Season 01/missing.mkv", roots); got != "" {
t.Errorf("missing file should not relocate, got %q", got)
}
// Traversal attempt: ".." segments are cleaned by filepath.Join and the
// result is re-validated, so it can't escape.
if got := relocateUnreachable("/old/../../../etc/passwd", roots); got != "" {
t.Errorf("traversal should not match, got %q", got)
}
}
func TestRelocateUnreachableSymlinkEscape(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("symlink semantics differ on windows")
}
root := t.TempDir()
outside := t.TempDir()
// A real file living OUTSIDE any allowed root.
mkfile(t, filepath.Join(outside, "sub", "secret.mkv"))
// A symlink inside the root pointing at the outside tree.
if err := os.Symlink(outside, filepath.Join(root, "link")); err != nil {
t.Skipf("symlink unsupported: %v", err)
}
// The lexical candidate root/link/sub/secret.mkv exists (os.Stat follows the
// symlink), but after resolving symlinks it's outside the root → must be
// rejected so the stream can't escape the allowed dirs.
got := relocateUnreachable("/old/link/sub/secret.mkv", []string{root})
if got != "" {
t.Errorf("symlink escape must be rejected, got %q", got)
}
}

View file

@ -205,6 +205,7 @@ func syncToServer(ctx context.Context, cfg config.Config, cache *library.Library
resp, err := ac.SyncLibrary(ctx, agent.LibrarySyncRequest{
Items: batch,
ScanPath: cache.Path,
AgentID: cfg.Agent.ID,
IsLastBatch: isLast,
SyncStartedAt: syncStartedAt,
})