unarr/internal/library/fingerprint.go
Deivid Soto b6ddeea129 feat(library): content fingerprint + path-resilient sync + stream self-heal
Stop treating the absolute path as a file's identity so a base-path change
(host binary→docker remap, moved media folder, remount) no longer makes the
server duplicate and orphan library rows.

- fingerprint.go: ComputeFingerprint = sha256(size ‖ first 1MiB ‖ last 1MiB),
  a stable content identity that survives rename/move/base-path change. Cached
  in LibraryItem and reused on incremental scans when size+mtime are unchanged.
- sync: send fingerprint + rel_path (relative to the scan root) + agent_id in
  the library-sync request, so the server can move a row in place and scope
  stale-cleanup per agent.
- daemon: force a FULL re-scan (with a user-facing WARNING) when the scan root
  changed since the last cache, so the server re-maps by fingerprint instead of
  duplicating. basePathChanged compares filepath.Clean'd roots.
- daemon: relocateUnreachable self-heals a stream request whose path is under an
  old root but whose file still exists under a current allowed root, so playback
  works immediately without waiting for the re-scan. Conservative: requires a
  3-segment tail and re-checks containment after resolving symlinks so it can
  neither serve the wrong file nor escape the allowed dirs.

See docs/plans/unarr-path-resilience.md in the web repo.
2026-06-03 12:08:58 +02:00

55 lines
1.4 KiB
Go

package library
import (
"crypto/sha256"
"encoding/binary"
"encoding/hex"
"io"
"os"
)
// fpChunk is how many bytes are hashed from the head and the tail of a file.
const fpChunk = 1 << 20 // 1 MiB
// ComputeFingerprint returns a stable content identity for a media file:
// sha256(fileSize ‖ first 1 MiB ‖ last 1 MiB). It survives renames, moves, and
// base-path changes (unlike the absolute path), so the server can recognise the
// same file at a new location and move its library row in place instead of
// duplicating it. Cheap: two bounded reads, never the whole file (except small
// ones). See docs/plans/unarr-path-resilience.md in the web repo.
func ComputeFingerprint(path string, size int64) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
h := sha256.New()
var sizeBuf [8]byte
binary.LittleEndian.PutUint64(sizeBuf[:], uint64(size))
h.Write(sizeBuf[:])
if size <= 2*fpChunk {
// Small file: hash it whole — head+tail would overlap anyway.
if _, err := io.Copy(h, f); err != nil {
return "", err
}
} else {
head := make([]byte, fpChunk)
if _, err := io.ReadFull(f, head); err != nil {
return "", err
}
h.Write(head)
if _, err := f.Seek(size-fpChunk, io.SeekStart); err != nil {
return "", err
}
tail := make([]byte, fpChunk)
if _, err := io.ReadFull(f, tail); err != nil {
return "", err
}
h.Write(tail)
}
return hex.EncodeToString(h.Sum(nil)), nil
}