feat(library): content fingerprint + path-resilient sync + stream self-heal
Stop treating the absolute path as a file's identity so a base-path change (host binary→docker remap, moved media folder, remount) no longer makes the server duplicate and orphan library rows. - fingerprint.go: ComputeFingerprint = sha256(size ‖ first 1MiB ‖ last 1MiB), a stable content identity that survives rename/move/base-path change. Cached in LibraryItem and reused on incremental scans when size+mtime are unchanged. - sync: send fingerprint + rel_path (relative to the scan root) + agent_id in the library-sync request, so the server can move a row in place and scope stale-cleanup per agent. - daemon: force a FULL re-scan (with a user-facing WARNING) when the scan root changed since the last cache, so the server re-maps by fingerprint instead of duplicating. basePathChanged compares filepath.Clean'd roots. - daemon: relocateUnreachable self-heals a stream request whose path is under an old root but whose file still exists under a current allowed root, so playback works immediately without waiting for the re-scan. Conservative: requires a 3-segment tail and re-checks containment after resolving symlinks so it can neither serve the wrong file nor escape the allowed dirs. See docs/plans/unarr-path-resilience.md in the web repo.
This commit is contained in:
parent
e298ff6c05
commit
b6ddeea129
9 changed files with 396 additions and 38 deletions
55
internal/library/fingerprint.go
Normal file
55
internal/library/fingerprint.go
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
package library
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/binary"
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"os"
|
||||
)
|
||||
|
||||
// fpChunk is how many bytes are hashed from the head and the tail of a file.
|
||||
const fpChunk = 1 << 20 // 1 MiB
|
||||
|
||||
// ComputeFingerprint returns a stable content identity for a media file:
|
||||
// sha256(fileSize ‖ first 1 MiB ‖ last 1 MiB). It survives renames, moves, and
|
||||
// base-path changes (unlike the absolute path), so the server can recognise the
|
||||
// same file at a new location and move its library row in place instead of
|
||||
// duplicating it. Cheap: two bounded reads, never the whole file (except small
|
||||
// ones). See docs/plans/unarr-path-resilience.md in the web repo.
|
||||
func ComputeFingerprint(path string, size int64) (string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
h := sha256.New()
|
||||
var sizeBuf [8]byte
|
||||
binary.LittleEndian.PutUint64(sizeBuf[:], uint64(size))
|
||||
h.Write(sizeBuf[:])
|
||||
|
||||
if size <= 2*fpChunk {
|
||||
// Small file: hash it whole — head+tail would overlap anyway.
|
||||
if _, err := io.Copy(h, f); err != nil {
|
||||
return "", err
|
||||
}
|
||||
} else {
|
||||
head := make([]byte, fpChunk)
|
||||
if _, err := io.ReadFull(f, head); err != nil {
|
||||
return "", err
|
||||
}
|
||||
h.Write(head)
|
||||
|
||||
if _, err := f.Seek(size-fpChunk, io.SeekStart); err != nil {
|
||||
return "", err
|
||||
}
|
||||
tail := make([]byte, fpChunk)
|
||||
if _, err := io.ReadFull(f, tail); err != nil {
|
||||
return "", err
|
||||
}
|
||||
h.Write(tail)
|
||||
}
|
||||
|
||||
return hex.EncodeToString(h.Sum(nil)), nil
|
||||
}
|
||||
81
internal/library/fingerprint_test.go
Normal file
81
internal/library/fingerprint_test.go
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
package library
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func writeFile(t *testing.T, dir, name string, data []byte) string {
|
||||
t.Helper()
|
||||
p := filepath.Join(dir, name)
|
||||
if err := os.WriteFile(p, data, 0o644); err != nil {
|
||||
t.Fatalf("write %s: %v", p, err)
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func fp(t *testing.T, path string) string {
|
||||
t.Helper()
|
||||
fi, err := os.Stat(path)
|
||||
if err != nil {
|
||||
t.Fatalf("stat %s: %v", path, err)
|
||||
}
|
||||
s, err := ComputeFingerprint(path, fi.Size())
|
||||
if err != nil {
|
||||
t.Fatalf("fingerprint %s: %v", path, err)
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
func TestComputeFingerprint(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
big := make([]byte, 5<<20) // 5 MiB > 2*chunk
|
||||
for i := range big {
|
||||
big[i] = byte(i * 7)
|
||||
}
|
||||
|
||||
a := fp(t, writeFile(t, dir, "a.bin", big))
|
||||
if len(a) != 64 {
|
||||
t.Fatalf("want 64-hex, got %d", len(a))
|
||||
}
|
||||
|
||||
// Move-invariance: identical bytes at a different path → same fingerprint.
|
||||
if b := fp(t, writeFile(t, dir, "moved.bin", big)); b != a {
|
||||
t.Errorf("move changed fingerprint: %s != %s", a, b)
|
||||
}
|
||||
|
||||
// Tail sensitivity: flipping the last byte must change the fingerprint.
|
||||
tailMut := append([]byte(nil), big...)
|
||||
tailMut[len(tailMut)-1] ^= 0xFF
|
||||
if c := fp(t, writeFile(t, dir, "tail.bin", tailMut)); c == a {
|
||||
t.Error("tail mutation did not change fingerprint")
|
||||
}
|
||||
|
||||
// Head sensitivity.
|
||||
headMut := append([]byte(nil), big...)
|
||||
headMut[0] ^= 0xFF
|
||||
if c := fp(t, writeFile(t, dir, "head.bin", headMut)); c == a {
|
||||
t.Error("head mutation did not change fingerprint")
|
||||
}
|
||||
|
||||
// Size is mixed in: a small file and a large file never collide trivially.
|
||||
small := fp(t, writeFile(t, dir, "small.bin", []byte("hello world")))
|
||||
if small == a {
|
||||
t.Error("small and big fingerprints collided")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRelToRoot(t *testing.T) {
|
||||
cases := []struct{ root, full, want string }{
|
||||
{"/downloads", "/downloads/TV Shows/X/S01E09.mkv", "TV Shows/X/S01E09.mkv"},
|
||||
{"/downloads", "/mnt/other/file.mkv", ""}, // outside root
|
||||
{"/downloads", "/downloads", ""}, // equal → "."
|
||||
{"", "/x/y.mkv", ""}, // no root
|
||||
}
|
||||
for _, c := range cases {
|
||||
if got := relToRoot(c.root, c.full); got != c.want {
|
||||
t.Errorf("relToRoot(%q,%q)=%q want %q", c.root, c.full, got, c.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -130,6 +130,26 @@ func scanSingleFile(ctx context.Context, ffprobePath, filePath string, cacheIdx
|
|||
ModTime: info.ModTime().UTC().Format(time.RFC3339),
|
||||
}
|
||||
|
||||
// Look up the cached entry once — reused for both fingerprint reuse and the
|
||||
// incremental ffprobe skip below.
|
||||
var cached *LibraryItem
|
||||
if existing != nil {
|
||||
if idx, ok := cacheIdx[filePath]; ok {
|
||||
cached = &existing.Items[idx]
|
||||
}
|
||||
}
|
||||
unchanged := cached != nil &&
|
||||
cached.FileSize == item.FileSize && cached.ModTime == item.ModTime
|
||||
|
||||
// Fingerprint: reuse the cached value when the file is unchanged and already
|
||||
// has one; otherwise compute it (cheap, two bounded reads). Computed even on
|
||||
// the incremental path so every synced item carries a stable identity.
|
||||
if unchanged && cached.Fingerprint != "" {
|
||||
item.Fingerprint = cached.Fingerprint
|
||||
} else if fp, fpErr := ComputeFingerprint(filePath, item.FileSize); fpErr == nil {
|
||||
item.Fingerprint = fp
|
||||
}
|
||||
|
||||
// Parse filename for title, year, quality, codec
|
||||
parsed := parser.Parse(item.FileName)
|
||||
item.Quality = parsed.Quality
|
||||
|
|
@ -150,15 +170,10 @@ func scanSingleFile(ctx context.Context, ffprobePath, filePath string, cacheIdx
|
|||
// an identical size+mtime (some torrent clients preserve the torrent's
|
||||
// mtime), so trusting the cached "damaged" verdict would pin a now-healthy
|
||||
// file as broken forever. Re-probing damaged items is cheap (they're few).
|
||||
if incremental && existing != nil {
|
||||
if idx, ok := cacheIdx[filePath]; ok {
|
||||
cached := existing.Items[idx]
|
||||
if cached.FileSize == item.FileSize && cached.ModTime == item.ModTime &&
|
||||
cached.MediaInfo != nil && cached.MediaInfo.Integrity == nil {
|
||||
item.MediaInfo = cached.MediaInfo
|
||||
return item
|
||||
}
|
||||
}
|
||||
if incremental && unchanged &&
|
||||
cached.MediaInfo != nil && cached.MediaInfo.Integrity == nil {
|
||||
item.MediaInfo = cached.MediaInfo
|
||||
return item
|
||||
}
|
||||
|
||||
// Run ffprobe
|
||||
|
|
|
|||
|
|
@ -1,6 +1,25 @@
|
|||
package library
|
||||
|
||||
import "github.com/torrentclaw/unarr/internal/agent"
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/torrentclaw/unarr/internal/agent"
|
||||
)
|
||||
|
||||
// relToRoot returns the file's path relative to the scan root (forward-slashed),
|
||||
// or "" when it doesn't live under root. The server stores this so streaming can
|
||||
// later reconstruct the absolute path from the agent's *current* root.
|
||||
func relToRoot(root, full string) string {
|
||||
if root == "" {
|
||||
return ""
|
||||
}
|
||||
rel, err := filepath.Rel(root, full)
|
||||
if err != nil || rel == "." || strings.HasPrefix(rel, "..") {
|
||||
return ""
|
||||
}
|
||||
return filepath.ToSlash(rel)
|
||||
}
|
||||
|
||||
// BuildSyncItems converts cached library items to sync request items.
|
||||
// Shared between unarr scan (cmd/scan.go) and auto-scan (cmd/daemon.go).
|
||||
|
|
@ -11,14 +30,17 @@ func BuildSyncItems(cache *LibraryCache) []agent.LibrarySyncItem {
|
|||
continue
|
||||
}
|
||||
si := agent.LibrarySyncItem{
|
||||
FilePath: item.FilePath,
|
||||
FileName: item.FileName,
|
||||
FileSize: item.FileSize,
|
||||
Title: item.Title,
|
||||
Year: item.Year,
|
||||
ContentType: DeriveContentType(item),
|
||||
Season: item.Season,
|
||||
Episode: item.Episode,
|
||||
FilePath: item.FilePath,
|
||||
FileName: item.FileName,
|
||||
FileSize: item.FileSize,
|
||||
Title: item.Title,
|
||||
Year: item.Year,
|
||||
ContentType: DeriveContentType(item),
|
||||
Season: item.Season,
|
||||
Episode: item.Episode,
|
||||
Fingerprint: item.Fingerprint,
|
||||
RelPath: relToRoot(cache.Path, item.FilePath),
|
||||
LibraryRootKey: "library",
|
||||
}
|
||||
|
||||
if item.MediaInfo != nil {
|
||||
|
|
|
|||
|
|
@ -4,18 +4,21 @@ import "github.com/torrentclaw/unarr/internal/library/mediainfo"
|
|||
|
||||
// LibraryItem represents a single scanned media file.
|
||||
type LibraryItem struct {
|
||||
FilePath string `json:"filePath"`
|
||||
FileName string `json:"fileName"`
|
||||
FileSize int64 `json:"fileSize"`
|
||||
ModTime string `json:"modTime"` // ISO 8601
|
||||
Title string `json:"title"`
|
||||
Year string `json:"year,omitempty"`
|
||||
Season int `json:"season,omitempty"`
|
||||
Episode int `json:"episode,omitempty"`
|
||||
Quality string `json:"quality,omitempty"` // "1080p" etc (from filename)
|
||||
Codec string `json:"codec,omitempty"` // "x265" etc (from filename)
|
||||
MediaInfo *mediainfo.MediaInfo `json:"mediaInfo,omitempty"`
|
||||
ScanError string `json:"scanError,omitempty"`
|
||||
FilePath string `json:"filePath"`
|
||||
FileName string `json:"fileName"`
|
||||
FileSize int64 `json:"fileSize"`
|
||||
ModTime string `json:"modTime"` // ISO 8601
|
||||
// Fingerprint is a stable content identity (see fingerprint.go). Cached so
|
||||
// incremental scans reuse it when size+mtime are unchanged.
|
||||
Fingerprint string `json:"fingerprint,omitempty"`
|
||||
Title string `json:"title"`
|
||||
Year string `json:"year,omitempty"`
|
||||
Season int `json:"season,omitempty"`
|
||||
Episode int `json:"episode,omitempty"`
|
||||
Quality string `json:"quality,omitempty"` // "1080p" etc (from filename)
|
||||
Codec string `json:"codec,omitempty"` // "x265" etc (from filename)
|
||||
MediaInfo *mediainfo.MediaInfo `json:"mediaInfo,omitempty"`
|
||||
ScanError string `json:"scanError,omitempty"`
|
||||
}
|
||||
|
||||
// LibraryCache is the on-disk cache of scanned library items.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue