fix(engine): cross-backend integrity guard with retry-then-damaged
A truncated debrid download (in-memory byte counter hit 100% while the
NFS write-back silently dropped most of the bytes) was marked completed.
The 1.1.6 fsync fix closed the debrid-specific hole; this generalizes the
guarantee so "completed" never means a corrupt file on ANY backend.
- IntegrityError + bounded retry: on a corrupt/short result the manager
re-downloads the same source up to 3x (clean start), then surfaces the
task as damaged ("corrupt download:" prefix) instead of completing it.
- verify (size mismatch / empty), debrid (incomplete / post-write / flush),
torrent (BytesMissing), usenet (par2 unrepairable / repair-failed) all
classify integrity failures so they route through the retry/damaged path.
- scanner: a file ffprobe can't read is emitted as a damaged library_item
(reason "unreadable") instead of being silently dropped from the sync.
- tests: manager retry-then-success + retry-exhausted-then-damaged,
verifying->resolving transition, damaged sync item.
This commit is contained in:
parent
271413e0f9
commit
a5f3f0914a
13 changed files with 400 additions and 91 deletions
|
|
@ -276,7 +276,11 @@ func (d *DebridDownloader) Download(ctx context.Context, task *Task, outputDir s
|
||||||
// and we read fewer bytes, the transfer was truncated (e.g. a debrid CDN edge
|
// and we read fewer bytes, the transfer was truncated (e.g. a debrid CDN edge
|
||||||
// closing the connection). Don't hand a short file to verify as if complete.
|
// closing the connection). Don't hand a short file to verify as if complete.
|
||||||
if totalBytes > 0 && downloaded < totalBytes {
|
if totalBytes > 0 && downloaded < totalBytes {
|
||||||
return nil, fmt.Errorf("incomplete download: got %s of %s", formatBytes(downloaded), formatBytes(totalBytes))
|
// Integrity, not transport — the manager re-downloads. Keep the partial
|
||||||
|
// (NOT removed): the bytes written so far are sequentially correct, so the
|
||||||
|
// retry resumes via HTTP Range from where the stream was cut instead of
|
||||||
|
// re-fetching the whole file.
|
||||||
|
return nil, integrityErr("truncated", "incomplete download: got %s of %s", formatBytes(downloaded), formatBytes(totalBytes))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Force the OS to flush the file to durable storage BEFORE we report success.
|
// Force the OS to flush the file to durable storage BEFORE we report success.
|
||||||
|
|
@ -286,10 +290,13 @@ func (d *DebridDownloader) Download(ctx context.Context, task *Task, outputDir s
|
||||||
// and rejects it ("size mismatch"). fsync surfaces a write-back error here,
|
// and rejects it ("size mismatch"). fsync surfaces a write-back error here,
|
||||||
// where it's actionable, instead of silently truncating the file.
|
// where it's actionable, instead of silently truncating the file.
|
||||||
if err := file.Sync(); err != nil {
|
if err := file.Sync(); err != nil {
|
||||||
return nil, fmt.Errorf("flush to disk failed (write-back/network-mount error): %w", err)
|
_ = closeFile()
|
||||||
|
_ = os.Remove(destPath) // uncertain on-disk state — drop it so the retry starts clean
|
||||||
|
return nil, integrityErr("flush_failed", "flush to disk failed (write-back/network-mount error): %v", err)
|
||||||
}
|
}
|
||||||
if err := closeFile(); err != nil {
|
if err := closeFile(); err != nil {
|
||||||
return nil, fmt.Errorf("close file failed (write-back/network-mount error): %w", err)
|
_ = os.Remove(destPath)
|
||||||
|
return nil, integrityErr("flush_failed", "close file failed (write-back/network-mount error): %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Safety net: after a durable flush, the on-disk size must match what we wrote.
|
// Safety net: after a durable flush, the on-disk size must match what we wrote.
|
||||||
|
|
@ -302,7 +309,7 @@ func (d *DebridDownloader) Download(ctx context.Context, task *Task, outputDir s
|
||||||
if rmErr := os.Remove(destPath); rmErr != nil {
|
if rmErr := os.Remove(destPath); rmErr != nil {
|
||||||
log.Printf("[%s] failed to remove corrupt partial %s: %v", agent.ShortID(task.ID), destPath, rmErr)
|
log.Printf("[%s] failed to remove corrupt partial %s: %v", agent.ShortID(task.ID), destPath, rmErr)
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("post-write size mismatch: wrote %s but file is %s on disk — likely a stalled or failing storage mount (%s)",
|
return nil, integrityErr("truncated", "post-write size mismatch: wrote %s but file is %s on disk — likely a stalled or failing storage mount (%s)",
|
||||||
formatBytes(downloaded), formatBytes(fi.Size()), outputDir)
|
formatBytes(downloaded), formatBytes(fi.Size()), outputDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
41
internal/engine/integrity.go
Normal file
41
internal/engine/integrity.go
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
package engine
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IntegrityError marks a finished download whose bytes don't match what was
|
||||||
|
// expected: a truncated / short file, a checksum/par2 failure, or an on-disk
|
||||||
|
// size below the advertised length. It is DISTINCT from a transport failure
|
||||||
|
// (network drop, dead tracker) — on an IntegrityError the manager re-downloads
|
||||||
|
// the SAME source a bounded number of times (a fresh, clean-start attempt), and
|
||||||
|
// only after exhausting the retries surfaces the task as damaged. This is the
|
||||||
|
// cross-backend safety net that guarantees "completed" never means a corrupt
|
||||||
|
// file (incident: 2026-06-15 debrid NFS write-back truncation — a 20 MB stub of
|
||||||
|
// a 394 MB file was marked completed because nothing re-checked the on-disk size
|
||||||
|
// after the page-cache write-back silently dropped ~374 MB).
|
||||||
|
type IntegrityError struct {
|
||||||
|
// Reason is a stable short code surfaced to the web / logs:
|
||||||
|
// "truncated", "size_mismatch", "empty", "par2_failed", "flush_failed".
|
||||||
|
Reason string
|
||||||
|
Message string // human-readable detail
|
||||||
|
}
|
||||||
|
|
||||||
|
func (e *IntegrityError) Error() string {
|
||||||
|
if e.Message != "" {
|
||||||
|
return e.Message
|
||||||
|
}
|
||||||
|
return "integrity check failed: " + e.Reason
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsIntegrity reports whether err is (or wraps) an IntegrityError.
|
||||||
|
func IsIntegrity(err error) bool {
|
||||||
|
var ie *IntegrityError
|
||||||
|
return errors.As(err, &ie)
|
||||||
|
}
|
||||||
|
|
||||||
|
// integrityErr builds an IntegrityError with a printf-style message.
|
||||||
|
func integrityErr(reason, format string, args ...any) *IntegrityError {
|
||||||
|
return &IntegrityError{Reason: reason, Message: fmt.Sprintf(format, args...)}
|
||||||
|
}
|
||||||
|
|
@ -2,7 +2,9 @@ package engine
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
|
"os"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
|
|
||||||
|
|
@ -379,110 +381,144 @@ func (m *Manager) processTask(ctx context.Context, task *Task) {
|
||||||
m.activeMu.Unlock()
|
m.activeMu.Unlock()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// 1. Resolve method
|
// On a corrupt/truncated result (a downloader's own integrity guard, or the
|
||||||
if err := task.Transition(StatusResolving); err != nil {
|
// shared on-disk verify below), re-download the SAME source a bounded number
|
||||||
m.fail(ctx, task, "transition error: "+err.Error())
|
// of times — a fresh clean-start attempt usually lands intact (the 2026-06-15
|
||||||
return
|
// debrid NFS write-back truncation was transient). Only after exhausting the
|
||||||
}
|
// retries is the task surfaced as damaged, so "completed" NEVER means a corrupt
|
||||||
|
// file. (User-chosen "both" policy: auto-retry, then visible-damaged.)
|
||||||
|
const maxIntegrityAttempts = 3
|
||||||
|
|
||||||
method, err := resolveMethod(ctx, task, m.downloaders, m.cfg.PreferredMethods)
|
for attempt := 1; ; attempt++ {
|
||||||
if err != nil {
|
result, err := m.attemptDownload(ctx, task)
|
||||||
m.fail(ctx, task, "no method available: "+err.Error())
|
if err != nil {
|
||||||
return
|
if IsInsufficientDisk(err) {
|
||||||
}
|
// Terminal — another source would fill the same disk.
|
||||||
|
m.fail(ctx, task, err.Error())
|
||||||
task.ResolvedMethod = method
|
return
|
||||||
log.Printf("[%s] resolved method: %s", agent.ShortID(task.ID), method)
|
}
|
||||||
|
if IsIntegrity(err) {
|
||||||
// 2. Download
|
if attempt < maxIntegrityAttempts {
|
||||||
if err := task.Transition(StatusDownloading); err != nil {
|
log.Printf("[%s] integrity check failed (attempt %d/%d), re-downloading clean: %v",
|
||||||
m.fail(ctx, task, "transition error: "+err.Error())
|
agent.ShortID(task.ID), attempt, maxIntegrityAttempts, err)
|
||||||
return
|
continue
|
||||||
}
|
}
|
||||||
|
m.failDamaged(ctx, task, err)
|
||||||
progressCh := make(chan Progress, 16)
|
return
|
||||||
|
}
|
||||||
// Drain progress channel (just for logging; reporter reads directly from task)
|
|
||||||
go func() {
|
|
||||||
for range progressCh {
|
|
||||||
// Progress already applied via task.UpdateProgress in the downloader
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
dl := m.downloaders[method]
|
|
||||||
result, err := dl.Download(ctx, task, m.cfg.OutputDir, progressCh)
|
|
||||||
close(progressCh)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
// A full disk is terminal — another source would fill the same disk, so
|
|
||||||
// skip the fallback and surface the clear message immediately.
|
|
||||||
if IsInsufficientDisk(err) {
|
|
||||||
m.fail(ctx, task, err.Error())
|
m.fail(ctx, task, err.Error())
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
// Try fallback
|
|
||||||
if tryFallback(task, m.downloaders, m.cfg.PreferredMethods) {
|
// Shared on-disk safety net across every backend — the last line of defense
|
||||||
log.Printf("[%s] %s failed, trying fallback: %v", agent.ShortID(task.ID), method, err)
|
// against a truncated/short file slipping past a downloader's own checks.
|
||||||
if err := task.Transition(StatusResolving); err == nil {
|
if err := task.Transition(StatusVerifying); err != nil {
|
||||||
m.processTaskRetry(ctx, task)
|
m.fail(ctx, task, "transition error: "+err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if verr := verify(result); verr != nil {
|
||||||
|
if IsIntegrity(verr) {
|
||||||
|
removeBrokenResult(task.ID, result) // clean start so a resume doesn't append to a short file
|
||||||
|
if attempt < maxIntegrityAttempts {
|
||||||
|
log.Printf("[%s] verify failed (attempt %d/%d), re-downloading clean: %v",
|
||||||
|
agent.ShortID(task.ID), attempt, maxIntegrityAttempts, verr)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m.failDamaged(ctx, task, verr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
m.fail(ctx, task, "verification failed: "+verr.Error())
|
||||||
|
return
|
||||||
}
|
}
|
||||||
m.fail(ctx, task, err.Error())
|
|
||||||
|
m.finalizeVerified(ctx, task, result)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
m.finalize(ctx, task, result)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// processTaskRetry handles fallback after a method failure.
|
// attemptDownload resolves a method and downloads once, falling back to the next
|
||||||
func (m *Manager) processTaskRetry(ctx context.Context, task *Task) {
|
// configured method on a plain transport failure (NOT on disk-full or integrity
|
||||||
|
// failures — those are the caller's to handle). Returns the download Result.
|
||||||
|
func (m *Manager) attemptDownload(ctx context.Context, task *Task) (*Result, error) {
|
||||||
|
if err := task.Transition(StatusResolving); err != nil {
|
||||||
|
return nil, fmt.Errorf("transition error: %w", err)
|
||||||
|
}
|
||||||
method, err := resolveMethod(ctx, task, m.downloaders, m.cfg.PreferredMethods)
|
method, err := resolveMethod(ctx, task, m.downloaders, m.cfg.PreferredMethods)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.fail(ctx, task, "fallback failed: "+err.Error())
|
return nil, fmt.Errorf("no method available: %w", err)
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
task.ResolvedMethod = method
|
task.ResolvedMethod = method
|
||||||
log.Printf("[%s] fallback to: %s", agent.ShortID(task.ID), method)
|
log.Printf("[%s] resolved method: %s", agent.ShortID(task.ID), method)
|
||||||
|
|
||||||
if err := task.Transition(StatusDownloading); err != nil {
|
if err := task.Transition(StatusDownloading); err != nil {
|
||||||
m.fail(ctx, task, "transition error: "+err.Error())
|
return nil, fmt.Errorf("transition error: %w", err)
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
result, err := m.runDownload(ctx, task, method)
|
||||||
|
if err != nil {
|
||||||
|
// Disk-full is terminal; an integrity failure is retried in-place by the
|
||||||
|
// caller (same source, clean start) — don't burn the method fallback on
|
||||||
|
// either. Only a plain transport failure tries the next method.
|
||||||
|
if IsInsufficientDisk(err) || IsIntegrity(err) {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if tryFallback(task, m.downloaders, m.cfg.PreferredMethods) {
|
||||||
|
log.Printf("[%s] %s failed, trying fallback: %v", agent.ShortID(task.ID), method, err)
|
||||||
|
if terr := task.Transition(StatusResolving); terr != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return m.attemptFallback(ctx, task)
|
||||||
|
}
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// attemptFallback runs the next available method after a transport failure.
|
||||||
|
func (m *Manager) attemptFallback(ctx context.Context, task *Task) (*Result, error) {
|
||||||
|
method, err := resolveMethod(ctx, task, m.downloaders, m.cfg.PreferredMethods)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("fallback failed: %w", err)
|
||||||
|
}
|
||||||
|
task.ResolvedMethod = method
|
||||||
|
log.Printf("[%s] fallback to: %s", agent.ShortID(task.ID), method)
|
||||||
|
if err := task.Transition(StatusDownloading); err != nil {
|
||||||
|
return nil, fmt.Errorf("transition error: %w", err)
|
||||||
|
}
|
||||||
|
return m.runDownload(ctx, task, method)
|
||||||
|
}
|
||||||
|
|
||||||
|
// runDownload invokes a single downloader, draining its progress channel.
|
||||||
|
func (m *Manager) runDownload(ctx context.Context, task *Task, method DownloadMethod) (*Result, error) {
|
||||||
progressCh := make(chan Progress, 16)
|
progressCh := make(chan Progress, 16)
|
||||||
|
// Drain progress channel (reporter reads progress directly from the task).
|
||||||
go func() {
|
go func() {
|
||||||
for range progressCh {
|
for range progressCh {
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
dl := m.downloaders[method]
|
dl := m.downloaders[method]
|
||||||
result, err := dl.Download(ctx, task, m.cfg.OutputDir, progressCh)
|
result, err := dl.Download(ctx, task, m.cfg.OutputDir, progressCh)
|
||||||
close(progressCh)
|
close(progressCh)
|
||||||
|
return result, err
|
||||||
if err != nil {
|
|
||||||
// No further fallback here — same disk, same outcome — so an
|
|
||||||
// InsufficientDiskError on the fallback surfaces its message directly.
|
|
||||||
m.fail(ctx, task, err.Error())
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
m.finalize(ctx, task, result)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// finalize runs verify → organize → upgrade replacement → complete for a downloaded task.
|
// removeBrokenResult deletes a single-file result that failed the on-disk verify
|
||||||
func (m *Manager) finalize(ctx context.Context, task *Task, result *Result) {
|
// so the retry's downloader starts clean (debrid resumes from a partial via HTTP
|
||||||
// Verify
|
// Range — appending to a truncated stub would compound the corruption). Multi-file
|
||||||
if err := task.Transition(StatusVerifying); err != nil {
|
// (directory) results are left for the downloader/anacrolix to re-verify in place.
|
||||||
m.fail(ctx, task, "transition error: "+err.Error())
|
func removeBrokenResult(taskID string, result *Result) {
|
||||||
|
if result == nil || result.FilePath == "" {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if err := verify(result); err != nil {
|
if fi, err := os.Stat(result.FilePath); err == nil && !fi.IsDir() {
|
||||||
m.fail(ctx, task, "verification failed: "+err.Error())
|
if rmErr := os.Remove(result.FilePath); rmErr != nil {
|
||||||
return
|
log.Printf("[%s] failed to remove broken file %s: %v", agent.ShortID(taskID), result.FilePath, rmErr)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// finalizeVerified runs organize → upgrade replacement → complete for a download
|
||||||
|
// that already passed verify.
|
||||||
|
func (m *Manager) finalizeVerified(ctx context.Context, task *Task, result *Result) {
|
||||||
// Organize
|
// Organize
|
||||||
if err := task.Transition(StatusOrganizing); err != nil {
|
if err := task.Transition(StatusOrganizing); err != nil {
|
||||||
m.fail(ctx, task, "transition error: "+err.Error())
|
m.fail(ctx, task, "transition error: "+err.Error())
|
||||||
|
|
@ -538,3 +574,16 @@ func (m *Manager) fail(ctx context.Context, task *Task, msg string) {
|
||||||
m.recordFinished(task.ToStatusUpdate())
|
m.recordFinished(task.ToStatusUpdate())
|
||||||
m.reporter.ReportFinal(ctx, task)
|
m.reporter.ReportFinal(ctx, task)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// damagedErrorPrefix is a STABLE marker the web matches on (download_task.error_message)
|
||||||
|
// to render a "corrupt — re-download" affordance instead of a generic failure. Keep
|
||||||
|
// in sync with the web's detection (src/lib/services/agent.ts / downloads UI).
|
||||||
|
const damagedErrorPrefix = "corrupt download: "
|
||||||
|
|
||||||
|
// failDamaged marks a task failed after its bytes repeatedly failed the integrity
|
||||||
|
// check (truncated/short file, checksum/par2 failure). Same terminal path as fail,
|
||||||
|
// but with the damagedErrorPrefix so the web can surface a re-download CTA — the
|
||||||
|
// download_task table has no integrity column, so the message IS the signal.
|
||||||
|
func (m *Manager) failDamaged(ctx context.Context, task *Task, err error) {
|
||||||
|
m.fail(ctx, task, damagedErrorPrefix+err.Error())
|
||||||
|
}
|
||||||
|
|
|
||||||
128
internal/engine/manager_integrity_test.go
Normal file
128
internal/engine/manager_integrity_test.go
Normal file
|
|
@ -0,0 +1,128 @@
|
||||||
|
package engine
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/torrentclaw/unarr/internal/agent"
|
||||||
|
)
|
||||||
|
|
||||||
|
// truncatingMockDownloader writes a SHORT file (failing the on-disk verify) until
|
||||||
|
// goodOnAttempt, then writes a full file. reportedSize is what each Result claims,
|
||||||
|
// so verify() compares the advertised size against the (initially truncated) bytes
|
||||||
|
// on disk — the exact shape of the 2026-06-15 debrid NFS truncation.
|
||||||
|
type truncatingMockDownloader struct {
|
||||||
|
dir string
|
||||||
|
reportedSize int64
|
||||||
|
goodOnAttempt int // 1-based attempt that finally writes a full file; 0 = never
|
||||||
|
callCount atomic.Int32
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *truncatingMockDownloader) Method() DownloadMethod { return MethodTorrent }
|
||||||
|
func (m *truncatingMockDownloader) Available(_ context.Context, _ *Task) (bool, error) {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
func (m *truncatingMockDownloader) Download(_ context.Context, _ *Task, _ string, _ chan<- Progress) (*Result, error) {
|
||||||
|
n := int(m.callCount.Add(1))
|
||||||
|
path := filepath.Join(m.dir, "movie.mkv")
|
||||||
|
size := int64(10) // truncated stub
|
||||||
|
if m.goodOnAttempt > 0 && n >= m.goodOnAttempt {
|
||||||
|
size = m.reportedSize
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(path, make([]byte, size), 0o644); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &Result{FilePath: path, FileName: "movie.mkv", Method: MethodTorrent, Size: m.reportedSize}, nil
|
||||||
|
}
|
||||||
|
func (m *truncatingMockDownloader) Pause(_ string) error { return nil }
|
||||||
|
func (m *truncatingMockDownloader) Cancel(_ string) error { return nil }
|
||||||
|
func (m *truncatingMockDownloader) Shutdown(_ context.Context) error { return nil }
|
||||||
|
|
||||||
|
// captureReporter builds a ProgressReporter over a mockStatusReporter we keep a
|
||||||
|
// handle to, so the test can read the final reported StatusUpdate.
|
||||||
|
func captureReporter() (*ProgressReporter, *mockStatusReporter) {
|
||||||
|
reporter := &mockStatusReporter{}
|
||||||
|
return &ProgressReporter{
|
||||||
|
reporter: reporter,
|
||||||
|
interval: 50 * time.Millisecond,
|
||||||
|
latest: make(map[string]*Task),
|
||||||
|
lastReported: make(map[string]TaskStatus),
|
||||||
|
}, reporter
|
||||||
|
}
|
||||||
|
|
||||||
|
func terminalUpdate(t *testing.T, r *mockStatusReporter, taskID string) agent.StatusUpdate {
|
||||||
|
t.Helper()
|
||||||
|
r.mu.Lock()
|
||||||
|
defer r.mu.Unlock()
|
||||||
|
for i := len(r.calls) - 1; i >= 0; i-- {
|
||||||
|
c := r.calls[i]
|
||||||
|
if c.TaskID == taskID && (c.Status == "completed" || c.Status == "failed") {
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.Fatalf("no terminal (completed/failed) status update for %s", taskID)
|
||||||
|
return agent.StatusUpdate{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A truncated download is re-tried clean and, once it lands intact, completes —
|
||||||
|
// "completed" is never reported for the corrupt attempt.
|
||||||
|
func TestManagerPipeline_IntegrityRetry_ThenSucceeds(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
pr, reporter := captureReporter()
|
||||||
|
dl := &truncatingMockDownloader{dir: dir, reportedSize: 10000, goodOnAttempt: 2}
|
||||||
|
|
||||||
|
mgr := NewManager(ManagerConfig{MaxConcurrent: 1, OutputDir: dir}, pr, dl)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
go pr.Run(ctx)
|
||||||
|
|
||||||
|
const taskID = "integrity-retry-ok-123456"
|
||||||
|
mgr.Submit(ctx, agent.Task{
|
||||||
|
ID: taskID, InfoHash: "abc123def456abc123def456abc123def456abc1",
|
||||||
|
Title: "Retry Test", PreferredMethod: "torrent",
|
||||||
|
})
|
||||||
|
mgr.Wait()
|
||||||
|
|
||||||
|
if got := dl.callCount.Load(); got != 2 {
|
||||||
|
t.Errorf("download attempts = %d, want 2 (1 truncated + 1 clean)", got)
|
||||||
|
}
|
||||||
|
if u := terminalUpdate(t, reporter, taskID); u.Status != "completed" {
|
||||||
|
t.Errorf("final status = %q (%s), want completed", u.Status, u.ErrorMessage)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A persistently-truncated download exhausts the bounded retries and is surfaced
|
||||||
|
// as damaged (failed + the stable corrupt-download marker), never completed.
|
||||||
|
func TestManagerPipeline_IntegrityRetry_ExhaustsThenDamaged(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
pr, reporter := captureReporter()
|
||||||
|
dl := &truncatingMockDownloader{dir: dir, reportedSize: 10000, goodOnAttempt: 0}
|
||||||
|
|
||||||
|
mgr := NewManager(ManagerConfig{MaxConcurrent: 1, OutputDir: dir}, pr, dl)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
go pr.Run(ctx)
|
||||||
|
|
||||||
|
const taskID = "integrity-retry-bad-123456"
|
||||||
|
mgr.Submit(ctx, agent.Task{
|
||||||
|
ID: taskID, InfoHash: "abc123def456abc123def456abc123def456abc1",
|
||||||
|
Title: "Damaged Test", PreferredMethod: "torrent",
|
||||||
|
})
|
||||||
|
mgr.Wait()
|
||||||
|
|
||||||
|
if got := dl.callCount.Load(); got != 3 {
|
||||||
|
t.Errorf("download attempts = %d, want 3 (bounded retries)", got)
|
||||||
|
}
|
||||||
|
u := terminalUpdate(t, reporter, taskID)
|
||||||
|
if u.Status != "failed" {
|
||||||
|
t.Fatalf("final status = %q, want failed", u.Status)
|
||||||
|
}
|
||||||
|
if !strings.HasPrefix(u.ErrorMessage, damagedErrorPrefix) {
|
||||||
|
t.Errorf("error message = %q, want prefix %q", u.ErrorMessage, damagedErrorPrefix)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -31,9 +31,11 @@ var validTransitions = map[TaskStatus][]TaskStatus{
|
||||||
StatusClaimed: {StatusResolving, StatusCancelled},
|
StatusClaimed: {StatusResolving, StatusCancelled},
|
||||||
StatusResolving: {StatusDownloading, StatusFailed, StatusCancelled},
|
StatusResolving: {StatusDownloading, StatusFailed, StatusCancelled},
|
||||||
StatusDownloading: {StatusVerifying, StatusFailed, StatusResolving, StatusCancelled},
|
StatusDownloading: {StatusVerifying, StatusFailed, StatusResolving, StatusCancelled},
|
||||||
StatusVerifying: {StatusOrganizing, StatusFailed},
|
// Verifying → Resolving: the on-disk verify found a truncated/corrupt file and
|
||||||
StatusOrganizing: {StatusSeeding, StatusCompleted},
|
// the manager is re-downloading the same source (bounded integrity retry).
|
||||||
StatusSeeding: {StatusCompleted},
|
StatusVerifying: {StatusOrganizing, StatusFailed, StatusResolving},
|
||||||
|
StatusOrganizing: {StatusSeeding, StatusCompleted},
|
||||||
|
StatusSeeding: {StatusCompleted},
|
||||||
}
|
}
|
||||||
|
|
||||||
// Task represents a download task with its full lifecycle state.
|
// Task represents a download task with its full lifecycle state.
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ func TestTransitionValid(t *testing.T) {
|
||||||
{StatusResolving, StatusDownloading},
|
{StatusResolving, StatusDownloading},
|
||||||
{StatusDownloading, StatusVerifying},
|
{StatusDownloading, StatusVerifying},
|
||||||
{StatusVerifying, StatusOrganizing},
|
{StatusVerifying, StatusOrganizing},
|
||||||
|
{StatusVerifying, StatusResolving}, // integrity retry: re-download after a failed on-disk verify
|
||||||
{StatusOrganizing, StatusCompleted},
|
{StatusOrganizing, StatusCompleted},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -60,7 +61,6 @@ func TestTransitionInvalid(t *testing.T) {
|
||||||
{StatusClaimed, StatusCompleted},
|
{StatusClaimed, StatusCompleted},
|
||||||
{StatusCompleted, StatusDownloading},
|
{StatusCompleted, StatusDownloading},
|
||||||
{StatusFailed, StatusCompleted},
|
{StatusFailed, StatusCompleted},
|
||||||
{StatusVerifying, StatusResolving},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tt := range invalid {
|
for _, tt := range invalid {
|
||||||
|
|
|
||||||
|
|
@ -534,11 +534,18 @@ func (d *TorrentDownloader) pollDownload(ctx context.Context, t *torrent.Torrent
|
||||||
default: // don't block if channel full
|
default: // don't block if channel full
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check completion
|
// Check completion. BytesCompleted counts only SHA1-VERIFIED pieces, so
|
||||||
|
// torrent content can't be silently truncated — but assert nothing is
|
||||||
|
// still missing (selective-file accounting, a piece that failed its last
|
||||||
|
// hash) before declaring done. A non-zero remainder is an integrity
|
||||||
|
// failure → the manager re-downloads (anacrolix re-checks pieces).
|
||||||
if downloaded >= totalBytes {
|
if downloaded >= totalBytes {
|
||||||
if isTTY {
|
if isTTY {
|
||||||
fmt.Fprintln(os.Stderr) // newline after \r progress
|
fmt.Fprintln(os.Stderr) // newline after \r progress
|
||||||
}
|
}
|
||||||
|
if missing := t.BytesMissing(); missing > 0 {
|
||||||
|
return nil, integrityErr("truncated", "torrent reported complete but %s of verified pieces are still missing", formatBytes(missing))
|
||||||
|
}
|
||||||
log.Printf("[%s] download complete: %s", task.ID[:8], fileName)
|
log.Printf("[%s] download complete: %s", task.ID[:8], fileName)
|
||||||
return &Result{}, nil
|
return &Result{}, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -277,10 +277,16 @@ func (u *UsenetDownloader) Download(ctx context.Context, task *Task, outputDir s
|
||||||
log.Printf("[%s] extracted archive", shortID)
|
log.Printf("[%s] extracted archive", shortID)
|
||||||
}
|
}
|
||||||
if ppResult.VerifyNote != "" {
|
if ppResult.VerifyNote != "" {
|
||||||
// Degraded verification (par2 missing / repair failed): surface it loudly
|
// Degraded verification (par2 missing / transient probe error): surface it
|
||||||
// so the delivered file isn't silently assumed good.
|
// loudly so the delivered file isn't silently assumed good.
|
||||||
log.Printf("[%s] WARNING: %s", shortID, ppResult.VerifyNote)
|
log.Printf("[%s] WARNING: %s", shortID, ppResult.VerifyNote)
|
||||||
}
|
}
|
||||||
|
if ppResult.Corrupt {
|
||||||
|
// par2 DEFINITIVELY confirmed unrepairable damage — fail as an integrity
|
||||||
|
// error so the manager re-downloads clean instead of completing a corrupt
|
||||||
|
// release (symmetric with the debrid/torrent guards).
|
||||||
|
return nil, integrityErr("par2_failed", "usenet delivery is corrupt: %s", ppResult.VerifyNote)
|
||||||
|
}
|
||||||
|
|
||||||
finalPath := ppResult.FinalPath
|
finalPath := ppResult.FinalPath
|
||||||
if finalPath == "" {
|
if finalPath == "" {
|
||||||
|
|
|
||||||
|
|
@ -29,14 +29,19 @@ func verify(result *Result) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
if actualSize == 0 {
|
if actualSize == 0 {
|
||||||
return fmt.Errorf("download is empty: %s", result.FilePath)
|
// Integrity, not transport: a zero-byte result is corrupt — let the manager
|
||||||
|
// re-download clean rather than surface an empty file as completed.
|
||||||
|
return integrityErr("empty", "download is empty: %s", result.FilePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// If we know the expected size, check within 2% tolerance
|
// If we know the expected size, check within 2% tolerance (container/muxing
|
||||||
|
// overhead). A shortfall beyond that is a truncated/corrupt file — classify it
|
||||||
|
// as an IntegrityError so the manager re-downloads clean instead of completing
|
||||||
|
// a half file (the last line of defense across every backend).
|
||||||
if result.Size > 0 {
|
if result.Size > 0 {
|
||||||
tolerance := int64(float64(result.Size) * 0.02)
|
tolerance := int64(float64(result.Size) * 0.02)
|
||||||
if actualSize < result.Size-tolerance {
|
if actualSize < result.Size-tolerance {
|
||||||
return fmt.Errorf("size mismatch: expected %d, got %d", result.Size, actualSize)
|
return integrityErr("size_mismatch", "size mismatch: expected %d, got %d", result.Size, actualSize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,29 @@ func BuildSyncItems(cache *LibraryCache) []agent.LibrarySyncItem {
|
||||||
items := make([]agent.LibrarySyncItem, 0, len(cache.Items))
|
items := make([]agent.LibrarySyncItem, 0, len(cache.Items))
|
||||||
for _, item := range cache.Items {
|
for _, item := range cache.Items {
|
||||||
if item.ScanError != "" {
|
if item.ScanError != "" {
|
||||||
|
// A file ffprobe can't read is almost always a truncated/corrupt
|
||||||
|
// download (2026-06-15 NFS write-back truncation). Previously these were
|
||||||
|
// silently dropped — the file vanished from the library with no trace.
|
||||||
|
// Emit a minimal DAMAGED row instead so the web flags it (badge +
|
||||||
|
// blocked playback + re-download) rather than hiding it. All fields below
|
||||||
|
// are populated before ffprobe runs, so they're valid even on scan error.
|
||||||
|
// The scanner re-probes damaged items every scan, so a clean re-download
|
||||||
|
// to the same path self-heals the verdict.
|
||||||
|
items = append(items, agent.LibrarySyncItem{
|
||||||
|
FilePath: item.FilePath,
|
||||||
|
FileName: item.FileName,
|
||||||
|
FileSize: item.FileSize,
|
||||||
|
Title: item.Title,
|
||||||
|
Year: item.Year,
|
||||||
|
ContentType: DeriveContentType(item),
|
||||||
|
Season: item.Season,
|
||||||
|
Episode: item.Episode,
|
||||||
|
Fingerprint: item.Fingerprint,
|
||||||
|
RelPath: relToRoot(cache.Path, item.FilePath),
|
||||||
|
LibraryRootKey: "library",
|
||||||
|
Integrity: "damaged",
|
||||||
|
IntegrityReason: "unreadable",
|
||||||
|
})
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
si := agent.LibrarySyncItem{
|
si := agent.LibrarySyncItem{
|
||||||
|
|
|
||||||
|
|
@ -52,8 +52,10 @@ func TestBuildSyncItems(t *testing.T) {
|
||||||
|
|
||||||
items := BuildSyncItems(cache)
|
items := BuildSyncItems(cache)
|
||||||
|
|
||||||
if len(items) != 2 {
|
// 3 items: the movie, the show, and the scan-error file surfaced as DAMAGED
|
||||||
t.Fatalf("expected 2 items (1 skipped), got %d", len(items))
|
// (no longer silently dropped — the web flags it for re-download).
|
||||||
|
if len(items) != 3 {
|
||||||
|
t.Fatalf("expected 3 items (1 damaged), got %d", len(items))
|
||||||
}
|
}
|
||||||
|
|
||||||
// First item: movie with full media info
|
// First item: movie with full media info
|
||||||
|
|
@ -97,6 +99,18 @@ func TestBuildSyncItems(t *testing.T) {
|
||||||
if show.Resolution != "" {
|
if show.Resolution != "" {
|
||||||
t.Errorf("resolution should be empty, got %q", show.Resolution)
|
t.Errorf("resolution should be empty, got %q", show.Resolution)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Third item: scan-error file surfaced as damaged (unreadable), not skipped.
|
||||||
|
damaged := items[2]
|
||||||
|
if damaged.FilePath != "/media/bad.mkv" {
|
||||||
|
t.Errorf("damaged filePath = %q, want /media/bad.mkv", damaged.FilePath)
|
||||||
|
}
|
||||||
|
if damaged.Integrity != "damaged" {
|
||||||
|
t.Errorf("integrity = %q, want damaged", damaged.Integrity)
|
||||||
|
}
|
||||||
|
if damaged.IntegrityReason != "unreadable" {
|
||||||
|
t.Errorf("integrityReason = %q, want unreadable", damaged.IntegrityReason)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestBuildSyncItemsEmpty(t *testing.T) {
|
func TestBuildSyncItemsEmpty(t *testing.T) {
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,13 @@ import (
|
||||||
// be checked is delivered UNVERIFIED, not verified.
|
// be checked is delivered UNVERIFIED, not verified.
|
||||||
var ErrPar2NotInstalled = errors.New("par2 not installed")
|
var ErrPar2NotInstalled = errors.New("par2 not installed")
|
||||||
|
|
||||||
|
// ErrPar2Unrepairable is returned by Par2Verify when parity confirms the data is
|
||||||
|
// damaged AND par2 reports repair is not possible — the file is definitively
|
||||||
|
// corrupt (distinct from a transient par2 probe error). The pipeline marks the
|
||||||
|
// delivery Corrupt so the engine treats it as an integrity failure and
|
||||||
|
// re-downloads, rather than shipping a broken file with a soft warning.
|
||||||
|
var ErrPar2Unrepairable = errors.New("par2: verification failed and repair not possible")
|
||||||
|
|
||||||
// par2Lookup probes whether the par2 binary is on PATH. It's a package var so
|
// par2Lookup probes whether the par2 binary is on PATH. It's a package var so
|
||||||
// tests can simulate a missing binary without touching the real PATH.
|
// tests can simulate a missing binary without touching the real PATH.
|
||||||
var par2Lookup = func() bool {
|
var par2Lookup = func() bool {
|
||||||
|
|
@ -42,7 +49,7 @@ func Par2Verify(par2File string) error {
|
||||||
return &Par2RepairableError{Par2File: par2File}
|
return &Par2RepairableError{Par2File: par2File}
|
||||||
}
|
}
|
||||||
if strings.Contains(outStr, "Repair is not possible") {
|
if strings.Contains(outStr, "Repair is not possible") {
|
||||||
return fmt.Errorf("par2: verification failed and repair not possible:\n%s", outStr)
|
return fmt.Errorf("%w:\n%s", ErrPar2Unrepairable, outStr)
|
||||||
}
|
}
|
||||||
return fmt.Errorf("par2 verify: %w\n%s", err, outStr)
|
return fmt.Errorf("par2 verify: %w\n%s", err, outStr)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,12 @@ type Result struct {
|
||||||
// the file is unverified rather than silently assuming it's good. Empty means
|
// the file is unverified rather than silently assuming it's good. Empty means
|
||||||
// either "verified OK" or "no parity shipped" — both are non-degraded.
|
// either "verified OK" or "no parity shipped" — both are non-degraded.
|
||||||
VerifyNote string
|
VerifyNote string
|
||||||
|
// Corrupt is true when par2 DEFINITIVELY confirmed the data is damaged and it
|
||||||
|
// could not be repaired (repair failed, or corruption detected with no par2
|
||||||
|
// binary to fix it). The engine treats this as an integrity failure and
|
||||||
|
// re-downloads — distinct from VerifyNote's softer "unverified but delivered"
|
||||||
|
// (e.g. no parity shipped, or a transient probe error).
|
||||||
|
Corrupt bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Options configures post-processing behavior.
|
// Options configures post-processing behavior.
|
||||||
|
|
@ -58,14 +64,28 @@ func Process(dir string, downloadedFiles map[string]string, opts Options) (*Resu
|
||||||
result.Repaired = true
|
result.Repaired = true
|
||||||
log.Printf("[usenet] par2: repair successful")
|
log.Printf("[usenet] par2: repair successful")
|
||||||
case errors.Is(repairErr, ErrPar2NotInstalled):
|
case errors.Is(repairErr, ErrPar2NotInstalled):
|
||||||
result.VerifyNote = "par2 corruption detected but `par2` is not installed — cannot repair, delivered POSSIBLY CORRUPT"
|
// Damage confirmed by parity, but no binary to repair it — the
|
||||||
|
// delivered file IS corrupt. Mark it so the engine re-downloads.
|
||||||
|
result.Corrupt = true
|
||||||
|
result.VerifyNote = "par2 corruption detected but `par2` is not installed — cannot repair, file is CORRUPT"
|
||||||
log.Printf("[usenet] WARNING: %s", result.VerifyNote)
|
log.Printf("[usenet] WARNING: %s", result.VerifyNote)
|
||||||
default:
|
default:
|
||||||
result.VerifyNote = fmt.Sprintf("par2 repair failed — file may be corrupt: %v", repairErr)
|
// Repair attempted and failed — the data is damaged beyond recovery.
|
||||||
|
result.Corrupt = true
|
||||||
|
result.VerifyNote = fmt.Sprintf("par2 repair failed — file is corrupt: %v", repairErr)
|
||||||
log.Printf("[usenet] WARNING: %s", result.VerifyNote)
|
log.Printf("[usenet] WARNING: %s", result.VerifyNote)
|
||||||
}
|
}
|
||||||
|
case errors.Is(err, ErrPar2Unrepairable):
|
||||||
|
// Parity confirmed the data is damaged and unrepairable — definitively
|
||||||
|
// corrupt (NOT a transient probe error). Engine re-downloads.
|
||||||
|
result.Corrupt = true
|
||||||
|
result.VerifyNote = fmt.Sprintf("par2: file is corrupt and cannot be repaired: %v", err)
|
||||||
|
log.Printf("[usenet] WARNING: %s", result.VerifyNote)
|
||||||
default:
|
default:
|
||||||
result.VerifyNote = fmt.Sprintf("par2 verification error — file may be corrupt: %v", err)
|
// A transient par2 probe/exec error (binary crash, I/O hiccup) — can't
|
||||||
|
// confirm corruption, so deliver UNVERIFIED with a loud note rather than
|
||||||
|
// nuking a possibly-good file.
|
||||||
|
result.VerifyNote = fmt.Sprintf("par2 verification error — file unverified: %v", err)
|
||||||
log.Printf("[usenet] WARNING: %s", result.VerifyNote)
|
log.Printf("[usenet] WARNING: %s", result.VerifyNote)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue