fix(engine): cross-backend integrity guard with retry-then-damaged
A truncated debrid download (in-memory byte counter hit 100% while the
NFS write-back silently dropped most of the bytes) was marked completed.
The 1.1.6 fsync fix closed the debrid-specific hole; this generalizes the
guarantee so "completed" never means a corrupt file on ANY backend.
- IntegrityError + bounded retry: on a corrupt/short result the manager
re-downloads the same source up to 3x (clean start), then surfaces the
task as damaged ("corrupt download:" prefix) instead of completing it.
- verify (size mismatch / empty), debrid (incomplete / post-write / flush),
torrent (BytesMissing), usenet (par2 unrepairable / repair-failed) all
classify integrity failures so they route through the retry/damaged path.
- scanner: a file ffprobe can't read is emitted as a damaged library_item
(reason "unreadable") instead of being silently dropped from the sync.
- tests: manager retry-then-success + retry-exhausted-then-damaged,
verifying->resolving transition, damaged sync item.
This commit is contained in:
parent
271413e0f9
commit
a5f3f0914a
13 changed files with 400 additions and 91 deletions
|
|
@ -276,7 +276,11 @@ func (d *DebridDownloader) Download(ctx context.Context, task *Task, outputDir s
|
|||
// and we read fewer bytes, the transfer was truncated (e.g. a debrid CDN edge
|
||||
// closing the connection). Don't hand a short file to verify as if complete.
|
||||
if totalBytes > 0 && downloaded < totalBytes {
|
||||
return nil, fmt.Errorf("incomplete download: got %s of %s", formatBytes(downloaded), formatBytes(totalBytes))
|
||||
// Integrity, not transport — the manager re-downloads. Keep the partial
|
||||
// (NOT removed): the bytes written so far are sequentially correct, so the
|
||||
// retry resumes via HTTP Range from where the stream was cut instead of
|
||||
// re-fetching the whole file.
|
||||
return nil, integrityErr("truncated", "incomplete download: got %s of %s", formatBytes(downloaded), formatBytes(totalBytes))
|
||||
}
|
||||
|
||||
// Force the OS to flush the file to durable storage BEFORE we report success.
|
||||
|
|
@ -286,10 +290,13 @@ func (d *DebridDownloader) Download(ctx context.Context, task *Task, outputDir s
|
|||
// and rejects it ("size mismatch"). fsync surfaces a write-back error here,
|
||||
// where it's actionable, instead of silently truncating the file.
|
||||
if err := file.Sync(); err != nil {
|
||||
return nil, fmt.Errorf("flush to disk failed (write-back/network-mount error): %w", err)
|
||||
_ = closeFile()
|
||||
_ = os.Remove(destPath) // uncertain on-disk state — drop it so the retry starts clean
|
||||
return nil, integrityErr("flush_failed", "flush to disk failed (write-back/network-mount error): %v", err)
|
||||
}
|
||||
if err := closeFile(); err != nil {
|
||||
return nil, fmt.Errorf("close file failed (write-back/network-mount error): %w", err)
|
||||
_ = os.Remove(destPath)
|
||||
return nil, integrityErr("flush_failed", "close file failed (write-back/network-mount error): %v", err)
|
||||
}
|
||||
|
||||
// Safety net: after a durable flush, the on-disk size must match what we wrote.
|
||||
|
|
@ -302,7 +309,7 @@ func (d *DebridDownloader) Download(ctx context.Context, task *Task, outputDir s
|
|||
if rmErr := os.Remove(destPath); rmErr != nil {
|
||||
log.Printf("[%s] failed to remove corrupt partial %s: %v", agent.ShortID(task.ID), destPath, rmErr)
|
||||
}
|
||||
return nil, fmt.Errorf("post-write size mismatch: wrote %s but file is %s on disk — likely a stalled or failing storage mount (%s)",
|
||||
return nil, integrityErr("truncated", "post-write size mismatch: wrote %s but file is %s on disk — likely a stalled or failing storage mount (%s)",
|
||||
formatBytes(downloaded), formatBytes(fi.Size()), outputDir)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue