unarr/internal/cmd/scan.go
Deivid Soto c82826bf68
Some checks failed
CI / Test (push) Failing after 6m21s
CI / Build (push) Successful in 1m34s
CI / Build-1 (push) Successful in 2m0s
CI / Build-2 (push) Successful in 1m33s
CI / Build-3 (push) Successful in 1m38s
CI / Build-4 (push) Successful in 1m35s
CI / Build-5 (push) Successful in 1m38s
CI / Lint (push) Failing after 2m34s
CI / Coverage (push) Failing after 2m44s
CI / Vet (push) Successful in 2m3s
fix(trickplay): stop scan-time sprite generation from saturating the host
Trickplay sprite generation (one full-decode ffmpeg pass per file) could pin a
machine: multiple agents on the same library decoded the same 4K file at once, no
CPU throttling, and crashed/restarted agents orphaned ffmpeg to init (it ran the
full 45-min decode to completion). Stacked orphans spiked a box to load ~140.

- Single-flight lock: O_CREATE|O_EXCL .lock in the shared sidecar dir so two
  agents watching the same library never decode the same file twice (stale locks
  reclaimed after a TTL). Returns ErrTrickplayInProgress → prewarm skips, not fail.
- Load gate: defer the heavy decode until 1-min load ≤ max(ratio×NumCPU, 1.5),
  capped at 15 min so it throttles without ever becoming a permanent off-switch on
  busy / small hosts. New knob library.prewarm_max_load_ratio (default 0.7).
- Concurrency: trickSem caps trickplay to ONE decode at a time per agent.
- CPU priority: setLowCPUPriority (nice 19) alongside the existing idle ionice.
- No orphans: hardenCmd sets Setpgid + Pdeathsig=SIGKILL, with runtime.LockOSThread
  around the child so the kernel kills ffmpeg exactly when the agent dies (and not
  spuriously — golang/go#27505).

Tests: single-flight/stale-reclaim, load-gate immediate/cancel, and an e2e
Pdeathsig orphan-kill check.
2026-06-04 08:25:00 +02:00

337 lines
8.8 KiB
Go

package cmd
import (
"context"
"encoding/json"
"fmt"
"os"
"os/signal"
"sort"
"strings"
"syscall"
"time"
"github.com/fatih/color"
"github.com/spf13/cobra"
"github.com/torrentclaw/unarr/internal/agent"
"github.com/torrentclaw/unarr/internal/config"
"github.com/torrentclaw/unarr/internal/library"
"github.com/torrentclaw/unarr/internal/library/mediainfo"
)
func newScanCmd() *cobra.Command {
var (
workers int
ffprobe string
showStatus bool
noSync bool
)
cmd := &cobra.Command{
Use: "scan <path>",
Short: "Scan your media library for quality analysis",
Long: `Walk a folder recursively, analyze each video file with ffprobe,
and sync the results to your TorrentClaw account.
After scanning, visit your Library page at torrentclaw.com/library
to see available quality upgrades.`,
Args: cobra.MaximumNArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
if showStatus {
return runScanStatus()
}
if len(args) == 0 {
cfg := loadConfig()
paths := library.ResolveScanPaths(cfg.Download.Dir, cfg.Organize.MoviesDir, cfg.Organize.TVShowsDir, cfg.Library.ScanPath)
if len(paths) == 0 {
return fmt.Errorf("usage: unarr scan <path>\n\nNo scan paths configured. Provide a path or set up downloads.dir via 'unarr init'")
}
for _, p := range paths {
if err := runScan(p, workers, ffprobe, noSync); err != nil {
return err
}
}
return nil
}
return runScan(args[0], workers, ffprobe, noSync)
},
}
cmd.Flags().IntVar(&workers, "workers", 0, "concurrent ffprobe workers (default: config or 8)")
cmd.Flags().StringVar(&ffprobe, "ffprobe", "", "path to ffprobe binary")
cmd.Flags().BoolVar(&showStatus, "status", false, "show summary of last scan")
cmd.Flags().BoolVar(&noSync, "no-sync", false, "scan only, don't upload to server")
return cmd
}
func runScan(dirPath string, workers int, ffprobePath string, noSync bool) error {
// Validate path
info, err := os.Stat(dirPath)
if err != nil {
return fmt.Errorf("path not found: %s", dirPath)
}
if !info.IsDir() {
return fmt.Errorf("not a directory: %s", dirPath)
}
cfg := loadConfig()
// Resolve workers: flag → config → default 8
if workers == 0 {
workers = cfg.Library.Workers
}
if workers == 0 {
workers = 8
}
// Resolve ffprobe path from flag → config
if ffprobePath == "" {
ffprobePath = cfg.Library.FFprobePath
}
// Load existing cache for incremental scanning
existing, _ := library.LoadCache()
// Context with signal handling
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()
bold := color.New(color.Bold)
bold.Printf("\n Scanning %s...\n\n", dirPath)
// Scan
cache, err := library.Scan(ctx, dirPath, existing, library.ScanOptions{
Workers: workers,
FFprobePath: ffprobePath,
Incremental: existing != nil,
OnProgress: func(scanned, total int, current string) {
// Truncate filename for display
if len(current) > 50 {
current = "..." + current[len(current)-47:]
}
fmt.Fprintf(os.Stderr, "\r Scanning %d/%d — %s\033[K", scanned, total, current)
},
})
if err != nil {
return fmt.Errorf("scan failed: %w", err)
}
fmt.Fprintf(os.Stderr, "\r\033[K") // clear progress line
// Save cache
if err := library.SaveCache(cache); err != nil {
return fmt.Errorf("save cache: %w", err)
}
// Remember scan path in config
if cfg.Library.ScanPath != dirPath {
cfg.Library.ScanPath = dirPath
_ = config.Save(cfg, cfgFile)
}
// Print summary
printScanSummary(cache)
// JSON output mode
if jsonOut {
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
return enc.Encode(cache)
}
// Pre-extract sidecars (text subs → WebVTT, panel frames → JPEG) into a hidden
// ".unarr" dir so playback gets instant subtitles/thumbnails and huge remuxes
// never hit the on-demand timeout. Best-effort + Ctrl-C interruptible (the scan
// itself is already saved).
if cfg.Library.CacheSubtitles || cfg.Library.CacheThumbnails || cfg.Library.Trickplay.Enabled {
if ff, err := mediainfo.ResolveFFmpeg(cfg.Library.FFmpegPath); err == nil {
fmt.Fprintf(os.Stderr, " Pre-extracting subtitles + thumbnails to cache… (Ctrl-C to skip)\n")
library.PrewarmSidecars(ctx, cache, library.PrewarmOptions{
FFmpegPath: ff,
CacheSubtitles: cfg.Library.CacheSubtitles,
CacheThumbnails: cfg.Library.CacheThumbnails,
Workers: 2,
Trickplay: cfg.Library.Trickplay.Enabled,
TrickplayIntervalSec: cfg.Library.Trickplay.IntervalSeconds(),
TrickplayWidth: cfg.Library.Trickplay.Width,
MaxLoadRatio: cfg.Library.PrewarmMaxLoadRatio,
})
} else {
fmt.Fprintf(os.Stderr, " Skipping sidecar prewarm: ffmpeg unavailable: %v\n", err)
}
}
// Sync to server
if !noSync {
return syncToServer(ctx, cfg, cache)
}
return nil
}
func syncToServer(ctx context.Context, cfg config.Config, cache *library.LibraryCache) error {
apiKey := apiKeyFlag
if apiKey == "" {
apiKey = cfg.Auth.APIKey
}
if apiKey == "" {
color.Yellow("\n ⚠ No API key configured. Run 'unarr init' to set up, or use --no-sync.")
return nil
}
ac := agent.NewClient(cfg.Auth.APIURL, apiKey, "unarr/"+Version)
items := library.BuildSyncItems(cache)
if len(items) == 0 {
color.Yellow("\n No valid items to sync.")
return nil
}
// Send in batches of 100
const batchSize = 100
totalSynced := 0
totalMatched := 0
totalRemoved := 0
syncStartedAt := time.Now().UTC().Format(time.RFC3339)
for i := 0; i < len(items); i += batchSize {
end := i + batchSize
if end > len(items) {
end = len(items)
}
batch := items[i:end]
isLast := end >= len(items)
fmt.Fprintf(os.Stderr, "\r Syncing %d/%d items...\033[K", end, len(items))
resp, err := ac.SyncLibrary(ctx, agent.LibrarySyncRequest{
Items: batch,
ScanPath: cache.Path,
AgentID: cfg.Agent.ID,
IsLastBatch: isLast,
SyncStartedAt: syncStartedAt,
})
if err != nil {
return fmt.Errorf("sync failed: %w", err)
}
totalSynced += resp.Synced
totalMatched += resp.Matched
totalRemoved += resp.Removed
}
fmt.Fprintf(os.Stderr, "\r\033[K")
green := color.New(color.FgGreen)
green.Printf("\n ✓ Synced %d items (%d matched, %d removed)\n", totalSynced, totalMatched, totalRemoved)
apiURL := strings.TrimSuffix(cfg.Auth.APIURL, "/")
fmt.Printf(" → View upgrades at %s/library\n\n", apiURL)
return nil
}
func runScanStatus() error {
cache, err := library.LoadCache()
if err != nil {
return fmt.Errorf("load cache: %w", err)
}
if cache == nil {
return fmt.Errorf("no library scan found. Run 'unarr scan <path>' first")
}
printScanSummary(cache)
return nil
}
func printScanSummary(cache *library.LibraryCache) {
bold := color.New(color.Bold)
dim := color.New(color.Faint)
total := len(cache.Items)
errors := 0
resCount := map[string]int{}
hdrCount := map[string]int{}
langCount := map[string]int{}
for _, item := range cache.Items {
if item.ScanError != "" {
errors++
continue
}
if item.MediaInfo == nil || item.MediaInfo.Video == nil {
continue
}
res := library.ResolveResolution(item.MediaInfo.Video.Width, item.MediaInfo.Video.Height)
if res == "" {
res = "other"
}
resCount[res]++
hdr := item.MediaInfo.Video.HDR
if hdr == "" {
hdr = "SDR"
}
hdrCount[hdr]++
for _, lang := range item.MediaInfo.Languages {
langCount[lang]++
}
}
bold.Printf("\n Library scan complete — %d files in %s\n", total, cache.Path)
dim.Printf(" Scanned at: %s\n\n", cache.ScannedAt)
// Resolution table
bold.Println(" Resolution Files")
dim.Println(" ─────────────────────")
for _, res := range []string{"2160p", "1080p", "720p", "480p", "other"} {
if count, ok := resCount[res]; ok {
fmt.Printf(" %-14s%d\n", res, count)
}
}
// HDR table
fmt.Println()
bold.Println(" HDR Files")
dim.Println(" ─────────────────────")
hdrOrder := []string{"DV+HDR10", "DV", "HDR10", "HLG", "SDR"}
for _, hdr := range hdrOrder {
if count, ok := hdrCount[hdr]; ok {
fmt.Printf(" %-14s%d\n", hdr, count)
}
}
// Top languages
if len(langCount) > 0 {
fmt.Println()
type langEntry struct {
lang string
count int
}
var langs []langEntry
for l, c := range langCount {
langs = append(langs, langEntry{l, c})
}
sort.Slice(langs, func(i, j int) bool { return langs[i].count > langs[j].count })
top := langs
if len(top) > 5 {
top = top[:5]
}
parts := make([]string, len(top))
for i, l := range top {
parts[i] = fmt.Sprintf("%s (%d)", strings.ToUpper(l.lang), l.count)
}
bold.Print(" Top languages: ")
fmt.Println(strings.Join(parts, ", "))
}
if errors > 0 {
fmt.Println()
color.Yellow(" Scan errors: %d files (run with --verbose for details)", errors)
}
fmt.Println()
}