Some checks failed
CI / Test (push) Failing after 6m21s
CI / Build (push) Successful in 1m34s
CI / Build-1 (push) Successful in 2m0s
CI / Build-2 (push) Successful in 1m33s
CI / Build-3 (push) Successful in 1m38s
CI / Build-4 (push) Successful in 1m35s
CI / Build-5 (push) Successful in 1m38s
CI / Lint (push) Failing after 2m34s
CI / Coverage (push) Failing after 2m44s
CI / Vet (push) Successful in 2m3s
Trickplay sprite generation (one full-decode ffmpeg pass per file) could pin a machine: multiple agents on the same library decoded the same 4K file at once, no CPU throttling, and crashed/restarted agents orphaned ffmpeg to init (it ran the full 45-min decode to completion). Stacked orphans spiked a box to load ~140. - Single-flight lock: O_CREATE|O_EXCL .lock in the shared sidecar dir so two agents watching the same library never decode the same file twice (stale locks reclaimed after a TTL). Returns ErrTrickplayInProgress → prewarm skips, not fail. - Load gate: defer the heavy decode until 1-min load ≤ max(ratio×NumCPU, 1.5), capped at 15 min so it throttles without ever becoming a permanent off-switch on busy / small hosts. New knob library.prewarm_max_load_ratio (default 0.7). - Concurrency: trickSem caps trickplay to ONE decode at a time per agent. - CPU priority: setLowCPUPriority (nice 19) alongside the existing idle ionice. - No orphans: hardenCmd sets Setpgid + Pdeathsig=SIGKILL, with runtime.LockOSThread around the child so the kernel kills ffmpeg exactly when the agent dies (and not spuriously — golang/go#27505). Tests: single-flight/stale-reclaim, load-gate immediate/cancel, and an e2e Pdeathsig orphan-kill check.
67 lines
1.9 KiB
Go
67 lines
1.9 KiB
Go
package mediainfo
|
|
|
|
import (
|
|
"errors"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestAcquireTrickplayLock_SingleFlight(t *testing.T) {
|
|
lock := filepath.Join(t.TempDir(), "sprite.jpg.lock")
|
|
|
|
release, err := acquireTrickplayLock(lock)
|
|
if err != nil {
|
|
t.Fatalf("first acquire: %v", err)
|
|
}
|
|
if _, statErr := os.Stat(lock); statErr != nil {
|
|
t.Fatalf("lock file not created: %v", statErr)
|
|
}
|
|
|
|
// Second acquire while the first is held → skip sentinel, not a real error.
|
|
if _, err := acquireTrickplayLock(lock); !errors.Is(err, ErrTrickplayInProgress) {
|
|
t.Fatalf("expected ErrTrickplayInProgress, got %v", err)
|
|
}
|
|
|
|
// After release the lock file is gone and it can be re-acquired.
|
|
release()
|
|
if _, statErr := os.Stat(lock); !os.IsNotExist(statErr) {
|
|
t.Fatalf("lock file should be removed after release, stat err = %v", statErr)
|
|
}
|
|
release2, err := acquireTrickplayLock(lock)
|
|
if err != nil {
|
|
t.Fatalf("re-acquire after release: %v", err)
|
|
}
|
|
release2()
|
|
}
|
|
|
|
func TestAcquireTrickplayLock_ReclaimsStale(t *testing.T) {
|
|
lock := filepath.Join(t.TempDir(), "sprite.jpg.lock")
|
|
|
|
// Simulate a crashed worker: a lock file older than the TTL with no live owner.
|
|
if err := os.WriteFile(lock, []byte("deadhost pid=999 t=0\n"), 0o644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
old := time.Now().Add(-trickplayLockTTL - time.Minute)
|
|
if err := os.Chtimes(lock, old, old); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
release, err := acquireTrickplayLock(lock)
|
|
if err != nil {
|
|
t.Fatalf("stale lock should be reclaimed, got %v", err)
|
|
}
|
|
release()
|
|
}
|
|
|
|
func TestAcquireTrickplayLock_FreshNotReclaimed(t *testing.T) {
|
|
lock := filepath.Join(t.TempDir(), "sprite.jpg.lock")
|
|
if err := os.WriteFile(lock, []byte("livehost pid=123 t=now\n"), 0o644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
// Fresh mtime (just written) → a live owner is assumed; must NOT be stolen.
|
|
if _, err := acquireTrickplayLock(lock); !errors.Is(err, ErrTrickplayInProgress) {
|
|
t.Fatalf("fresh lock must not be reclaimed, got %v", err)
|
|
}
|
|
}
|