unarr/internal/library/mediainfo/chromaprint.go
Deivid Soto a710bc1626
Some checks failed
CI / Test (push) Failing after 6m18s
CI / Build (push) Successful in 1m32s
CI / Build-1 (push) Successful in 1m55s
CI / Build-2 (push) Successful in 1m33s
CI / Build-3 (push) Successful in 1m32s
CI / Build-4 (push) Successful in 1m35s
CI / Build-5 (push) Successful in 1m33s
CI / Lint (push) Failing after 2m50s
CI / Coverage (push) Successful in 2m58s
CI / Vet (push) Successful in 2m7s
feat(library): detección de intro/créditos post-scan (skip segments)
Tras cada scan, localiza la intro (OP) y los créditos (ED) comparando
fingerprints chromaprint entre episodios de la misma temporada —
reimplementación limpia del enfoque de Intro Skipper: índice invertido
de uint32, alineamiento por shifts, Hamming ≤6/32, región contigua más
larga (15-120s intro / 15-450s créditos). Películas: inicio de créditos
por rachas de blackframe (solo keyframes, -skip_frame nokey) que llegan
al final del fichero.

- fpcalc se auto-descarga de las releases estáticas de acoustid
  (linux/macos/windows, ~2MB) con el mismo patrón que ffmpeg/ffprobe.
- Resultados cacheados como sidecar .skipseg.json (mtime + versión de
  algoritmo); solo los ficheros nuevos trabajan.
- Submit a /api/internal/agent/skip-segments DESPUÉS del library-sync,
  en dos fases (episodios primero, películas después) para que la
  fase rápida no espere a los blackframe lentos sobre NAS.
- Agrupación por (dir + título-pre-SxxEyy + season): los títulos
  parseados arrastran nombre de episodio y tags de release.
- Gotcha cazado en vivo: fpcalc -length sale sin drenar el pipe; hay
  que cerrar nuestro read-end o ffmpeg queda bloqueado para siempre.
- config: library.skip_detect (default true, backfill) y scan_interval
  default 24h → 1h (estilo Plex).
2026-06-12 19:46:07 +02:00

279 lines
8.9 KiB
Go

package mediainfo
import (
"bufio"
"context"
"encoding/json"
"fmt"
"math/bits"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// Chromaprint-based shared-audio detection. Episodes of the same season share
// an identical intro (OP) and credits (ED) audio track; fingerprinting a window
// of each episode and finding the longest aligned low-hamming-distance region
// between two episodes localizes those segments. Clean-room implementation of
// the approach popularized by Jellyfin's Intro Skipper plugin.
//
// Fingerprint stream: chromaprint emits one uint32 per ~0.1238s of audio
// (11025 Hz mono, FFT 4096, 2/3 overlap → ~8.08 points/second).
const (
// ChromaprintSampleDur is seconds of audio per fingerprint point.
ChromaprintSampleDur = 0.1238
// maxHammingBits: two points are "similar" when their XOR popcount is below this.
maxHammingBits = 6
// maxTimeSkipSec: gap tolerance when growing a contiguous similar region.
maxTimeSkipSec = 3.5
)
// SkipSegmentRange is one detected skippable range inside a media file.
type SkipSegmentRange struct {
Category string `json:"category"` // "intro" | "credits"
StartSec float64 `json:"startSec"`
EndSec float64 `json:"endSec"`
}
// FingerprintAudioWindow decodes [startSec, startSec+lengthSec] of the first
// audio track with ffmpeg and pipes the WAV into fpcalc -raw, returning the
// chromaprint point stream.
func FingerprintAudioWindow(ctx context.Context, ffmpegPath, fpcalcPath, mediaPath string, startSec, lengthSec float64) ([]uint32, error) {
ff := exec.CommandContext(ctx, ffmpegPath,
"-nostdin", "-loglevel", "error",
"-ss", strconv.FormatFloat(startSec, 'f', 3, 64),
"-i", mediaPath,
"-t", strconv.FormatFloat(lengthSec, 'f', 3, 64),
"-map", "0:a:0",
"-ac", "2",
"-f", "wav", "-",
)
fp := exec.CommandContext(ctx, fpcalcPath,
"-raw", "-length", strconv.Itoa(int(lengthSec)), "-")
pipe, err := ff.StdoutPipe()
if err != nil {
return nil, fmt.Errorf("ffmpeg pipe: %w", err)
}
fp.Stdin = pipe
var ffErr strings.Builder
ff.Stderr = &ffErr
if err := ff.Start(); err != nil {
return nil, fmt.Errorf("ffmpeg start: %w", err)
}
out, err := fp.Output()
// fpcalc stops reading once it has processed -length seconds and may exit
// WITHOUT draining the last buffered bytes. Close our read end so ffmpeg
// gets EPIPE and exits — otherwise it blocks forever on a full pipe whose
// only remaining reader is us (caught live: 5-min ctx kills, per file).
_ = pipe.Close()
// Always reap ffmpeg; early pipe close makes it exit non-zero — fine as
// long as fpcalc produced output.
_ = ff.Wait()
if err != nil {
return nil, fmt.Errorf("fpcalc: %w (ffmpeg: %s)", err, strings.TrimSpace(ffErr.String()))
}
for _, line := range strings.Split(string(out), "\n") {
if rest, ok := strings.CutPrefix(strings.TrimSpace(line), "FINGERPRINT="); ok {
parts := strings.Split(rest, ",")
points := make([]uint32, 0, len(parts))
for _, p := range parts {
// fpcalc may print signed ints; parse wide and truncate.
v, perr := strconv.ParseInt(strings.TrimSpace(p), 10, 64)
if perr != nil {
return nil, fmt.Errorf("fpcalc output parse: %w", perr)
}
points = append(points, uint32(v))
}
if len(points) == 0 {
return nil, fmt.Errorf("fpcalc produced an empty fingerprint")
}
return points, nil
}
}
return nil, fmt.Errorf("no FINGERPRINT line in fpcalc output")
}
// SharedRegion is the longest aligned similar-audio region between two
// fingerprint streams, in seconds relative to each stream's start.
type SharedRegion struct {
AStart, AEnd float64
BStart, BEnd float64
Duration float64
}
// FindSharedRegion locates the longest contiguous region (bounded by
// minDur/maxDur seconds) where streams a and b carry near-identical audio at
// some alignment. Returns nil when no qualifying region exists.
func FindSharedRegion(a, b []uint32, minDur, maxDur float64) *SharedRegion {
if len(a) == 0 || len(b) == 0 {
return nil
}
// Inverted index of b: point value → last index seen.
indexB := make(map[uint32]int, len(b))
for i, v := range b {
indexB[v] = i
}
// Candidate alignments: exact value matches (±2 on the value tolerates
// quantization noise between encodes).
shifts := make(map[int]struct{})
for i, v := range a {
for d := -2; d <= 2; d++ {
if j, ok := indexB[v+uint32(d)]; ok {
shifts[j-i] = struct{}{}
}
}
}
minPoints := int(minDur / ChromaprintSampleDur)
gapSec := float64(maxTimeSkipSec)
gapPoints := int(gapSec / ChromaprintSampleDur)
var best *SharedRegion
for shift := range shifts {
i0 := 0
if shift < 0 {
i0 = -shift
}
i1 := len(a)
if len(b)-shift < i1 {
i1 = len(b) - shift
}
if i1-i0 < minPoints {
continue
}
runStart, prev := -1, -1
flush := func(end int) {
if runStart < 0 {
return
}
dur := float64(end-runStart) * ChromaprintSampleDur
if dur >= minDur && dur <= maxDur && (best == nil || dur > best.Duration) {
best = &SharedRegion{
AStart: float64(runStart) * ChromaprintSampleDur,
AEnd: float64(end) * ChromaprintSampleDur,
BStart: float64(runStart+shift) * ChromaprintSampleDur,
BEnd: float64(end+shift) * ChromaprintSampleDur,
Duration: dur,
}
}
}
for i := i0; i < i1; i++ {
if bits.OnesCount32(a[i]^b[i+shift]) > maxHammingBits {
continue
}
if prev >= 0 && i-prev > gapPoints {
flush(prev)
runStart = i
} else if runStart < 0 {
runStart = i
}
prev = i
}
flush(prev)
}
return best
}
// --- Black-frame credits detection (movies: no sibling episode to compare) ---
var blackframeRe = regexp.MustCompile(`frame:\d+\s+pblack:\d+\s+pts:\d+\s+t:([\d.]+)`)
// DetectBlackFrameRuns scans [startSec, startSec+lengthSec] with ffmpeg's
// blackframe filter and returns the timestamps (absolute seconds) of frames
// that are ≥minBlackPct black. Used to find the start of end credits in movies
// (classic credits roll on black).
func DetectBlackFrameRuns(ctx context.Context, ffmpegPath, mediaPath string, startSec, lengthSec float64, minBlackPct int) ([]float64, error) {
// Keyframe-only decode: credits-on-black lasts minutes, so sampling one
// frame every keyframe interval (~2-10s) finds the run at ~2% of the cost
// of a full decode — the difference between seconds and minutes per 4K film.
cmd := exec.CommandContext(ctx, ffmpegPath,
"-nostdin", "-loglevel", "info",
"-skip_frame", "nokey",
"-ss", strconv.FormatFloat(startSec, 'f', 3, 64),
"-i", mediaPath,
"-t", strconv.FormatFloat(lengthSec, 'f', 3, 64),
"-an", "-sn",
"-vf", fmt.Sprintf("blackframe=amount=%d:threshold=32", minBlackPct),
"-f", "null", "-",
)
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, err
}
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("ffmpeg blackframe start: %w", err)
}
var times []float64
sc := bufio.NewScanner(stderr)
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
for sc.Scan() {
if m := blackframeRe.FindStringSubmatch(sc.Text()); m != nil {
if t, perr := strconv.ParseFloat(m[1], 64); perr == nil {
times = append(times, startSec+t)
}
}
}
if err := cmd.Wait(); err != nil {
return nil, fmt.Errorf("ffmpeg blackframe: %w", err)
}
return times, nil
}
// --- Sidecar cache for detected segments ---
// skipSegmentsSidecarVersion bumps when the detection algorithm changes enough
// that cached results should be recomputed.
const skipSegmentsSidecarVersion = 1
// SkipSegmentsSidecar is the cached detection result for one media file.
type SkipSegmentsSidecar struct {
Version int `json:"version"`
DurationSec float64 `json:"durationSec"`
Segments []SkipSegmentRange `json:"segments"` // empty = analyzed, nothing found
}
func skipSegmentsCachePath(mediaPath string) string {
return filepath.Join(sidecarDir(mediaPath), filepath.Base(mediaPath)+".skipseg.json")
}
// ReadCachedSkipSegments returns the cached detection result for mediaPath if
// fresh (newer than the media file) and of the current algorithm version.
func ReadCachedSkipSegments(mediaPath string) (*SkipSegmentsSidecar, bool) {
p := skipSegmentsCachePath(mediaPath)
if !sidecarFresh(p, mediaPath) {
return nil, false
}
data, err := os.ReadFile(p)
if err != nil {
return nil, false
}
var sc SkipSegmentsSidecar
if err := json.Unmarshal(data, &sc); err != nil || sc.Version != skipSegmentsSidecarVersion {
return nil, false
}
return &sc, true
}
// WriteCachedSkipSegments persists a detection result next to the media file.
func WriteCachedSkipSegments(mediaPath string, durationSec float64, segs []SkipSegmentRange) error {
if segs == nil {
segs = []SkipSegmentRange{}
}
sc := SkipSegmentsSidecar{Version: skipSegmentsSidecarVersion, DurationSec: durationSec, Segments: segs}
data, err := json.Marshal(sc)
if err != nil {
return err
}
dir := sidecarDir(mediaPath)
if err := os.MkdirAll(dir, 0o755); err != nil {
return err
}
return os.WriteFile(skipSegmentsCachePath(mediaPath), data, 0o644)
}