Some checks failed
CI / Test (push) Failing after 6m18s
CI / Build (push) Successful in 1m32s
CI / Build-1 (push) Successful in 1m55s
CI / Build-2 (push) Successful in 1m33s
CI / Build-3 (push) Successful in 1m32s
CI / Build-4 (push) Successful in 1m35s
CI / Build-5 (push) Successful in 1m33s
CI / Lint (push) Failing after 2m50s
CI / Coverage (push) Successful in 2m58s
CI / Vet (push) Successful in 2m7s
Tras cada scan, localiza la intro (OP) y los créditos (ED) comparando fingerprints chromaprint entre episodios de la misma temporada — reimplementación limpia del enfoque de Intro Skipper: índice invertido de uint32, alineamiento por shifts, Hamming ≤6/32, región contigua más larga (15-120s intro / 15-450s créditos). Películas: inicio de créditos por rachas de blackframe (solo keyframes, -skip_frame nokey) que llegan al final del fichero. - fpcalc se auto-descarga de las releases estáticas de acoustid (linux/macos/windows, ~2MB) con el mismo patrón que ffmpeg/ffprobe. - Resultados cacheados como sidecar .skipseg.json (mtime + versión de algoritmo); solo los ficheros nuevos trabajan. - Submit a /api/internal/agent/skip-segments DESPUÉS del library-sync, en dos fases (episodios primero, películas después) para que la fase rápida no espere a los blackframe lentos sobre NAS. - Agrupación por (dir + título-pre-SxxEyy + season): los títulos parseados arrastran nombre de episodio y tags de release. - Gotcha cazado en vivo: fpcalc -length sale sin drenar el pipe; hay que cerrar nuestro read-end o ffmpeg queda bloqueado para siempre. - config: library.skip_detect (default true, backfill) y scan_interval default 24h → 1h (estilo Plex).
279 lines
8.9 KiB
Go
279 lines
8.9 KiB
Go
package mediainfo
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"math/bits"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// Chromaprint-based shared-audio detection. Episodes of the same season share
|
|
// an identical intro (OP) and credits (ED) audio track; fingerprinting a window
|
|
// of each episode and finding the longest aligned low-hamming-distance region
|
|
// between two episodes localizes those segments. Clean-room implementation of
|
|
// the approach popularized by Jellyfin's Intro Skipper plugin.
|
|
//
|
|
// Fingerprint stream: chromaprint emits one uint32 per ~0.1238s of audio
|
|
// (11025 Hz mono, FFT 4096, 2/3 overlap → ~8.08 points/second).
|
|
|
|
const (
|
|
// ChromaprintSampleDur is seconds of audio per fingerprint point.
|
|
ChromaprintSampleDur = 0.1238
|
|
// maxHammingBits: two points are "similar" when their XOR popcount is below this.
|
|
maxHammingBits = 6
|
|
// maxTimeSkipSec: gap tolerance when growing a contiguous similar region.
|
|
maxTimeSkipSec = 3.5
|
|
)
|
|
|
|
// SkipSegmentRange is one detected skippable range inside a media file.
|
|
type SkipSegmentRange struct {
|
|
Category string `json:"category"` // "intro" | "credits"
|
|
StartSec float64 `json:"startSec"`
|
|
EndSec float64 `json:"endSec"`
|
|
}
|
|
|
|
// FingerprintAudioWindow decodes [startSec, startSec+lengthSec] of the first
|
|
// audio track with ffmpeg and pipes the WAV into fpcalc -raw, returning the
|
|
// chromaprint point stream.
|
|
func FingerprintAudioWindow(ctx context.Context, ffmpegPath, fpcalcPath, mediaPath string, startSec, lengthSec float64) ([]uint32, error) {
|
|
ff := exec.CommandContext(ctx, ffmpegPath,
|
|
"-nostdin", "-loglevel", "error",
|
|
"-ss", strconv.FormatFloat(startSec, 'f', 3, 64),
|
|
"-i", mediaPath,
|
|
"-t", strconv.FormatFloat(lengthSec, 'f', 3, 64),
|
|
"-map", "0:a:0",
|
|
"-ac", "2",
|
|
"-f", "wav", "-",
|
|
)
|
|
fp := exec.CommandContext(ctx, fpcalcPath,
|
|
"-raw", "-length", strconv.Itoa(int(lengthSec)), "-")
|
|
|
|
pipe, err := ff.StdoutPipe()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("ffmpeg pipe: %w", err)
|
|
}
|
|
fp.Stdin = pipe
|
|
var ffErr strings.Builder
|
|
ff.Stderr = &ffErr
|
|
|
|
if err := ff.Start(); err != nil {
|
|
return nil, fmt.Errorf("ffmpeg start: %w", err)
|
|
}
|
|
out, err := fp.Output()
|
|
// fpcalc stops reading once it has processed -length seconds and may exit
|
|
// WITHOUT draining the last buffered bytes. Close our read end so ffmpeg
|
|
// gets EPIPE and exits — otherwise it blocks forever on a full pipe whose
|
|
// only remaining reader is us (caught live: 5-min ctx kills, per file).
|
|
_ = pipe.Close()
|
|
// Always reap ffmpeg; early pipe close makes it exit non-zero — fine as
|
|
// long as fpcalc produced output.
|
|
_ = ff.Wait()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("fpcalc: %w (ffmpeg: %s)", err, strings.TrimSpace(ffErr.String()))
|
|
}
|
|
|
|
for _, line := range strings.Split(string(out), "\n") {
|
|
if rest, ok := strings.CutPrefix(strings.TrimSpace(line), "FINGERPRINT="); ok {
|
|
parts := strings.Split(rest, ",")
|
|
points := make([]uint32, 0, len(parts))
|
|
for _, p := range parts {
|
|
// fpcalc may print signed ints; parse wide and truncate.
|
|
v, perr := strconv.ParseInt(strings.TrimSpace(p), 10, 64)
|
|
if perr != nil {
|
|
return nil, fmt.Errorf("fpcalc output parse: %w", perr)
|
|
}
|
|
points = append(points, uint32(v))
|
|
}
|
|
if len(points) == 0 {
|
|
return nil, fmt.Errorf("fpcalc produced an empty fingerprint")
|
|
}
|
|
return points, nil
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("no FINGERPRINT line in fpcalc output")
|
|
}
|
|
|
|
// SharedRegion is the longest aligned similar-audio region between two
|
|
// fingerprint streams, in seconds relative to each stream's start.
|
|
type SharedRegion struct {
|
|
AStart, AEnd float64
|
|
BStart, BEnd float64
|
|
Duration float64
|
|
}
|
|
|
|
// FindSharedRegion locates the longest contiguous region (bounded by
|
|
// minDur/maxDur seconds) where streams a and b carry near-identical audio at
|
|
// some alignment. Returns nil when no qualifying region exists.
|
|
func FindSharedRegion(a, b []uint32, minDur, maxDur float64) *SharedRegion {
|
|
if len(a) == 0 || len(b) == 0 {
|
|
return nil
|
|
}
|
|
// Inverted index of b: point value → last index seen.
|
|
indexB := make(map[uint32]int, len(b))
|
|
for i, v := range b {
|
|
indexB[v] = i
|
|
}
|
|
// Candidate alignments: exact value matches (±2 on the value tolerates
|
|
// quantization noise between encodes).
|
|
shifts := make(map[int]struct{})
|
|
for i, v := range a {
|
|
for d := -2; d <= 2; d++ {
|
|
if j, ok := indexB[v+uint32(d)]; ok {
|
|
shifts[j-i] = struct{}{}
|
|
}
|
|
}
|
|
}
|
|
|
|
minPoints := int(minDur / ChromaprintSampleDur)
|
|
gapSec := float64(maxTimeSkipSec)
|
|
gapPoints := int(gapSec / ChromaprintSampleDur)
|
|
var best *SharedRegion
|
|
|
|
for shift := range shifts {
|
|
i0 := 0
|
|
if shift < 0 {
|
|
i0 = -shift
|
|
}
|
|
i1 := len(a)
|
|
if len(b)-shift < i1 {
|
|
i1 = len(b) - shift
|
|
}
|
|
if i1-i0 < minPoints {
|
|
continue
|
|
}
|
|
runStart, prev := -1, -1
|
|
flush := func(end int) {
|
|
if runStart < 0 {
|
|
return
|
|
}
|
|
dur := float64(end-runStart) * ChromaprintSampleDur
|
|
if dur >= minDur && dur <= maxDur && (best == nil || dur > best.Duration) {
|
|
best = &SharedRegion{
|
|
AStart: float64(runStart) * ChromaprintSampleDur,
|
|
AEnd: float64(end) * ChromaprintSampleDur,
|
|
BStart: float64(runStart+shift) * ChromaprintSampleDur,
|
|
BEnd: float64(end+shift) * ChromaprintSampleDur,
|
|
Duration: dur,
|
|
}
|
|
}
|
|
}
|
|
for i := i0; i < i1; i++ {
|
|
if bits.OnesCount32(a[i]^b[i+shift]) > maxHammingBits {
|
|
continue
|
|
}
|
|
if prev >= 0 && i-prev > gapPoints {
|
|
flush(prev)
|
|
runStart = i
|
|
} else if runStart < 0 {
|
|
runStart = i
|
|
}
|
|
prev = i
|
|
}
|
|
flush(prev)
|
|
}
|
|
return best
|
|
}
|
|
|
|
// --- Black-frame credits detection (movies: no sibling episode to compare) ---
|
|
|
|
var blackframeRe = regexp.MustCompile(`frame:\d+\s+pblack:\d+\s+pts:\d+\s+t:([\d.]+)`)
|
|
|
|
// DetectBlackFrameRuns scans [startSec, startSec+lengthSec] with ffmpeg's
|
|
// blackframe filter and returns the timestamps (absolute seconds) of frames
|
|
// that are ≥minBlackPct black. Used to find the start of end credits in movies
|
|
// (classic credits roll on black).
|
|
func DetectBlackFrameRuns(ctx context.Context, ffmpegPath, mediaPath string, startSec, lengthSec float64, minBlackPct int) ([]float64, error) {
|
|
// Keyframe-only decode: credits-on-black lasts minutes, so sampling one
|
|
// frame every keyframe interval (~2-10s) finds the run at ~2% of the cost
|
|
// of a full decode — the difference between seconds and minutes per 4K film.
|
|
cmd := exec.CommandContext(ctx, ffmpegPath,
|
|
"-nostdin", "-loglevel", "info",
|
|
"-skip_frame", "nokey",
|
|
"-ss", strconv.FormatFloat(startSec, 'f', 3, 64),
|
|
"-i", mediaPath,
|
|
"-t", strconv.FormatFloat(lengthSec, 'f', 3, 64),
|
|
"-an", "-sn",
|
|
"-vf", fmt.Sprintf("blackframe=amount=%d:threshold=32", minBlackPct),
|
|
"-f", "null", "-",
|
|
)
|
|
stderr, err := cmd.StderrPipe()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := cmd.Start(); err != nil {
|
|
return nil, fmt.Errorf("ffmpeg blackframe start: %w", err)
|
|
}
|
|
var times []float64
|
|
sc := bufio.NewScanner(stderr)
|
|
sc.Buffer(make([]byte, 0, 64*1024), 1024*1024)
|
|
for sc.Scan() {
|
|
if m := blackframeRe.FindStringSubmatch(sc.Text()); m != nil {
|
|
if t, perr := strconv.ParseFloat(m[1], 64); perr == nil {
|
|
times = append(times, startSec+t)
|
|
}
|
|
}
|
|
}
|
|
if err := cmd.Wait(); err != nil {
|
|
return nil, fmt.Errorf("ffmpeg blackframe: %w", err)
|
|
}
|
|
return times, nil
|
|
}
|
|
|
|
// --- Sidecar cache for detected segments ---
|
|
|
|
// skipSegmentsSidecarVersion bumps when the detection algorithm changes enough
|
|
// that cached results should be recomputed.
|
|
const skipSegmentsSidecarVersion = 1
|
|
|
|
// SkipSegmentsSidecar is the cached detection result for one media file.
|
|
type SkipSegmentsSidecar struct {
|
|
Version int `json:"version"`
|
|
DurationSec float64 `json:"durationSec"`
|
|
Segments []SkipSegmentRange `json:"segments"` // empty = analyzed, nothing found
|
|
}
|
|
|
|
func skipSegmentsCachePath(mediaPath string) string {
|
|
return filepath.Join(sidecarDir(mediaPath), filepath.Base(mediaPath)+".skipseg.json")
|
|
}
|
|
|
|
// ReadCachedSkipSegments returns the cached detection result for mediaPath if
|
|
// fresh (newer than the media file) and of the current algorithm version.
|
|
func ReadCachedSkipSegments(mediaPath string) (*SkipSegmentsSidecar, bool) {
|
|
p := skipSegmentsCachePath(mediaPath)
|
|
if !sidecarFresh(p, mediaPath) {
|
|
return nil, false
|
|
}
|
|
data, err := os.ReadFile(p)
|
|
if err != nil {
|
|
return nil, false
|
|
}
|
|
var sc SkipSegmentsSidecar
|
|
if err := json.Unmarshal(data, &sc); err != nil || sc.Version != skipSegmentsSidecarVersion {
|
|
return nil, false
|
|
}
|
|
return &sc, true
|
|
}
|
|
|
|
// WriteCachedSkipSegments persists a detection result next to the media file.
|
|
func WriteCachedSkipSegments(mediaPath string, durationSec float64, segs []SkipSegmentRange) error {
|
|
if segs == nil {
|
|
segs = []SkipSegmentRange{}
|
|
}
|
|
sc := SkipSegmentsSidecar{Version: skipSegmentsSidecarVersion, DurationSec: durationSec, Segments: segs}
|
|
data, err := json.Marshal(sc)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
dir := sidecarDir(mediaPath)
|
|
if err := os.MkdirAll(dir, 0o755); err != nil {
|
|
return err
|
|
}
|
|
return os.WriteFile(skipSegmentsCachePath(mediaPath), data, 0o644)
|
|
}
|