feat(library): detección de intro/créditos post-scan (skip segments)
Some checks failed
CI / Test (push) Failing after 6m18s
CI / Build (push) Successful in 1m32s
CI / Build-1 (push) Successful in 1m55s
CI / Build-2 (push) Successful in 1m33s
CI / Build-3 (push) Successful in 1m32s
CI / Build-4 (push) Successful in 1m35s
CI / Build-5 (push) Successful in 1m33s
CI / Lint (push) Failing after 2m50s
CI / Coverage (push) Successful in 2m58s
CI / Vet (push) Successful in 2m7s

Tras cada scan, localiza la intro (OP) y los créditos (ED) comparando
fingerprints chromaprint entre episodios de la misma temporada —
reimplementación limpia del enfoque de Intro Skipper: índice invertido
de uint32, alineamiento por shifts, Hamming ≤6/32, región contigua más
larga (15-120s intro / 15-450s créditos). Películas: inicio de créditos
por rachas de blackframe (solo keyframes, -skip_frame nokey) que llegan
al final del fichero.

- fpcalc se auto-descarga de las releases estáticas de acoustid
  (linux/macos/windows, ~2MB) con el mismo patrón que ffmpeg/ffprobe.
- Resultados cacheados como sidecar .skipseg.json (mtime + versión de
  algoritmo); solo los ficheros nuevos trabajan.
- Submit a /api/internal/agent/skip-segments DESPUÉS del library-sync,
  en dos fases (episodios primero, películas después) para que la
  fase rápida no espere a los blackframe lentos sobre NAS.
- Agrupación por (dir + título-pre-SxxEyy + season): los títulos
  parseados arrastran nombre de episodio y tags de release.
- Gotcha cazado en vivo: fpcalc -length sale sin drenar el pipe; hay
  que cerrar nuestro read-end o ffmpeg queda bloqueado para siempre.
- config: library.skip_detect (default true, backfill) y scan_interval
  default 24h → 1h (estilo Plex).
This commit is contained in:
Deivid Soto 2026-06-12 19:46:07 +02:00
parent 59da949a53
commit a710bc1626
11 changed files with 1223 additions and 5 deletions

View file

@ -0,0 +1,148 @@
package mediainfo
import (
"archive/tar"
"compress/gzip"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"time"
)
// fpcalc (chromaprint) powers skip-segment detection: the ffmpeg static builds
// we download from ffbinaries do NOT include the chromaprint muxer, so audio
// fingerprinting pipes decoded WAV from our ffmpeg into a standalone fpcalc
// binary. acoustid publishes small (~2MB) static builds per platform.
const fpcalcVersion = "1.6.0"
var fpcalcDLClient = &http.Client{Timeout: 5 * time.Minute}
const maxFpcalcArchiveSize = 50 * 1024 * 1024 // 50MB
// fpcalcDownloadURL returns the release asset URL for the current platform,
// and whether the asset is a zip (Windows) instead of tar.gz.
func fpcalcDownloadURL() (url string, isZip bool, err error) {
base := fmt.Sprintf("https://github.com/acoustid/chromaprint/releases/download/v%s/chromaprint-fpcalc-%s-", fpcalcVersion, fpcalcVersion)
switch runtime.GOOS {
case "linux":
switch runtime.GOARCH {
case "amd64":
return base + "linux-x86_64.tar.gz", false, nil
case "arm64":
return base + "linux-arm64.tar.gz", false, nil
}
case "darwin":
return base + "macos-universal.tar.gz", false, nil
case "windows":
if runtime.GOARCH == "amd64" {
return base + "windows-x86_64.zip", true, nil
}
}
return "", false, fmt.Errorf("no fpcalc build for platform %s/%s", runtime.GOOS, runtime.GOARCH)
}
// FpcalcCachePath returns the cached fpcalc binary path (same bin dir as the
// downloaded ffmpeg/ffprobe).
func FpcalcCachePath() (string, error) {
dir, err := FFprobeCacheDir()
if err != nil {
return "", err
}
name := "fpcalc"
if runtime.GOOS == "windows" {
name = "fpcalc.exe"
}
return filepath.Join(dir, name), nil
}
// ResolveFpcalc finds a usable fpcalc binary: PATH → cache dir → download.
func ResolveFpcalc() (string, error) {
if p, err := exec.LookPath("fpcalc"); err == nil {
return p, nil
}
dest, err := FpcalcCachePath()
if err != nil {
return "", err
}
if _, err := os.Stat(dest); err == nil {
return dest, nil
}
return downloadFpcalc(dest)
}
func downloadFpcalc(dest string) (string, error) {
url, isZip, err := fpcalcDownloadURL()
if err != nil {
return "", err
}
fmt.Fprintf(os.Stderr, "fpcalc not found — downloading chromaprint %s...\n", fpcalcVersion)
resp, err := fpcalcDLClient.Get(url)
if err != nil {
return "", fmt.Errorf("fpcalc download failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("fpcalc download failed: HTTP %d", resp.StatusCode)
}
data, err := io.ReadAll(io.LimitReader(resp.Body, maxFpcalcArchiveSize))
if err != nil {
return "", fmt.Errorf("fpcalc download read failed: %w", err)
}
name := "fpcalc"
if runtime.GOOS == "windows" {
name = "fpcalc.exe"
}
var binary []byte
if isZip {
binary, err = extractFromZip(data, name)
} else {
binary, err = extractFromTarGz(data, name)
}
if err != nil {
return "", err
}
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
return "", fmt.Errorf("cannot create cache directory: %w", err)
}
if err := os.WriteFile(dest, binary, 0o755); err != nil {
return "", fmt.Errorf("cannot write fpcalc binary: %w", err)
}
fmt.Fprintf(os.Stderr, "fpcalc installed to %s\n", dest)
return dest, nil
}
func extractFromTarGz(data []byte, target string) ([]byte, error) {
gz, err := gzip.NewReader(strings.NewReader(string(data)))
if err != nil {
return nil, fmt.Errorf("cannot open downloaded archive: %w", err)
}
defer gz.Close()
tr := tar.NewReader(gz)
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("cannot read archive: %w", err)
}
if hdr.Typeflag == tar.TypeReg && filepath.Base(hdr.Name) == target {
return io.ReadAll(io.LimitReader(tr, maxFpcalcArchiveSize))
}
}
return nil, fmt.Errorf("%s not found in downloaded archive", target)
}