feat(library): detección de intro/créditos post-scan (skip segments)
Some checks failed
CI / Test (push) Failing after 6m18s
CI / Build (push) Successful in 1m32s
CI / Build-1 (push) Successful in 1m55s
CI / Build-2 (push) Successful in 1m33s
CI / Build-3 (push) Successful in 1m32s
CI / Build-4 (push) Successful in 1m35s
CI / Build-5 (push) Successful in 1m33s
CI / Lint (push) Failing after 2m50s
CI / Coverage (push) Successful in 2m58s
CI / Vet (push) Successful in 2m7s
Some checks failed
CI / Test (push) Failing after 6m18s
CI / Build (push) Successful in 1m32s
CI / Build-1 (push) Successful in 1m55s
CI / Build-2 (push) Successful in 1m33s
CI / Build-3 (push) Successful in 1m32s
CI / Build-4 (push) Successful in 1m35s
CI / Build-5 (push) Successful in 1m33s
CI / Lint (push) Failing after 2m50s
CI / Coverage (push) Successful in 2m58s
CI / Vet (push) Successful in 2m7s
Tras cada scan, localiza la intro (OP) y los créditos (ED) comparando fingerprints chromaprint entre episodios de la misma temporada — reimplementación limpia del enfoque de Intro Skipper: índice invertido de uint32, alineamiento por shifts, Hamming ≤6/32, región contigua más larga (15-120s intro / 15-450s créditos). Películas: inicio de créditos por rachas de blackframe (solo keyframes, -skip_frame nokey) que llegan al final del fichero. - fpcalc se auto-descarga de las releases estáticas de acoustid (linux/macos/windows, ~2MB) con el mismo patrón que ffmpeg/ffprobe. - Resultados cacheados como sidecar .skipseg.json (mtime + versión de algoritmo); solo los ficheros nuevos trabajan. - Submit a /api/internal/agent/skip-segments DESPUÉS del library-sync, en dos fases (episodios primero, películas después) para que la fase rápida no espere a los blackframe lentos sobre NAS. - Agrupación por (dir + título-pre-SxxEyy + season): los títulos parseados arrastran nombre de episodio y tags de release. - Gotcha cazado en vivo: fpcalc -length sale sin drenar el pipe; hay que cerrar nuestro read-end o ffmpeg queda bloqueado para siempre. - config: library.skip_detect (default true, backfill) y scan_interval default 24h → 1h (estilo Plex).
This commit is contained in:
parent
59da949a53
commit
a710bc1626
11 changed files with 1223 additions and 5 deletions
420
internal/library/skipdetect.go
Normal file
420
internal/library/skipdetect.go
Normal file
|
|
@ -0,0 +1,420 @@
|
|||
package library
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"math"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/torrentclaw/unarr/internal/library/mediainfo"
|
||||
)
|
||||
|
||||
// Skip-segment detection: find intro (OP) and credits (ED) ranges by comparing
|
||||
// chromaprint audio fingerprints between episodes of the same season (episodes
|
||||
// share identical intro/credits audio), plus black-frame credits detection for
|
||||
// movies (no sibling to compare). Results are cached as ".unarr" sidecars and
|
||||
// submitted to the web, which shares them across all users by content identity.
|
||||
|
||||
const (
|
||||
skipMinIntroSec = 15
|
||||
skipMaxIntroSec = 120
|
||||
skipMinCreditsSec = 15
|
||||
skipMaxCreditsSec = 450
|
||||
skipCreditsWindow = 450 // episodes: fingerprint the last N seconds
|
||||
skipIntroWindowCap = 600 // episodes: fingerprint at most the first N seconds
|
||||
skipMinRuntimeSec = 300 // ignore shorts/extras
|
||||
|
||||
movieCreditsWindow = 900 // movies: black-frame scan over the last N seconds
|
||||
movieMinCreditsSec = 60
|
||||
movieMinRuntimeSec = 3600
|
||||
)
|
||||
|
||||
// SkipDetectOptions configures DetectSkipSegments.
|
||||
type SkipDetectOptions struct {
|
||||
FFmpegPath string
|
||||
FpcalcPath string // empty disables episode (chromaprint) detection
|
||||
Workers int // concurrent ffmpeg+fpcalc jobs; default 2
|
||||
Movies bool // also detect movie end credits via black frames
|
||||
}
|
||||
|
||||
// SkipDetection is the outcome for one media file (only files with ≥1 segment
|
||||
// are returned).
|
||||
type SkipDetection struct {
|
||||
Item LibraryItem
|
||||
DurationSec float64
|
||||
Segments []mediainfo.SkipSegmentRange
|
||||
}
|
||||
|
||||
// DetectSkipSegments analyzes the scanned library and returns every file with
|
||||
// detected skippable segments. Idempotent and best-effort: fresh sidecar
|
||||
// results are reused without re-analysis, errors skip the file, ctx cancels
|
||||
// cleanly.
|
||||
func DetectSkipSegments(ctx context.Context, cache *LibraryCache, opts SkipDetectOptions) []SkipDetection {
|
||||
if cache == nil || opts.FFmpegPath == "" {
|
||||
return nil
|
||||
}
|
||||
workers := opts.Workers
|
||||
if workers < 1 {
|
||||
workers = 2
|
||||
}
|
||||
|
||||
var out []SkipDetection
|
||||
var outMu sync.Mutex
|
||||
add := func(item LibraryItem, dur float64, segs []mediainfo.SkipSegmentRange) {
|
||||
if len(segs) == 0 {
|
||||
return
|
||||
}
|
||||
outMu.Lock()
|
||||
out = append(out, SkipDetection{Item: item, DurationSec: dur, Segments: segs})
|
||||
outMu.Unlock()
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
analyzed, cached := 0, 0
|
||||
|
||||
if opts.FpcalcPath != "" {
|
||||
a, c := detectEpisodeGroups(ctx, cache, opts, workers, add)
|
||||
analyzed += a
|
||||
cached += c
|
||||
}
|
||||
if opts.Movies {
|
||||
a, c := detectMovieCredits(ctx, cache, opts, workers, add)
|
||||
analyzed += a
|
||||
cached += c
|
||||
}
|
||||
|
||||
log.Printf("[skipdetect] %d file(s) analyzed (%d from cache), %d with segments, in %s",
|
||||
analyzed, cached, len(out), time.Since(start).Round(time.Second))
|
||||
return out
|
||||
}
|
||||
|
||||
// seasonEpisodeMarker locates the SxxEyy token in a parsed title so the group
|
||||
// key uses only the SHOW part. Parsed titles keep the episode name + release
|
||||
// tags ("Show S01E09 Embrace and Whisper BILI WEB DL…"), which differ per
|
||||
// file — grouping on the raw title would leave every episode alone.
|
||||
var seasonEpisodeMarker = regexp.MustCompile(`(?i)\bS\d{1,2}\s*E\d{1,4}\b`)
|
||||
|
||||
// seasonGroupKey groups episodes that can share intro/credits audio: same
|
||||
// directory + same show-title prefix + same season. The directory bound keeps
|
||||
// flat mixed folders from exploding into one giant group; cross-show pairs
|
||||
// inside a dir fail closed anyway (unrelated audio never matches).
|
||||
func seasonGroupKey(item LibraryItem) string {
|
||||
title := strings.ToLower(strings.TrimSpace(item.Title))
|
||||
if loc := seasonEpisodeMarker.FindStringIndex(title); loc != nil {
|
||||
title = strings.TrimSpace(title[:loc[0]])
|
||||
}
|
||||
return filepath.Dir(item.FilePath) + "|" + title + "|s" + strconv.Itoa(item.Season)
|
||||
}
|
||||
|
||||
func itemDuration(item LibraryItem) float64 {
|
||||
if item.MediaInfo != nil && item.MediaInfo.Video != nil {
|
||||
return item.MediaInfo.Video.Duration
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// detectEpisodeGroups runs chromaprint comparison inside (title, season)
|
||||
// groups. Returns (analyzed, fromCache) counters.
|
||||
func detectEpisodeGroups(ctx context.Context, cache *LibraryCache, opts SkipDetectOptions, workers int, add func(LibraryItem, float64, []mediainfo.SkipSegmentRange)) (int, int) {
|
||||
groups := make(map[string][]LibraryItem)
|
||||
for _, item := range cache.Items {
|
||||
if item.Season <= 0 || item.Episode <= 0 || item.FilePath == "" {
|
||||
continue
|
||||
}
|
||||
if itemDuration(item) < skipMinRuntimeSec {
|
||||
continue
|
||||
}
|
||||
groups[seasonGroupKey(item)] = append(groups[seasonGroupKey(item)], item)
|
||||
}
|
||||
|
||||
analyzed, fromCache := 0, 0
|
||||
for _, items := range groups {
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
// Distinct episode numbers — two releases of the same episode carry
|
||||
// identical full audio (a comparison would match the whole window).
|
||||
eps := make(map[int]struct{})
|
||||
for _, it := range items {
|
||||
eps[it.Episode] = struct{}{}
|
||||
}
|
||||
if len(eps) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Cached results short-circuit the whole group when complete.
|
||||
needCompute := false
|
||||
cachedSegs := make(map[string]*mediainfo.SkipSegmentsSidecar, len(items))
|
||||
for _, it := range items {
|
||||
if sc, ok := mediainfo.ReadCachedSkipSegments(it.FilePath); ok {
|
||||
cachedSegs[it.FilePath] = sc
|
||||
} else {
|
||||
needCompute = true
|
||||
}
|
||||
}
|
||||
if !needCompute {
|
||||
for _, it := range items {
|
||||
sc := cachedSegs[it.FilePath]
|
||||
analyzed++
|
||||
fromCache++
|
||||
add(it, sc.DurationSec, sc.Segments)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Fingerprint every episode in the group (intro + credits windows).
|
||||
fps := fingerprintGroup(ctx, items, opts, workers)
|
||||
|
||||
sort.Slice(items, func(i, j int) bool { return items[i].Episode < items[j].Episode })
|
||||
for _, it := range items {
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
analyzed++
|
||||
if sc, ok := cachedSegs[it.FilePath]; ok {
|
||||
fromCache++
|
||||
add(it, sc.DurationSec, sc.Segments)
|
||||
continue
|
||||
}
|
||||
fp := fps[it.FilePath]
|
||||
if fp == nil {
|
||||
continue
|
||||
}
|
||||
segs := detectForEpisode(it, fp, items, fps)
|
||||
if err := mediainfo.WriteCachedSkipSegments(it.FilePath, fp.duration, segs); err != nil {
|
||||
log.Printf("[skipdetect] sidecar write skipped (%q): %v", it.FilePath, err)
|
||||
}
|
||||
add(it, fp.duration, segs)
|
||||
}
|
||||
}
|
||||
return analyzed, fromCache
|
||||
}
|
||||
|
||||
// episodeFingerprints holds the two fingerprinted windows of one file.
|
||||
type episodeFingerprints struct {
|
||||
duration float64
|
||||
intro []uint32
|
||||
credits []uint32
|
||||
creditsStart float64 // absolute offset of the credits window
|
||||
}
|
||||
|
||||
func fingerprintGroup(ctx context.Context, items []LibraryItem, opts SkipDetectOptions, workers int) map[string]*episodeFingerprints {
|
||||
fps := make(map[string]*episodeFingerprints, len(items))
|
||||
var mu sync.Mutex
|
||||
jobs := make(chan LibraryItem)
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < workers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for it := range jobs {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
dur := itemDuration(it)
|
||||
introWin := math.Min(0.25*dur, skipIntroWindowCap)
|
||||
credStart := math.Max(0, dur-skipCreditsWindow)
|
||||
jctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
|
||||
intro, err1 := mediainfo.FingerprintAudioWindow(jctx, opts.FFmpegPath, opts.FpcalcPath, it.FilePath, 0, introWin)
|
||||
credits, err2 := mediainfo.FingerprintAudioWindow(jctx, opts.FFmpegPath, opts.FpcalcPath, it.FilePath, credStart, skipCreditsWindow)
|
||||
cancel()
|
||||
if err1 != nil || err2 != nil {
|
||||
if err1 != nil {
|
||||
log.Printf("[skipdetect] fingerprint failed (%q): %v", it.FilePath, err1)
|
||||
} else {
|
||||
log.Printf("[skipdetect] fingerprint failed (%q): %v", it.FilePath, err2)
|
||||
}
|
||||
continue
|
||||
}
|
||||
mu.Lock()
|
||||
fps[it.FilePath] = &episodeFingerprints{duration: dur, intro: intro, credits: credits, creditsStart: credStart}
|
||||
mu.Unlock()
|
||||
}
|
||||
}()
|
||||
}
|
||||
for _, it := range items {
|
||||
// Skip already-cached files only if every OTHER episode can still find
|
||||
// partners — fingerprinting cached files too keeps them available as
|
||||
// comparison partners for the new ones, so always fingerprint.
|
||||
select {
|
||||
case jobs <- it:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
close(jobs)
|
||||
wg.Wait()
|
||||
return fps
|
||||
}
|
||||
|
||||
// detectForEpisode compares one episode against partners (nearest different
|
||||
// episode numbers first, up to 3) and returns its detected segments.
|
||||
func detectForEpisode(it LibraryItem, fp *episodeFingerprints, items []LibraryItem, fps map[string]*episodeFingerprints) []mediainfo.SkipSegmentRange {
|
||||
type partner struct {
|
||||
fp *episodeFingerprints
|
||||
dist int
|
||||
}
|
||||
var partners []partner
|
||||
for _, other := range items {
|
||||
if other.FilePath == it.FilePath || other.Episode == it.Episode {
|
||||
continue
|
||||
}
|
||||
ofp := fps[other.FilePath]
|
||||
if ofp == nil {
|
||||
continue
|
||||
}
|
||||
d := other.Episode - it.Episode
|
||||
if d < 0 {
|
||||
d = -d
|
||||
}
|
||||
partners = append(partners, partner{fp: ofp, dist: d})
|
||||
}
|
||||
sort.Slice(partners, func(i, j int) bool { return partners[i].dist < partners[j].dist })
|
||||
if len(partners) > 3 {
|
||||
partners = partners[:3]
|
||||
}
|
||||
|
||||
segs := make([]mediainfo.SkipSegmentRange, 0, 2)
|
||||
|
||||
for _, p := range partners {
|
||||
r := mediainfo.FindSharedRegion(fp.intro, p.fp.intro, skipMinIntroSec, skipMaxIntroSec)
|
||||
if r == nil {
|
||||
continue
|
||||
}
|
||||
start, end := r.AStart, r.AEnd
|
||||
if start <= 5 { // OP at the head — snap to the very start
|
||||
start = 0
|
||||
}
|
||||
segs = append(segs, mediainfo.SkipSegmentRange{Category: "intro", StartSec: round1(start), EndSec: round1(end)})
|
||||
break
|
||||
}
|
||||
|
||||
for _, p := range partners {
|
||||
// A near-full-window match means the two files share ALL audio (same
|
||||
// episode content) — not a credits segment.
|
||||
r := mediainfo.FindSharedRegion(fp.credits, p.fp.credits, skipMinCreditsSec, skipMaxCreditsSec)
|
||||
if r == nil || r.Duration >= 0.97*skipCreditsWindow {
|
||||
continue
|
||||
}
|
||||
segs = append(segs, mediainfo.SkipSegmentRange{
|
||||
Category: "credits",
|
||||
StartSec: round1(fp.creditsStart + r.AStart),
|
||||
EndSec: round1(fp.creditsStart + r.AEnd),
|
||||
})
|
||||
break
|
||||
}
|
||||
return segs
|
||||
}
|
||||
|
||||
// detectMovieCredits finds end-credits in movies via sustained black-frame
|
||||
// runs (classic credits roll on black). Single-file, no fingerprinting.
|
||||
func detectMovieCredits(ctx context.Context, cache *LibraryCache, opts SkipDetectOptions, workers int, add func(LibraryItem, float64, []mediainfo.SkipSegmentRange)) (int, int) {
|
||||
var movies []LibraryItem
|
||||
for _, item := range cache.Items {
|
||||
if item.Season > 0 || item.Episode > 0 || item.FilePath == "" {
|
||||
continue
|
||||
}
|
||||
if itemDuration(item) < movieMinRuntimeSec {
|
||||
continue
|
||||
}
|
||||
movies = append(movies, item)
|
||||
}
|
||||
|
||||
analyzed, fromCache := 0, 0
|
||||
var mu sync.Mutex
|
||||
jobs := make(chan LibraryItem)
|
||||
var wg sync.WaitGroup
|
||||
for i := 0; i < workers; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
for it := range jobs {
|
||||
if ctx.Err() != nil {
|
||||
return
|
||||
}
|
||||
dur := itemDuration(it)
|
||||
if sc, ok := mediainfo.ReadCachedSkipSegments(it.FilePath); ok {
|
||||
mu.Lock()
|
||||
analyzed++
|
||||
fromCache++
|
||||
mu.Unlock()
|
||||
add(it, sc.DurationSec, sc.Segments)
|
||||
continue
|
||||
}
|
||||
winStart := math.Max(0, dur-movieCreditsWindow)
|
||||
jctx, cancel := context.WithTimeout(ctx, 10*time.Minute)
|
||||
times, err := mediainfo.DetectBlackFrameRuns(jctx, opts.FFmpegPath, it.FilePath, winStart, movieCreditsWindow, 85)
|
||||
cancel()
|
||||
if err != nil {
|
||||
log.Printf("[skipdetect] blackframe failed (%q): %v", it.FilePath, err)
|
||||
continue
|
||||
}
|
||||
segs := creditsFromBlackRuns(times, dur)
|
||||
if werr := mediainfo.WriteCachedSkipSegments(it.FilePath, dur, segs); werr != nil {
|
||||
log.Printf("[skipdetect] sidecar write skipped (%q): %v", it.FilePath, werr)
|
||||
}
|
||||
mu.Lock()
|
||||
analyzed++
|
||||
mu.Unlock()
|
||||
add(it, dur, segs)
|
||||
}
|
||||
}()
|
||||
}
|
||||
for _, it := range movies {
|
||||
select {
|
||||
case jobs <- it:
|
||||
case <-ctx.Done():
|
||||
}
|
||||
if ctx.Err() != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
close(jobs)
|
||||
wg.Wait()
|
||||
return analyzed, fromCache
|
||||
}
|
||||
|
||||
// creditsFromBlackRuns picks the credits start from black-frame timestamps:
|
||||
// the longest run of black frames (gaps ≤30s between hits) that reaches the
|
||||
// end of the file (within 90s — post-credits scenes break the run and are
|
||||
// kept watchable). Requires ≥60s of credits to avoid fade-to-black scenes.
|
||||
func creditsFromBlackRuns(times []float64, durationSec float64) []mediainfo.SkipSegmentRange {
|
||||
if len(times) == 0 {
|
||||
return nil
|
||||
}
|
||||
const maxGap = 30.0
|
||||
bestStart, bestEnd := -1.0, -1.0
|
||||
runStart := times[0]
|
||||
prev := times[0]
|
||||
flush := func(end float64) {
|
||||
if end-runStart > bestEnd-bestStart {
|
||||
bestStart, bestEnd = runStart, end
|
||||
}
|
||||
}
|
||||
for _, t := range times[1:] {
|
||||
if t-prev > maxGap {
|
||||
flush(prev)
|
||||
runStart = t
|
||||
}
|
||||
prev = t
|
||||
}
|
||||
flush(prev)
|
||||
|
||||
if bestStart < 0 || bestEnd-bestStart < movieMinCreditsSec {
|
||||
return nil
|
||||
}
|
||||
if durationSec-bestEnd > 90 { // run doesn't reach the end → mid-film scene
|
||||
return nil
|
||||
}
|
||||
return []mediainfo.SkipSegmentRange{{Category: "credits", StartSec: round1(bestStart), EndSec: round1(durationSec)}}
|
||||
}
|
||||
|
||||
func round1(v float64) float64 { return math.Round(v*10) / 10 }
|
||||
Loading…
Add table
Add a link
Reference in a new issue