diff --git a/internal/engine/hls.go b/internal/engine/hls.go index 640123d..d9648c6 100644 --- a/internal/engine/hls.go +++ b/internal/engine/hls.go @@ -250,14 +250,10 @@ type HLSSession struct { // fromCache=true means the session is replaying a completed encode and no // ffmpeg subprocess was spawned. writerLockHeld=true means this session // owns the per-key TryAcquireWriter claim — Close must ReleaseWriter. - // subsDone closes when the subtitle extractor goroutine returns (or is - // nil when the source had no subtitle tracks); MarkComplete waits on it - // so a HIT replay never serves partial .vtt files. cache *HLSCache cacheKey string fromCache bool writerLockHeld bool - subsDone chan struct{} } // hlsSeekAhead is how many segments past the writer's current position the @@ -454,10 +450,6 @@ func StartHLSSession(ctx context.Context, cfg HLSSessionConfig) (*HLSSession, er cleanupOnError() return nil, fmt.Errorf("hls: mkdir video: %w", err) } - if err := os.MkdirAll(filepath.Join(tmpDir, "subs"), 0o755); err != nil { - cleanupOnError() - return nil, fmt.Errorf("hls: mkdir subs: %w", err) - } segCount := segmentCountForDuration(probe.DurationSec) @@ -512,18 +504,12 @@ func StartHLSSession(ctx context.Context, cfg HLSSessionConfig) (*HLSSession, er go s.waitFFmpeg() go s.pollSegments(ffCtx) - if len(probe.SubtitleTracks) > 0 { - s.subsDone = make(chan struct{}) - // Capture the source ref now (by value): subs are extracted once at - // startup, and a later URL refresh (2c) mutates s.cfg.SourceURL from the - // waitFFmpeg goroutine — passing the URL in keeps extractSubtitles from - // racing that write. - subSrc := cfg.sourceRef() - go func() { - defer close(s.subsDone) - s.extractSubtitles(ffCtx, subSrc) - }() - } + // Subtitles are no longer extracted per-session: the web player fetches each + // text track on demand as WebVTT from the /sub endpoint (subtitleHandler). + // The old per-session extraction wrote subs/sub-N.vtt that nothing requests + // anymore (the master playlist no longer advertises a SUBTITLES group), so + // it was pure wasted ffmpeg work — and its Close() wait could block HLS cache + // persistence on a slow extract. Removed. cachedNote := "" if cfg.Cache != nil { @@ -677,20 +663,7 @@ func (s *HLSSession) Close() error { log.Printf("[hls %s] closed (cache reuse)", shortHLSID(s.cfg.SessionID)) return nil } - // Wait briefly for the subtitle extractor to finish so a cached - // replay never serves half-written .vtt files. Bounded so a stuck - // extractor can't block Close indefinitely; on timeout we treat - // the cache as incomplete and drop it. - subsOK := true - if s.subsDone != nil { - select { - case <-s.subsDone: - case <-time.After(15 * time.Second): - log.Printf("[hls %s] subtitle extractor timeout — not caching", shortHLSID(s.cfg.SessionID)) - subsOK = false - } - } - if subsOK && exitErr == nil && s.allSegmentsPresent() { + if exitErr == nil && s.allSegmentsPresent() { if err := s.cache.MarkComplete(s.cacheKey); err == nil { log.Printf("[hls %s] cache persisted %s", shortHLSID(s.cfg.SessionID), s.cacheKey) return nil @@ -1101,31 +1074,6 @@ func (s *HLSSession) restartFromSegment(targetIdx int) error { return nil } -// ServeSubtitle writes the WebVTT subtitle for the requested track index, if -// extraction has finished. -func (s *HLSSession) ServeSubtitle(w http.ResponseWriter, r *http.Request, idx int) { - s.Touch() - if idx < 0 || idx >= len(s.probe.SubtitleTracks) { - http.Error(w, "subtitle track not found", http.StatusNotFound) - return - } - path := filepath.Join(s.tmpDir, "subs", fmt.Sprintf("sub-%d.vtt", idx)) - deadline := time.Now().Add(15 * time.Second) - for { - if fi, err := os.Stat(path); err == nil && fi.Size() > 0 { - break - } - if s.isClosed() || time.Now().After(deadline) { - http.Error(w, "subtitle not yet extracted", http.StatusServiceUnavailable) - return - } - time.Sleep(200 * time.Millisecond) - } - w.Header().Set("Content-Type", "text/vtt; charset=utf-8") - w.Header().Set("Cache-Control", "max-age=3600") - http.ServeFile(w, r, path) -} - // ---- ffmpeg argument builders ---- // buildHLSFFmpegArgs returns the argv for the initial HLS encode (start at 0). @@ -1467,37 +1415,6 @@ func buildHLSFFmpegArgsAt(cfg HLSSessionConfig, probe *StreamProbe, tmpDir strin return args } -// extractSubtitles spawns short-lived ffmpeg jobs to convert each text-based -// subtitle track to WebVTT in parallel. Bitmap subs (PGS, DVB) are skipped — -// they would require burn-in into the video encode, which is out of scope. -func (s *HLSSession) extractSubtitles(ctx context.Context, src string) { - subsDir := filepath.Join(s.tmpDir, "subs") - for i, sub := range s.probe.SubtitleTracks { - if !sub.IsTextSubtitle() { - continue - } - out := filepath.Join(subsDir, fmt.Sprintf("sub-%d.vtt", i)) - args := []string{ - "-y", "-hide_banner", "-loglevel", "warning", - "-i", src, - "-map", fmt.Sprintf("0:s:%d?", i), - "-c:s", "webvtt", - out, - } - // Run sequentially to avoid hammering the disk; subtitle extraction - // is fast enough that parallelism isn't worth the complexity. - cmd := exec.CommandContext(ctx, s.cfg.Transcode.FFmpegPath, args...) - if err := cmd.Run(); err != nil { - if ctx.Err() != nil { - return - } - log.Printf("[hls %s] subtitle %d (%s) extract failed: %v", - shortHLSID(s.cfg.SessionID), i, sub.Lang, err) - continue - } - } -} - // ---- Manifest rendering ---- // renderVideoPlaylist builds the VOD media playlist for the video stream. @@ -1538,63 +1455,24 @@ func renderMasterPlaylist(probe *StreamProbe, qualityLabel string) string { b.WriteString("#EXTM3U\n") b.WriteString("#EXT-X-VERSION:7\n") - // Subtitle renditions. We never set DEFAULT=YES or AUTOSELECT=YES on any - // rendition: anime files routinely ship a forced "signs only" English - // track with cues only every few minutes, and stacking that track plus - // the user's locale auto-select produced the "subs broken" report. The - // HLS spec also caps DEFAULT to one per GROUP-ID — "none" trivially - // satisfies it. Names disambiguate when several tracks share the same - // language ("ES", "ES 2", forced suffix). - hasSubs := false - langCounts := make(map[string]int) - for i, s := range probe.SubtitleTracks { - if !s.IsTextSubtitle() { - continue - } - hasSubs = true - lang := s.Lang - if lang == "" { - lang = "und" - } - base := s.Title - if base == "" { - base = strings.ToUpper(lang) - } - key := strings.ToLower(base) - langCounts[key]++ - name := base - if langCounts[key] > 1 { - name = fmt.Sprintf("%s %d", base, langCounts[key]) - } - if s.Forced { - name = name + " (forced)" - } - b.WriteString(fmt.Sprintf( - `#EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME=%q,LANGUAGE=%q,DEFAULT=NO,AUTOSELECT=NO,FORCED=%s,URI="subs/sub-%d.m3u8"`+"\n", - name, lang, ynBool(s.Forced), i, - )) - } + // Subtitles are no longer embedded as HLS renditions. The web player attaches + // every TEXT subtitle as an external served on demand by the /sub + // endpoint (subtitleHandler) — ONE source for direct-play AND HLS that works + // under native playback and hls.js alike. Embedding them here too would + // double the captions menu under hls.js, and the native-HLS path (Chrome's + // "maybe" canPlayType) never surfaced in-manifest SUBTITLES renditions + // anyway, which is what made subtitles inconsistent. Bitmap subs (PGS/DVB) + // remain burn-in (no WebVTT form). // Video variant. Bandwidth + resolution are best-effort estimates from probe. bw := bitrateForQuality(qualityLabel) w, h := scaledDimensions(probe.Width, probe.Height, qualityHeight(qualityLabel)) codecs := `avc1.4D4028,mp4a.40.2` - streamInf := fmt.Sprintf("#EXT-X-STREAM-INF:BANDWIDTH=%d,RESOLUTION=%dx%d,CODECS=%q", bw, w, h, codecs) - if hasSubs { - streamInf += `,SUBTITLES="subs"` - } - b.WriteString(streamInf + "\n") + b.WriteString(fmt.Sprintf("#EXT-X-STREAM-INF:BANDWIDTH=%d,RESOLUTION=%dx%d,CODECS=%q\n", bw, w, h, codecs)) b.WriteString("video/index.m3u8\n") return b.String() } -func ynBool(b bool) string { - if b { - return "YES" - } - return "NO" -} - // bitrateForQuality returns a synthetic bandwidth attribute for the master // playlist's STREAM-INF — only used by ABR logic, which we don't run yet. func bitrateForQuality(q string) int { diff --git a/internal/engine/hls_test.go b/internal/engine/hls_test.go index 7d4cde3..5b19374 100644 --- a/internal/engine/hls_test.go +++ b/internal/engine/hls_test.go @@ -7,15 +7,6 @@ import ( "time" ) -func TestYnBool(t *testing.T) { - if got := ynBool(true); got != "YES" { - t.Errorf("ynBool(true) = %q, want YES", got) - } - if got := ynBool(false); got != "NO" { - t.Errorf("ynBool(false) = %q, want NO", got) - } -} - func TestBitrateForQuality(t *testing.T) { cases := map[string]int{ "2160p": 25_000_000, @@ -144,17 +135,15 @@ func TestRenderMasterPlaylist(t *testing.T) { if !strings.Contains(out, "RESOLUTION=1920x1080") { t.Errorf("expected 1920x1080 resolution, got:\n%s", out) } - if !strings.Contains(out, `SUBTITLES="subs"`) { - t.Errorf("expected subtitles group attached, got:\n%s", out) + // Subtitles are NO LONGER embedded as HLS renditions — the web player + // attaches them as external s (served by /sub). The master playlist + // must therefore carry no SUBTITLES group, no EXT-X-MEDIA, and no SUBTITLES + // attribute on the video variant, even when the source has text subs. + if strings.Contains(out, "SUBTITLES") { + t.Errorf("subtitles must NOT be embedded in the manifest (served as external ), got:\n%s", out) } - if !strings.Contains(out, `LANGUAGE="es"`) || !strings.Contains(out, `LANGUAGE="en"`) { - t.Errorf("expected text subs included, got:\n%s", out) - } - if strings.Contains(out, "hdmv_pgs") || strings.Contains(out, `LANGUAGE="ja"`) { - t.Errorf("bitmap subs should be excluded, got:\n%s", out) - } - if !strings.Contains(out, "(forced)") { - t.Errorf("expected forced suffix on English track, got:\n%s", out) + if strings.Contains(out, "EXT-X-MEDIA") { + t.Errorf("no EXT-X-MEDIA rendition expected, got:\n%s", out) } } diff --git a/internal/engine/stream_server.go b/internal/engine/stream_server.go index 3271ec0..f785d38 100644 --- a/internal/engine/stream_server.go +++ b/internal/engine/stream_server.go @@ -261,6 +261,7 @@ func (ss *StreamServer) Listen(ctx context.Context) error { mux.HandleFunc("/playlist.m3u", ss.playlistHandler) mux.HandleFunc("/hls/", ss.hlsHandler) mux.HandleFunc("/thumbnail", ss.thumbnailHandler) + mux.HandleFunc("/sub", ss.subtitleHandler) // SO_REUSEADDR allows immediate rebind if the port is in TIME_WAIT (e.g. after agent restart) lc := net.ListenConfig{ @@ -607,8 +608,6 @@ func (ss *StreamServer) HLSURLsJSON(sessionID string) string { // video/index.m3u8 — video media playlist // video/init.mp4 — fMP4 init segment // video/seg-.m4s — video segment -// subs/sub-.m3u8 — per-subtitle media playlist (synthesised) -// subs/sub-.vtt — WebVTT subtitle (extracted by ffmpeg) func (ss *StreamServer) hlsHandler(w http.ResponseWriter, r *http.Request) { ss.lastActivity.Store(time.Now().UnixNano()) @@ -679,54 +678,14 @@ func (ss *StreamServer) hlsHandler(w http.ResponseWriter, r *http.Request) { return } session.ServeSegment(w, r, idx) - case strings.HasPrefix(resource, "subs/sub-") && strings.HasSuffix(resource, ".m3u8"): - idxStr := strings.TrimSuffix(strings.TrimPrefix(resource, "subs/sub-"), ".m3u8") - idx, err := strconv.Atoi(idxStr) - if err != nil { - http.Error(w, "bad subtitle index", http.StatusBadRequest) - return - } - ss.serveSubtitlePlaylist(w, r, session, idx) - case strings.HasPrefix(resource, "subs/sub-") && strings.HasSuffix(resource, ".vtt"): - idxStr := strings.TrimSuffix(strings.TrimPrefix(resource, "subs/sub-"), ".vtt") - idx, err := strconv.Atoi(idxStr) - if err != nil { - http.Error(w, "bad subtitle index", http.StatusBadRequest) - return - } - session.ServeSubtitle(w, r, idx) default: + // Subtitles are no longer served here — the web player fetches each text + // track on demand from /sub (subtitleHandler). The master playlist no + // longer advertises a SUBTITLES group, so no player requests subs/sub-*. http.Error(w, "unknown hls resource", http.StatusNotFound) } } -// serveSubtitlePlaylist generates a single-VTT-segment HLS playlist on the -// fly so hls.js can consume it as a regular subtitle rendition. The VTT file -// itself is extracted asynchronously by HLSSession.extractSubtitles. -func (ss *StreamServer) serveSubtitlePlaylist(w http.ResponseWriter, r *http.Request, session *HLSSession, idx int) { - if idx < 0 || idx >= len(session.probe.SubtitleTracks) { - http.Error(w, "subtitle out of range", http.StatusNotFound) - return - } - dur := session.durationSec - if dur < 1 { - dur = 1 - } - body := strings.Builder{} - body.WriteString("#EXTM3U\n") - body.WriteString("#EXT-X-VERSION:3\n") - body.WriteString("#EXT-X-PLAYLIST-TYPE:VOD\n") - body.WriteString(fmt.Sprintf("#EXT-X-TARGETDURATION:%d\n", int(dur)+1)) - body.WriteString("#EXT-X-MEDIA-SEQUENCE:0\n") - body.WriteString(fmt.Sprintf("#EXTINF:%.3f,\n", dur)) - body.WriteString(fmt.Sprintf("sub-%d.vtt\n", idx)) - body.WriteString("#EXT-X-ENDLIST\n") - - w.Header().Set("Content-Type", "application/vnd.apple.mpegurl") - w.Header().Set("Cache-Control", "no-cache") - _, _ = io.WriteString(w, body.String()) -} - // healthHandler responde con el estado del servidor en JSON. // Útil para diagnosticar conectividad desde redes remotas o Tailscale: // @@ -1000,6 +959,93 @@ func (ss *StreamServer) thumbnailHandler(w http.ResponseWriter, r *http.Request) } } +// subtitleHandler extracts ONE embedded TEXT subtitle stream from a file and +// serves it as WebVTT, on demand. It's the single subtitle source the web +// player uses for BOTH direct-play and HLS (attached as an external ), +// so subtitles are identical regardless of play method or whether playback runs +// natively or via hls.js — no longer dependent on the browser's HLS engine +// surfacing in-manifest renditions. +// +// Mirrors thumbnailHandler: path in ?p= (client-visible), index in ?i=, and the +// token scope binds path+index so a tampered p/i fails verification. 404 on a +// bad token (no oracle). The path is clamped to a regular file as defense in +// depth. Bitmap subs (PGS/DVB) have no text form — those are burned in via the +// HLS path and are not served here; the web only requests text tracks. +func (ss *StreamServer) subtitleHandler(w http.ResponseWriter, r *http.Request) { + ss.lastActivity.Store(time.Now().UnixNano()) + if ss.writeCORSHeaders(w, r, "") { + return + } + + q := r.URL.Query() + rawPath := q.Get("p") + if rawPath == "" { + http.Error(w, "missing path", http.StatusBadRequest) + return + } + index, err := strconv.Atoi(q.Get("i")) + if err != nil || index < 0 { + http.Error(w, "bad index", http.StatusBadRequest) + return + } + if !ss.checkStreamToken(streamScopeSub(rawPath, index), q.Get("t")) { + clientIP, _, _ := net.SplitHostPort(r.RemoteAddr) + log.Printf("[sub] rejected from %s — bad/absent token", clientIP) + http.Error(w, "not found", http.StatusNotFound) + return + } + if fi, statErr := os.Stat(rawPath); statErr != nil || !fi.Mode().IsRegular() { + http.Error(w, "not found", http.StatusNotFound) + return + } + if ss.ffmpegPath == "" { + http.Error(w, "subtitles unavailable", http.StatusServiceUnavailable) + return + } + + // A full subtitle track is small (KBs–low MBs); 60s is ample even for a + // long movie's text track and bounds a hung/corrupt ffmpeg. + ctx, cancel := context.WithTimeout(r.Context(), 60*time.Second) + defer cancel() + + // -map 0:s: selects the Nth subtitle stream (same ordering as the + // library scan / probe.json / burn-in si=N). `-c:s webvtt -f webvtt` converts + // srt/ass/mov_text/etc. to WebVTT on stdout. `?` makes the map non-fatal if + // the stream is absent (yields empty output rather than a hard error). + args := []string{ + "-nostdin", + "-loglevel", "error", + "-i", rawPath, + "-map", fmt.Sprintf("0:s:%d?", index), + "-c:s", "webvtt", + "-f", "webvtt", + "-", + } + cmd := exec.CommandContext(ctx, ss.ffmpegPath, args...) + var stderr strings.Builder + cmd.Stderr = &stderr + out, err := cmd.Output() + if err != nil || len(out) == 0 { + log.Printf("[sub] extract failed (i=%d path=%q): err=%v %s", + index, rawPath, err, strings.TrimSpace(stderr.String())) + http.Error(w, "subtitle extract failed", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "text/vtt; charset=utf-8") + // path+index is stable content for the daemon's lifetime; let the browser + // cache so re-selecting a track doesn't re-run ffmpeg. private — the user's + // own file. + w.Header().Set("Cache-Control", "private, max-age=3600") + w.Header().Set("Content-Length", strconv.Itoa(len(out))) + //nolint:gosec // G705: WebVTT served as text/vtt to a element — not + // HTML, so cue text can't execute; the path is token-scoped + stat'd as a + // regular file, and ffmpeg only emits well-formed WebVTT. + if _, err := w.Write(out); err != nil { + log.Printf("[sub] write failed: %v", err) + } +} + // buildThumbnailArgs builds the ffmpeg argv that decodes ONE frame at posSec and // writes a scaled JPEG to stdout. `-ss` BEFORE `-i` does an input (keyframe) // seek — near-constant time regardless of position — instead of decoding from diff --git a/internal/engine/stream_token.go b/internal/engine/stream_token.go index 78c2883..7f7caf0 100644 --- a/internal/engine/stream_token.go +++ b/internal/engine/stream_token.go @@ -60,6 +60,17 @@ func streamScopeThumb(filePath string) string { return "thumb:" + hex.EncodeToString(sum[:]) } +// streamScopeSub is the token scope for on-demand WebVTT extraction of one text +// subtitle stream from a specific file (the /sub endpoint, used identically by +// direct-play and HLS so subtitles are consistent across both). Binds the file +// path's SHA-256 + the subtitle stream index, so a leaked URL exposes only that +// one track of that one file. The web mints the matching scope in +// src/lib/stream-token.ts (streamScopeSub), byte-for-byte. +func streamScopeSub(filePath string, index int) string { + sum := sha256.Sum256([]byte(filePath)) + return "sub:" + hex.EncodeToString(sum[:]) + ":" + strconv.Itoa(index) +} + // newStreamSecret returns 32 cryptographically-random bytes used to sign stream // tokens for the lifetime of the daemon. Regenerated each start, so tokens from // a previous run stop validating (the web re-resolves the URL on demand).