feat(usenet): implement full NNTP download pipeline

Complete usenet download support for unarr CLI: - NZB XML parser with password extraction from <head> meta - yEnc decoder with CRC32 verification - NNTP client with TLS, auth, and connection pool (up to 10 conns) - Segment downloader with parallel workers and progress reporting - Post-processing: par2 verify/repair, unrar/7z extraction with password support - Agent client methods: SearchNzbs, DownloadNzb, GetUsenetCredentials - UsenetDownloader implementing full Downloader interface - Daemon wiring: UsenetDownloader passed to Manager E2E tested: Oppenheimer 1080p (2.94 GB) downloaded via NNTP in 77.6s.
2026-03-28 21:12:12 +01:00 · 2026-03-28 21:12:12 +01:00 · e332c0a6e4
commit e332c0a6e4
parent 5f337eebd7
15 changed files with 3016 additions and 23 deletions
--- a/internal/usenet/nzb/parser.go
+++ b/internal/usenet/nzb/parser.go
@ -0,0 +1,365 @@
+package nzb
+
+import (
+	"encoding/xml"
+	"fmt"
+	"io"
+	"path/filepath"
+	"regexp"
+	"strconv"
+	"strings"
+)
+
+// NZB represents a parsed NZB file containing one or more files to download.
+type NZB struct {
+	Files    []File
+	Password string            // from <meta type="password"> in <head>
+	Meta     map[string]string // all <meta> entries from <head>
+}
+
+// File represents a single file within an NZB, composed of multiple segments.
+type File struct {
+	Poster   string
+	Date     int64
+	Subject  string
+	Groups   []string
+	Segments []Segment
+}
+
+// Segment represents a single NNTP article segment of a file.
+type Segment struct {
+	Bytes     int64
+	Number    int
+	MessageID string // message-id without angle brackets
+}
+
+// xmlNZB is the raw XML structure for parsing.
+type xmlNZB struct {
+	XMLName xml.Name  `xml:"nzb"`
+	Head    xmlHead   `xml:"head"`
+	Files   []xmlFile `xml:"file"`
+}
+
+type xmlHead struct {
+	Meta []xmlMeta `xml:"meta"`
+}
+
+type xmlMeta struct {
+	Type  string `xml:"type,attr"`
+	Value string `xml:",chardata"`
+}
+
+type xmlFile struct {
+	Poster   string        `xml:"poster,attr"`
+	Date     string        `xml:"date,attr"`
+	Subject  string        `xml:"subject,attr"`
+	Groups   xmlGroups     `xml:"groups"`
+	Segments xmlSegments   `xml:"segments"`
+}
+
+type xmlGroups struct {
+	Groups []string `xml:"group"`
+}
+
+type xmlSegments struct {
+	Segments []xmlSegment `xml:"segment"`
+}
+
+type xmlSegment struct {
+	Bytes     string `xml:"bytes,attr"`
+	Number    string `xml:"number,attr"`
+	MessageID string `xml:",chardata"`
+}
+
+// Parse reads and parses an NZB XML document from the given reader.
+func Parse(r io.Reader) (*NZB, error) {
+	var raw xmlNZB
+	dec := xml.NewDecoder(r)
+	if err := dec.Decode(&raw); err != nil {
+		return nil, fmt.Errorf("nzb: xml decode: %w", err)
+	}
+
+	if len(raw.Files) == 0 {
+		return nil, fmt.Errorf("nzb: no files found")
+	}
+
+	nzb := &NZB{
+		Files: make([]File, 0, len(raw.Files)),
+		Meta:  make(map[string]string),
+	}
+
+	// Parse <head> meta entries
+	for _, m := range raw.Head.Meta {
+		if m.Type != "" {
+			nzb.Meta[m.Type] = strings.TrimSpace(m.Value)
+		}
+	}
+	nzb.Password = nzb.Meta["password"]
+
+	for _, rf := range raw.Files {
+		date, _ := strconv.ParseInt(rf.Date, 10, 64)
+
+		segs := make([]Segment, 0, len(rf.Segments.Segments))
+		for _, rs := range rf.Segments.Segments {
+			bytes, _ := strconv.ParseInt(rs.Bytes, 10, 64)
+			num, _ := strconv.Atoi(rs.Number)
+			msgID := strings.TrimSpace(rs.MessageID)
+			// Strip angle brackets if present
+			msgID = strings.TrimPrefix(msgID, "<")
+			msgID = strings.TrimSuffix(msgID, ">")
+
+			if msgID == "" {
+				continue
+			}
+
+			segs = append(segs, Segment{
+				Bytes:     bytes,
+				Number:    num,
+				MessageID: msgID,
+			})
+		}
+
+		if len(segs) == 0 {
+			continue
+		}
+
+		nzb.Files = append(nzb.Files, File{
+			Poster:   rf.Poster,
+			Date:     date,
+			Subject:  rf.Subject,
+			Groups:   rf.Groups.Groups,
+			Segments: segs,
+		})
+	}
+
+	if len(nzb.Files) == 0 {
+		return nil, fmt.Errorf("nzb: no valid files with segments found")
+	}
+
+	return nzb, nil
+}
+
+// ParseBytes parses an NZB from a byte slice.
+func ParseBytes(data []byte) (*NZB, error) {
+	return Parse(strings.NewReader(string(data)))
+}
+
+// TotalBytes returns the total size of all segments across all files.
+func (n *NZB) TotalBytes() int64 {
+	var total int64
+	for _, f := range n.Files {
+		total += f.TotalBytes()
+	}
+	return total
+}
+
+// TotalSegments returns the total number of segments across all files.
+func (n *NZB) TotalSegments() int {
+	var total int
+	for _, f := range n.Files {
+		total += len(f.Segments)
+	}
+	return total
+}
+
+// ContentFiles returns files that are likely content (video, audio, images),
+// excluding par2, nfo, sfv, nzb, and sample files.
+func (n *NZB) ContentFiles() []File {
+	var result []File
+	for _, f := range n.Files {
+		name := f.Filename()
+		if isMetadataFile(name) || isSampleFile(name) {
+			continue
+		}
+		result = append(result, f)
+	}
+	return result
+}
+
+// Par2Files returns only par2 parity files.
+func (n *NZB) Par2Files() []File {
+	var result []File
+	for _, f := range n.Files {
+		ext := strings.ToLower(filepath.Ext(f.Filename()))
+		if ext == ".par2" {
+			result = append(result, f)
+		}
+	}
+	return result
+}
+
+// RarFiles returns rar archive files (.rar, .rNN, .NNN).
+func (n *NZB) RarFiles() []File {
+	var result []File
+	for _, f := range n.Files {
+		if isRarFile(f.Filename()) {
+			result = append(result, f)
+		}
+	}
+	return result
+}
+
+// LargestFile returns the file with the most total bytes.
+// Returns nil if NZB has no files.
+func (n *NZB) LargestFile() *File {
+	if len(n.Files) == 0 {
+		return nil
+	}
+	largest := &n.Files[0]
+	for i := 1; i < len(n.Files); i++ {
+		if n.Files[i].TotalBytes() > largest.TotalBytes() {
+			largest = &n.Files[i]
+		}
+	}
+	return largest
+}
+
+// IsObfuscated returns true if the NZB filenames appear to be obfuscated
+// (random strings instead of meaningful names).
+func (n *NZB) IsObfuscated() bool {
+	for _, f := range n.Files {
+		name := f.Filename()
+		if name == "" {
+			continue
+		}
+		base := strings.TrimSuffix(name, filepath.Ext(name))
+		// Check if base name is mostly hex/random chars (obfuscated)
+		if len(base) > 10 && isHexLike(base) {
+			return true
+		}
+	}
+	return false
+}
+
+// HasRars returns true if the NZB contains rar archive files.
+func (n *NZB) HasRars() bool {
+	for _, f := range n.Files {
+		if isRarFile(f.Filename()) {
+			return true
+		}
+	}
+	return false
+}
+
+// HasPar2 returns true if the NZB contains par2 parity files.
+func (n *NZB) HasPar2() bool {
+	for _, f := range n.Files {
+		ext := strings.ToLower(filepath.Ext(f.Filename()))
+		if ext == ".par2" {
+			return true
+		}
+	}
+	return false
+}
+
+// TotalBytes returns the sum of all segment sizes in this file.
+func (f *File) TotalBytes() int64 {
+	var total int64
+	for _, s := range f.Segments {
+		total += s.Bytes
+	}
+	return total
+}
+
+// subjectFilenameRe matches the filename in a typical Usenet subject line.
+// Examples:
+//   "Movie.2024.1080p.mkv" yEnc (1/50)
+//   [PRiVATE]-[#a]- "file.rar" yEnc (01/99)
+var subjectFilenameRe = regexp.MustCompile(`"([^"]+)"`)
+
+// Filename extracts the filename from the subject line.
+// Falls back to the raw subject if no quoted filename is found.
+func (f *File) Filename() string {
+	m := subjectFilenameRe.FindStringSubmatch(f.Subject)
+	if len(m) >= 2 {
+		return m[1]
+	}
+	// Fallback: try to extract something useful
+	return sanitizeFilename(f.Subject)
+}
+
+// Extension returns the lowercase file extension (e.g., ".mkv", ".rar").
+func (f *File) Extension() string {
+	return strings.ToLower(filepath.Ext(f.Filename()))
+}
+
+// isMetadataFile returns true for non-content files.
+func isMetadataFile(name string) bool {
+	ext := strings.ToLower(filepath.Ext(name))
+	switch ext {
+	case ".par2", ".nfo", ".sfv", ".nzb", ".txt", ".jpg", ".png", ".url":
+		return true
+	}
+	return false
+}
+
+// isSampleFile returns true for sample/preview files.
+// Matches filenames containing "sample" as a word boundary (e.g., "movie.sample.mkv", "Sample/video.mkv").
+func isSampleFile(name string) bool {
+	lower := strings.ToLower(name)
+	// Match "sample" preceded and followed by non-alphanumeric (word boundary)
+	idx := strings.Index(lower, "sample")
+	if idx < 0 {
+		return false
+	}
+	// Check it's not part of a larger word (e.g., "resampled")
+	if idx > 0 && isAlphaNum(lower[idx-1]) {
+		return false
+	}
+	end := idx + 6
+	if end < len(lower) && isAlphaNum(lower[end]) {
+		return false
+	}
+	return true
+}
+
+func isAlphaNum(b byte) bool {
+	return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9')
+}
+
+// isRarFile returns true for rar archive files.
+func isRarFile(name string) bool {
+	lower := strings.ToLower(name)
+	ext := filepath.Ext(lower)
+	if ext == ".rar" {
+		return true
+	}
+	// Match .r00, .r01, ..., .r99 and .s00, .s01
+	if len(ext) == 4 && (ext[1] == 'r' || ext[1] == 's') {
+		_, err := strconv.Atoi(ext[2:])
+		return err == nil
+	}
+	// Match .001, .002, etc (split rar)
+	if len(ext) == 4 {
+		_, err := strconv.Atoi(ext[1:])
+		return err == nil
+	}
+	return false
+}
+
+// isHexLike returns true if the string looks like random hex/obfuscated.
+func isHexLike(s string) bool {
+	hexChars := 0
+	for _, c := range s {
+		if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') {
+			hexChars++
+		}
+	}
+	return float64(hexChars)/float64(len(s)) > 0.8
+}
+
+var yencPartRe = regexp.MustCompile(`\s*\(\d+/\d+\)\s*`)
+
+// sanitizeFilename removes characters that are invalid in filenames.
+func sanitizeFilename(s string) string {
+	// Remove yEnc part indicators like (01/50)
+	s = yencPartRe.ReplaceAllString(s, "")
+	// Remove yEnc keyword
+	s = strings.ReplaceAll(s, "yEnc", "")
+	s = strings.TrimSpace(s)
+	// Remove invalid path chars
+	for _, c := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|"} {
+		s = strings.ReplaceAll(s, c, "_")
+	}
+	return s
+}
--- a/internal/usenet/nzb/parser_test.go
+++ b/internal/usenet/nzb/parser_test.go
@ -0,0 +1,269 @@
+package nzb
+
+import (
+	"strings"
+	"testing"
+)
+
+const testNZB = `<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE nzb PUBLIC "-//newzBin//DTD NZB 1.1//EN" "http://www.newzbin.com/DTD/nzb/nzb-1.1.dtd">
+<nzb xmlns="http://www.newzbin.com/DTD/2003/nzb">
+  <file poster="user@example.com" date="1700000000" subject="Movie.2024.1080p.BluRay.x264-GROUP [01/50] - &quot;Movie.2024.1080p.BluRay.x264-GROUP.mkv&quot; yEnc (1/3200)">
+    <groups>
+      <group>alt.binaries.movies</group>
+      <group>alt.binaries.multimedia</group>
+    </groups>
+    <segments>
+      <segment bytes="768000" number="1">abc123@news.example.com</segment>
+      <segment bytes="768000" number="2">def456@news.example.com</segment>
+      <segment bytes="512000" number="3">ghi789@news.example.com</segment>
+    </segments>
+  </file>
+  <file poster="user@example.com" date="1700000000" subject="Movie.2024.1080p.BluRay.x264-GROUP [02/50] - &quot;Movie.2024.1080p.BluRay.x264-GROUP.nfo&quot; yEnc (1/1)">
+    <groups>
+      <group>alt.binaries.movies</group>
+    </groups>
+    <segments>
+      <segment bytes="4096" number="1">nfo001@news.example.com</segment>
+    </segments>
+  </file>
+  <file poster="user@example.com" date="1700000000" subject="Movie.2024.1080p.BluRay.x264-GROUP [03/50] - &quot;Movie.2024.1080p.BluRay.x264-GROUP.par2&quot; yEnc (1/1)">
+    <groups>
+      <group>alt.binaries.movies</group>
+    </groups>
+    <segments>
+      <segment bytes="32768" number="1">par001@news.example.com</segment>
+    </segments>
+  </file>
+</nzb>`
+
+const testNZBWithRars = `<?xml version="1.0" encoding="UTF-8"?>
+<nzb xmlns="http://www.newzbin.com/DTD/2003/nzb">
+  <file poster="bot@example.com" date="1700000000" subject="[PRiVATE]-[#a]- &quot;Movie.2024.rar&quot; yEnc (01/99)">
+    <groups><group>alt.binaries.movies</group></groups>
+    <segments>
+      <segment bytes="768000" number="1">rar001@example</segment>
+      <segment bytes="768000" number="2">rar002@example</segment>
+    </segments>
+  </file>
+  <file poster="bot@example.com" date="1700000000" subject="[PRiVATE]-[#a]- &quot;Movie.2024.r00&quot; yEnc (01/99)">
+    <groups><group>alt.binaries.movies</group></groups>
+    <segments>
+      <segment bytes="768000" number="1">r00001@example</segment>
+    </segments>
+  </file>
+  <file poster="bot@example.com" date="1700000000" subject="[PRiVATE]-[#a]- &quot;Movie.2024.r01&quot; yEnc (01/99)">
+    <groups><group>alt.binaries.movies</group></groups>
+    <segments>
+      <segment bytes="768000" number="1">r01001@example</segment>
+    </segments>
+  </file>
+  <file poster="bot@example.com" date="1700000000" subject="[PRiVATE]-[#a]- &quot;Movie.2024.par2&quot; yEnc (1/1)">
+    <groups><group>alt.binaries.movies</group></groups>
+    <segments>
+      <segment bytes="32768" number="1">par001@example</segment>
+    </segments>
+  </file>
+</nzb>`
+
+func TestParse(t *testing.T) {
+	nzb, err := Parse(strings.NewReader(testNZB))
+	if err != nil {
+		t.Fatalf("Parse failed: %v", err)
+	}
+
+	if len(nzb.Files) != 3 {
+		t.Fatalf("expected 3 files, got %d", len(nzb.Files))
+	}
+
+	// First file — the MKV
+	f := nzb.Files[0]
+	if f.Poster != "user@example.com" {
+		t.Errorf("poster: got %q", f.Poster)
+	}
+	if f.Date != 1700000000 {
+		t.Errorf("date: got %d", f.Date)
+	}
+	if len(f.Groups) != 2 {
+		t.Errorf("groups: got %d", len(f.Groups))
+	}
+	if f.Groups[0] != "alt.binaries.movies" {
+		t.Errorf("group[0]: got %q", f.Groups[0])
+	}
+	if len(f.Segments) != 3 {
+		t.Errorf("segments: got %d", len(f.Segments))
+	}
+
+	seg := f.Segments[0]
+	if seg.Bytes != 768000 {
+		t.Errorf("seg bytes: got %d", seg.Bytes)
+	}
+	if seg.Number != 1 {
+		t.Errorf("seg number: got %d", seg.Number)
+	}
+	if seg.MessageID != "abc123@news.example.com" {
+		t.Errorf("seg msgid: got %q", seg.MessageID)
+	}
+}
+
+func TestParseBytes(t *testing.T) {
+	nzb, err := ParseBytes([]byte(testNZB))
+	if err != nil {
+		t.Fatalf("ParseBytes failed: %v", err)
+	}
+	if len(nzb.Files) != 3 {
+		t.Fatalf("expected 3 files, got %d", len(nzb.Files))
+	}
+}
+
+func TestTotalBytes(t *testing.T) {
+	nzb, _ := ParseBytes([]byte(testNZB))
+	// 768000 + 768000 + 512000 + 4096 + 32768
+	expected := int64(768000 + 768000 + 512000 + 4096 + 32768)
+	if got := nzb.TotalBytes(); got != expected {
+		t.Errorf("TotalBytes: got %d, want %d", got, expected)
+	}
+}
+
+func TestTotalSegments(t *testing.T) {
+	nzb, _ := ParseBytes([]byte(testNZB))
+	if got := nzb.TotalSegments(); got != 5 {
+		t.Errorf("TotalSegments: got %d, want 5", got)
+	}
+}
+
+func TestContentFiles(t *testing.T) {
+	nzb, _ := ParseBytes([]byte(testNZB))
+	content := nzb.ContentFiles()
+	if len(content) != 1 {
+		t.Fatalf("ContentFiles: got %d, want 1", len(content))
+	}
+	if content[0].Filename() != "Movie.2024.1080p.BluRay.x264-GROUP.mkv" {
+		t.Errorf("content filename: got %q", content[0].Filename())
+	}
+}
+
+func TestPar2Files(t *testing.T) {
+	nzb, _ := ParseBytes([]byte(testNZB))
+	par2 := nzb.Par2Files()
+	if len(par2) != 1 {
+		t.Fatalf("Par2Files: got %d, want 1", len(par2))
+	}
+}
+
+func TestLargestFile(t *testing.T) {
+	nzb, _ := ParseBytes([]byte(testNZB))
+	largest := nzb.LargestFile()
+	if largest == nil {
+		t.Fatal("LargestFile returned nil")
+	}
+	if largest.Filename() != "Movie.2024.1080p.BluRay.x264-GROUP.mkv" {
+		t.Errorf("largest file: got %q", largest.Filename())
+	}
+}
+
+func TestFilename(t *testing.T) {
+	tests := []struct {
+		subject  string
+		expected string
+	}{
+		{
+			`Movie.2024.1080p [01/50] - "Movie.2024.1080p.mkv" yEnc (1/3200)`,
+			"Movie.2024.1080p.mkv",
+		},
+		{
+			`[PRiVATE]-[#a]- "file.rar" yEnc (01/99)`,
+			"file.rar",
+		},
+		{
+			`Some subject without quotes (1/1)`,
+			"Some subject without quotes",
+		},
+	}
+
+	for _, tt := range tests {
+		f := File{Subject: tt.subject}
+		if got := f.Filename(); got != tt.expected {
+			t.Errorf("Filename(%q) = %q, want %q", tt.subject, got, tt.expected)
+		}
+	}
+}
+
+func TestExtension(t *testing.T) {
+	f := File{Subject: `"Movie.2024.1080p.BluRay.x264-GROUP.mkv" yEnc (1/3200)`}
+	if got := f.Extension(); got != ".mkv" {
+		t.Errorf("Extension: got %q, want .mkv", got)
+	}
+}
+
+func TestHasRars(t *testing.T) {
+	nzb, _ := ParseBytes([]byte(testNZBWithRars))
+	if !nzb.HasRars() {
+		t.Error("HasRars: expected true")
+	}
+	if !nzb.HasPar2() {
+		t.Error("HasPar2: expected true")
+	}
+}
+
+func TestRarFiles(t *testing.T) {
+	nzb, _ := ParseBytes([]byte(testNZBWithRars))
+	rars := nzb.RarFiles()
+	if len(rars) != 3 {
+		t.Fatalf("RarFiles: got %d, want 3", len(rars))
+	}
+}
+
+func TestIsRarFile(t *testing.T) {
+	tests := []struct {
+		name string
+		want bool
+	}{
+		{"file.rar", true},
+		{"file.r00", true},
+		{"file.r99", true},
+		{"file.s00", true},
+		{"file.001", true},
+		{"file.mkv", false},
+		{"file.par2", false},
+		{"file.nfo", false},
+	}
+	for _, tt := range tests {
+		if got := isRarFile(tt.name); got != tt.want {
+			t.Errorf("isRarFile(%q) = %v, want %v", tt.name, got, tt.want)
+		}
+	}
+}
+
+func TestParseEmpty(t *testing.T) {
+	_, err := Parse(strings.NewReader(`<?xml version="1.0"?><nzb xmlns="http://www.newzbin.com/DTD/2003/nzb"></nzb>`))
+	if err == nil {
+		t.Error("expected error for empty NZB")
+	}
+}
+
+func TestParseInvalidXML(t *testing.T) {
+	_, err := Parse(strings.NewReader("not xml"))
+	if err == nil {
+		t.Error("expected error for invalid XML")
+	}
+}
+
+func TestStripAngleBrackets(t *testing.T) {
+	nzbXML := `<?xml version="1.0"?>
+<nzb xmlns="http://www.newzbin.com/DTD/2003/nzb">
+  <file poster="test" date="0" subject="&quot;test.bin&quot; (1/1)">
+    <groups><group>alt.test</group></groups>
+    <segments>
+      <segment bytes="100" number="1">&lt;angle@brackets.com&gt;</segment>
+    </segments>
+  </file>
+</nzb>`
+	nzb, err := ParseBytes([]byte(nzbXML))
+	if err != nil {
+		t.Fatalf("Parse failed: %v", err)
+	}
+	if nzb.Files[0].Segments[0].MessageID != "angle@brackets.com" {
+		t.Errorf("MessageID not stripped: got %q", nzb.Files[0].Segments[0].MessageID)
+	}
+}