feat(usenet): implement full NNTP download pipeline

Complete usenet download support for unarr CLI:
- NZB XML parser with password extraction from <head> meta
- yEnc decoder with CRC32 verification
- NNTP client with TLS, auth, and connection pool (up to 10 conns)
- Segment downloader with parallel workers and progress reporting
- Post-processing: par2 verify/repair, unrar/7z extraction with password support
- Agent client methods: SearchNzbs, DownloadNzb, GetUsenetCredentials
- UsenetDownloader implementing full Downloader interface
- Daemon wiring: UsenetDownloader passed to Manager

E2E tested: Oppenheimer 1080p (2.94 GB) downloaded via NNTP in 77.6s.
This commit is contained in:
Deivid Soto 2026-03-28 21:12:12 +01:00
parent 5f337eebd7
commit e332c0a6e4
15 changed files with 3016 additions and 23 deletions

View file

@ -0,0 +1,365 @@
package nzb
import (
"encoding/xml"
"fmt"
"io"
"path/filepath"
"regexp"
"strconv"
"strings"
)
// NZB represents a parsed NZB file containing one or more files to download.
type NZB struct {
Files []File
Password string // from <meta type="password"> in <head>
Meta map[string]string // all <meta> entries from <head>
}
// File represents a single file within an NZB, composed of multiple segments.
type File struct {
Poster string
Date int64
Subject string
Groups []string
Segments []Segment
}
// Segment represents a single NNTP article segment of a file.
type Segment struct {
Bytes int64
Number int
MessageID string // message-id without angle brackets
}
// xmlNZB is the raw XML structure for parsing.
type xmlNZB struct {
XMLName xml.Name `xml:"nzb"`
Head xmlHead `xml:"head"`
Files []xmlFile `xml:"file"`
}
type xmlHead struct {
Meta []xmlMeta `xml:"meta"`
}
type xmlMeta struct {
Type string `xml:"type,attr"`
Value string `xml:",chardata"`
}
type xmlFile struct {
Poster string `xml:"poster,attr"`
Date string `xml:"date,attr"`
Subject string `xml:"subject,attr"`
Groups xmlGroups `xml:"groups"`
Segments xmlSegments `xml:"segments"`
}
type xmlGroups struct {
Groups []string `xml:"group"`
}
type xmlSegments struct {
Segments []xmlSegment `xml:"segment"`
}
type xmlSegment struct {
Bytes string `xml:"bytes,attr"`
Number string `xml:"number,attr"`
MessageID string `xml:",chardata"`
}
// Parse reads and parses an NZB XML document from the given reader.
func Parse(r io.Reader) (*NZB, error) {
var raw xmlNZB
dec := xml.NewDecoder(r)
if err := dec.Decode(&raw); err != nil {
return nil, fmt.Errorf("nzb: xml decode: %w", err)
}
if len(raw.Files) == 0 {
return nil, fmt.Errorf("nzb: no files found")
}
nzb := &NZB{
Files: make([]File, 0, len(raw.Files)),
Meta: make(map[string]string),
}
// Parse <head> meta entries
for _, m := range raw.Head.Meta {
if m.Type != "" {
nzb.Meta[m.Type] = strings.TrimSpace(m.Value)
}
}
nzb.Password = nzb.Meta["password"]
for _, rf := range raw.Files {
date, _ := strconv.ParseInt(rf.Date, 10, 64)
segs := make([]Segment, 0, len(rf.Segments.Segments))
for _, rs := range rf.Segments.Segments {
bytes, _ := strconv.ParseInt(rs.Bytes, 10, 64)
num, _ := strconv.Atoi(rs.Number)
msgID := strings.TrimSpace(rs.MessageID)
// Strip angle brackets if present
msgID = strings.TrimPrefix(msgID, "<")
msgID = strings.TrimSuffix(msgID, ">")
if msgID == "" {
continue
}
segs = append(segs, Segment{
Bytes: bytes,
Number: num,
MessageID: msgID,
})
}
if len(segs) == 0 {
continue
}
nzb.Files = append(nzb.Files, File{
Poster: rf.Poster,
Date: date,
Subject: rf.Subject,
Groups: rf.Groups.Groups,
Segments: segs,
})
}
if len(nzb.Files) == 0 {
return nil, fmt.Errorf("nzb: no valid files with segments found")
}
return nzb, nil
}
// ParseBytes parses an NZB from a byte slice.
func ParseBytes(data []byte) (*NZB, error) {
return Parse(strings.NewReader(string(data)))
}
// TotalBytes returns the total size of all segments across all files.
func (n *NZB) TotalBytes() int64 {
var total int64
for _, f := range n.Files {
total += f.TotalBytes()
}
return total
}
// TotalSegments returns the total number of segments across all files.
func (n *NZB) TotalSegments() int {
var total int
for _, f := range n.Files {
total += len(f.Segments)
}
return total
}
// ContentFiles returns files that are likely content (video, audio, images),
// excluding par2, nfo, sfv, nzb, and sample files.
func (n *NZB) ContentFiles() []File {
var result []File
for _, f := range n.Files {
name := f.Filename()
if isMetadataFile(name) || isSampleFile(name) {
continue
}
result = append(result, f)
}
return result
}
// Par2Files returns only par2 parity files.
func (n *NZB) Par2Files() []File {
var result []File
for _, f := range n.Files {
ext := strings.ToLower(filepath.Ext(f.Filename()))
if ext == ".par2" {
result = append(result, f)
}
}
return result
}
// RarFiles returns rar archive files (.rar, .rNN, .NNN).
func (n *NZB) RarFiles() []File {
var result []File
for _, f := range n.Files {
if isRarFile(f.Filename()) {
result = append(result, f)
}
}
return result
}
// LargestFile returns the file with the most total bytes.
// Returns nil if NZB has no files.
func (n *NZB) LargestFile() *File {
if len(n.Files) == 0 {
return nil
}
largest := &n.Files[0]
for i := 1; i < len(n.Files); i++ {
if n.Files[i].TotalBytes() > largest.TotalBytes() {
largest = &n.Files[i]
}
}
return largest
}
// IsObfuscated returns true if the NZB filenames appear to be obfuscated
// (random strings instead of meaningful names).
func (n *NZB) IsObfuscated() bool {
for _, f := range n.Files {
name := f.Filename()
if name == "" {
continue
}
base := strings.TrimSuffix(name, filepath.Ext(name))
// Check if base name is mostly hex/random chars (obfuscated)
if len(base) > 10 && isHexLike(base) {
return true
}
}
return false
}
// HasRars returns true if the NZB contains rar archive files.
func (n *NZB) HasRars() bool {
for _, f := range n.Files {
if isRarFile(f.Filename()) {
return true
}
}
return false
}
// HasPar2 returns true if the NZB contains par2 parity files.
func (n *NZB) HasPar2() bool {
for _, f := range n.Files {
ext := strings.ToLower(filepath.Ext(f.Filename()))
if ext == ".par2" {
return true
}
}
return false
}
// TotalBytes returns the sum of all segment sizes in this file.
func (f *File) TotalBytes() int64 {
var total int64
for _, s := range f.Segments {
total += s.Bytes
}
return total
}
// subjectFilenameRe matches the filename in a typical Usenet subject line.
// Examples:
// "Movie.2024.1080p.mkv" yEnc (1/50)
// [PRiVATE]-[#a]- "file.rar" yEnc (01/99)
var subjectFilenameRe = regexp.MustCompile(`"([^"]+)"`)
// Filename extracts the filename from the subject line.
// Falls back to the raw subject if no quoted filename is found.
func (f *File) Filename() string {
m := subjectFilenameRe.FindStringSubmatch(f.Subject)
if len(m) >= 2 {
return m[1]
}
// Fallback: try to extract something useful
return sanitizeFilename(f.Subject)
}
// Extension returns the lowercase file extension (e.g., ".mkv", ".rar").
func (f *File) Extension() string {
return strings.ToLower(filepath.Ext(f.Filename()))
}
// isMetadataFile returns true for non-content files.
func isMetadataFile(name string) bool {
ext := strings.ToLower(filepath.Ext(name))
switch ext {
case ".par2", ".nfo", ".sfv", ".nzb", ".txt", ".jpg", ".png", ".url":
return true
}
return false
}
// isSampleFile returns true for sample/preview files.
// Matches filenames containing "sample" as a word boundary (e.g., "movie.sample.mkv", "Sample/video.mkv").
func isSampleFile(name string) bool {
lower := strings.ToLower(name)
// Match "sample" preceded and followed by non-alphanumeric (word boundary)
idx := strings.Index(lower, "sample")
if idx < 0 {
return false
}
// Check it's not part of a larger word (e.g., "resampled")
if idx > 0 && isAlphaNum(lower[idx-1]) {
return false
}
end := idx + 6
if end < len(lower) && isAlphaNum(lower[end]) {
return false
}
return true
}
func isAlphaNum(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9')
}
// isRarFile returns true for rar archive files.
func isRarFile(name string) bool {
lower := strings.ToLower(name)
ext := filepath.Ext(lower)
if ext == ".rar" {
return true
}
// Match .r00, .r01, ..., .r99 and .s00, .s01
if len(ext) == 4 && (ext[1] == 'r' || ext[1] == 's') {
_, err := strconv.Atoi(ext[2:])
return err == nil
}
// Match .001, .002, etc (split rar)
if len(ext) == 4 {
_, err := strconv.Atoi(ext[1:])
return err == nil
}
return false
}
// isHexLike returns true if the string looks like random hex/obfuscated.
func isHexLike(s string) bool {
hexChars := 0
for _, c := range s {
if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') {
hexChars++
}
}
return float64(hexChars)/float64(len(s)) > 0.8
}
var yencPartRe = regexp.MustCompile(`\s*\(\d+/\d+\)\s*`)
// sanitizeFilename removes characters that are invalid in filenames.
func sanitizeFilename(s string) string {
// Remove yEnc part indicators like (01/50)
s = yencPartRe.ReplaceAllString(s, "")
// Remove yEnc keyword
s = strings.ReplaceAll(s, "yEnc", "")
s = strings.TrimSpace(s)
// Remove invalid path chars
for _, c := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|"} {
s = strings.ReplaceAll(s, c, "_")
}
return s
}

View file

@ -0,0 +1,269 @@
package nzb
import (
"strings"
"testing"
)
const testNZB = `<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE nzb PUBLIC "-//newzBin//DTD NZB 1.1//EN" "http://www.newzbin.com/DTD/nzb/nzb-1.1.dtd">
<nzb xmlns="http://www.newzbin.com/DTD/2003/nzb">
<file poster="user@example.com" date="1700000000" subject="Movie.2024.1080p.BluRay.x264-GROUP [01/50] - &quot;Movie.2024.1080p.BluRay.x264-GROUP.mkv&quot; yEnc (1/3200)">
<groups>
<group>alt.binaries.movies</group>
<group>alt.binaries.multimedia</group>
</groups>
<segments>
<segment bytes="768000" number="1">abc123@news.example.com</segment>
<segment bytes="768000" number="2">def456@news.example.com</segment>
<segment bytes="512000" number="3">ghi789@news.example.com</segment>
</segments>
</file>
<file poster="user@example.com" date="1700000000" subject="Movie.2024.1080p.BluRay.x264-GROUP [02/50] - &quot;Movie.2024.1080p.BluRay.x264-GROUP.nfo&quot; yEnc (1/1)">
<groups>
<group>alt.binaries.movies</group>
</groups>
<segments>
<segment bytes="4096" number="1">nfo001@news.example.com</segment>
</segments>
</file>
<file poster="user@example.com" date="1700000000" subject="Movie.2024.1080p.BluRay.x264-GROUP [03/50] - &quot;Movie.2024.1080p.BluRay.x264-GROUP.par2&quot; yEnc (1/1)">
<groups>
<group>alt.binaries.movies</group>
</groups>
<segments>
<segment bytes="32768" number="1">par001@news.example.com</segment>
</segments>
</file>
</nzb>`
const testNZBWithRars = `<?xml version="1.0" encoding="UTF-8"?>
<nzb xmlns="http://www.newzbin.com/DTD/2003/nzb">
<file poster="bot@example.com" date="1700000000" subject="[PRiVATE]-[#a]- &quot;Movie.2024.rar&quot; yEnc (01/99)">
<groups><group>alt.binaries.movies</group></groups>
<segments>
<segment bytes="768000" number="1">rar001@example</segment>
<segment bytes="768000" number="2">rar002@example</segment>
</segments>
</file>
<file poster="bot@example.com" date="1700000000" subject="[PRiVATE]-[#a]- &quot;Movie.2024.r00&quot; yEnc (01/99)">
<groups><group>alt.binaries.movies</group></groups>
<segments>
<segment bytes="768000" number="1">r00001@example</segment>
</segments>
</file>
<file poster="bot@example.com" date="1700000000" subject="[PRiVATE]-[#a]- &quot;Movie.2024.r01&quot; yEnc (01/99)">
<groups><group>alt.binaries.movies</group></groups>
<segments>
<segment bytes="768000" number="1">r01001@example</segment>
</segments>
</file>
<file poster="bot@example.com" date="1700000000" subject="[PRiVATE]-[#a]- &quot;Movie.2024.par2&quot; yEnc (1/1)">
<groups><group>alt.binaries.movies</group></groups>
<segments>
<segment bytes="32768" number="1">par001@example</segment>
</segments>
</file>
</nzb>`
func TestParse(t *testing.T) {
nzb, err := Parse(strings.NewReader(testNZB))
if err != nil {
t.Fatalf("Parse failed: %v", err)
}
if len(nzb.Files) != 3 {
t.Fatalf("expected 3 files, got %d", len(nzb.Files))
}
// First file — the MKV
f := nzb.Files[0]
if f.Poster != "user@example.com" {
t.Errorf("poster: got %q", f.Poster)
}
if f.Date != 1700000000 {
t.Errorf("date: got %d", f.Date)
}
if len(f.Groups) != 2 {
t.Errorf("groups: got %d", len(f.Groups))
}
if f.Groups[0] != "alt.binaries.movies" {
t.Errorf("group[0]: got %q", f.Groups[0])
}
if len(f.Segments) != 3 {
t.Errorf("segments: got %d", len(f.Segments))
}
seg := f.Segments[0]
if seg.Bytes != 768000 {
t.Errorf("seg bytes: got %d", seg.Bytes)
}
if seg.Number != 1 {
t.Errorf("seg number: got %d", seg.Number)
}
if seg.MessageID != "abc123@news.example.com" {
t.Errorf("seg msgid: got %q", seg.MessageID)
}
}
func TestParseBytes(t *testing.T) {
nzb, err := ParseBytes([]byte(testNZB))
if err != nil {
t.Fatalf("ParseBytes failed: %v", err)
}
if len(nzb.Files) != 3 {
t.Fatalf("expected 3 files, got %d", len(nzb.Files))
}
}
func TestTotalBytes(t *testing.T) {
nzb, _ := ParseBytes([]byte(testNZB))
// 768000 + 768000 + 512000 + 4096 + 32768
expected := int64(768000 + 768000 + 512000 + 4096 + 32768)
if got := nzb.TotalBytes(); got != expected {
t.Errorf("TotalBytes: got %d, want %d", got, expected)
}
}
func TestTotalSegments(t *testing.T) {
nzb, _ := ParseBytes([]byte(testNZB))
if got := nzb.TotalSegments(); got != 5 {
t.Errorf("TotalSegments: got %d, want 5", got)
}
}
func TestContentFiles(t *testing.T) {
nzb, _ := ParseBytes([]byte(testNZB))
content := nzb.ContentFiles()
if len(content) != 1 {
t.Fatalf("ContentFiles: got %d, want 1", len(content))
}
if content[0].Filename() != "Movie.2024.1080p.BluRay.x264-GROUP.mkv" {
t.Errorf("content filename: got %q", content[0].Filename())
}
}
func TestPar2Files(t *testing.T) {
nzb, _ := ParseBytes([]byte(testNZB))
par2 := nzb.Par2Files()
if len(par2) != 1 {
t.Fatalf("Par2Files: got %d, want 1", len(par2))
}
}
func TestLargestFile(t *testing.T) {
nzb, _ := ParseBytes([]byte(testNZB))
largest := nzb.LargestFile()
if largest == nil {
t.Fatal("LargestFile returned nil")
}
if largest.Filename() != "Movie.2024.1080p.BluRay.x264-GROUP.mkv" {
t.Errorf("largest file: got %q", largest.Filename())
}
}
func TestFilename(t *testing.T) {
tests := []struct {
subject string
expected string
}{
{
`Movie.2024.1080p [01/50] - "Movie.2024.1080p.mkv" yEnc (1/3200)`,
"Movie.2024.1080p.mkv",
},
{
`[PRiVATE]-[#a]- "file.rar" yEnc (01/99)`,
"file.rar",
},
{
`Some subject without quotes (1/1)`,
"Some subject without quotes",
},
}
for _, tt := range tests {
f := File{Subject: tt.subject}
if got := f.Filename(); got != tt.expected {
t.Errorf("Filename(%q) = %q, want %q", tt.subject, got, tt.expected)
}
}
}
func TestExtension(t *testing.T) {
f := File{Subject: `"Movie.2024.1080p.BluRay.x264-GROUP.mkv" yEnc (1/3200)`}
if got := f.Extension(); got != ".mkv" {
t.Errorf("Extension: got %q, want .mkv", got)
}
}
func TestHasRars(t *testing.T) {
nzb, _ := ParseBytes([]byte(testNZBWithRars))
if !nzb.HasRars() {
t.Error("HasRars: expected true")
}
if !nzb.HasPar2() {
t.Error("HasPar2: expected true")
}
}
func TestRarFiles(t *testing.T) {
nzb, _ := ParseBytes([]byte(testNZBWithRars))
rars := nzb.RarFiles()
if len(rars) != 3 {
t.Fatalf("RarFiles: got %d, want 3", len(rars))
}
}
func TestIsRarFile(t *testing.T) {
tests := []struct {
name string
want bool
}{
{"file.rar", true},
{"file.r00", true},
{"file.r99", true},
{"file.s00", true},
{"file.001", true},
{"file.mkv", false},
{"file.par2", false},
{"file.nfo", false},
}
for _, tt := range tests {
if got := isRarFile(tt.name); got != tt.want {
t.Errorf("isRarFile(%q) = %v, want %v", tt.name, got, tt.want)
}
}
}
func TestParseEmpty(t *testing.T) {
_, err := Parse(strings.NewReader(`<?xml version="1.0"?><nzb xmlns="http://www.newzbin.com/DTD/2003/nzb"></nzb>`))
if err == nil {
t.Error("expected error for empty NZB")
}
}
func TestParseInvalidXML(t *testing.T) {
_, err := Parse(strings.NewReader("not xml"))
if err == nil {
t.Error("expected error for invalid XML")
}
}
func TestStripAngleBrackets(t *testing.T) {
nzbXML := `<?xml version="1.0"?>
<nzb xmlns="http://www.newzbin.com/DTD/2003/nzb">
<file poster="test" date="0" subject="&quot;test.bin&quot; (1/1)">
<groups><group>alt.test</group></groups>
<segments>
<segment bytes="100" number="1">&lt;angle@brackets.com&gt;</segment>
</segments>
</file>
</nzb>`
nzb, err := ParseBytes([]byte(nzbXML))
if err != nil {
t.Fatalf("Parse failed: %v", err)
}
if nzb.Files[0].Segments[0].MessageID != "angle@brackets.com" {
t.Errorf("MessageID not stripped: got %q", nzb.Files[0].Segments[0].MessageID)
}
}