Complete usenet download support for unarr CLI: - NZB XML parser with password extraction from <head> meta - yEnc decoder with CRC32 verification - NNTP client with TLS, auth, and connection pool (up to 10 conns) - Segment downloader with parallel workers and progress reporting - Post-processing: par2 verify/repair, unrar/7z extraction with password support - Agent client methods: SearchNzbs, DownloadNzb, GetUsenetCredentials - UsenetDownloader implementing full Downloader interface - Daemon wiring: UsenetDownloader passed to Manager E2E tested: Oppenheimer 1080p (2.94 GB) downloaded via NNTP in 77.6s.
365 lines
8.5 KiB
Go
365 lines
8.5 KiB
Go
package nzb
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"fmt"
|
|
"io"
|
|
"path/filepath"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// NZB represents a parsed NZB file containing one or more files to download.
|
|
type NZB struct {
|
|
Files []File
|
|
Password string // from <meta type="password"> in <head>
|
|
Meta map[string]string // all <meta> entries from <head>
|
|
}
|
|
|
|
// File represents a single file within an NZB, composed of multiple segments.
|
|
type File struct {
|
|
Poster string
|
|
Date int64
|
|
Subject string
|
|
Groups []string
|
|
Segments []Segment
|
|
}
|
|
|
|
// Segment represents a single NNTP article segment of a file.
|
|
type Segment struct {
|
|
Bytes int64
|
|
Number int
|
|
MessageID string // message-id without angle brackets
|
|
}
|
|
|
|
// xmlNZB is the raw XML structure for parsing.
|
|
type xmlNZB struct {
|
|
XMLName xml.Name `xml:"nzb"`
|
|
Head xmlHead `xml:"head"`
|
|
Files []xmlFile `xml:"file"`
|
|
}
|
|
|
|
type xmlHead struct {
|
|
Meta []xmlMeta `xml:"meta"`
|
|
}
|
|
|
|
type xmlMeta struct {
|
|
Type string `xml:"type,attr"`
|
|
Value string `xml:",chardata"`
|
|
}
|
|
|
|
type xmlFile struct {
|
|
Poster string `xml:"poster,attr"`
|
|
Date string `xml:"date,attr"`
|
|
Subject string `xml:"subject,attr"`
|
|
Groups xmlGroups `xml:"groups"`
|
|
Segments xmlSegments `xml:"segments"`
|
|
}
|
|
|
|
type xmlGroups struct {
|
|
Groups []string `xml:"group"`
|
|
}
|
|
|
|
type xmlSegments struct {
|
|
Segments []xmlSegment `xml:"segment"`
|
|
}
|
|
|
|
type xmlSegment struct {
|
|
Bytes string `xml:"bytes,attr"`
|
|
Number string `xml:"number,attr"`
|
|
MessageID string `xml:",chardata"`
|
|
}
|
|
|
|
// Parse reads and parses an NZB XML document from the given reader.
|
|
func Parse(r io.Reader) (*NZB, error) {
|
|
var raw xmlNZB
|
|
dec := xml.NewDecoder(r)
|
|
if err := dec.Decode(&raw); err != nil {
|
|
return nil, fmt.Errorf("nzb: xml decode: %w", err)
|
|
}
|
|
|
|
if len(raw.Files) == 0 {
|
|
return nil, fmt.Errorf("nzb: no files found")
|
|
}
|
|
|
|
nzb := &NZB{
|
|
Files: make([]File, 0, len(raw.Files)),
|
|
Meta: make(map[string]string),
|
|
}
|
|
|
|
// Parse <head> meta entries
|
|
for _, m := range raw.Head.Meta {
|
|
if m.Type != "" {
|
|
nzb.Meta[m.Type] = strings.TrimSpace(m.Value)
|
|
}
|
|
}
|
|
nzb.Password = nzb.Meta["password"]
|
|
|
|
for _, rf := range raw.Files {
|
|
date, _ := strconv.ParseInt(rf.Date, 10, 64)
|
|
|
|
segs := make([]Segment, 0, len(rf.Segments.Segments))
|
|
for _, rs := range rf.Segments.Segments {
|
|
bytes, _ := strconv.ParseInt(rs.Bytes, 10, 64)
|
|
num, _ := strconv.Atoi(rs.Number)
|
|
msgID := strings.TrimSpace(rs.MessageID)
|
|
// Strip angle brackets if present
|
|
msgID = strings.TrimPrefix(msgID, "<")
|
|
msgID = strings.TrimSuffix(msgID, ">")
|
|
|
|
if msgID == "" {
|
|
continue
|
|
}
|
|
|
|
segs = append(segs, Segment{
|
|
Bytes: bytes,
|
|
Number: num,
|
|
MessageID: msgID,
|
|
})
|
|
}
|
|
|
|
if len(segs) == 0 {
|
|
continue
|
|
}
|
|
|
|
nzb.Files = append(nzb.Files, File{
|
|
Poster: rf.Poster,
|
|
Date: date,
|
|
Subject: rf.Subject,
|
|
Groups: rf.Groups.Groups,
|
|
Segments: segs,
|
|
})
|
|
}
|
|
|
|
if len(nzb.Files) == 0 {
|
|
return nil, fmt.Errorf("nzb: no valid files with segments found")
|
|
}
|
|
|
|
return nzb, nil
|
|
}
|
|
|
|
// ParseBytes parses an NZB from a byte slice.
|
|
func ParseBytes(data []byte) (*NZB, error) {
|
|
return Parse(strings.NewReader(string(data)))
|
|
}
|
|
|
|
// TotalBytes returns the total size of all segments across all files.
|
|
func (n *NZB) TotalBytes() int64 {
|
|
var total int64
|
|
for _, f := range n.Files {
|
|
total += f.TotalBytes()
|
|
}
|
|
return total
|
|
}
|
|
|
|
// TotalSegments returns the total number of segments across all files.
|
|
func (n *NZB) TotalSegments() int {
|
|
var total int
|
|
for _, f := range n.Files {
|
|
total += len(f.Segments)
|
|
}
|
|
return total
|
|
}
|
|
|
|
// ContentFiles returns files that are likely content (video, audio, images),
|
|
// excluding par2, nfo, sfv, nzb, and sample files.
|
|
func (n *NZB) ContentFiles() []File {
|
|
var result []File
|
|
for _, f := range n.Files {
|
|
name := f.Filename()
|
|
if isMetadataFile(name) || isSampleFile(name) {
|
|
continue
|
|
}
|
|
result = append(result, f)
|
|
}
|
|
return result
|
|
}
|
|
|
|
// Par2Files returns only par2 parity files.
|
|
func (n *NZB) Par2Files() []File {
|
|
var result []File
|
|
for _, f := range n.Files {
|
|
ext := strings.ToLower(filepath.Ext(f.Filename()))
|
|
if ext == ".par2" {
|
|
result = append(result, f)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// RarFiles returns rar archive files (.rar, .rNN, .NNN).
|
|
func (n *NZB) RarFiles() []File {
|
|
var result []File
|
|
for _, f := range n.Files {
|
|
if isRarFile(f.Filename()) {
|
|
result = append(result, f)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// LargestFile returns the file with the most total bytes.
|
|
// Returns nil if NZB has no files.
|
|
func (n *NZB) LargestFile() *File {
|
|
if len(n.Files) == 0 {
|
|
return nil
|
|
}
|
|
largest := &n.Files[0]
|
|
for i := 1; i < len(n.Files); i++ {
|
|
if n.Files[i].TotalBytes() > largest.TotalBytes() {
|
|
largest = &n.Files[i]
|
|
}
|
|
}
|
|
return largest
|
|
}
|
|
|
|
// IsObfuscated returns true if the NZB filenames appear to be obfuscated
|
|
// (random strings instead of meaningful names).
|
|
func (n *NZB) IsObfuscated() bool {
|
|
for _, f := range n.Files {
|
|
name := f.Filename()
|
|
if name == "" {
|
|
continue
|
|
}
|
|
base := strings.TrimSuffix(name, filepath.Ext(name))
|
|
// Check if base name is mostly hex/random chars (obfuscated)
|
|
if len(base) > 10 && isHexLike(base) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// HasRars returns true if the NZB contains rar archive files.
|
|
func (n *NZB) HasRars() bool {
|
|
for _, f := range n.Files {
|
|
if isRarFile(f.Filename()) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// HasPar2 returns true if the NZB contains par2 parity files.
|
|
func (n *NZB) HasPar2() bool {
|
|
for _, f := range n.Files {
|
|
ext := strings.ToLower(filepath.Ext(f.Filename()))
|
|
if ext == ".par2" {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// TotalBytes returns the sum of all segment sizes in this file.
|
|
func (f *File) TotalBytes() int64 {
|
|
var total int64
|
|
for _, s := range f.Segments {
|
|
total += s.Bytes
|
|
}
|
|
return total
|
|
}
|
|
|
|
// subjectFilenameRe matches the filename in a typical Usenet subject line.
|
|
// Examples:
|
|
// "Movie.2024.1080p.mkv" yEnc (1/50)
|
|
// [PRiVATE]-[#a]- "file.rar" yEnc (01/99)
|
|
var subjectFilenameRe = regexp.MustCompile(`"([^"]+)"`)
|
|
|
|
// Filename extracts the filename from the subject line.
|
|
// Falls back to the raw subject if no quoted filename is found.
|
|
func (f *File) Filename() string {
|
|
m := subjectFilenameRe.FindStringSubmatch(f.Subject)
|
|
if len(m) >= 2 {
|
|
return m[1]
|
|
}
|
|
// Fallback: try to extract something useful
|
|
return sanitizeFilename(f.Subject)
|
|
}
|
|
|
|
// Extension returns the lowercase file extension (e.g., ".mkv", ".rar").
|
|
func (f *File) Extension() string {
|
|
return strings.ToLower(filepath.Ext(f.Filename()))
|
|
}
|
|
|
|
// isMetadataFile returns true for non-content files.
|
|
func isMetadataFile(name string) bool {
|
|
ext := strings.ToLower(filepath.Ext(name))
|
|
switch ext {
|
|
case ".par2", ".nfo", ".sfv", ".nzb", ".txt", ".jpg", ".png", ".url":
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isSampleFile returns true for sample/preview files.
|
|
// Matches filenames containing "sample" as a word boundary (e.g., "movie.sample.mkv", "Sample/video.mkv").
|
|
func isSampleFile(name string) bool {
|
|
lower := strings.ToLower(name)
|
|
// Match "sample" preceded and followed by non-alphanumeric (word boundary)
|
|
idx := strings.Index(lower, "sample")
|
|
if idx < 0 {
|
|
return false
|
|
}
|
|
// Check it's not part of a larger word (e.g., "resampled")
|
|
if idx > 0 && isAlphaNum(lower[idx-1]) {
|
|
return false
|
|
}
|
|
end := idx + 6
|
|
if end < len(lower) && isAlphaNum(lower[end]) {
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func isAlphaNum(b byte) bool {
|
|
return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9')
|
|
}
|
|
|
|
// isRarFile returns true for rar archive files.
|
|
func isRarFile(name string) bool {
|
|
lower := strings.ToLower(name)
|
|
ext := filepath.Ext(lower)
|
|
if ext == ".rar" {
|
|
return true
|
|
}
|
|
// Match .r00, .r01, ..., .r99 and .s00, .s01
|
|
if len(ext) == 4 && (ext[1] == 'r' || ext[1] == 's') {
|
|
_, err := strconv.Atoi(ext[2:])
|
|
return err == nil
|
|
}
|
|
// Match .001, .002, etc (split rar)
|
|
if len(ext) == 4 {
|
|
_, err := strconv.Atoi(ext[1:])
|
|
return err == nil
|
|
}
|
|
return false
|
|
}
|
|
|
|
// isHexLike returns true if the string looks like random hex/obfuscated.
|
|
func isHexLike(s string) bool {
|
|
hexChars := 0
|
|
for _, c := range s {
|
|
if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') {
|
|
hexChars++
|
|
}
|
|
}
|
|
return float64(hexChars)/float64(len(s)) > 0.8
|
|
}
|
|
|
|
var yencPartRe = regexp.MustCompile(`\s*\(\d+/\d+\)\s*`)
|
|
|
|
// sanitizeFilename removes characters that are invalid in filenames.
|
|
func sanitizeFilename(s string) string {
|
|
// Remove yEnc part indicators like (01/50)
|
|
s = yencPartRe.ReplaceAllString(s, "")
|
|
// Remove yEnc keyword
|
|
s = strings.ReplaceAll(s, "yEnc", "")
|
|
s = strings.TrimSpace(s)
|
|
// Remove invalid path chars
|
|
for _, c := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|"} {
|
|
s = strings.ReplaceAll(s, c, "_")
|
|
}
|
|
return s
|
|
}
|