feat(usenet): implement full NNTP download pipeline
Complete usenet download support for unarr CLI: - NZB XML parser with password extraction from <head> meta - yEnc decoder with CRC32 verification - NNTP client with TLS, auth, and connection pool (up to 10 conns) - Segment downloader with parallel workers and progress reporting - Post-processing: par2 verify/repair, unrar/7z extraction with password support - Agent client methods: SearchNzbs, DownloadNzb, GetUsenetCredentials - UsenetDownloader implementing full Downloader interface - Daemon wiring: UsenetDownloader passed to Manager E2E tested: Oppenheimer 1080p (2.94 GB) downloaded via NNTP in 77.6s.
This commit is contained in:
parent
5f337eebd7
commit
e332c0a6e4
15 changed files with 3016 additions and 23 deletions
365
internal/usenet/nzb/parser.go
Normal file
365
internal/usenet/nzb/parser.go
Normal file
|
|
@ -0,0 +1,365 @@
|
|||
package nzb
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NZB represents a parsed NZB file containing one or more files to download.
|
||||
type NZB struct {
|
||||
Files []File
|
||||
Password string // from <meta type="password"> in <head>
|
||||
Meta map[string]string // all <meta> entries from <head>
|
||||
}
|
||||
|
||||
// File represents a single file within an NZB, composed of multiple segments.
|
||||
type File struct {
|
||||
Poster string
|
||||
Date int64
|
||||
Subject string
|
||||
Groups []string
|
||||
Segments []Segment
|
||||
}
|
||||
|
||||
// Segment represents a single NNTP article segment of a file.
|
||||
type Segment struct {
|
||||
Bytes int64
|
||||
Number int
|
||||
MessageID string // message-id without angle brackets
|
||||
}
|
||||
|
||||
// xmlNZB is the raw XML structure for parsing.
|
||||
type xmlNZB struct {
|
||||
XMLName xml.Name `xml:"nzb"`
|
||||
Head xmlHead `xml:"head"`
|
||||
Files []xmlFile `xml:"file"`
|
||||
}
|
||||
|
||||
type xmlHead struct {
|
||||
Meta []xmlMeta `xml:"meta"`
|
||||
}
|
||||
|
||||
type xmlMeta struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Value string `xml:",chardata"`
|
||||
}
|
||||
|
||||
type xmlFile struct {
|
||||
Poster string `xml:"poster,attr"`
|
||||
Date string `xml:"date,attr"`
|
||||
Subject string `xml:"subject,attr"`
|
||||
Groups xmlGroups `xml:"groups"`
|
||||
Segments xmlSegments `xml:"segments"`
|
||||
}
|
||||
|
||||
type xmlGroups struct {
|
||||
Groups []string `xml:"group"`
|
||||
}
|
||||
|
||||
type xmlSegments struct {
|
||||
Segments []xmlSegment `xml:"segment"`
|
||||
}
|
||||
|
||||
type xmlSegment struct {
|
||||
Bytes string `xml:"bytes,attr"`
|
||||
Number string `xml:"number,attr"`
|
||||
MessageID string `xml:",chardata"`
|
||||
}
|
||||
|
||||
// Parse reads and parses an NZB XML document from the given reader.
|
||||
func Parse(r io.Reader) (*NZB, error) {
|
||||
var raw xmlNZB
|
||||
dec := xml.NewDecoder(r)
|
||||
if err := dec.Decode(&raw); err != nil {
|
||||
return nil, fmt.Errorf("nzb: xml decode: %w", err)
|
||||
}
|
||||
|
||||
if len(raw.Files) == 0 {
|
||||
return nil, fmt.Errorf("nzb: no files found")
|
||||
}
|
||||
|
||||
nzb := &NZB{
|
||||
Files: make([]File, 0, len(raw.Files)),
|
||||
Meta: make(map[string]string),
|
||||
}
|
||||
|
||||
// Parse <head> meta entries
|
||||
for _, m := range raw.Head.Meta {
|
||||
if m.Type != "" {
|
||||
nzb.Meta[m.Type] = strings.TrimSpace(m.Value)
|
||||
}
|
||||
}
|
||||
nzb.Password = nzb.Meta["password"]
|
||||
|
||||
for _, rf := range raw.Files {
|
||||
date, _ := strconv.ParseInt(rf.Date, 10, 64)
|
||||
|
||||
segs := make([]Segment, 0, len(rf.Segments.Segments))
|
||||
for _, rs := range rf.Segments.Segments {
|
||||
bytes, _ := strconv.ParseInt(rs.Bytes, 10, 64)
|
||||
num, _ := strconv.Atoi(rs.Number)
|
||||
msgID := strings.TrimSpace(rs.MessageID)
|
||||
// Strip angle brackets if present
|
||||
msgID = strings.TrimPrefix(msgID, "<")
|
||||
msgID = strings.TrimSuffix(msgID, ">")
|
||||
|
||||
if msgID == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
segs = append(segs, Segment{
|
||||
Bytes: bytes,
|
||||
Number: num,
|
||||
MessageID: msgID,
|
||||
})
|
||||
}
|
||||
|
||||
if len(segs) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
nzb.Files = append(nzb.Files, File{
|
||||
Poster: rf.Poster,
|
||||
Date: date,
|
||||
Subject: rf.Subject,
|
||||
Groups: rf.Groups.Groups,
|
||||
Segments: segs,
|
||||
})
|
||||
}
|
||||
|
||||
if len(nzb.Files) == 0 {
|
||||
return nil, fmt.Errorf("nzb: no valid files with segments found")
|
||||
}
|
||||
|
||||
return nzb, nil
|
||||
}
|
||||
|
||||
// ParseBytes parses an NZB from a byte slice.
|
||||
func ParseBytes(data []byte) (*NZB, error) {
|
||||
return Parse(strings.NewReader(string(data)))
|
||||
}
|
||||
|
||||
// TotalBytes returns the total size of all segments across all files.
|
||||
func (n *NZB) TotalBytes() int64 {
|
||||
var total int64
|
||||
for _, f := range n.Files {
|
||||
total += f.TotalBytes()
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// TotalSegments returns the total number of segments across all files.
|
||||
func (n *NZB) TotalSegments() int {
|
||||
var total int
|
||||
for _, f := range n.Files {
|
||||
total += len(f.Segments)
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// ContentFiles returns files that are likely content (video, audio, images),
|
||||
// excluding par2, nfo, sfv, nzb, and sample files.
|
||||
func (n *NZB) ContentFiles() []File {
|
||||
var result []File
|
||||
for _, f := range n.Files {
|
||||
name := f.Filename()
|
||||
if isMetadataFile(name) || isSampleFile(name) {
|
||||
continue
|
||||
}
|
||||
result = append(result, f)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Par2Files returns only par2 parity files.
|
||||
func (n *NZB) Par2Files() []File {
|
||||
var result []File
|
||||
for _, f := range n.Files {
|
||||
ext := strings.ToLower(filepath.Ext(f.Filename()))
|
||||
if ext == ".par2" {
|
||||
result = append(result, f)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// RarFiles returns rar archive files (.rar, .rNN, .NNN).
|
||||
func (n *NZB) RarFiles() []File {
|
||||
var result []File
|
||||
for _, f := range n.Files {
|
||||
if isRarFile(f.Filename()) {
|
||||
result = append(result, f)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// LargestFile returns the file with the most total bytes.
|
||||
// Returns nil if NZB has no files.
|
||||
func (n *NZB) LargestFile() *File {
|
||||
if len(n.Files) == 0 {
|
||||
return nil
|
||||
}
|
||||
largest := &n.Files[0]
|
||||
for i := 1; i < len(n.Files); i++ {
|
||||
if n.Files[i].TotalBytes() > largest.TotalBytes() {
|
||||
largest = &n.Files[i]
|
||||
}
|
||||
}
|
||||
return largest
|
||||
}
|
||||
|
||||
// IsObfuscated returns true if the NZB filenames appear to be obfuscated
|
||||
// (random strings instead of meaningful names).
|
||||
func (n *NZB) IsObfuscated() bool {
|
||||
for _, f := range n.Files {
|
||||
name := f.Filename()
|
||||
if name == "" {
|
||||
continue
|
||||
}
|
||||
base := strings.TrimSuffix(name, filepath.Ext(name))
|
||||
// Check if base name is mostly hex/random chars (obfuscated)
|
||||
if len(base) > 10 && isHexLike(base) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// HasRars returns true if the NZB contains rar archive files.
|
||||
func (n *NZB) HasRars() bool {
|
||||
for _, f := range n.Files {
|
||||
if isRarFile(f.Filename()) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// HasPar2 returns true if the NZB contains par2 parity files.
|
||||
func (n *NZB) HasPar2() bool {
|
||||
for _, f := range n.Files {
|
||||
ext := strings.ToLower(filepath.Ext(f.Filename()))
|
||||
if ext == ".par2" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// TotalBytes returns the sum of all segment sizes in this file.
|
||||
func (f *File) TotalBytes() int64 {
|
||||
var total int64
|
||||
for _, s := range f.Segments {
|
||||
total += s.Bytes
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// subjectFilenameRe matches the filename in a typical Usenet subject line.
|
||||
// Examples:
|
||||
// "Movie.2024.1080p.mkv" yEnc (1/50)
|
||||
// [PRiVATE]-[#a]- "file.rar" yEnc (01/99)
|
||||
var subjectFilenameRe = regexp.MustCompile(`"([^"]+)"`)
|
||||
|
||||
// Filename extracts the filename from the subject line.
|
||||
// Falls back to the raw subject if no quoted filename is found.
|
||||
func (f *File) Filename() string {
|
||||
m := subjectFilenameRe.FindStringSubmatch(f.Subject)
|
||||
if len(m) >= 2 {
|
||||
return m[1]
|
||||
}
|
||||
// Fallback: try to extract something useful
|
||||
return sanitizeFilename(f.Subject)
|
||||
}
|
||||
|
||||
// Extension returns the lowercase file extension (e.g., ".mkv", ".rar").
|
||||
func (f *File) Extension() string {
|
||||
return strings.ToLower(filepath.Ext(f.Filename()))
|
||||
}
|
||||
|
||||
// isMetadataFile returns true for non-content files.
|
||||
func isMetadataFile(name string) bool {
|
||||
ext := strings.ToLower(filepath.Ext(name))
|
||||
switch ext {
|
||||
case ".par2", ".nfo", ".sfv", ".nzb", ".txt", ".jpg", ".png", ".url":
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isSampleFile returns true for sample/preview files.
|
||||
// Matches filenames containing "sample" as a word boundary (e.g., "movie.sample.mkv", "Sample/video.mkv").
|
||||
func isSampleFile(name string) bool {
|
||||
lower := strings.ToLower(name)
|
||||
// Match "sample" preceded and followed by non-alphanumeric (word boundary)
|
||||
idx := strings.Index(lower, "sample")
|
||||
if idx < 0 {
|
||||
return false
|
||||
}
|
||||
// Check it's not part of a larger word (e.g., "resampled")
|
||||
if idx > 0 && isAlphaNum(lower[idx-1]) {
|
||||
return false
|
||||
}
|
||||
end := idx + 6
|
||||
if end < len(lower) && isAlphaNum(lower[end]) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func isAlphaNum(b byte) bool {
|
||||
return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9')
|
||||
}
|
||||
|
||||
// isRarFile returns true for rar archive files.
|
||||
func isRarFile(name string) bool {
|
||||
lower := strings.ToLower(name)
|
||||
ext := filepath.Ext(lower)
|
||||
if ext == ".rar" {
|
||||
return true
|
||||
}
|
||||
// Match .r00, .r01, ..., .r99 and .s00, .s01
|
||||
if len(ext) == 4 && (ext[1] == 'r' || ext[1] == 's') {
|
||||
_, err := strconv.Atoi(ext[2:])
|
||||
return err == nil
|
||||
}
|
||||
// Match .001, .002, etc (split rar)
|
||||
if len(ext) == 4 {
|
||||
_, err := strconv.Atoi(ext[1:])
|
||||
return err == nil
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isHexLike returns true if the string looks like random hex/obfuscated.
|
||||
func isHexLike(s string) bool {
|
||||
hexChars := 0
|
||||
for _, c := range s {
|
||||
if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') {
|
||||
hexChars++
|
||||
}
|
||||
}
|
||||
return float64(hexChars)/float64(len(s)) > 0.8
|
||||
}
|
||||
|
||||
var yencPartRe = regexp.MustCompile(`\s*\(\d+/\d+\)\s*`)
|
||||
|
||||
// sanitizeFilename removes characters that are invalid in filenames.
|
||||
func sanitizeFilename(s string) string {
|
||||
// Remove yEnc part indicators like (01/50)
|
||||
s = yencPartRe.ReplaceAllString(s, "")
|
||||
// Remove yEnc keyword
|
||||
s = strings.ReplaceAll(s, "yEnc", "")
|
||||
s = strings.TrimSpace(s)
|
||||
// Remove invalid path chars
|
||||
for _, c := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|"} {
|
||||
s = strings.ReplaceAll(s, c, "_")
|
||||
}
|
||||
return s
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue