package nzb import ( "encoding/xml" "fmt" "io" "path/filepath" "regexp" "strconv" "strings" ) // NZB represents a parsed NZB file containing one or more files to download. type NZB struct { Files []File Password string // from in
Meta map[string]string // all entries from } // File represents a single file within an NZB, composed of multiple segments. type File struct { Poster string Date int64 Subject string Groups []string Segments []Segment } // Segment represents a single NNTP article segment of a file. type Segment struct { Bytes int64 Number int MessageID string // message-id without angle brackets } // xmlNZB is the raw XML structure for parsing. type xmlNZB struct { XMLName xml.Name `xml:"nzb"` Head xmlHead `xml:"head"` Files []xmlFile `xml:"file"` } type xmlHead struct { Meta []xmlMeta `xml:"meta"` } type xmlMeta struct { Type string `xml:"type,attr"` Value string `xml:",chardata"` } type xmlFile struct { Poster string `xml:"poster,attr"` Date string `xml:"date,attr"` Subject string `xml:"subject,attr"` Groups xmlGroups `xml:"groups"` Segments xmlSegments `xml:"segments"` } type xmlGroups struct { Groups []string `xml:"group"` } type xmlSegments struct { Segments []xmlSegment `xml:"segment"` } type xmlSegment struct { Bytes string `xml:"bytes,attr"` Number string `xml:"number,attr"` MessageID string `xml:",chardata"` } // Parse reads and parses an NZB XML document from the given reader. func Parse(r io.Reader) (*NZB, error) { var raw xmlNZB dec := xml.NewDecoder(r) if err := dec.Decode(&raw); err != nil { return nil, fmt.Errorf("nzb: xml decode: %w", err) } if len(raw.Files) == 0 { return nil, fmt.Errorf("nzb: no files found") } nzb := &NZB{ Files: make([]File, 0, len(raw.Files)), Meta: make(map[string]string), } // Parse meta entries for _, m := range raw.Head.Meta { if m.Type != "" { nzb.Meta[m.Type] = strings.TrimSpace(m.Value) } } nzb.Password = nzb.Meta["password"] for _, rf := range raw.Files { date, _ := strconv.ParseInt(rf.Date, 10, 64) segs := make([]Segment, 0, len(rf.Segments.Segments)) for _, rs := range rf.Segments.Segments { bytes, _ := strconv.ParseInt(rs.Bytes, 10, 64) num, _ := strconv.Atoi(rs.Number) msgID := strings.TrimSpace(rs.MessageID) // Strip angle brackets if present msgID = strings.TrimPrefix(msgID, "<") msgID = strings.TrimSuffix(msgID, ">") if msgID == "" { continue } segs = append(segs, Segment{ Bytes: bytes, Number: num, MessageID: msgID, }) } if len(segs) == 0 { continue } nzb.Files = append(nzb.Files, File{ Poster: rf.Poster, Date: date, Subject: rf.Subject, Groups: rf.Groups.Groups, Segments: segs, }) } if len(nzb.Files) == 0 { return nil, fmt.Errorf("nzb: no valid files with segments found") } return nzb, nil } // ParseBytes parses an NZB from a byte slice. func ParseBytes(data []byte) (*NZB, error) { return Parse(strings.NewReader(string(data))) } // TotalBytes returns the total size of all segments across all files. func (n *NZB) TotalBytes() int64 { var total int64 for _, f := range n.Files { total += f.TotalBytes() } return total } // TotalSegments returns the total number of segments across all files. func (n *NZB) TotalSegments() int { var total int for _, f := range n.Files { total += len(f.Segments) } return total } // ContentFiles returns files that are likely content (video, audio, images), // excluding par2, nfo, sfv, nzb, and sample files. func (n *NZB) ContentFiles() []File { var result []File for _, f := range n.Files { name := f.Filename() if isMetadataFile(name) || isSampleFile(name) { continue } result = append(result, f) } return result } // Par2Files returns only par2 parity files. func (n *NZB) Par2Files() []File { var result []File for _, f := range n.Files { ext := strings.ToLower(filepath.Ext(f.Filename())) if ext == ".par2" { result = append(result, f) } } return result } // RarFiles returns rar archive files (.rar, .rNN, .NNN). func (n *NZB) RarFiles() []File { var result []File for _, f := range n.Files { if isRarFile(f.Filename()) { result = append(result, f) } } return result } // LargestFile returns the file with the most total bytes. // Returns nil if NZB has no files. func (n *NZB) LargestFile() *File { if len(n.Files) == 0 { return nil } largest := &n.Files[0] for i := 1; i < len(n.Files); i++ { if n.Files[i].TotalBytes() > largest.TotalBytes() { largest = &n.Files[i] } } return largest } // IsObfuscated returns true if the NZB filenames appear to be obfuscated // (random strings instead of meaningful names). func (n *NZB) IsObfuscated() bool { for _, f := range n.Files { name := f.Filename() if name == "" { continue } base := strings.TrimSuffix(name, filepath.Ext(name)) // Check if base name is mostly hex/random chars (obfuscated) if len(base) > 10 && isHexLike(base) { return true } } return false } // HasRars returns true if the NZB contains rar archive files. func (n *NZB) HasRars() bool { for _, f := range n.Files { if isRarFile(f.Filename()) { return true } } return false } // HasPar2 returns true if the NZB contains par2 parity files. func (n *NZB) HasPar2() bool { for _, f := range n.Files { ext := strings.ToLower(filepath.Ext(f.Filename())) if ext == ".par2" { return true } } return false } // TotalBytes returns the sum of all segment sizes in this file. func (f *File) TotalBytes() int64 { var total int64 for _, s := range f.Segments { total += s.Bytes } return total } // subjectFilenameRe matches the filename in a typical Usenet subject line. // Examples: // // "Movie.2024.1080p.mkv" yEnc (1/50) // [PRiVATE]-[#a]- "file.rar" yEnc (01/99) var subjectFilenameRe = regexp.MustCompile(`"([^"]+)"`) // Filename extracts the filename from the subject line. // Falls back to the raw subject if no quoted filename is found. func (f *File) Filename() string { m := subjectFilenameRe.FindStringSubmatch(f.Subject) if len(m) >= 2 { return m[1] } // Fallback: try to extract something useful return sanitizeFilename(f.Subject) } // Extension returns the lowercase file extension (e.g., ".mkv", ".rar"). func (f *File) Extension() string { return strings.ToLower(filepath.Ext(f.Filename())) } // isMetadataFile returns true for non-content files. func isMetadataFile(name string) bool { ext := strings.ToLower(filepath.Ext(name)) switch ext { case ".par2", ".nfo", ".sfv", ".nzb", ".txt", ".jpg", ".png", ".url": return true } return false } // isSampleFile returns true for sample/preview files. // Matches filenames containing "sample" as a word boundary (e.g., "movie.sample.mkv", "Sample/video.mkv"). func isSampleFile(name string) bool { lower := strings.ToLower(name) // Match "sample" preceded and followed by non-alphanumeric (word boundary) idx := strings.Index(lower, "sample") if idx < 0 { return false } // Check it's not part of a larger word (e.g., "resampled") if idx > 0 && isAlphaNum(lower[idx-1]) { return false } end := idx + 6 if end < len(lower) && isAlphaNum(lower[end]) { return false } return true } func isAlphaNum(b byte) bool { return (b >= 'a' && b <= 'z') || (b >= '0' && b <= '9') } // isRarFile returns true for rar archive files. func isRarFile(name string) bool { lower := strings.ToLower(name) ext := filepath.Ext(lower) if ext == ".rar" { return true } // Match .r00, .r01, ..., .r99 and .s00, .s01 if len(ext) == 4 && (ext[1] == 'r' || ext[1] == 's') { _, err := strconv.Atoi(ext[2:]) return err == nil } // Match .001, .002, etc (split rar) if len(ext) == 4 { _, err := strconv.Atoi(ext[1:]) return err == nil } return false } // isHexLike returns true if the string looks like random hex/obfuscated. func isHexLike(s string) bool { hexChars := 0 for _, c := range s { if (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') { hexChars++ } } return float64(hexChars)/float64(len(s)) > 0.8 } var yencPartRe = regexp.MustCompile(`\s*\(\d+/\d+\)\s*`) // sanitizeFilename removes characters that are invalid in filenames. func sanitizeFilename(s string) string { // Remove yEnc part indicators like (01/50) s = yencPartRe.ReplaceAllString(s, "") // Remove yEnc keyword s = strings.ReplaceAll(s, "yEnc", "") s = strings.TrimSpace(s) // Remove invalid path chars for _, c := range []string{"/", "\\", ":", "*", "?", "\"", "<", ">", "|"} { s = strings.ReplaceAll(s, c, "_") } return s }