feat(usenet): implement full NNTP download pipeline
Complete usenet download support for unarr CLI: - NZB XML parser with password extraction from <head> meta - yEnc decoder with CRC32 verification - NNTP client with TLS, auth, and connection pool (up to 10 conns) - Segment downloader with parallel workers and progress reporting - Post-processing: par2 verify/repair, unrar/7z extraction with password support - Agent client methods: SearchNzbs, DownloadNzb, GetUsenetCredentials - UsenetDownloader implementing full Downloader interface - Daemon wiring: UsenetDownloader passed to Manager E2E tested: Oppenheimer 1080p (2.94 GB) downloaded via NNTP in 77.6s.
This commit is contained in:
parent
5f337eebd7
commit
e332c0a6e4
15 changed files with 3016 additions and 23 deletions
224
internal/usenet/postprocess/extract.go
Normal file
224
internal/usenet/postprocess/extract.go
Normal file
|
|
@ -0,0 +1,224 @@
|
|||
package postprocess
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ExtractorType identifies which extraction tool is available.
|
||||
type ExtractorType string
|
||||
|
||||
const (
|
||||
ExtractorNone ExtractorType = ""
|
||||
ExtractorUnrar ExtractorType = "unrar"
|
||||
Extractor7z ExtractorType = "7z"
|
||||
)
|
||||
|
||||
// FindExtractor checks which archive extractor is available in PATH.
|
||||
func FindExtractor() (ExtractorType, string) {
|
||||
if path, err := exec.LookPath("unrar"); err == nil {
|
||||
return ExtractorUnrar, path
|
||||
}
|
||||
if path, err := exec.LookPath("7z"); err == nil {
|
||||
return Extractor7z, path
|
||||
}
|
||||
return ExtractorNone, ""
|
||||
}
|
||||
|
||||
// Extract extracts an archive using the best available tool.
|
||||
// password is optional — pass "" if not needed.
|
||||
// Returns the list of extracted file paths.
|
||||
func Extract(archivePath string, outputDir string, password string) ([]string, error) {
|
||||
extType, extPath := FindExtractor()
|
||||
if extType == ExtractorNone {
|
||||
return nil, fmt.Errorf("no archive extractor found (install unrar or 7z)")
|
||||
}
|
||||
|
||||
switch extType {
|
||||
case ExtractorUnrar:
|
||||
return extractUnrar(extPath, archivePath, outputDir, password)
|
||||
case Extractor7z:
|
||||
return extract7z(extPath, archivePath, outputDir, password)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown extractor: %s", extType)
|
||||
}
|
||||
}
|
||||
|
||||
// extractUnrar extracts using unrar.
|
||||
func extractUnrar(unrarPath, archivePath, outputDir, password string) ([]string, error) {
|
||||
args := []string{"x", "-o+", "-y"}
|
||||
if password != "" {
|
||||
args = append(args, "-p"+password)
|
||||
} else {
|
||||
args = append(args, "-p-") // no password, skip asking
|
||||
}
|
||||
args = append(args, archivePath, outputDir+"/")
|
||||
|
||||
cmd := exec.Command(unrarPath, args...)
|
||||
cmd.Dir = outputDir
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
// Check for password error
|
||||
outStr := string(output)
|
||||
if strings.Contains(outStr, "wrong password") || strings.Contains(outStr, "Incorrect password") {
|
||||
return nil, &PasswordError{Archive: archivePath}
|
||||
}
|
||||
return nil, fmt.Errorf("unrar: %w\n%s", err, output)
|
||||
}
|
||||
|
||||
return listExtractedFiles(outputDir, archivePath)
|
||||
}
|
||||
|
||||
// extract7z extracts using 7z.
|
||||
func extract7z(szPath, archivePath, outputDir, password string) ([]string, error) {
|
||||
args := []string{"x", "-y", "-o" + outputDir}
|
||||
if password != "" {
|
||||
args = append(args, "-p"+password)
|
||||
} else {
|
||||
args = append(args, "-p") // empty password
|
||||
}
|
||||
args = append(args, archivePath)
|
||||
|
||||
cmd := exec.Command(szPath, args...)
|
||||
cmd.Dir = outputDir
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
outStr := string(output)
|
||||
if strings.Contains(outStr, "Wrong password") || strings.Contains(outStr, "incorrect password") {
|
||||
return nil, &PasswordError{Archive: archivePath}
|
||||
}
|
||||
return nil, fmt.Errorf("7z: %w\n%s", err, output)
|
||||
}
|
||||
|
||||
return listExtractedFiles(outputDir, archivePath)
|
||||
}
|
||||
|
||||
// IsPasswordProtected checks if a rar archive requires a password.
|
||||
func IsPasswordProtected(archivePath string) bool {
|
||||
extType, extPath := FindExtractor()
|
||||
if extType == ExtractorNone {
|
||||
return false
|
||||
}
|
||||
|
||||
switch extType {
|
||||
case ExtractorUnrar:
|
||||
cmd := exec.Command(extPath, "t", "-p-", archivePath)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
outStr := string(output)
|
||||
return strings.Contains(outStr, "password") || strings.Contains(outStr, "encrypted")
|
||||
}
|
||||
case Extractor7z:
|
||||
cmd := exec.Command(extPath, "t", "-p", archivePath)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
outStr := string(output)
|
||||
return strings.Contains(outStr, "Wrong password") || strings.Contains(outStr, "encrypted")
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// listExtractedFiles returns new files in outputDir that aren't the archive itself.
|
||||
func listExtractedFiles(dir, archivePath string) ([]string, error) {
|
||||
archiveBase := filepath.Base(archivePath)
|
||||
archiveDir := filepath.Dir(archivePath)
|
||||
var files []string
|
||||
|
||||
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return nil // skip errors
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
base := filepath.Base(path)
|
||||
// Skip archive files themselves
|
||||
if isArchiveFile(base) && filepath.Dir(path) == archiveDir {
|
||||
return nil
|
||||
}
|
||||
if base == archiveBase {
|
||||
return nil
|
||||
}
|
||||
files = append(files, path)
|
||||
return nil
|
||||
})
|
||||
return files, err
|
||||
}
|
||||
|
||||
// Cleanup removes archive and parity files from a directory.
|
||||
func Cleanup(dir string) error {
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
name := entry.Name()
|
||||
if isCleanupTarget(name) {
|
||||
path := filepath.Join(dir, name)
|
||||
log.Printf("[usenet] cleanup: removing %s", name)
|
||||
os.Remove(path)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// isArchiveFile returns true for rar/split archive files.
|
||||
func isArchiveFile(name string) bool {
|
||||
lower := strings.ToLower(name)
|
||||
ext := filepath.Ext(lower)
|
||||
|
||||
if ext == ".rar" {
|
||||
return true
|
||||
}
|
||||
// .r00, .r01, ... .r99, .s00, etc.
|
||||
if len(ext) == 4 && (ext[1] == 'r' || ext[1] == 's') {
|
||||
return isNumeric(ext[2:])
|
||||
}
|
||||
// .001, .002, etc.
|
||||
if len(ext) == 4 && isNumeric(ext[1:]) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// isCleanupTarget returns true for files that should be removed after extraction.
|
||||
var cleanupExts = regexp.MustCompile(`(?i)\.(par2|nfo|sfv|nzb|srr|srs|jpg|png|txt|url)$`)
|
||||
var cleanupRarParts = regexp.MustCompile(`(?i)\.(rar|r\d{2}|s\d{2}|\d{3})$`)
|
||||
|
||||
func isCleanupTarget(name string) bool {
|
||||
if cleanupExts.MatchString(name) {
|
||||
return true
|
||||
}
|
||||
if cleanupRarParts.MatchString(name) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isNumeric(s string) bool {
|
||||
for _, c := range s {
|
||||
if c < '0' || c > '9' {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return len(s) > 0
|
||||
}
|
||||
|
||||
// PasswordError indicates the archive requires a password.
|
||||
type PasswordError struct {
|
||||
Archive string
|
||||
}
|
||||
|
||||
func (e *PasswordError) Error() string {
|
||||
return fmt.Sprintf("archive is password protected: %s", e.Archive)
|
||||
}
|
||||
65
internal/usenet/postprocess/par2.go
Normal file
65
internal/usenet/postprocess/par2.go
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
package postprocess
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Par2Available checks if par2cmdline is installed.
|
||||
func Par2Available() bool {
|
||||
_, err := exec.LookPath("par2")
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// Par2Verify verifies files using a par2 file.
|
||||
// Returns nil if verification passes, error otherwise.
|
||||
func Par2Verify(par2File string) error {
|
||||
if !Par2Available() {
|
||||
log.Printf("[usenet] par2 not installed, skipping verification")
|
||||
return nil
|
||||
}
|
||||
|
||||
cmd := exec.Command("par2", "verify", par2File)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
outStr := string(output)
|
||||
// Check if repair is possible
|
||||
if strings.Contains(outStr, "Repair is possible") {
|
||||
return &Par2RepairableError{Par2File: par2File}
|
||||
}
|
||||
if strings.Contains(outStr, "Repair is not possible") {
|
||||
return fmt.Errorf("par2: verification failed and repair not possible:\n%s", outStr)
|
||||
}
|
||||
return fmt.Errorf("par2 verify: %w\n%s", err, outStr)
|
||||
}
|
||||
|
||||
log.Printf("[usenet] par2: verification OK")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Par2Repair attempts to repair files using par2 parity data.
|
||||
func Par2Repair(par2File string) error {
|
||||
if !Par2Available() {
|
||||
return fmt.Errorf("par2 not installed")
|
||||
}
|
||||
|
||||
cmd := exec.Command("par2", "repair", par2File)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return fmt.Errorf("par2 repair: %w\n%s", err, output)
|
||||
}
|
||||
|
||||
log.Printf("[usenet] par2: repair successful")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Par2RepairableError indicates verification failed but repair is possible.
|
||||
type Par2RepairableError struct {
|
||||
Par2File string
|
||||
}
|
||||
|
||||
func (e *Par2RepairableError) Error() string {
|
||||
return fmt.Sprintf("par2: verification failed, repair possible: %s", e.Par2File)
|
||||
}
|
||||
229
internal/usenet/postprocess/pipeline.go
Normal file
229
internal/usenet/postprocess/pipeline.go
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
package postprocess
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Result holds the outcome of post-processing.
|
||||
type Result struct {
|
||||
FinalPath string // path to the main content file (e.g., the video)
|
||||
Files []string // all final files
|
||||
Repaired bool // whether par2 repair was needed
|
||||
Extracted bool // whether archive extraction was performed
|
||||
}
|
||||
|
||||
// Options configures post-processing behavior.
|
||||
type Options struct {
|
||||
Password string // password for encrypted archives (empty = none)
|
||||
Cleanup bool // remove intermediate files after extraction
|
||||
}
|
||||
|
||||
// Process runs the full post-processing pipeline on downloaded usenet files.
|
||||
// Steps: par2 verify → par2 repair → extract archives → cleanup → find main file.
|
||||
func Process(dir string, downloadedFiles map[string]string, opts Options) (*Result, error) {
|
||||
result := &Result{}
|
||||
|
||||
// Step 1: Par2 verification and repair
|
||||
par2File := findPar2File(downloadedFiles)
|
||||
if par2File != "" {
|
||||
err := Par2Verify(par2File)
|
||||
if err != nil {
|
||||
if _, ok := err.(*Par2RepairableError); ok {
|
||||
log.Printf("[usenet] attempting par2 repair...")
|
||||
if repairErr := Par2Repair(par2File); repairErr != nil {
|
||||
log.Printf("[usenet] par2 repair failed: %v", repairErr)
|
||||
} else {
|
||||
result.Repaired = true
|
||||
}
|
||||
} else {
|
||||
log.Printf("[usenet] par2 verification error: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 2: Find and extract archives
|
||||
rarFile := findFirstRar(downloadedFiles)
|
||||
if rarFile != "" {
|
||||
log.Printf("[usenet] extracting archive: %s", filepath.Base(rarFile))
|
||||
|
||||
// Check if password-protected
|
||||
if opts.Password == "" && IsPasswordProtected(rarFile) {
|
||||
return nil, &PasswordError{Archive: rarFile}
|
||||
}
|
||||
|
||||
extracted, err := Extract(rarFile, dir, opts.Password)
|
||||
if err != nil {
|
||||
if _, ok := err.(*PasswordError); ok {
|
||||
return nil, err
|
||||
}
|
||||
return nil, fmt.Errorf("extraction failed: %w", err)
|
||||
}
|
||||
|
||||
result.Extracted = true
|
||||
result.Files = extracted
|
||||
|
||||
// Step 3: Cleanup archive + par2 files
|
||||
if opts.Cleanup {
|
||||
Cleanup(dir)
|
||||
}
|
||||
} else {
|
||||
// No archives — content files are the final files
|
||||
for _, path := range downloadedFiles {
|
||||
if !isCleanupTarget(filepath.Base(path)) {
|
||||
result.Files = append(result.Files, path)
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup metadata files
|
||||
if opts.Cleanup {
|
||||
for name, path := range downloadedFiles {
|
||||
lower := strings.ToLower(name)
|
||||
ext := filepath.Ext(lower)
|
||||
if ext == ".par2" || ext == ".nfo" || ext == ".sfv" || ext == ".nzb" {
|
||||
log.Printf("[usenet] cleanup: removing %s", name)
|
||||
os.Remove(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 4: Find main content file (largest video file)
|
||||
result.FinalPath = findMainFile(dir, result.Files)
|
||||
|
||||
if result.FinalPath == "" && len(result.Files) > 0 {
|
||||
result.FinalPath = result.Files[0]
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// findPar2File returns the path of the main .par2 file (not volume sets).
|
||||
func findPar2File(files map[string]string) string {
|
||||
var mainPar2 string
|
||||
var smallestSize int64 = -1
|
||||
|
||||
for name, path := range files {
|
||||
ext := strings.ToLower(filepath.Ext(name))
|
||||
if ext != ".par2" {
|
||||
continue
|
||||
}
|
||||
// The main par2 file is typically the smallest one (index file)
|
||||
// Volume par2 files are larger (contain recovery data)
|
||||
fi, err := os.Stat(path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if smallestSize < 0 || fi.Size() < smallestSize {
|
||||
smallestSize = fi.Size()
|
||||
mainPar2 = path
|
||||
}
|
||||
}
|
||||
return mainPar2
|
||||
}
|
||||
|
||||
// firstRarRe matches the first volume of a multi-part rar set.
|
||||
// Patterns: .part01.rar, .part1.rar, or just .rar (single/first volume)
|
||||
var firstRarRe = regexp.MustCompile(`(?i)\.part0*1\.rar$`)
|
||||
|
||||
// findFirstRar returns the path to the first rar volume.
|
||||
// For multi-part rars (part01.rar, part02.rar...), returns part01 specifically.
|
||||
func findFirstRar(files map[string]string) string {
|
||||
// Priority 1: Find explicitly named first part (part01.rar, part1.rar)
|
||||
for _, path := range files {
|
||||
if firstRarRe.MatchString(path) {
|
||||
return path
|
||||
}
|
||||
}
|
||||
|
||||
// Priority 2: Find the shortest-named .rar file (usually the first volume)
|
||||
var rarFiles []struct {
|
||||
name string
|
||||
path string
|
||||
}
|
||||
for name, path := range files {
|
||||
if strings.HasSuffix(strings.ToLower(name), ".rar") {
|
||||
rarFiles = append(rarFiles, struct {
|
||||
name string
|
||||
path string
|
||||
}{name, path})
|
||||
}
|
||||
}
|
||||
if len(rarFiles) > 0 {
|
||||
sort.Slice(rarFiles, func(i, j int) bool {
|
||||
return len(rarFiles[i].name) < len(rarFiles[j].name)
|
||||
})
|
||||
return rarFiles[0].path
|
||||
}
|
||||
|
||||
// Priority 3: .001 split format
|
||||
for name, path := range files {
|
||||
if strings.HasSuffix(strings.ToLower(name), ".001") {
|
||||
return path
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// findMainFile finds the largest video file in the directory or file list.
|
||||
func findMainFile(dir string, files []string) string {
|
||||
videoExts := map[string]bool{
|
||||
".mkv": true, ".mp4": true, ".avi": true, ".mov": true,
|
||||
".wmv": true, ".flv": true, ".m4v": true, ".ts": true,
|
||||
".webm": true,
|
||||
}
|
||||
|
||||
var bestPath string
|
||||
var bestSize int64
|
||||
|
||||
// First try from the explicit file list
|
||||
for _, path := range files {
|
||||
ext := strings.ToLower(filepath.Ext(path))
|
||||
if !videoExts[ext] {
|
||||
continue
|
||||
}
|
||||
fi, err := os.Stat(path)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if fi.Size() > bestSize {
|
||||
bestSize = fi.Size()
|
||||
bestPath = path
|
||||
}
|
||||
}
|
||||
|
||||
if bestPath != "" {
|
||||
return bestPath
|
||||
}
|
||||
|
||||
// Fallback: scan directory
|
||||
entries, err := os.ReadDir(dir)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
for _, entry := range entries {
|
||||
if entry.IsDir() {
|
||||
continue
|
||||
}
|
||||
ext := strings.ToLower(filepath.Ext(entry.Name()))
|
||||
if !videoExts[ext] {
|
||||
continue
|
||||
}
|
||||
fi, err := entry.Info()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if fi.Size() > bestSize {
|
||||
bestSize = fi.Size()
|
||||
bestPath = filepath.Join(dir, entry.Name())
|
||||
}
|
||||
}
|
||||
|
||||
return bestPath
|
||||
}
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue