fix(upgrade): break auto-apply restart loop (0.9.8)
Some checks failed
Release / release (push) Failing after 0s
Release / docker (push) Has been skipped
Release / virustotal (push) Failing after 0s

Two bugs in 0.9.6/0.9.7 caused an infinite restart loop after a Force update
signal: the CLI never reported the upgrade outcome, so `upgrade_requested`
stayed `true`; AND `applyAutoUpgrade` called `os.Exit(0)` even when the
target version equalled the current one, so systemd respawned and saw the
flag again.

  - new Client.ReportUpgradeResult → POST /api/internal/agent/upgrade-result
  - applyAutoUpgrade calls it on success / failure / no-op
  - no-op case detected up front (same version) — skips Execute + Exit,
    clears server flag instead
This commit is contained in:
Deivid Soto 2026-05-27 08:18:33 +02:00
parent 7e96976257
commit 2e7cd7e8ed
4 changed files with 77 additions and 2 deletions

View file

@ -5,6 +5,27 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [0.9.8] - 2026-05-27
### Fixed
- **auto-upgrade restart loop**: when the server signal arrived for a version
the daemon was already running (e.g. flag still set after a previous
upgrade), `applyAutoUpgrade` would call `upgrade.Execute` (which no-ops),
then `os.Exit(0)` anyway — systemd respawned, the flag was still set, the
cycle repeated. Now: no-op case is detected up front, the daemon clears
the server flag via `/api/internal/agent/upgrade-result` and stays alive.
- **upgrade flag stuck after success**: the CLI never reported the upgrade
outcome, so `upgrade_requested` stayed `true` in the DB forever. The
daemon now calls `/api/internal/agent/upgrade-result` on every applyAutoUpgrade
branch (success, failure, no-op) — server clears the flag, restart loops
end.
### Added
- New `Client.ReportUpgradeResult(agentID, success, version, error)` HTTP
method wrapping `POST /api/internal/agent/upgrade-result`.
## [0.9.7] - 2026-05-26 ## [0.9.7] - 2026-05-26
### Added ### Added

View file

@ -91,6 +91,24 @@ func (c *Client) Deregister(ctx context.Context, agentID string) error {
return nil return nil
} }
// ReportUpgradeResult tells the server the outcome of a previously requested
// upgrade so the server can clear `upgrade_requested`. Without this call the
// flag stays sticky and the daemon would re-trigger applyAutoUpgrade on every
// sync after upgrade — even for "already on target version" no-ops.
func (c *Client) ReportUpgradeResult(ctx context.Context, agentID string, success bool, version, errMsg string) error {
req := struct {
AgentID string `json:"agentId"`
Success bool `json:"success"`
Version string `json:"version,omitempty"`
Error string `json:"error,omitempty"`
}{AgentID: agentID, Success: success, Version: version, Error: errMsg}
var resp StatusResponse
if err := c.doPost(ctx, "/api/internal/agent/upgrade-result", req, &resp); err != nil {
return fmt.Errorf("report upgrade result: %w", err)
}
return nil
}
// ReportStatus reports download progress. Returns server-side flags the CLI must act on. // ReportStatus reports download progress. Returns server-side flags the CLI must act on.
func (c *Client) ReportStatus(ctx context.Context, update StatusUpdate) (*StatusResponse, error) { func (c *Client) ReportStatus(ctx context.Context, update StatusUpdate) (*StatusResponse, error) {
var resp StatusResponse var resp StatusResponse

View file

@ -294,8 +294,30 @@ func (d *Daemon) Deregister() {
// supervisor (systemd Restart=always on Linux) respawns on the new binary. // supervisor (systemd Restart=always on Linux) respawns on the new binary.
// Triggered by the server's upgrade signal — opt-in flag set by the user from // Triggered by the server's upgrade signal — opt-in flag set by the user from
// the web UI; the daemon never auto-upgrades on a passive version bump. // the web UI; the daemon never auto-upgrades on a passive version bump.
//
// Reports the outcome to /api/internal/agent/upgrade-result so the server
// clears `upgrade_requested`. Without this report the flag stays sticky and
// the daemon would loop on every sync — including the no-op case where it's
// already on the target version.
func (d *Daemon) applyAutoUpgrade(targetVersion string) { func (d *Daemon) applyAutoUpgrade(targetVersion string) {
currentClean := strings.TrimPrefix(d.cfg.Version, "v") currentClean := strings.TrimPrefix(d.cfg.Version, "v")
targetClean := strings.TrimPrefix(targetVersion, "v")
// No-op: server signal arrived but we're already running the target. This
// happens when the daemon restarts after a previous auto-upgrade before
// reportUpgradeResult cleared the flag, or when the operator manually
// installed the same version off-band. Skip Execute (which would also
// no-op) AND skip os.Exit, but DO clear the flag — otherwise we loop.
if currentClean == targetClean {
log.Printf("[upgrade] already on v%s — clearing server flag", currentClean)
ctxR, cancelR := context.WithTimeout(context.Background(), 10*time.Second)
defer cancelR()
if err := d.client.ReportUpgradeResult(ctxR, d.cfg.AgentID, true, currentClean, ""); err != nil {
log.Printf("[upgrade] report-result failed (will retry on next signal): %v", err)
}
return
}
upgrader := &upgrade.Upgrader{ upgrader := &upgrade.Upgrader{
CurrentVersion: currentClean, CurrentVersion: currentClean,
OnProgress: func(msg string) { OnProgress: func(msg string) {
@ -307,10 +329,24 @@ func (d *Daemon) applyAutoUpgrade(targetVersion string) {
result := upgrader.Execute(ctx, targetVersion) result := upgrader.Execute(ctx, targetVersion)
if !result.Success { if !result.Success {
log.Printf("[upgrade] auto-upgrade failed: %v", result.Error) log.Printf("[upgrade] auto-upgrade failed: %v", result.Error)
errMsg := ""
if result.Error != nil {
errMsg = result.Error.Error()
}
ctxR, cancelR := context.WithTimeout(context.Background(), 10*time.Second)
defer cancelR()
if err := d.client.ReportUpgradeResult(ctxR, d.cfg.AgentID, false, targetClean, errMsg); err != nil {
log.Printf("[upgrade] report-result failed: %v", err)
}
return return
} }
log.Printf("[upgrade] upgraded v%s → v%s; exiting so service supervisor restarts on new binary", log.Printf("[upgrade] upgraded v%s → v%s; reporting result + exiting so service supervisor restarts on new binary",
result.OldVersion, result.NewVersion) result.OldVersion, result.NewVersion)
ctxR, cancelR := context.WithTimeout(context.Background(), 10*time.Second)
if err := d.client.ReportUpgradeResult(ctxR, d.cfg.AgentID, true, result.NewVersion, ""); err != nil {
log.Printf("[upgrade] report-result failed: %v", err)
}
cancelR()
time.Sleep(500 * time.Millisecond) time.Sleep(500 * time.Millisecond)
os.Exit(0) os.Exit(0)
} }

View file

@ -1,4 +1,4 @@
package cmd package cmd
// Version is the CLI version. Overridden by goreleaser ldflags at release time. // Version is the CLI version. Overridden by goreleaser ldflags at release time.
var Version = "0.9.7" var Version = "0.9.8"