fix(upgrade): break auto-apply restart loop (0.9.8)
Two bugs in 0.9.6/0.9.7 caused an infinite restart loop after a Force update
signal: the CLI never reported the upgrade outcome, so `upgrade_requested`
stayed `true`; AND `applyAutoUpgrade` called `os.Exit(0)` even when the
target version equalled the current one, so systemd respawned and saw the
flag again.
- new Client.ReportUpgradeResult → POST /api/internal/agent/upgrade-result
- applyAutoUpgrade calls it on success / failure / no-op
- no-op case detected up front (same version) — skips Execute + Exit,
clears server flag instead
This commit is contained in:
parent
7e96976257
commit
2e7cd7e8ed
4 changed files with 77 additions and 2 deletions
|
|
@ -91,6 +91,24 @@ func (c *Client) Deregister(ctx context.Context, agentID string) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
// ReportUpgradeResult tells the server the outcome of a previously requested
|
||||
// upgrade so the server can clear `upgrade_requested`. Without this call the
|
||||
// flag stays sticky and the daemon would re-trigger applyAutoUpgrade on every
|
||||
// sync after upgrade — even for "already on target version" no-ops.
|
||||
func (c *Client) ReportUpgradeResult(ctx context.Context, agentID string, success bool, version, errMsg string) error {
|
||||
req := struct {
|
||||
AgentID string `json:"agentId"`
|
||||
Success bool `json:"success"`
|
||||
Version string `json:"version,omitempty"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}{AgentID: agentID, Success: success, Version: version, Error: errMsg}
|
||||
var resp StatusResponse
|
||||
if err := c.doPost(ctx, "/api/internal/agent/upgrade-result", req, &resp); err != nil {
|
||||
return fmt.Errorf("report upgrade result: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ReportStatus reports download progress. Returns server-side flags the CLI must act on.
|
||||
func (c *Client) ReportStatus(ctx context.Context, update StatusUpdate) (*StatusResponse, error) {
|
||||
var resp StatusResponse
|
||||
|
|
|
|||
|
|
@ -294,8 +294,30 @@ func (d *Daemon) Deregister() {
|
|||
// supervisor (systemd Restart=always on Linux) respawns on the new binary.
|
||||
// Triggered by the server's upgrade signal — opt-in flag set by the user from
|
||||
// the web UI; the daemon never auto-upgrades on a passive version bump.
|
||||
//
|
||||
// Reports the outcome to /api/internal/agent/upgrade-result so the server
|
||||
// clears `upgrade_requested`. Without this report the flag stays sticky and
|
||||
// the daemon would loop on every sync — including the no-op case where it's
|
||||
// already on the target version.
|
||||
func (d *Daemon) applyAutoUpgrade(targetVersion string) {
|
||||
currentClean := strings.TrimPrefix(d.cfg.Version, "v")
|
||||
targetClean := strings.TrimPrefix(targetVersion, "v")
|
||||
|
||||
// No-op: server signal arrived but we're already running the target. This
|
||||
// happens when the daemon restarts after a previous auto-upgrade before
|
||||
// reportUpgradeResult cleared the flag, or when the operator manually
|
||||
// installed the same version off-band. Skip Execute (which would also
|
||||
// no-op) AND skip os.Exit, but DO clear the flag — otherwise we loop.
|
||||
if currentClean == targetClean {
|
||||
log.Printf("[upgrade] already on v%s — clearing server flag", currentClean)
|
||||
ctxR, cancelR := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancelR()
|
||||
if err := d.client.ReportUpgradeResult(ctxR, d.cfg.AgentID, true, currentClean, ""); err != nil {
|
||||
log.Printf("[upgrade] report-result failed (will retry on next signal): %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
upgrader := &upgrade.Upgrader{
|
||||
CurrentVersion: currentClean,
|
||||
OnProgress: func(msg string) {
|
||||
|
|
@ -307,10 +329,24 @@ func (d *Daemon) applyAutoUpgrade(targetVersion string) {
|
|||
result := upgrader.Execute(ctx, targetVersion)
|
||||
if !result.Success {
|
||||
log.Printf("[upgrade] auto-upgrade failed: %v", result.Error)
|
||||
errMsg := ""
|
||||
if result.Error != nil {
|
||||
errMsg = result.Error.Error()
|
||||
}
|
||||
ctxR, cancelR := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancelR()
|
||||
if err := d.client.ReportUpgradeResult(ctxR, d.cfg.AgentID, false, targetClean, errMsg); err != nil {
|
||||
log.Printf("[upgrade] report-result failed: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
log.Printf("[upgrade] upgraded v%s → v%s; exiting so service supervisor restarts on new binary",
|
||||
log.Printf("[upgrade] upgraded v%s → v%s; reporting result + exiting so service supervisor restarts on new binary",
|
||||
result.OldVersion, result.NewVersion)
|
||||
ctxR, cancelR := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
if err := d.client.ReportUpgradeResult(ctxR, d.cfg.AgentID, true, result.NewVersion, ""); err != nil {
|
||||
log.Printf("[upgrade] report-result failed: %v", err)
|
||||
}
|
||||
cancelR()
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
os.Exit(0)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue