fix(ws): add ping/pong keepalive and read deadline to detect zombie connections

Without a SetReadDeadline, a silently dead WebSocket (e.g. Cloudflare
dropping the connection without a close frame) would block readLoop
forever. The daemon would appear connected but never receive tasks,
and never fall back to HTTP polling.

- Send RFC 6455 pings every 30s (resets Cloudflare's idle timer)
- SetReadDeadline of 45s, refreshed on every pong and text message
- SetWriteDeadline of 10s on all writes to prevent blocked sends
- On timeout, readLoop emits "disconnected" → HybridTransport falls
  back to HTTP and starts WS reconnection loop
This commit is contained in:
Deivid Soto 2026-04-08 00:06:19 +02:00
parent 56a386f4e2
commit 2398707cc1

View file

@ -226,10 +226,51 @@ func (t *WSTransport) send(msg any) error {
if err != nil {
return err
}
_ = t.conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
return t.conn.WriteMessage(websocket.TextMessage, data)
}
func (t *WSTransport) readLoop(conn *websocket.Conn) {
// Cloudflare idle timeout is 100s. We send pings every 30s and expect
// either a pong or a server message within 45s. If neither arrives,
// the read deadline fires and we detect the zombie connection.
const (
pongWait = 45 * time.Second
pingPeriod = 30 * time.Second
)
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
conn.SetPongHandler(func(string) error {
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
return nil
})
// Ping ticker goroutine — stops when readLoop returns.
pingDone := make(chan struct{})
go func() {
ticker := time.NewTicker(pingPeriod)
defer ticker.Stop()
for {
select {
case <-ticker.C:
t.mu.Lock()
if t.conn != nil {
_ = t.conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
err := t.conn.WriteMessage(websocket.PingMessage, nil)
_ = t.conn.SetWriteDeadline(time.Time{})
if err != nil {
t.mu.Unlock()
return
}
}
t.mu.Unlock()
case <-pingDone:
return
}
}
}()
defer close(pingDone)
for {
_, msg, err := conn.ReadMessage()
if err != nil {
@ -244,6 +285,9 @@ func (t *WSTransport) readLoop(conn *websocket.Conn) {
return
}
// Any message (text or pong) proves the connection is alive.
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
var envelope struct {
Type string `json:"type"`
}