fix(ws): add ping/pong keepalive and read deadline to detect zombie connections
Without a SetReadDeadline, a silently dead WebSocket (e.g. Cloudflare dropping the connection without a close frame) would block readLoop forever. The daemon would appear connected but never receive tasks, and never fall back to HTTP polling. - Send RFC 6455 pings every 30s (resets Cloudflare's idle timer) - SetReadDeadline of 45s, refreshed on every pong and text message - SetWriteDeadline of 10s on all writes to prevent blocked sends - On timeout, readLoop emits "disconnected" → HybridTransport falls back to HTTP and starts WS reconnection loop
This commit is contained in:
parent
56a386f4e2
commit
2398707cc1
1 changed files with 44 additions and 0 deletions
|
|
@ -226,10 +226,51 @@ func (t *WSTransport) send(msg any) error {
|
|||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_ = t.conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
|
||||
return t.conn.WriteMessage(websocket.TextMessage, data)
|
||||
}
|
||||
|
||||
func (t *WSTransport) readLoop(conn *websocket.Conn) {
|
||||
// Cloudflare idle timeout is 100s. We send pings every 30s and expect
|
||||
// either a pong or a server message within 45s. If neither arrives,
|
||||
// the read deadline fires and we detect the zombie connection.
|
||||
const (
|
||||
pongWait = 45 * time.Second
|
||||
pingPeriod = 30 * time.Second
|
||||
)
|
||||
|
||||
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
|
||||
conn.SetPongHandler(func(string) error {
|
||||
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
|
||||
return nil
|
||||
})
|
||||
|
||||
// Ping ticker goroutine — stops when readLoop returns.
|
||||
pingDone := make(chan struct{})
|
||||
go func() {
|
||||
ticker := time.NewTicker(pingPeriod)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
t.mu.Lock()
|
||||
if t.conn != nil {
|
||||
_ = t.conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
|
||||
err := t.conn.WriteMessage(websocket.PingMessage, nil)
|
||||
_ = t.conn.SetWriteDeadline(time.Time{})
|
||||
if err != nil {
|
||||
t.mu.Unlock()
|
||||
return
|
||||
}
|
||||
}
|
||||
t.mu.Unlock()
|
||||
case <-pingDone:
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
defer close(pingDone)
|
||||
|
||||
for {
|
||||
_, msg, err := conn.ReadMessage()
|
||||
if err != nil {
|
||||
|
|
@ -244,6 +285,9 @@ func (t *WSTransport) readLoop(conn *websocket.Conn) {
|
|||
return
|
||||
}
|
||||
|
||||
// Any message (text or pong) proves the connection is alive.
|
||||
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
|
||||
|
||||
var envelope struct {
|
||||
Type string `json:"type"`
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue