fix(ws): add ping/pong keepalive and read deadline to detect zombie connections
Without a SetReadDeadline, a silently dead WebSocket (e.g. Cloudflare dropping the connection without a close frame) would block readLoop forever. The daemon would appear connected but never receive tasks, and never fall back to HTTP polling. - Send RFC 6455 pings every 30s (resets Cloudflare's idle timer) - SetReadDeadline of 45s, refreshed on every pong and text message - SetWriteDeadline of 10s on all writes to prevent blocked sends - On timeout, readLoop emits "disconnected" → HybridTransport falls back to HTTP and starts WS reconnection loop
This commit is contained in:
parent
56a386f4e2
commit
2398707cc1
1 changed files with 44 additions and 0 deletions
|
|
@ -226,10 +226,51 @@ func (t *WSTransport) send(msg any) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
_ = t.conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
|
||||||
return t.conn.WriteMessage(websocket.TextMessage, data)
|
return t.conn.WriteMessage(websocket.TextMessage, data)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *WSTransport) readLoop(conn *websocket.Conn) {
|
func (t *WSTransport) readLoop(conn *websocket.Conn) {
|
||||||
|
// Cloudflare idle timeout is 100s. We send pings every 30s and expect
|
||||||
|
// either a pong or a server message within 45s. If neither arrives,
|
||||||
|
// the read deadline fires and we detect the zombie connection.
|
||||||
|
const (
|
||||||
|
pongWait = 45 * time.Second
|
||||||
|
pingPeriod = 30 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
|
||||||
|
conn.SetPongHandler(func(string) error {
|
||||||
|
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
// Ping ticker goroutine — stops when readLoop returns.
|
||||||
|
pingDone := make(chan struct{})
|
||||||
|
go func() {
|
||||||
|
ticker := time.NewTicker(pingPeriod)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
t.mu.Lock()
|
||||||
|
if t.conn != nil {
|
||||||
|
_ = t.conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
|
||||||
|
err := t.conn.WriteMessage(websocket.PingMessage, nil)
|
||||||
|
_ = t.conn.SetWriteDeadline(time.Time{})
|
||||||
|
if err != nil {
|
||||||
|
t.mu.Unlock()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
t.mu.Unlock()
|
||||||
|
case <-pingDone:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
defer close(pingDone)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
_, msg, err := conn.ReadMessage()
|
_, msg, err := conn.ReadMessage()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
@ -244,6 +285,9 @@ func (t *WSTransport) readLoop(conn *websocket.Conn) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Any message (text or pong) proves the connection is alive.
|
||||||
|
_ = conn.SetReadDeadline(time.Now().Add(pongWait))
|
||||||
|
|
||||||
var envelope struct {
|
var envelope struct {
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue