Provide backpressure to clients when etcd goes down
When etcd is down today we don't specifically handle the error involved, which means clients get a generic 500 error. This commit adds a formal error type internally for both WatchExpired and EtcdUnreachable, and then converts them to api/errors before returning to the client. It also upgrades the client to retry on any 429 or 5xx error that has a Retry-After header, instead of just 429. In combination, this allows the apiserver to exert backpressure on controllers that are hotlooping. Picked 2 seconds by default, but we could potentially ramp that up even further in a future iteration.
This commit is contained in:
@@ -25,6 +25,8 @@ const (
|
||||
EtcdErrorCodeTestFailed = 101
|
||||
EtcdErrorCodeNodeExist = 105
|
||||
EtcdErrorCodeValueRequired = 200
|
||||
EtcdErrorCodeWatchExpired = 401
|
||||
EtcdErrorCodeUnreachable = 501
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -32,6 +34,8 @@ var (
|
||||
EtcdErrorTestFailed = &etcd.EtcdError{ErrorCode: EtcdErrorCodeTestFailed}
|
||||
EtcdErrorNodeExist = &etcd.EtcdError{ErrorCode: EtcdErrorCodeNodeExist}
|
||||
EtcdErrorValueRequired = &etcd.EtcdError{ErrorCode: EtcdErrorCodeValueRequired}
|
||||
EtcdErrorWatchExpired = &etcd.EtcdError{ErrorCode: EtcdErrorCodeWatchExpired}
|
||||
EtcdErrorUnreachable = &etcd.EtcdError{ErrorCode: EtcdErrorCodeUnreachable}
|
||||
)
|
||||
|
||||
// EtcdClient is an injectable interface for testing.
|
||||
|
Reference in New Issue
Block a user