Provide backpressure to clients when etcd goes down
When etcd is down today we don't specifically handle the error involved, which means clients get a generic 500 error. This commit adds a formal error type internally for both WatchExpired and EtcdUnreachable, and then converts them to api/errors before returning to the client. It also upgrades the client to retry on any 429 or 5xx error that has a Retry-After header, instead of just 429. In combination, this allows the apiserver to exert backpressure on controllers that are hotlooping. Picked 2 seconds by default, but we could potentially ramp that up even further in a future iteration.
This commit is contained in:
@@ -43,6 +43,16 @@ func IsEtcdTestFailed(err error) bool {
|
||||
return isEtcdErrorNum(err, tools.EtcdErrorCodeTestFailed)
|
||||
}
|
||||
|
||||
// IsEtcdWatchExpired returns true if and only if err indicates the watch has expired.
|
||||
func IsEtcdWatchExpired(err error) bool {
|
||||
return isEtcdErrorNum(err, tools.EtcdErrorCodeWatchExpired)
|
||||
}
|
||||
|
||||
// IsEtcdUnreachable returns true if and only if err indicates the server could not be reached.
|
||||
func IsEtcdUnreachable(err error) bool {
|
||||
return isEtcdErrorNum(err, tools.EtcdErrorCodeUnreachable)
|
||||
}
|
||||
|
||||
// IsEtcdWatchStoppedByUser returns true if and only if err is a client triggered stop.
|
||||
func IsEtcdWatchStoppedByUser(err error) bool {
|
||||
return goetcd.ErrWatchStoppedByUser == err
|
||||
|
@@ -17,6 +17,7 @@ limitations under the License.
|
||||
package etcd
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -181,12 +182,30 @@ func (w *etcdWatcher) translate() {
|
||||
select {
|
||||
case err := <-w.etcdError:
|
||||
if err != nil {
|
||||
w.emit(watch.Event{
|
||||
Type: watch.Error,
|
||||
Object: &unversioned.Status{
|
||||
var status *unversioned.Status
|
||||
switch {
|
||||
case IsEtcdWatchExpired(err):
|
||||
status = &unversioned.Status{
|
||||
Status: unversioned.StatusFailure,
|
||||
Message: err.Error(),
|
||||
},
|
||||
Code: http.StatusGone, // Gone
|
||||
Reason: unversioned.StatusReasonExpired,
|
||||
}
|
||||
// TODO: need to generate errors using api/errors which has a circular dependency on this package
|
||||
// no other way to inject errors
|
||||
// case IsEtcdUnreachable(err):
|
||||
// status = errors.NewServerTimeout(...)
|
||||
default:
|
||||
status = &unversioned.Status{
|
||||
Status: unversioned.StatusFailure,
|
||||
Message: err.Error(),
|
||||
Code: http.StatusInternalServerError,
|
||||
Reason: unversioned.StatusReasonInternalError,
|
||||
}
|
||||
}
|
||||
w.emit(watch.Event{
|
||||
Type: watch.Error,
|
||||
Object: status,
|
||||
})
|
||||
}
|
||||
return
|
||||
|
Reference in New Issue
Block a user