Provide backpressure to clients when etcd goes down

When etcd is down today we don't specifically handle the error involved,
which means clients get a generic 500 error. This commit adds a formal
error type internally for both WatchExpired and EtcdUnreachable, and
then converts them to api/errors before returning to the client. It also
upgrades the client to retry on any 429 or 5xx error that has a
Retry-After header, instead of just 429.

In combination, this allows the apiserver to exert backpressure on
controllers that are hotlooping.  Picked 2 seconds by default, but we
could potentially ramp that up even further in a future iteration.
This commit is contained in:
Clayton Coleman
2015-11-04 15:15:01 -05:00
parent ca6fe97275
commit 3da15535b6
8 changed files with 103 additions and 13 deletions

View File

@@ -43,6 +43,16 @@ func IsEtcdTestFailed(err error) bool {
return isEtcdErrorNum(err, tools.EtcdErrorCodeTestFailed)
}
// IsEtcdWatchExpired returns true if and only if err indicates the watch has expired.
func IsEtcdWatchExpired(err error) bool {
return isEtcdErrorNum(err, tools.EtcdErrorCodeWatchExpired)
}
// IsEtcdUnreachable returns true if and only if err indicates the server could not be reached.
func IsEtcdUnreachable(err error) bool {
return isEtcdErrorNum(err, tools.EtcdErrorCodeUnreachable)
}
// IsEtcdWatchStoppedByUser returns true if and only if err is a client triggered stop.
func IsEtcdWatchStoppedByUser(err error) bool {
return goetcd.ErrWatchStoppedByUser == err

View File

@@ -17,6 +17,7 @@ limitations under the License.
package etcd
import (
"net/http"
"sync"
"time"
@@ -181,12 +182,30 @@ func (w *etcdWatcher) translate() {
select {
case err := <-w.etcdError:
if err != nil {
w.emit(watch.Event{
Type: watch.Error,
Object: &unversioned.Status{
var status *unversioned.Status
switch {
case IsEtcdWatchExpired(err):
status = &unversioned.Status{
Status: unversioned.StatusFailure,
Message: err.Error(),
},
Code: http.StatusGone, // Gone
Reason: unversioned.StatusReasonExpired,
}
// TODO: need to generate errors using api/errors which has a circular dependency on this package
// no other way to inject errors
// case IsEtcdUnreachable(err):
// status = errors.NewServerTimeout(...)
default:
status = &unversioned.Status{
Status: unversioned.StatusFailure,
Message: err.Error(),
Code: http.StatusInternalServerError,
Reason: unversioned.StatusReasonInternalError,
}
}
w.emit(watch.Event{
Type: watch.Error,
Object: status,
})
}
return