Support recovery from in the middle of a rename.

This commit is contained in:
Brendan Burns
2015-05-01 00:31:01 -07:00
parent 32b4b1cdd7
commit 63cbc715dd
5 changed files with 684 additions and 384 deletions

View File

@@ -17,13 +17,18 @@ limitations under the License.
package kubectl
import (
goerrors "errors"
"fmt"
"io"
"strconv"
"strings"
"time"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api"
"github.com/GoogleCloudPlatform/kubernetes/pkg/api/errors"
"github.com/GoogleCloudPlatform/kubernetes/pkg/client"
"github.com/GoogleCloudPlatform/kubernetes/pkg/fields"
"github.com/GoogleCloudPlatform/kubernetes/pkg/labels"
"github.com/GoogleCloudPlatform/kubernetes/pkg/util/wait"
)
@@ -71,6 +76,50 @@ const (
RenameRollingUpdateCleanupPolicy RollingUpdaterCleanupPolicy = "Rename"
)
func LoadExistingNextReplicationController(c *client.Client, namespace, newName string) (*api.ReplicationController, error) {
if len(newName) == 0 {
return nil, nil
}
newRc, err := c.ReplicationControllers(namespace).Get(newName)
if err != nil && errors.IsNotFound(err) {
return nil, nil
}
return newRc, err
}
func CreateNewControllerFromCurrentController(c *client.Client, namespace, oldName, newName, image, deploymentKey string) (*api.ReplicationController, error) {
// load the old RC into the "new" RC
newRc, err := c.ReplicationControllers(namespace).Get(oldName)
if err != nil {
return nil, err
}
if len(newRc.Spec.Template.Spec.Containers) > 1 {
// TODO: support multi-container image update.
return nil, goerrors.New("Image update is not supported for multi-container pods")
}
if len(newRc.Spec.Template.Spec.Containers) == 0 {
return nil, goerrors.New(fmt.Sprintf("Pod has no containers! (%v)", newRc))
}
newRc.Spec.Template.Spec.Containers[0].Image = image
newHash, err := api.HashObject(newRc, c.Codec)
if err != nil {
return nil, err
}
if len(newName) == 0 {
newName = fmt.Sprintf("%s-%s", newRc.Name, newHash)
}
newRc.Name = newName
newRc.Spec.Selector[deploymentKey] = newHash
newRc.Spec.Template.Labels[deploymentKey] = newHash
// Clear resource version after hashing so that identical updates get different hashes.
newRc.ResourceVersion = ""
return newRc, nil
}
// NewRollingUpdater creates a RollingUpdater from a client
func NewRollingUpdater(namespace string, c RollingUpdaterClient) *RollingUpdater {
return &RollingUpdater{
@@ -115,6 +164,138 @@ func SetNextControllerAnnotation(rc *api.ReplicationController, name string) {
rc.Annotations[nextControllerAnnotation] = name
}
func UpdateExistingReplicationController(c client.Interface, oldRc *api.ReplicationController, namespace, newName, deploymentKey, deploymentValue string, out io.Writer) (*api.ReplicationController, error) {
SetNextControllerAnnotation(oldRc, newName)
if _, found := oldRc.Spec.Selector[deploymentKey]; !found {
return AddDeploymentKeyToReplicationController(oldRc, c, deploymentKey, deploymentValue, namespace, out)
} else {
// If we didn't need to update the controller for the deployment key, we still need to write
// the "next" controller.
return c.ReplicationControllers(namespace).Update(oldRc)
}
}
const MaxRetries = 3
func AddDeploymentKeyToReplicationController(oldRc *api.ReplicationController, client client.Interface, deploymentKey, deploymentValue, namespace string, out io.Writer) (*api.ReplicationController, error) {
var err error
// First, update the template label. This ensures that any newly created pods will have the new label
if oldRc, err = updateWithRetries(client.ReplicationControllers(namespace), oldRc, func(rc *api.ReplicationController) {
if rc.Spec.Template.Labels == nil {
rc.Spec.Template.Labels = map[string]string{}
}
rc.Spec.Template.Labels[deploymentKey] = deploymentValue
}); err != nil {
return nil, err
}
// Update all pods managed by the rc to have the new hash label, so they are correctly adopted
// TODO: extract the code from the label command and re-use it here.
podList, err := client.Pods(namespace).List(labels.SelectorFromSet(oldRc.Spec.Selector), fields.Everything())
if err != nil {
return nil, err
}
for ix := range podList.Items {
pod := &podList.Items[ix]
if pod.Labels == nil {
pod.Labels = map[string]string{
deploymentKey: deploymentValue,
}
} else {
pod.Labels[deploymentKey] = deploymentValue
}
err = nil
delay := 3
for i := 0; i < MaxRetries; i++ {
_, err = client.Pods(namespace).Update(pod)
if err != nil {
fmt.Fprint(out, "Error updating pod (%v), retrying after %d seconds", err, delay)
time.Sleep(time.Second * time.Duration(delay))
delay *= delay
} else {
break
}
}
if err != nil {
return nil, err
}
}
if oldRc.Spec.Selector == nil {
oldRc.Spec.Selector = map[string]string{}
}
// Copy the old selector, so that we can scrub out any orphaned pods
selectorCopy := map[string]string{}
for k, v := range oldRc.Spec.Selector {
selectorCopy[k] = v
}
oldRc.Spec.Selector[deploymentKey] = deploymentValue
// Update the selector of the rc so it manages all the pods we updated above
if oldRc, err = updateWithRetries(client.ReplicationControllers(namespace), oldRc, func(rc *api.ReplicationController) {
rc.Spec.Selector[deploymentKey] = deploymentValue
}); err != nil {
return nil, err
}
// Clean up any orphaned pods that don't have the new label, this can happen if the rc manager
// doesn't see the update to its pod template and creates a new pod with the old labels after
// we've finished re-adopting existing pods to the rc.
podList, err = client.Pods(namespace).List(labels.SelectorFromSet(selectorCopy), fields.Everything())
for ix := range podList.Items {
pod := &podList.Items[ix]
if value, found := pod.Labels[deploymentKey]; !found || value != deploymentValue {
if err := client.Pods(namespace).Delete(pod.Name, nil); err != nil {
return nil, err
}
}
}
return oldRc, nil
}
type updateFunc func(controller *api.ReplicationController)
// updateWithRetries updates applies the given rc as an update.
func updateWithRetries(rcClient client.ReplicationControllerInterface, rc *api.ReplicationController, applyUpdate updateFunc) (*api.ReplicationController, error) {
// Each update could take ~100ms, so give it 0.5 second
var err error
oldRc := rc
err = wait.Poll(10*time.Millisecond, 500*time.Millisecond, func() (bool, error) {
// Apply the update, then attempt to push it to the apiserver.
applyUpdate(rc)
if rc, err = rcClient.Update(rc); err == nil {
// rc contains the latest controller post update
return true, nil
}
// Update the controller with the latest resource version, if the update failed we
// can't trust rc so use oldRc.Name.
if rc, err = rcClient.Get(oldRc.Name); err != nil {
// The Get failed: Value in rc cannot be trusted.
rc = oldRc
}
// The Get passed: rc contains the latest controller, expect a poll for the update.
return false, nil
})
// If the error is non-nil the returned controller cannot be trusted, if it is nil, the returned
// controller contains the applied update.
return rc, err
}
func FindSourceController(r RollingUpdaterClient, namespace, name string) (*api.ReplicationController, error) {
list, err := r.ListReplicationControllers(namespace, labels.Everything())
if err != nil {
return nil, err
}
for ix := range list.Items {
rc := &list.Items[ix]
if rc.Annotations != nil && strings.HasPrefix(rc.Annotations[sourceIdAnnotation], name) {
return rc, nil
}
}
return nil, fmt.Errorf("couldn't find a replication controller with source id == %s/%s", namespace, name)
}
// Update all pods for a ReplicationController (oldRc) by creating a new
// controller (newRc) with 0 replicas, and synchronously resizing oldRc,newRc
// by 1 until oldRc has 0 replicas and newRc has the original # of desired
@@ -286,19 +467,28 @@ func (r *RollingUpdater) updateAndWait(rc *api.ReplicationController, interval,
}
func (r *RollingUpdater) rename(rc *api.ReplicationController, newName string) error {
return Rename(r.c, rc, newName)
}
func Rename(c RollingUpdaterClient, rc *api.ReplicationController, newName string) error {
oldName := rc.Name
rc.Name = newName
rc.ResourceVersion = ""
_, err := r.c.CreateReplicationController(rc.Namespace, rc)
_, err := c.CreateReplicationController(rc.Namespace, rc)
if err != nil {
return err
}
return r.c.DeleteReplicationController(rc.Namespace, oldName)
err = c.DeleteReplicationController(rc.Namespace, oldName)
if err != nil && !errors.IsNotFound(err) {
return err
}
return nil
}
// RollingUpdaterClient abstracts access to ReplicationControllers.
type RollingUpdaterClient interface {
ListReplicationControllers(namespace string, selector labels.Selector) (*api.ReplicationControllerList, error)
GetReplicationController(namespace, name string) (*api.ReplicationController, error)
UpdateReplicationController(namespace string, rc *api.ReplicationController) (*api.ReplicationController, error)
CreateReplicationController(namespace string, rc *api.ReplicationController) (*api.ReplicationController, error)
@@ -315,6 +505,10 @@ type realRollingUpdaterClient struct {
client client.Interface
}
func (c *realRollingUpdaterClient) ListReplicationControllers(namespace string, selector labels.Selector) (*api.ReplicationControllerList, error) {
return c.client.ReplicationControllers(namespace).List(selector)
}
func (c *realRollingUpdaterClient) GetReplicationController(namespace, name string) (*api.ReplicationController, error) {
return c.client.ReplicationControllers(namespace).Get(name)
}