diff --git a/pkg/apis/apps/v1beta1/types.go b/pkg/apis/apps/v1beta1/types.go index 9c53adcefd2..32833e4a041 100644 --- a/pkg/apis/apps/v1beta1/types.go +++ b/pkg/apis/apps/v1beta1/types.go @@ -21,6 +21,11 @@ import ( "k8s.io/kubernetes/pkg/api/v1" ) +const ( + // StatefulSetInitAnnotation if present, and set to false, indicates that a Pod's readiness should be ignored. + StatefulSetInitAnnotation = "pod.alpha.kubernetes.io/initialized" +) + // +genclient=true // StatefulSet represents a set of pods with consistent identities. diff --git a/pkg/controller/statefulset/BUILD b/pkg/controller/statefulset/BUILD index 0fb0c0746f9..dc07b2dd6f6 100644 --- a/pkg/controller/statefulset/BUILD +++ b/pkg/controller/statefulset/BUILD @@ -11,12 +11,10 @@ load( go_library( name = "go_default_library", srcs = [ - "fakes.go", - "identity_mappers.go", - "iterator.go", + "stateful_pod_control.go", "stateful_set.go", + "stateful_set_control.go", "stateful_set_utils.go", - "statefulpod.go", ], tags = ["automanaged"], deps = [ @@ -25,19 +23,14 @@ go_library( "//pkg/api/v1/pod:go_default_library", "//pkg/apis/apps/v1beta1:go_default_library", "//pkg/client/clientset_generated/clientset:go_default_library", - "//pkg/client/clientset_generated/clientset/typed/apps/v1beta1:go_default_library", "//pkg/client/legacylisters:go_default_library", "//pkg/controller:go_default_library", "//vendor:github.com/golang/glog", - "//vendor:gopkg.in/inf.v0", "//vendor:k8s.io/apimachinery/pkg/api/errors", - "//vendor:k8s.io/apimachinery/pkg/api/resource", "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", "//vendor:k8s.io/apimachinery/pkg/runtime", - "//vendor:k8s.io/apimachinery/pkg/types", "//vendor:k8s.io/apimachinery/pkg/util/errors", "//vendor:k8s.io/apimachinery/pkg/util/runtime", - "//vendor:k8s.io/apimachinery/pkg/util/sets", "//vendor:k8s.io/apimachinery/pkg/util/wait", "//vendor:k8s.io/apimachinery/pkg/watch", "//vendor:k8s.io/client-go/kubernetes/typed/core/v1", @@ -51,33 +44,29 @@ go_library( go_test( name = "go_default_test", srcs = [ - "identity_mappers_test.go", - "iterator_test.go", + "stateful_pod_control_test.go", + "stateful_set_control_test.go", "stateful_set_test.go", - "statefulpod_test.go", + "stateful_set_utils_test.go", ], library = ":go_default_library", tags = ["automanaged"], deps = [ - "//pkg/api:go_default_library", - "//pkg/api/testapi:go_default_library", "//pkg/api/v1:go_default_library", "//pkg/api/v1/pod:go_default_library", "//pkg/apis/apps/v1beta1:go_default_library", - "//pkg/client/clientset_generated/clientset:go_default_library", "//pkg/client/clientset_generated/clientset/fake:go_default_library", - "//pkg/client/clientset_generated/clientset/typed/apps/v1beta1:go_default_library", - "//pkg/client/clientset_generated/clientset/typed/apps/v1beta1/fake:go_default_library", "//pkg/client/legacylisters:go_default_library", "//pkg/controller:go_default_library", + "//vendor:k8s.io/apimachinery/pkg/api/errors", + "//vendor:k8s.io/apimachinery/pkg/api/resource", "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", "//vendor:k8s.io/apimachinery/pkg/runtime", - "//vendor:k8s.io/apimachinery/pkg/util/errors", - "//vendor:k8s.io/apimachinery/pkg/util/sets", - "//vendor:k8s.io/client-go/rest", + "//vendor:k8s.io/apimachinery/pkg/types", "//vendor:k8s.io/client-go/testing", "//vendor:k8s.io/client-go/tools/cache", - "//vendor:k8s.io/client-go/util/testing", + "//vendor:k8s.io/client-go/tools/record", + "//vendor:k8s.io/client-go/util/workqueue", ], ) diff --git a/pkg/controller/statefulset/fakes.go b/pkg/controller/statefulset/fakes.go deleted file mode 100644 index fee99a450e2..00000000000 --- a/pkg/controller/statefulset/fakes.go +++ /dev/null @@ -1,327 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package statefulset - -import ( - "fmt" - "time" - - inf "gopkg.in/inf.v0" - - "k8s.io/apimachinery/pkg/api/resource" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/client-go/tools/record" - "k8s.io/kubernetes/pkg/api/v1" - apipod "k8s.io/kubernetes/pkg/api/v1/pod" - apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" -) - -func dec(i int64, exponent int) *inf.Dec { - return inf.NewDec(i, inf.Scale(-exponent)) -} - -func newPVC(name string) v1.PersistentVolumeClaim { - return v1.PersistentVolumeClaim{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - }, - Spec: v1.PersistentVolumeClaimSpec{ - Resources: v1.ResourceRequirements{ - Requests: v1.ResourceList{ - v1.ResourceStorage: *resource.NewQuantity(1, resource.BinarySI), - }, - }, - }, - } -} - -func newStatefulSetWithVolumes(replicas int, name string, petMounts []v1.VolumeMount, podMounts []v1.VolumeMount) *apps.StatefulSet { - mounts := append(petMounts, podMounts...) - claims := []v1.PersistentVolumeClaim{} - for _, m := range petMounts { - claims = append(claims, newPVC(m.Name)) - } - - vols := []v1.Volume{} - for _, m := range podMounts { - vols = append(vols, v1.Volume{ - Name: m.Name, - VolumeSource: v1.VolumeSource{ - HostPath: &v1.HostPathVolumeSource{ - Path: fmt.Sprintf("/tmp/%v", m.Name), - }, - }, - }) - } - - return &apps.StatefulSet{ - TypeMeta: metav1.TypeMeta{ - Kind: "StatefulSet", - APIVersion: "apps/v1beta1", - }, - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: metav1.NamespaceDefault, - UID: types.UID("test"), - }, - Spec: apps.StatefulSetSpec{ - Selector: &metav1.LabelSelector{ - MatchLabels: map[string]string{"foo": "bar"}, - }, - Replicas: func() *int32 { i := int32(replicas); return &i }(), - Template: v1.PodTemplateSpec{ - Spec: v1.PodSpec{ - Containers: []v1.Container{ - { - Name: "nginx", - Image: "nginx", - VolumeMounts: mounts, - }, - }, - Volumes: vols, - }, - }, - VolumeClaimTemplates: claims, - ServiceName: "governingsvc", - }, - } -} - -func runningPod(ns, name string) *v1.Pod { - p := &v1.Pod{Status: v1.PodStatus{Phase: v1.PodRunning}} - p.Namespace = ns - p.Name = name - return p -} - -func newPodList(ps *apps.StatefulSet, num int) []*v1.Pod { - // knownPods are pods in the system - knownPods := []*v1.Pod{} - for i := 0; i < num; i++ { - k, _ := newPCB(fmt.Sprintf("%v", i), ps) - knownPods = append(knownPods, k.pod) - } - return knownPods -} - -func newStatefulSet(replicas int) *apps.StatefulSet { - petMounts := []v1.VolumeMount{ - {Name: "datadir", MountPath: "/tmp/zookeeper"}, - } - podMounts := []v1.VolumeMount{ - {Name: "home", MountPath: "/home"}, - } - return newStatefulSetWithVolumes(replicas, "foo", petMounts, podMounts) -} - -func checkPodForMount(pod *v1.Pod, mountName string) error { - for _, c := range pod.Spec.Containers { - for _, v := range c.VolumeMounts { - if v.Name == mountName { - return nil - } - } - } - return fmt.Errorf("Found volume but no associated mount %v in pod %v", mountName, pod.Name) -} - -func newFakePetClient() *fakePetClient { - return &fakePetClient{ - pets: []*pcb{}, - claims: []v1.PersistentVolumeClaim{}, - recorder: &record.FakeRecorder{}, - petHealthChecker: &defaultPetHealthChecker{}, - } -} - -type fakePetClient struct { - pets []*pcb - claims []v1.PersistentVolumeClaim - petsCreated int - petsDeleted int - claimsCreated int - claimsDeleted int - recorder record.EventRecorder - petHealthChecker -} - -// Delete fakes pet client deletion. -func (f *fakePetClient) Delete(p *pcb) error { - pets := []*pcb{} - found := false - for i, pet := range f.pets { - if p.pod.Name == pet.pod.Name { - found = true - f.recorder.Eventf(pet.parent, v1.EventTypeNormal, "SuccessfulDelete", "pod: %v", pet.pod.Name) - continue - } - pets = append(pets, f.pets[i]) - } - if !found { - // TODO: Return proper not found error - return fmt.Errorf("Delete failed: pod %v doesn't exist", p.pod.Name) - } - f.pets = pets - f.petsDeleted++ - return nil -} - -// Get fakes getting pets. -func (f *fakePetClient) Get(p *pcb) (*pcb, bool, error) { - for i, pet := range f.pets { - if p.pod.Name == pet.pod.Name { - return f.pets[i], true, nil - } - } - return nil, false, nil -} - -// Create fakes pet creation. -func (f *fakePetClient) Create(p *pcb) error { - for _, pet := range f.pets { - if p.pod.Name == pet.pod.Name { - return fmt.Errorf("Create failed: pod %v already exists", p.pod.Name) - } - } - f.recorder.Eventf(p.parent, v1.EventTypeNormal, "SuccessfulCreate", "pod: %v", p.pod.Name) - f.pets = append(f.pets, p) - f.petsCreated++ - return nil -} - -// Update fakes pet updates. -func (f *fakePetClient) Update(expected, wanted *pcb) error { - found := false - pets := []*pcb{} - for i, pet := range f.pets { - if wanted.pod.Name == pet.pod.Name { - f.pets[i].pod.Annotations[apipod.PodHostnameAnnotation] = wanted.pod.Annotations[apipod.PodHostnameAnnotation] - f.pets[i].pod.Annotations[apipod.PodSubdomainAnnotation] = wanted.pod.Annotations[apipod.PodSubdomainAnnotation] - f.pets[i].pod.Spec = wanted.pod.Spec - found = true - } - pets = append(pets, f.pets[i]) - } - f.pets = pets - if !found { - return fmt.Errorf("Cannot update pod %v not found", wanted.pod.Name) - } - // TODO: Delete pvcs/volumes that are in wanted but not in expected. - return nil -} - -func (f *fakePetClient) getPodList() []*v1.Pod { - p := []*v1.Pod{} - for i, pet := range f.pets { - if pet.pod == nil { - continue - } - p = append(p, f.pets[i].pod) - } - return p -} - -func (f *fakePetClient) deletePetAtIndex(index int) { - p := []*pcb{} - for i := range f.pets { - if i != index { - p = append(p, f.pets[i]) - } - } - f.pets = p -} - -func (f *fakePetClient) setHealthy(index int) error { - if len(f.pets) <= index { - return fmt.Errorf("Index out of range, len %v index %v", len(f.pets), index) - } - f.pets[index].pod.Status.Phase = v1.PodRunning - f.pets[index].pod.Annotations[StatefulSetInitAnnotation] = "true" - f.pets[index].pod.Status.Conditions = []v1.PodCondition{ - {Type: v1.PodReady, Status: v1.ConditionTrue}, - } - return nil -} - -// isHealthy is a convenience wrapper around the default health checker. -// The first invocation returns not-healthy, but marks the pet healthy so -// subsequent invocations see it as healthy. -func (f *fakePetClient) isHealthy(pod *v1.Pod) bool { - if f.petHealthChecker.isHealthy(pod) { - return true - } - return false -} - -func (f *fakePetClient) setDeletionTimestamp(index int) error { - if len(f.pets) <= index { - return fmt.Errorf("Index out of range, len %v index %v", len(f.pets), index) - } - f.pets[index].pod.DeletionTimestamp = &metav1.Time{Time: time.Now()} - return nil -} - -// SyncPVCs fakes pvc syncing. -func (f *fakePetClient) SyncPVCs(pet *pcb) error { - v := pet.pvcs - updateClaims := map[string]v1.PersistentVolumeClaim{} - for i, update := range v { - updateClaims[update.Name] = v[i] - } - claimList := []v1.PersistentVolumeClaim{} - for i, existing := range f.claims { - if update, ok := updateClaims[existing.Name]; ok { - claimList = append(claimList, update) - delete(updateClaims, existing.Name) - } else { - claimList = append(claimList, f.claims[i]) - } - } - for _, remaining := range updateClaims { - claimList = append(claimList, remaining) - f.claimsCreated++ - f.recorder.Eventf(pet.parent, v1.EventTypeNormal, "SuccessfulCreate", "pvc: %v", remaining.Name) - } - f.claims = claimList - return nil -} - -// DeletePVCs fakes pvc deletion. -func (f *fakePetClient) DeletePVCs(pet *pcb) error { - claimsToDelete := pet.pvcs - deleteClaimNames := sets.NewString() - for _, c := range claimsToDelete { - deleteClaimNames.Insert(c.Name) - } - pvcs := []v1.PersistentVolumeClaim{} - for i, existing := range f.claims { - if deleteClaimNames.Has(existing.Name) { - deleteClaimNames.Delete(existing.Name) - f.claimsDeleted++ - f.recorder.Eventf(pet.parent, v1.EventTypeNormal, "SuccessfulDelete", "pvc: %v", existing.Name) - continue - } - pvcs = append(pvcs, f.claims[i]) - } - f.claims = pvcs - if deleteClaimNames.Len() != 0 { - return fmt.Errorf("Claims %+v don't exist. Failed deletion.", deleteClaimNames) - } - return nil -} diff --git a/pkg/controller/statefulset/identity_mappers.go b/pkg/controller/statefulset/identity_mappers.go deleted file mode 100644 index 0c8decb15b1..00000000000 --- a/pkg/controller/statefulset/identity_mappers.go +++ /dev/null @@ -1,247 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package statefulset - -import ( - "crypto/md5" - "fmt" - "sort" - "strings" - - "github.com/golang/glog" - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/kubernetes/pkg/api/v1" - podapi "k8s.io/kubernetes/pkg/api/v1/pod" - apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" -) - -// identityMapper is an interface for assigning identities to a pet. -// All existing identity mappers just append "-(index)" to the statefulset name to -// generate a unique identity. This is used in claims/DNS/hostname/petname -// etc. There's a more elegant way to achieve this mapping, but we're -// taking the simplest route till we have data on whether users will need -// more customization. -// Note that running a single identity mapper is not guaranteed to give -// your pet a unique identity. You must run them all. Order doesn't matter. -type identityMapper interface { - // SetIdentity takes an id and assigns the given pet an identity based - // on the stateful set spec. The is must be unique amongst members of the - // stateful set. - SetIdentity(id string, pet *v1.Pod) - - // Identity returns the identity of the pet. - Identity(pod *v1.Pod) string -} - -func newIdentityMappers(ps *apps.StatefulSet) []identityMapper { - return []identityMapper{ - &NameIdentityMapper{ps}, - &NetworkIdentityMapper{ps}, - &VolumeIdentityMapper{ps}, - } -} - -// NetworkIdentityMapper assigns network identity to pets. -type NetworkIdentityMapper struct { - ps *apps.StatefulSet -} - -// SetIdentity sets network identity on the pet. -func (n *NetworkIdentityMapper) SetIdentity(id string, pet *v1.Pod) { - pet.Annotations[podapi.PodHostnameAnnotation] = fmt.Sprintf("%v-%v", n.ps.Name, id) - pet.Annotations[podapi.PodSubdomainAnnotation] = n.ps.Spec.ServiceName - return -} - -// Identity returns the network identity of the pet. -func (n *NetworkIdentityMapper) Identity(pet *v1.Pod) string { - return n.String(pet) -} - -// String is a string function for the network identity of the pet. -func (n *NetworkIdentityMapper) String(pet *v1.Pod) string { - hostname := pet.Annotations[podapi.PodHostnameAnnotation] - subdomain := pet.Annotations[podapi.PodSubdomainAnnotation] - return strings.Join([]string{hostname, subdomain, n.ps.Namespace}, ".") -} - -// VolumeIdentityMapper assigns storage identity to pets. -type VolumeIdentityMapper struct { - ps *apps.StatefulSet -} - -// SetIdentity sets storage identity on the pet. -func (v *VolumeIdentityMapper) SetIdentity(id string, pet *v1.Pod) { - petVolumes := []v1.Volume{} - petClaims := v.GetClaims(id) - - // These volumes will all go down with the pod. If a name matches one of - // the claims in the stateful set, it gets clobbered. - podVolumes := map[string]v1.Volume{} - for _, podVol := range pet.Spec.Volumes { - podVolumes[podVol.Name] = podVol - } - - // Insert claims for the idempotent statefulset volumes - for name, claim := range petClaims { - // Volumes on a pet for which there are no associated claims on the - // statefulset are pod local, and die with the pod. - podVol, ok := podVolumes[name] - if ok { - // TODO: Validate and reject this. - glog.V(4).Infof("Overwriting existing volume source %v", podVol.Name) - } - newVol := v1.Volume{ - Name: name, - VolumeSource: v1.VolumeSource{ - PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ - ClaimName: claim.Name, - // TODO: Use source definition to set this value when we have one. - ReadOnly: false, - }, - }, - } - petVolumes = append(petVolumes, newVol) - } - - // Transfer any ephemeral pod volumes - for name, vol := range podVolumes { - if _, ok := petClaims[name]; !ok { - petVolumes = append(petVolumes, vol) - } - } - pet.Spec.Volumes = petVolumes - return -} - -// Identity returns the storage identity of the pet. -func (v *VolumeIdentityMapper) Identity(pet *v1.Pod) string { - // TODO: Make this a hash? - return v.String(pet) -} - -// String is a string function for the network identity of the pet. -func (v *VolumeIdentityMapper) String(pet *v1.Pod) string { - ids := []string{} - petVols := sets.NewString() - for _, petVol := range v.ps.Spec.VolumeClaimTemplates { - petVols.Insert(petVol.Name) - } - for _, podVol := range pet.Spec.Volumes { - // Volumes on a pet for which there are no associated claims on the - // statefulset are pod local, and die with the pod. - if !petVols.Has(podVol.Name) { - continue - } - if podVol.VolumeSource.PersistentVolumeClaim == nil { - // TODO: Is this a part of the identity? - ids = append(ids, fmt.Sprintf("%v:None", podVol.Name)) - continue - } - ids = append(ids, fmt.Sprintf("%v:%v", podVol.Name, podVol.VolumeSource.PersistentVolumeClaim.ClaimName)) - } - sort.Strings(ids) - return strings.Join(ids, "") -} - -// GetClaims returns the volume claims associated with the given id. -// The claims belong to the statefulset. The id should be unique within a statefulset. -func (v *VolumeIdentityMapper) GetClaims(id string) map[string]v1.PersistentVolumeClaim { - petClaims := map[string]v1.PersistentVolumeClaim{} - for _, pvc := range v.ps.Spec.VolumeClaimTemplates { - claim := pvc - // TODO: Name length checking in validation. - claim.Name = fmt.Sprintf("%v-%v-%v", claim.Name, v.ps.Name, id) - claim.Namespace = v.ps.Namespace - claim.Labels = v.ps.Spec.Selector.MatchLabels - - // TODO: We're assuming that the claim template has a volume QoS key, eg: - // volume.alpha.kubernetes.io/storage-class: anything - petClaims[pvc.Name] = claim - } - return petClaims -} - -// GetClaimsForPet returns the pvcs for the given pet. -func (v *VolumeIdentityMapper) GetClaimsForPet(pet *v1.Pod) []v1.PersistentVolumeClaim { - // Strip out the "-(index)" from the pet name and use it to generate - // claim names. - id := strings.Split(pet.Name, "-") - petID := id[len(id)-1] - pvcs := []v1.PersistentVolumeClaim{} - for _, pvc := range v.GetClaims(petID) { - pvcs = append(pvcs, pvc) - } - return pvcs -} - -// NameIdentityMapper assigns names to pets. -// It also puts the pet in the same namespace as the parent. -type NameIdentityMapper struct { - ps *apps.StatefulSet -} - -// SetIdentity sets the pet namespace and name. -func (n *NameIdentityMapper) SetIdentity(id string, pet *v1.Pod) { - pet.Name = fmt.Sprintf("%v-%v", n.ps.Name, id) - pet.Namespace = n.ps.Namespace - return -} - -// Identity returns the name identity of the pet. -func (n *NameIdentityMapper) Identity(pet *v1.Pod) string { - return n.String(pet) -} - -// String is a string function for the name identity of the pet. -func (n *NameIdentityMapper) String(pet *v1.Pod) string { - return fmt.Sprintf("%v/%v", pet.Namespace, pet.Name) -} - -// identityHash computes a hash of the pet by running all the above identity -// mappers. -func identityHash(ps *apps.StatefulSet, pet *v1.Pod) string { - id := "" - for _, idMapper := range newIdentityMappers(ps) { - id += idMapper.Identity(pet) - } - return fmt.Sprintf("%x", md5.Sum([]byte(id))) -} - -// copyPetID gives the realPet the same identity as the expectedPet. -// Note that this is *not* a literal copy, but a copy of the fields that -// contribute to the pet's identity. The returned boolean 'needsUpdate' will -// be false if the realPet already has the same identity as the expectedPet. -func copyPetID(realPet, expectedPet *pcb) (pod v1.Pod, needsUpdate bool, err error) { - if realPet.pod == nil || expectedPet.pod == nil { - return pod, false, fmt.Errorf("Need a valid to and from pet for copy") - } - if realPet.parent.UID != expectedPet.parent.UID { - return pod, false, fmt.Errorf("Cannot copy pets with different parents") - } - ps := realPet.parent - if identityHash(ps, realPet.pod) == identityHash(ps, expectedPet.pod) { - return *realPet.pod, false, nil - } - copyPod := *realPet.pod - // This is the easiest way to give an identity to a pod. It won't work - // when we stop using names for id. - for _, idMapper := range newIdentityMappers(ps) { - idMapper.SetIdentity(expectedPet.id, ©Pod) - } - return copyPod, true, nil -} diff --git a/pkg/controller/statefulset/identity_mappers_test.go b/pkg/controller/statefulset/identity_mappers_test.go deleted file mode 100644 index 16e87c61c67..00000000000 --- a/pkg/controller/statefulset/identity_mappers_test.go +++ /dev/null @@ -1,179 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package statefulset - -import ( - "fmt" - "reflect" - "strings" - - "testing" - - "k8s.io/kubernetes/pkg/api/v1" - apipod "k8s.io/kubernetes/pkg/api/v1/pod" -) - -func TestPetIDName(t *testing.T) { - replicas := 3 - ps := newStatefulSet(replicas) - for i := 0; i < replicas; i++ { - petName := fmt.Sprintf("%v-%d", ps.Name, i) - pcb, err := newPCB(fmt.Sprintf("%d", i), ps) - if err != nil { - t.Fatalf("Failed to generate pet %v", err) - } - pod := pcb.pod - if pod.Name != petName || pod.Namespace != ps.Namespace { - t.Errorf("Wrong name identity, expected %v", pcb.pod.Name) - } - } -} - -func TestPetIDDNS(t *testing.T) { - replicas := 3 - ps := newStatefulSet(replicas) - for i := 0; i < replicas; i++ { - petName := fmt.Sprintf("%v-%d", ps.Name, i) - petSubdomain := ps.Spec.ServiceName - pcb, err := newPCB(fmt.Sprintf("%d", i), ps) - pod := pcb.pod - if err != nil { - t.Fatalf("Failed to generate pet %v", err) - } - if hostname, ok := pod.Annotations[apipod.PodHostnameAnnotation]; !ok || hostname != petName { - t.Errorf("Wrong hostname: %v", hostname) - } - if subdomain, ok := pod.Annotations[apipod.PodSubdomainAnnotation]; !ok || subdomain != petSubdomain { - t.Errorf("Wrong subdomain: %v", subdomain) - } - } -} -func TestPetIDVolume(t *testing.T) { - replicas := 3 - ps := newStatefulSet(replicas) - for i := 0; i < replicas; i++ { - pcb, err := newPCB(fmt.Sprintf("%d", i), ps) - if err != nil { - t.Fatalf("Failed to generate pet %v", err) - } - pod := pcb.pod - petName := fmt.Sprintf("%v-%d", ps.Name, i) - claimName := fmt.Sprintf("datadir-%v", petName) - for _, v := range pod.Spec.Volumes { - switch v.Name { - case "datadir": - c := v.VolumeSource.PersistentVolumeClaim - if c == nil || c.ClaimName != claimName { - t.Fatalf("Unexpected claim %v", c) - } - if err := checkPodForMount(pod, "datadir"); err != nil { - t.Errorf("Expected pod mount: %v", err) - } - case "home": - h := v.VolumeSource.HostPath - if h == nil || h.Path != "/tmp/home" { - t.Errorf("Unexpected modification to hostpath, expected /tmp/home got %+v", h) - } - default: - t.Errorf("Unexpected volume %v", v.Name) - } - } - } - // TODO: Check volume mounts. -} - -func TestPetIDVolumeClaims(t *testing.T) { - replicas := 3 - ps := newStatefulSet(replicas) - for i := 0; i < replicas; i++ { - pcb, err := newPCB(fmt.Sprintf("%v", i), ps) - if err != nil { - t.Fatalf("Failed to generate pet %v", err) - } - pvcs := pcb.pvcs - petName := fmt.Sprintf("%v-%d", ps.Name, i) - claimName := fmt.Sprintf("datadir-%v", petName) - if len(pvcs) != 1 || pvcs[0].Name != claimName { - t.Errorf("Wrong pvc expected %v got %v", claimName, pvcs[0].Name) - } - } -} - -func TestPetIDCrossAssignment(t *testing.T) { - replicas := 3 - ps := newStatefulSet(replicas) - - nameMapper := &NameIdentityMapper{ps} - volumeMapper := &VolumeIdentityMapper{ps} - networkMapper := &NetworkIdentityMapper{ps} - - // Check that the name is consistent across identity. - for i := 0; i < replicas; i++ { - pet, _ := newPCB(fmt.Sprintf("%v", i), ps) - p := pet.pod - name := strings.Split(nameMapper.Identity(p), "/")[1] - network := networkMapper.Identity(p) - volume := volumeMapper.Identity(p) - - petVolume := strings.Split(volume, ":")[1] - - if petVolume != fmt.Sprintf("datadir-%v", name) { - t.Errorf("Unexpected pet volume name %v, expected %v", petVolume, name) - } - if network != fmt.Sprintf("%v.%v.%v", name, ps.Spec.ServiceName, ps.Namespace) { - t.Errorf("Unexpected pet network ID %v, expected %v", network, name) - } - t.Logf("[%v] volume: %+v, network: %+v, name: %+v", i, volume, network, name) - } -} - -func TestPetIDReset(t *testing.T) { - replicas := 2 - ps := newStatefulSet(replicas) - firstPCB, err := newPCB("1", ps) - secondPCB, err := newPCB("2", ps) - if identityHash(ps, firstPCB.pod) == identityHash(ps, secondPCB.pod) { - t.Fatalf("Failed to generate uniquey identities:\n%+v\n%+v", firstPCB.pod.Spec, secondPCB.pod.Spec) - } - userAdded := v1.Volume{ - Name: "test", - VolumeSource: v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{Medium: v1.StorageMediumMemory}, - }, - } - firstPCB.pod.Spec.Volumes = append(firstPCB.pod.Spec.Volumes, userAdded) - pod, needsUpdate, err := copyPetID(firstPCB, secondPCB) - if err != nil { - t.Errorf("%v", err) - } - if !needsUpdate { - t.Errorf("expected update since identity of %v was reset", secondPCB.pod.Name) - } - if identityHash(ps, &pod) != identityHash(ps, secondPCB.pod) { - t.Errorf("Failed to copy identity for pod %v -> %v", firstPCB.pod.Name, secondPCB.pod.Name) - } - foundVol := false - for _, v := range pod.Spec.Volumes { - if reflect.DeepEqual(v, userAdded) { - foundVol = true - break - } - } - if !foundVol { - t.Errorf("User added volume was corrupted by reset action.") - } -} diff --git a/pkg/controller/statefulset/iterator.go b/pkg/controller/statefulset/iterator.go deleted file mode 100644 index fc29e920602..00000000000 --- a/pkg/controller/statefulset/iterator.go +++ /dev/null @@ -1,163 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package statefulset - -import ( - "fmt" - "sort" - - "github.com/golang/glog" - "k8s.io/kubernetes/pkg/api/v1" - apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" - "k8s.io/kubernetes/pkg/controller" -) - -// newPCB generates a new PCB using the id string as a unique qualifier -func newPCB(id string, ps *apps.StatefulSet) (*pcb, error) { - petPod, err := controller.GetPodFromTemplate(&ps.Spec.Template, ps, nil) - if err != nil { - return nil, err - } - for _, im := range newIdentityMappers(ps) { - im.SetIdentity(id, petPod) - } - petPVCs := []v1.PersistentVolumeClaim{} - vMapper := &VolumeIdentityMapper{ps} - for _, c := range vMapper.GetClaims(id) { - petPVCs = append(petPVCs, c) - } - // TODO: Replace id field with IdentityHash, since id is more than just an index. - return &pcb{pod: petPod, pvcs: petPVCs, id: id, parent: ps}, nil -} - -// petQueue is a custom datastructure that's resembles a queue of pets. -type petQueue struct { - pets []*pcb - idMapper identityMapper -} - -// enqueue enqueues the given pet, evicting any pets with the same id -func (pt *petQueue) enqueue(p *pcb) { - if p == nil { - pt.pets = append(pt.pets, nil) - return - } - // Pop an existing pet from the know list, append the new pet to the end. - petList := []*pcb{} - petID := pt.idMapper.Identity(p.pod) - for i := range pt.pets { - if petID != pt.idMapper.Identity(pt.pets[i].pod) { - petList = append(petList, pt.pets[i]) - } - } - pt.pets = petList - p.event = syncPet - pt.pets = append(pt.pets, p) -} - -// dequeue returns the last element of the queue -func (pt *petQueue) dequeue() *pcb { - if pt.empty() { - glog.Warningf("Dequeue invoked on an empty queue") - return nil - } - l := len(pt.pets) - 1 - pet := pt.pets[l] - pt.pets = pt.pets[:l] - return pet -} - -// empty returns true if the pet queue is empty. -func (pt *petQueue) empty() bool { - return len(pt.pets) == 0 -} - -// NewPetQueue returns a queue for tracking pets -func NewPetQueue(ps *apps.StatefulSet, podList []*v1.Pod) *petQueue { - pt := petQueue{pets: []*pcb{}, idMapper: &NameIdentityMapper{ps}} - // Seed the queue with existing pets. Assume all pets are scheduled for - // deletion, enqueuing a pet will "undelete" it. We always want to delete - // from the higher ids, so sort by creation timestamp. - - sort.Sort(PodsByCreationTimestamp(podList)) - vMapper := VolumeIdentityMapper{ps} - for i := range podList { - pod := podList[i] - pt.pets = append(pt.pets, &pcb{pod: pod, pvcs: vMapper.GetClaimsForPet(pod), parent: ps, event: deletePet, id: fmt.Sprintf("%v", i)}) - } - return &pt -} - -// statefulsetIterator implements a simple iterator over pets in the given statefulset. -type statefulSetIterator struct { - // ps is the statefulset for this iterator. - ps *apps.StatefulSet - // queue contains the elements to iterate over. - queue *petQueue - // errs is a list because we always want the iterator to drain. - errs []error - // petCount is the number of pets iterated over. - petCount int32 -} - -// Next returns true for as long as there are elements in the underlying queue. -func (pi *statefulSetIterator) Next() bool { - var pet *pcb - var err error - if pi.petCount < *(pi.ps.Spec.Replicas) { - pet, err = newPCB(fmt.Sprintf("%d", pi.petCount), pi.ps) - if err != nil { - pi.errs = append(pi.errs, err) - // Don't stop iterating over the set on errors. Caller handles nil. - pet = nil - } - pi.queue.enqueue(pet) - pi.petCount++ - } - // Keep the iterator running till we've deleted pets in the queue. - return !pi.queue.empty() -} - -// Value dequeues an element from the queue. -func (pi *statefulSetIterator) Value() *pcb { - return pi.queue.dequeue() -} - -// NewStatefulSetIterator returns a new iterator. All pods in the given podList -// are used to seed the queue of the iterator. -func NewStatefulSetIterator(ps *apps.StatefulSet, podList []*v1.Pod) *statefulSetIterator { - pi := &statefulSetIterator{ - ps: ps, - queue: NewPetQueue(ps, podList), - errs: []error{}, - petCount: 0, - } - return pi -} - -// PodsByCreationTimestamp sorts a list of Pods by creation timestamp, using their names as a tie breaker. -type PodsByCreationTimestamp []*v1.Pod - -func (o PodsByCreationTimestamp) Len() int { return len(o) } -func (o PodsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } - -func (o PodsByCreationTimestamp) Less(i, j int) bool { - if o[i].CreationTimestamp.Equal(o[j].CreationTimestamp) { - return o[i].Name < o[j].Name - } - return o[i].CreationTimestamp.Before(o[j].CreationTimestamp) -} diff --git a/pkg/controller/statefulset/iterator_test.go b/pkg/controller/statefulset/iterator_test.go deleted file mode 100644 index 92e44cb53be..00000000000 --- a/pkg/controller/statefulset/iterator_test.go +++ /dev/null @@ -1,150 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package statefulset - -import ( - "fmt" - - "testing" - - "k8s.io/apimachinery/pkg/util/sets" - "k8s.io/kubernetes/pkg/api/v1" -) - -func TestPetQueueCreates(t *testing.T) { - replicas := 3 - ps := newStatefulSet(replicas) - q := NewPetQueue(ps, []*v1.Pod{}) - for i := 0; i < replicas; i++ { - pet, _ := newPCB(fmt.Sprintf("%v", i), ps) - q.enqueue(pet) - p := q.dequeue() - if p.event != syncPet { - t.Errorf("Failed to retrieve sync event from queue") - } - } - if q.dequeue() != nil { - t.Errorf("Expected no pods") - } -} - -func TestPetQueueScaleDown(t *testing.T) { - replicas := 1 - ps := newStatefulSet(replicas) - - // knownPods are the pods in the system - knownPods := newPodList(ps, 3) - - q := NewPetQueue(ps, knownPods) - - // The iterator will insert a single replica, the enqueue - // mimics that behavior. - pet, _ := newPCB(fmt.Sprintf("%v", 0), ps) - q.enqueue(pet) - - deletes := sets.NewString(fmt.Sprintf("%v-1", ps.Name), fmt.Sprintf("%v-2", ps.Name)) - syncs := sets.NewString(fmt.Sprintf("%v-0", ps.Name)) - - // Confirm that 2 known pods are deleted - for i := 0; i < 3; i++ { - p := q.dequeue() - switch p.event { - case syncPet: - if !syncs.Has(p.pod.Name) { - t.Errorf("Unexpected sync %v expecting %+v", p.pod.Name, syncs) - } - case deletePet: - if !deletes.Has(p.pod.Name) { - t.Errorf("Unexpected deletes %v expecting %+v", p.pod.Name, deletes) - } - } - } - if q.dequeue() != nil { - t.Errorf("Expected no pods") - } -} - -func TestPetQueueScaleUp(t *testing.T) { - replicas := 5 - ps := newStatefulSet(replicas) - - // knownPods are pods in the system - knownPods := newPodList(ps, 2) - - q := NewPetQueue(ps, knownPods) - for i := 0; i < 5; i++ { - pet, _ := newPCB(fmt.Sprintf("%v", i), ps) - q.enqueue(pet) - } - for i := 4; i >= 0; i-- { - pet := q.dequeue() - expectedName := fmt.Sprintf("%v-%d", ps.Name, i) - if pet.event != syncPet || pet.pod.Name != expectedName { - t.Errorf("Unexpected pod %+v, expected %v", pet.pod.Name, expectedName) - } - } -} - -func TestStatefulSetIteratorRelist(t *testing.T) { - replicas := 5 - ps := newStatefulSet(replicas) - - // knownPods are pods in the system - knownPods := newPodList(ps, 5) - for i := range knownPods { - knownPods[i].Spec.NodeName = fmt.Sprintf("foo-node-%v", i) - knownPods[i].Status.Phase = v1.PodRunning - } - pi := NewStatefulSetIterator(ps, knownPods) - - // A simple resync should not change identity of pods in the system - i := 0 - for pi.Next() { - p := pi.Value() - if identityHash(ps, p.pod) != identityHash(ps, knownPods[i]) { - t.Errorf("Got unexpected identity hash from iterator.") - } - if p.event != syncPet { - t.Errorf("Got unexpected sync event for %v: %v", p.pod.Name, p.event) - } - i++ - } - if i != 5 { - t.Errorf("Unexpected iterations %v, this probably means too many/few pods", i) - } - - // Scale to 0 should delete all pods in system - *(ps.Spec.Replicas) = 0 - pi = NewStatefulSetIterator(ps, knownPods) - i = 0 - for pi.Next() { - p := pi.Value() - if p.event != deletePet { - t.Errorf("Got unexpected sync event for %v: %v", p.pod.Name, p.event) - } - i++ - } - if i != 5 { - t.Errorf("Unexpected iterations %v, this probably means too many/few pods", i) - } - - // Relist with 0 replicas should no-op - pi = NewStatefulSetIterator(ps, []*v1.Pod{}) - if pi.Next() != false { - t.Errorf("Unexpected iteration without any replicas or pods in system") - } -} diff --git a/pkg/controller/statefulset/stateful_pod_control.go b/pkg/controller/statefulset/stateful_pod_control.go new file mode 100644 index 00000000000..00b66c2ecdb --- /dev/null +++ b/pkg/controller/statefulset/stateful_pod_control.go @@ -0,0 +1,225 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statefulset + +import ( + "fmt" + "strings" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + errorutils "k8s.io/apimachinery/pkg/util/errors" + "k8s.io/client-go/tools/record" + "k8s.io/kubernetes/pkg/api" + "k8s.io/kubernetes/pkg/api/v1" + apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" + "k8s.io/kubernetes/pkg/client/clientset_generated/clientset" +) + +// StatefulPodControlInterface defines the interface that StatefulSetController uses to create, update, and delete Pods, +// and to update the Status of a StatefulSet. It follows the design paradigms used for PodControl, but its +// implementation provides for PVC creation, ordered Pod creation, ordered Pod termination, and Pod identity enforcement. +// Like controller.PodControlInterface, it is implemented as an interface to provide for testing fakes. +type StatefulPodControlInterface interface { + // CreateStatefulPod create a Pod in a StatefulSet. Any PVCs necessary for the Pod are created prior to creating + // the Pod. If the returned error is nil the Pod and its PVCs have been created. + CreateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error + // UpdateStatefulPod Updates a Pod in a StatefulSet. If the Pod already has the correct identity and stable + // storage this method is a no-op. If the Pod must be mutated to conform to the Set, it is mutated and updated. + // pod is an in-out parameter, and any updates made to the pod are reflected as mutations to this parameter. If + // the create is successful, the returned error is nil. + UpdateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error + // DeleteStatefulPod deletes a Pod in a StatefulSet. The pods PVCs are not deleted. If the delete is successful, + // the returned error is nil. + DeleteStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error + // UpdateStatefulSetStatus Updates the Status.Replicas of a StatefulSet. set is an in-out parameter, and any + // updates made to the set are made visible as mutations to the parameter. If the method is successful, the + // returned error is nil, and set has its Status.Replicas field set to replicas. + UpdateStatefulSetReplicas(set *apps.StatefulSet, replicas int32) error +} + +func NewRealStatefulPodControl(client clientset.Interface, recorder record.EventRecorder) StatefulPodControlInterface { + return &realStatefulPodControl{client, recorder} +} + +// realStatefulPodControl implements StatefulPodControlInterface using a clientset.Interface to communicate with the +// API server. The struct is package private as the internal details are irrelevant to importing packages. +type realStatefulPodControl struct { + client clientset.Interface + recorder record.EventRecorder +} + +func (spc *realStatefulPodControl) CreateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error { + // Create the Pod's PVCs prior to creating the Pod + if err := spc.createPersistentVolumeClaims(set, pod); err != nil { + spc.recordPodEvent("create", set, pod, err) + return err + } + // If we created the PVCs attempt to create the Pod + _, err := spc.client.Core().Pods(set.Namespace).Create(pod) + // sink already exists errors + if apierrors.IsAlreadyExists(err) { + return err + } + spc.recordPodEvent("create", set, pod, err) + return err +} + +func (spc *realStatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error { + // we make a copy of the Pod on the stack and mutate the copy + // we copy back to pod to notify the caller of successful mutation + obj, err := api.Scheme.Copy(pod) + if err != nil { + return fmt.Errorf("unable to copy pod: %v", err) + } + podCopy := obj.(*v1.Pod) + for attempt := 0; attempt < maxUpdateRetries; attempt++ { + // assume the Pod is consistent + consistent := true + // if the Pod does not conform to it's identity, update the identity and dirty the Pod + if !identityMatches(set, podCopy) { + updateIdentity(set, podCopy) + consistent = false + } + // if the Pod does not conform to the StatefulSet's storage requirements, update the Pod's PVC's, + // dirty the Pod, and create any missing PVCs + if !storageMatches(set, podCopy) { + updateStorage(set, podCopy) + consistent = false + if err := spc.createPersistentVolumeClaims(set, podCopy); err != nil { + spc.recordPodEvent("update", set, pod, err) + return err + } + } + // if the Pod is not dirty do nothing + if consistent { + *pod = *podCopy + return nil + } + // commit the update, retrying on conflicts + _, err = spc.client.Core().Pods(set.Namespace).Update(podCopy) + if !apierrors.IsConflict(err) { + if err == nil { + *pod = *podCopy + } + spc.recordPodEvent("update", set, pod, err) + return err + } + conflicting, err := spc.client.Core().Pods(set.Namespace).Get(podCopy.Name, metav1.GetOptions{}) + if err != nil { + spc.recordPodEvent("update", set, podCopy, err) + return err + } + *podCopy = *conflicting + } + spc.recordPodEvent("update", set, pod, updateConflictError) + return updateConflictError +} + +func (spc *realStatefulPodControl) DeleteStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error { + err := spc.client.Core().Pods(set.Namespace).Delete(pod.Name, nil) + spc.recordPodEvent("delete", set, pod, err) + return err +} + +func (spc *realStatefulPodControl) UpdateStatefulSetReplicas(set *apps.StatefulSet, replicas int32) error { + if set.Status.Replicas == replicas { + return nil + } + obj, err := api.Scheme.Copy(set) + if err != nil { + return fmt.Errorf("unable to copy set: %v", err) + } + setCopy := obj.(*apps.StatefulSet) + setCopy.Status.Replicas = replicas + for attempt := 0; attempt < maxUpdateRetries; attempt++ { + _, err := spc.client.Apps().StatefulSets(setCopy.Namespace).UpdateStatus(setCopy) + if !apierrors.IsConflict(err) { + if err == nil { + *set = *setCopy + } + return err + } + conflicting, err := spc.client.Apps().StatefulSets(setCopy.Namespace).Get(setCopy.Name, metav1.GetOptions{}) + if err != nil { + return err + } + conflicting.Status.Replicas = setCopy.Status.Replicas + *setCopy = *conflicting + } + return updateConflictError +} + +// recordPodEvent records an event for verb applied to a Pod in a StatefulSet. If err is nil the generated event will +// have a reason of v1.EventTypeNormal. If err is not nil the generated event will have a reason of v1.EventTypeWarning. +func (spc *realStatefulPodControl) recordPodEvent(verb string, set *apps.StatefulSet, pod *v1.Pod, err error) { + if err == nil { + reason := fmt.Sprintf("Successful%s", strings.Title(verb)) + message := fmt.Sprintf("%s Pod %s in StatefulSet %s successful", + strings.ToLower(verb), pod.Name, set.Name) + spc.recorder.Event(set, v1.EventTypeNormal, reason, message) + } else { + reason := fmt.Sprintf("Failed%s", strings.Title(verb)) + message := fmt.Sprintf("%s Pod %s in StatefulSet %s failed error: %s", + strings.ToLower(verb), pod.Name, set.Name, err) + spc.recorder.Event(set, v1.EventTypeWarning, reason, message) + } +} + +// recordClaimEvent records an event for verb applied to the PersistentVolumeClaim of a Pod in a StatefulSet. If err is +// nil the generated event will have a reason of v1.EventTypeNormal. If err is not nil the generated event will have a +// reason of v1.EventTypeWarning. +func (spc *realStatefulPodControl) recordClaimEvent(verb string, set *apps.StatefulSet, pod *v1.Pod, claim *v1.PersistentVolumeClaim, err error) { + if err == nil { + reason := fmt.Sprintf("Successful%s", strings.Title(verb)) + message := fmt.Sprintf("%s Claim %s Pod %s in StatefulSet %s success", + strings.ToLower(verb), claim.Name, pod.Name, set.Name) + spc.recorder.Event(set, v1.EventTypeNormal, reason, message) + } else { + reason := fmt.Sprintf("Failed%s", strings.Title(verb)) + message := fmt.Sprintf("%s Claim %s for Pod %s in StatefulSet %s failed error: %s", + strings.ToLower(verb), claim.Name, pod.Name, set.Name, err) + spc.recorder.Event(set, v1.EventTypeWarning, reason, message) + } +} + +// createPersistentVolumeClaims creates all of the required PersistentVolumeClaims for pod, which mush be a member of +// set. If all of the claims for Pod are successfully created, the returned error is nil. If creation fails, this method +// may be called again until no error is returned, indicating the PersistentVolumeClaims for pod are consistent with +// set's Spec. +func (spc *realStatefulPodControl) createPersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) error { + var errs []error + for _, claim := range getPersistentVolumeClaims(set, pod) { + _, err := spc.client.Core().PersistentVolumeClaims(claim.Namespace).Get(claim.Name, metav1.GetOptions{}) + if err != nil { + if apierrors.IsNotFound(err) { + _, err := spc.client.Core().PersistentVolumeClaims(claim.Namespace).Create(&claim) + if err != nil { + errs = append(errs, fmt.Errorf("Failed to create PVC %s: %s", claim.Name, err)) + } + spc.recordClaimEvent("create", set, pod, &claim, err) + } else { + errs = append(errs, fmt.Errorf("Failed to retrieve PVC %s: %s", claim.Name, err)) + spc.recordClaimEvent("create", set, pod, &claim, err) + } + } + // TODO: Check resource requirements and accessmodes, update if necessary + } + return errorutils.NewAggregate(errs) +} + +var _ StatefulPodControlInterface = &realStatefulPodControl{} diff --git a/pkg/controller/statefulset/stateful_pod_control_test.go b/pkg/controller/statefulset/stateful_pod_control_test.go new file mode 100644 index 00000000000..a1f9206c16a --- /dev/null +++ b/pkg/controller/statefulset/stateful_pod_control_test.go @@ -0,0 +1,619 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statefulset + +import ( + "errors" + "strings" + "testing" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/runtime" + + core "k8s.io/client-go/testing" + "k8s.io/client-go/tools/record" + + "k8s.io/kubernetes/pkg/api/v1" + "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake" +) + +func TestStatefulPodControlCreatesPods(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewNotFound(action.GetResource().GroupResource(), action.GetResource().Resource) + }) + fakeClient.AddReactor("create", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + create := action.(core.CreateAction) + return true, create.GetObject(), nil + }) + fakeClient.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + create := action.(core.CreateAction) + return true, create.GetObject(), nil + }) + if err := control.CreateStatefulPod(set, pod); err != nil { + t.Errorf("StatefulPodControl failed to create Pod error: %s", err) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 2 { + t.Errorf("Expected 2 events for successful create found %d", eventCount) + } + for i := range events { + if !strings.Contains(events[i], v1.EventTypeNormal) { + t.Errorf("Expected normal events found %s", events[i]) + } + } +} + +func TestStatefulPodControlCreatePodExists(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + pvcs := getPersistentVolumeClaims(set, pod) + fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + claim := pvcs[action.GetResource().GroupResource().Resource] + return true, &claim, nil + }) + fakeClient.AddReactor("create", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + create := action.(core.CreateAction) + return true, create.GetObject(), nil + }) + fakeClient.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, pod, apierrors.NewAlreadyExists(action.GetResource().GroupResource(), pod.Name) + }) + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.CreateStatefulPod(set, pod); !apierrors.IsAlreadyExists(err) { + t.Errorf("Failed to create Pod error: %s", err) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 0 { + t.Errorf("Expected 0 events when Pod and PVC exist found %d", eventCount) + for i := range events { + t.Log(events[i]) + } + } +} + +func TestStatefulPodControlCreatePodPvcCreateFailure(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewNotFound(action.GetResource().GroupResource(), action.GetResource().Resource) + }) + fakeClient.AddReactor("create", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + fakeClient.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + create := action.(core.CreateAction) + return true, create.GetObject(), nil + }) + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.CreateStatefulPod(set, pod); err == nil { + t.Error("Failed to produce error on PVC creation failure") + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 2 { + t.Errorf("Expected 2 events for PVC create failure found %d", eventCount) + } + for i := range events { + if !strings.Contains(events[i], v1.EventTypeWarning) { + t.Errorf("Expected normal events found %s", events[i]) + } + } +} + +func TestStatefulPodControlCreatePodPvcGetFailure(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + fakeClient.AddReactor("create", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + fakeClient.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + create := action.(core.CreateAction) + return true, create.GetObject(), nil + }) + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.CreateStatefulPod(set, pod); err == nil { + t.Error("Failed to produce error on PVC creation failure") + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 2 { + t.Errorf("Expected 2 events for PVC create failure found %d", eventCount) + } + for i := range events { + if !strings.Contains(events[i], v1.EventTypeWarning) { + t.Errorf("Expected normal events found %s", events[i]) + } + } +} + +func TestStatefulPodControlCreatePodFailed(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewNotFound(action.GetResource().GroupResource(), action.GetResource().Resource) + }) + fakeClient.AddReactor("create", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + create := action.(core.CreateAction) + return true, create.GetObject(), nil + }) + fakeClient.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.CreateStatefulPod(set, pod); err == nil { + t.Error("Failed to produce error on Pod creation failure") + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 2 { + t.Errorf("Expected 2 events for failed Pod create found %d", eventCount) + } else if !strings.Contains(events[0], v1.EventTypeNormal) { + t.Errorf("Expected normal event found %s", events[0]) + + } else if !strings.Contains(events[1], v1.EventTypeWarning) { + t.Errorf("Expected warning event found %s", events[1]) + + } +} + +func TestStatefulPodControlNoOpUpdate(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("*", "*", func(action core.Action) (bool, runtime.Object, error) { + t.Error("no-op update should not make any client invocation") + return true, nil, apierrors.NewInternalError(errors.New("If we are here we have a problem")) + }) + if err := control.UpdateStatefulPod(set, pod); err != nil { + t.Errorf("Error returned on no-op update error: %s", err) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 0 { + t.Errorf("Expected 0 events for no-op update found %d", eventCount) + } +} + +func TestStatefulPodControlUpdatesIdentity(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "pods", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + return true, update.GetObject(), nil + }) + pod.Name = "goo-0" + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.UpdateStatefulPod(set, pod); err != nil { + t.Errorf("Successful update returned an error: %s", err) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 1 { + t.Errorf("Expected 1 event for successful Pod update found %d", eventCount) + } else if !strings.Contains(events[0], v1.EventTypeNormal) { + t.Errorf("Expected normal event found %s", events[0]) + } + if !identityMatches(set, pod) { + t.Error("Name update failed identity does not match") + } +} + +func TestStatefulPodControlUpdateIdentityFailure(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + pod.Name = "goo-0" + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.UpdateStatefulPod(set, pod); err == nil { + t.Error("Falied update does not generate an error") + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 1 { + t.Errorf("Expected 1 event for failed Pod update found %d", eventCount) + } else if !strings.Contains(events[0], v1.EventTypeWarning) { + t.Errorf("Expected warning event found %s", events[0]) + } + if identityMatches(set, pod) { + t.Error("Failed update mutated Pod identity") + } +} + +func TestStatefulPodControlUpdatesPodStorage(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + pvcs := getPersistentVolumeClaims(set, pod) + volumes := make([]v1.Volume, len(pod.Spec.Volumes)) + for i := range pod.Spec.Volumes { + if _, contains := pvcs[pod.Spec.Volumes[i].Name]; !contains { + volumes = append(volumes, pod.Spec.Volumes[i]) + } + } + pod.Spec.Volumes = volumes + fakeClient.AddReactor("update", "pods", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + return true, update.GetObject(), nil + }) + fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewNotFound(action.GetResource().GroupResource(), action.GetResource().Resource) + }) + fakeClient.AddReactor("create", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + return true, update.GetObject(), nil + }) + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.UpdateStatefulPod(set, pod); err != nil { + t.Errorf("Successful update returned an error: %s", err) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 2 { + t.Errorf("Expected 2 event for successful Pod storage update found %d", eventCount) + } + for i := range events { + if !strings.Contains(events[i], v1.EventTypeNormal) { + t.Errorf("Expected normal event found %s", events[i]) + } + } + if !storageMatches(set, pod) { + t.Error("Name update failed identity does not match") + } +} + +func TestStatefulPodControlUpdatePodStorageFailure(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + pvcs := getPersistentVolumeClaims(set, pod) + volumes := make([]v1.Volume, len(pod.Spec.Volumes)) + for i := range pod.Spec.Volumes { + if _, contains := pvcs[pod.Spec.Volumes[i].Name]; !contains { + volumes = append(volumes, pod.Spec.Volumes[i]) + } + } + pod.Spec.Volumes = volumes + fakeClient.AddReactor("update", "pods", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + return true, update.GetObject(), nil + }) + fakeClient.AddReactor("get", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewNotFound(action.GetResource().GroupResource(), action.GetResource().Resource) + }) + fakeClient.AddReactor("create", "persistentvolumeclaims", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.UpdateStatefulPod(set, pod); err == nil { + t.Error("Failed Pod storage update did not return an error") + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 2 { + t.Errorf("Expected 2 event for failed Pod storage update found %d", eventCount) + } + for i := range events { + if !strings.Contains(events[i], v1.EventTypeWarning) { + t.Errorf("Expected normal event found %s", events[i]) + } + } + if storageMatches(set, pod) { + t.Error("Storag matches on failed update") + } +} + +func TestStatefulPodControlUpdatePodConflictSuccess(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + attempts := 0 + fakeClient.AddReactor("update", "pods", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + if attempts < maxUpdateRetries/2 { + attempts++ + return true, update.GetObject(), apierrors.NewConflict(action.GetResource().GroupResource(), pod.Name, errors.New("conflict")) + } else { + return true, update.GetObject(), nil + } + }) + fakeClient.AddReactor("get", "pods", func(action core.Action) (bool, runtime.Object, error) { + pod.Name = "goo-0" + return true, pod, nil + + }) + pod.Name = "goo-0" + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.UpdateStatefulPod(set, pod); err != nil { + t.Errorf("Successful update returned an error: %s", err) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 1 { + t.Errorf("Expected 1 event for successful Pod update found %d", eventCount) + } else if !strings.Contains(events[0], v1.EventTypeNormal) { + t.Errorf("Expected normal event found %s", events[0]) + } + if !identityMatches(set, pod) { + t.Error("Name update failed identity does not match") + } +} + +func TestStatefulPodControlUpdatePodConflictFailure(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "pods", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + return true, update.GetObject(), apierrors.NewConflict(action.GetResource().GroupResource(), pod.Name, errors.New("conflict")) + + }) + fakeClient.AddReactor("get", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + + }) + pod.Name = "goo-0" + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.UpdateStatefulPod(set, pod); err == nil { + t.Error("Falied update did not reaturn an error") + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 1 { + t.Errorf("Expected 1 event for failed Pod update found %d", eventCount) + } else if !strings.Contains(events[0], v1.EventTypeWarning) { + t.Errorf("Expected normal event found %s", events[0]) + } + if identityMatches(set, pod) { + t.Error("Identity matches on failed update") + } +} + +func TestStatefulPodControlUpdatePodConflictMaxRetries(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "pods", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + return true, update.GetObject(), apierrors.NewConflict(action.GetResource().GroupResource(), pod.Name, errors.New("conflict")) + + }) + fakeClient.AddReactor("get", "pods", func(action core.Action) (bool, runtime.Object, error) { + pod.Name = "goo-0" + return true, pod, nil + + }) + pod.Name = "goo-0" + control = NewRealStatefulPodControl(fakeClient, recorder) + if err := control.UpdateStatefulPod(set, pod); err == nil { + t.Error("Falied update did not reaturn an error") + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 1 { + t.Errorf("Expected 1 event for failed Pod update found %d", eventCount) + } else if !strings.Contains(events[0], v1.EventTypeWarning) { + t.Errorf("Expected normal event found %s", events[0]) + } + if identityMatches(set, pod) { + t.Error("Identity matches on failed update") + } +} + +func TestStatefulPodControlDeletesStatefulPod(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("delete", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, nil + }) + if err := control.DeleteStatefulPod(set, pod); err != nil { + t.Errorf("Error returned on successful delete: %s", err) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 1 { + t.Errorf("Expected 1 events for successful delete found %d", eventCount) + } else if !strings.Contains(events[0], v1.EventTypeNormal) { + t.Errorf("Expected normal event found %s", events[0]) + } +} + +func TestStatefulPodControlDeleteFailure(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("delete", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + if err := control.DeleteStatefulPod(set, pod); err == nil { + t.Error("Fialed to return error on failed delete") + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 1 { + t.Errorf("Expected 1 events for failed delete found %d", eventCount) + } else if !strings.Contains(events[0], v1.EventTypeWarning) { + t.Errorf("Expected warning event found %s", events[0]) + } +} + +func TestStatefulPodControlUpdatesSetStatus(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "statefulsets", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + return true, update.GetObject(), nil + }) + if err := control.UpdateStatefulSetReplicas(set, 2); err != nil { + t.Errorf("Error returned on successful status update: %s", err) + } + if set.Status.Replicas != 2 { + t.Errorf("UpdateStatefulSetStatus mutated the sets replicas %d", set.Status.Replicas) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 0 { + t.Errorf("Expected 0 events for successful status update %d", eventCount) + } +} + +func TestStatefulPodControlUpdateReplicasFailure(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + replicas := set.Status.Replicas + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "statefulsets", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + if err := control.UpdateStatefulSetReplicas(set, 2); err == nil { + t.Error("Failed update did not return error") + } + if set.Status.Replicas != replicas { + t.Errorf("UpdateStatefulSetStatus mutated the sets replicas %d", replicas) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 0 { + t.Errorf("Expected 0 events for successful status update %d", eventCount) + } +} + +func TestStatefulPodControlUpdateReplicasConflict(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + attempts := 0 + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "statefulsets", func(action core.Action) (bool, runtime.Object, error) { + + update := action.(core.UpdateAction) + if attempts < maxUpdateRetries/2 { + attempts++ + return true, update.GetObject(), apierrors.NewConflict(action.GetResource().GroupResource(), set.Name, errors.New("Object already exists")) + } else { + return true, update.GetObject(), nil + } + }) + fakeClient.AddReactor("get", "statefulsets", func(action core.Action) (bool, runtime.Object, error) { + return true, set, nil + }) + if err := control.UpdateStatefulSetReplicas(set, 2); err != nil { + t.Errorf("UpdateStatefulSetStatus returned an error: %s", err) + } + if set.Status.Replicas != 2 { + t.Errorf("UpdateStatefulSetStatus mutated the sets replicas %d", set.Status.Replicas) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 0 { + t.Errorf("Expected 0 events for successful status update %d", eventCount) + } +} + +func TestStatefulPodControlUpdateReplicasConflictFailure(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + replicas := set.Status.Replicas + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "statefulsets", func(action core.Action) (bool, runtime.Object, error) { + update := action.(core.UpdateAction) + return true, update.GetObject(), apierrors.NewConflict(action.GetResource().GroupResource(), set.Name, errors.New("Object already exists")) + }) + fakeClient.AddReactor("get", "statefulsets", func(action core.Action) (bool, runtime.Object, error) { + return true, nil, apierrors.NewInternalError(errors.New("API server down")) + }) + if err := control.UpdateStatefulSetReplicas(set, 2); err == nil { + t.Error("UpdateStatefulSetStatus failed to return an error on get failure") + } + if set.Status.Replicas != replicas { + t.Errorf("UpdateStatefulSetStatus mutated the sets replicas %d", set.Status.Replicas) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 0 { + t.Errorf("Expected 0 events for successful status update %d", eventCount) + } +} + +func TestStatefulPodControlUpdateReplicasConflictMaxRetries(t *testing.T) { + recorder := record.NewFakeRecorder(10) + set := newStatefulSet(3) + replicas := set.Status.Replicas + fakeClient := &fake.Clientset{} + control := NewRealStatefulPodControl(fakeClient, recorder) + fakeClient.AddReactor("update", "statefulsets", func(action core.Action) (bool, runtime.Object, error) { + return true, newStatefulSet(3), apierrors.NewConflict(action.GetResource().GroupResource(), set.Name, errors.New("Object already exists")) + }) + fakeClient.AddReactor("get", "statefulsets", func(action core.Action) (bool, runtime.Object, error) { + return true, newStatefulSet(3), nil + }) + if err := control.UpdateStatefulSetReplicas(set, 2); err == nil { + t.Error("UpdateStatefulSetStatus failure did not return an error ") + } + if set.Status.Replicas != replicas { + t.Errorf("UpdateStatefulSetStatus mutated the sets replicas %d", set.Status.Replicas) + } + events := collectEvents(recorder.Events) + if eventCount := len(events); eventCount != 0 { + t.Errorf("Expected 0 events for successful status update %d", eventCount) + } +} + +func collectEvents(source <-chan string) []string { + done := false + events := make([]string, 0) + for !done { + select { + case event := <-source: + events = append(events, event) + default: + done = true + } + } + return events +} diff --git a/pkg/controller/statefulset/stateful_set.go b/pkg/controller/statefulset/stateful_set.go index 860062bc710..4726aa7cc19 100644 --- a/pkg/controller/statefulset/stateful_set.go +++ b/pkg/controller/statefulset/stateful_set.go @@ -27,6 +27,7 @@ import ( clientv1 "k8s.io/client-go/pkg/api/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" + "k8s.io/kubernetes/pkg/api" "k8s.io/kubernetes/pkg/api/v1" apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" @@ -34,7 +35,6 @@ import ( "k8s.io/kubernetes/pkg/client/legacylisters" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/util/errors" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/watch" @@ -47,42 +47,27 @@ import ( const ( // Time to sleep before polling to see if the pod cache has synced. PodStoreSyncedPollPeriod = 100 * time.Millisecond - // number of retries for a status update. - statusUpdateRetries = 2 // period to relist statefulsets and verify pets statefulSetResyncPeriod = 30 * time.Second ) // StatefulSetController controls statefulsets. type StatefulSetController struct { + // client interface kubeClient clientset.Interface - // newSyncer returns an interface capable of syncing a single pet. // Abstracted out for testing. - newSyncer func(*pcb) *petSyncer - + control StatefulSetControlInterface // podStore is a cache of watched pods. podStore listers.StoreToPodLister - // podStoreSynced returns true if the pod store has synced at least once. - podStoreSynced func() bool - // Watches changes to all pods. - podController cache.Controller - + podStoreSynced cache.InformerSynced // A store of StatefulSets, populated by the psController. - psStore listers.StoreToStatefulSetLister + setStore listers.StoreToStatefulSetLister // Watches changes to all StatefulSets. - psController cache.Controller - - // A store of the 1 unhealthy pet blocking progress for a given ps - blockingPetStore *unhealthyPetTracker - + setController cache.Controller // Controllers that need to be synced. queue workqueue.RateLimitingInterface - - // syncHandler handles sync events for statefulsets. - // Abstracted as a func to allow injection for testing. - syncHandler func(psKey string) error } // NewStatefulSetController creates a new statefulset controller. @@ -91,86 +76,83 @@ func NewStatefulSetController(podInformer cache.SharedIndexInformer, kubeClient eventBroadcaster.StartLogging(glog.Infof) eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: v1core.New(kubeClient.Core().RESTClient()).Events("")}) recorder := eventBroadcaster.NewRecorder(api.Scheme, clientv1.EventSource{Component: "statefulset"}) - pc := &apiServerPetClient{kubeClient, recorder, &defaultPetHealthChecker{}} - psc := &StatefulSetController{ - kubeClient: kubeClient, - blockingPetStore: newUnHealthyPetTracker(pc), - newSyncer: func(blockingPet *pcb) *petSyncer { - return &petSyncer{pc, blockingPet} - }, - queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "statefulset"), + ssc := &StatefulSetController{ + kubeClient: kubeClient, + control: NewDefaultStatefulSetControl(NewRealStatefulPodControl(kubeClient, recorder)), + queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "statefulset"), } podInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ // lookup the statefulset and enqueue - AddFunc: psc.addPod, + AddFunc: ssc.addPod, // lookup current and old statefulset if labels changed - UpdateFunc: psc.updatePod, + UpdateFunc: ssc.updatePod, // lookup statefulset accounting for deletion tombstones - DeleteFunc: psc.deletePod, + DeleteFunc: ssc.deletePod, }) - psc.podStore.Indexer = podInformer.GetIndexer() - psc.podController = podInformer.GetController() + ssc.podStore.Indexer = podInformer.GetIndexer() - psc.psStore.Store, psc.psController = cache.NewInformer( + ssc.setStore.Store, ssc.setController = cache.NewInformer( &cache.ListWatch{ ListFunc: func(options metav1.ListOptions) (runtime.Object, error) { - return psc.kubeClient.Apps().StatefulSets(metav1.NamespaceAll).List(options) + return ssc.kubeClient.Apps().StatefulSets(v1.NamespaceAll).List(options) }, WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) { - return psc.kubeClient.Apps().StatefulSets(metav1.NamespaceAll).Watch(options) + return ssc.kubeClient.Apps().StatefulSets(v1.NamespaceAll).Watch(options) }, }, &apps.StatefulSet{}, statefulSetResyncPeriod, cache.ResourceEventHandlerFuncs{ - AddFunc: psc.enqueueStatefulSet, + AddFunc: ssc.enqueueStatefulSet, UpdateFunc: func(old, cur interface{}) { oldPS := old.(*apps.StatefulSet) curPS := cur.(*apps.StatefulSet) if oldPS.Status.Replicas != curPS.Status.Replicas { glog.V(4).Infof("Observed updated replica count for StatefulSet: %v, %d->%d", curPS.Name, oldPS.Status.Replicas, curPS.Status.Replicas) } - psc.enqueueStatefulSet(cur) + ssc.enqueueStatefulSet(cur) }, - DeleteFunc: psc.enqueueStatefulSet, + DeleteFunc: ssc.enqueueStatefulSet, }, ) // TODO: Watch volumes - psc.podStoreSynced = psc.podController.HasSynced - psc.syncHandler = psc.Sync - return psc + ssc.podStoreSynced = podInformer.GetController().HasSynced + return ssc } // Run runs the statefulset controller. -func (psc *StatefulSetController) Run(workers int, stopCh <-chan struct{}) { +func (ssc *StatefulSetController) Run(workers int, stopCh <-chan struct{}) { defer utilruntime.HandleCrash() + defer ssc.queue.ShutDown() glog.Infof("Starting statefulset controller") - go psc.podController.Run(stopCh) - go psc.psController.Run(stopCh) + if !cache.WaitForCacheSync(stopCh, ssc.podStoreSynced) { + return + } + go ssc.setController.Run(stopCh) for i := 0; i < workers; i++ { - go wait.Until(psc.worker, time.Second, stopCh) + go wait.Until(ssc.worker, time.Second, stopCh) } <-stopCh glog.Infof("Shutting down statefulset controller") - psc.queue.ShutDown() + } // addPod adds the statefulset for the pod to the sync queue -func (psc *StatefulSetController) addPod(obj interface{}) { +func (ssc *StatefulSetController) addPod(obj interface{}) { pod := obj.(*v1.Pod) glog.V(4).Infof("Pod %s created, labels: %+v", pod.Name, pod.Labels) - ps := psc.getStatefulSetForPod(pod) - if ps == nil { + set := ssc.getStatefulSetForPod(pod) + if set == nil { return } - psc.enqueueStatefulSet(ps) + ssc.enqueueStatefulSet(set) } // updatePod adds the statefulset for the current and old pods to the sync queue. // If the labels of the pod didn't change, this method enqueues a single statefulset. -func (psc *StatefulSetController) updatePod(old, cur interface{}) { +func (ssc *StatefulSetController) updatePod(old, cur interface{}) { curPod := cur.(*v1.Pod) oldPod := old.(*v1.Pod) if curPod.ResourceVersion == oldPod.ResourceVersion { @@ -178,20 +160,21 @@ func (psc *StatefulSetController) updatePod(old, cur interface{}) { // Two different versions of the same pod will always have different RVs. return } - ps := psc.getStatefulSetForPod(curPod) - if ps == nil { + set := ssc.getStatefulSetForPod(curPod) + if set == nil { return } - psc.enqueueStatefulSet(ps) + ssc.enqueueStatefulSet(set) + // TODO will we need this going forward with controller ref impl? if !reflect.DeepEqual(curPod.Labels, oldPod.Labels) { - if oldPS := psc.getStatefulSetForPod(oldPod); oldPS != nil { - psc.enqueueStatefulSet(oldPS) + if oldSet := ssc.getStatefulSetForPod(oldPod); oldSet != nil { + ssc.enqueueStatefulSet(oldSet) } } } // deletePod enqueues the statefulset for the pod accounting for deletion tombstones. -func (psc *StatefulSetController) deletePod(obj interface{}) { +func (ssc *StatefulSetController) deletePod(obj interface{}) { pod, ok := obj.(*v1.Pod) // When a delete is dropped, the relist will notice a pod in the store not @@ -201,173 +184,126 @@ func (psc *StatefulSetController) deletePod(obj interface{}) { if !ok { tombstone, ok := obj.(cache.DeletedFinalStateUnknown) if !ok { - glog.Errorf("couldn't get object from tombstone %+v", obj) + utilruntime.HandleError(fmt.Errorf("couldn't get object from tombstone %+v", obj)) return } pod, ok = tombstone.Obj.(*v1.Pod) if !ok { - glog.Errorf("tombstone contained object that is not a pod %+v", obj) + utilruntime.HandleError(fmt.Errorf("tombstone contained object that is not a pod %+v", obj)) return } } glog.V(4).Infof("Pod %s/%s deleted through %v.", pod.Namespace, pod.Name, utilruntime.GetCaller()) - if ps := psc.getStatefulSetForPod(pod); ps != nil { - psc.enqueueStatefulSet(ps) + if set := ssc.getStatefulSetForPod(pod); set != nil { + ssc.enqueueStatefulSet(set) } } // getPodsForStatefulSets returns the pods that match the selectors of the given statefulset. -func (psc *StatefulSetController) getPodsForStatefulSet(ps *apps.StatefulSet) ([]*v1.Pod, error) { - // TODO: Do we want the statefulset to fight with RCs? check parent statefulset annotation, or name prefix? - sel, err := metav1.LabelSelectorAsSelector(ps.Spec.Selector) +func (ssc *StatefulSetController) getPodsForStatefulSet(set *apps.StatefulSet) ([]*v1.Pod, error) { + sel, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) if err != nil { return []*v1.Pod{}, err } - pods, err := psc.podStore.Pods(ps.Namespace).List(sel) - if err != nil { - return []*v1.Pod{}, err - } - // TODO: Do we need to copy? - result := make([]*v1.Pod, 0, len(pods)) - for i := range pods { - result = append(result, &(*pods[i])) - } - return result, nil + return ssc.podStore.Pods(set.Namespace).List(sel) } -// getStatefulSetForPod returns the pet set managing the given pod. -func (psc *StatefulSetController) getStatefulSetForPod(pod *v1.Pod) *apps.StatefulSet { - ps, err := psc.psStore.GetPodStatefulSets(pod) +// getStatefulSetForPod returns the StatefulSet managing the given pod. +func (ssc *StatefulSetController) getStatefulSetForPod(pod *v1.Pod) *apps.StatefulSet { + sets, err := ssc.setStore.GetPodStatefulSets(pod) if err != nil { glog.V(4).Infof("No StatefulSets found for pod %v, StatefulSet controller will avoid syncing", pod.Name) return nil } - // Resolve a overlapping statefulset tie by creation timestamp. - // Let's hope users don't create overlapping statefulsets. - if len(ps) > 1 { - glog.Errorf("user error! more than one StatefulSet is selecting pods with labels: %+v", pod.Labels) - sort.Sort(overlappingStatefulSets(ps)) + // More than one set is selecting the same Pod + if len(sets) > 1 { + utilruntime.HandleError( + fmt.Errorf( + "user error: more than one StatefulSet is selecting pods with labels: %+v", + pod.Labels)) + // The timestamp sort should not be necessary because we will enforce the CreatedBy requirement by + // name + sort.Sort(overlappingStatefulSets(sets)) + // return the first created set for which pod is a member + for i := range sets { + if isMemberOf(&sets[i], pod) { + return &sets[i] + } + } + glog.V(4).Infof("No StatefulSets found for pod %v, StatefulSet controller will avoid syncing", pod.Name) + return nil } - return &ps[0] + return &sets[0] + } // enqueueStatefulSet enqueues the given statefulset in the work queue. -func (psc *StatefulSetController) enqueueStatefulSet(obj interface{}) { +func (ssc *StatefulSetController) enqueueStatefulSet(obj interface{}) { key, err := controller.KeyFunc(obj) if err != nil { - glog.Errorf("Cound't get key for object %+v: %v", obj, err) + utilruntime.HandleError(fmt.Errorf("Cound't get key for object %+v: %v", obj, err)) return } - psc.queue.Add(key) + ssc.queue.Add(key) } -// worker runs a worker thread that just dequeues items, processes them, and marks them done. -// It enforces that the syncHandler is never invoked concurrently with the same key. -func (psc *StatefulSetController) worker() { - for { - func() { - key, quit := psc.queue.Get() - if quit { - return - } - defer psc.queue.Done(key) - if err := psc.syncHandler(key.(string)); err != nil { - glog.Errorf("Error syncing StatefulSet %v, requeuing: %v", key.(string), err) - psc.queue.AddRateLimited(key) - } else { - psc.queue.Forget(key) - } - }() +// processNextWorkItem dequeues items, processes them, and marks them done. It enforces that the syncHandler is never +// invoked concurrently with the same key. +func (ssc *StatefulSetController) processNextWorkItem() bool { + key, quit := ssc.queue.Get() + if quit { + return false + } + defer ssc.queue.Done(key) + if err := ssc.sync(key.(string)); err != nil { + utilruntime.HandleError(fmt.Errorf("Error syncing StatefulSet %v, requeuing: %v", key.(string), err)) + ssc.queue.AddRateLimited(key) + } else { + ssc.queue.Forget(key) + } + return true +} + +// worker runs a worker goroutine that invokes processNextWorkItem until the the controller's queue is closed +func (ssc *StatefulSetController) worker() { + for ssc.processNextWorkItem() { + } } -// Sync syncs the given statefulset. -func (psc *StatefulSetController) Sync(key string) error { +// sync syncs the given statefulset. +func (ssc *StatefulSetController) sync(key string) error { startTime := time.Now() defer func() { glog.V(4).Infof("Finished syncing statefulset %q (%v)", key, time.Now().Sub(startTime)) }() - if !psc.podStoreSynced() { - // Sleep so we give the pod reflector goroutine a chance to run. - time.Sleep(PodStoreSyncedPollPeriod) - return fmt.Errorf("waiting for pods controller to sync") - } - - obj, exists, err := psc.psStore.Store.GetByKey(key) + obj, exists, err := ssc.setStore.Store.GetByKey(key) if !exists { - if err = psc.blockingPetStore.store.Delete(key); err != nil { - return err - } glog.Infof("StatefulSet has been deleted %v", key) return nil } if err != nil { - glog.Errorf("Unable to retrieve StatefulSet %v from store: %v", key, err) + utilruntime.HandleError(fmt.Errorf("Unable to retrieve StatefulSet %v from store: %v", key, err)) return err } - ps := *obj.(*apps.StatefulSet) - petList, err := psc.getPodsForStatefulSet(&ps) + set := *obj.(*apps.StatefulSet) + pods, err := ssc.getPodsForStatefulSet(&set) if err != nil { return err } - numPets, syncErr := psc.syncStatefulSet(&ps, petList) - if updateErr := updatePetCount(psc.kubeClient.Apps(), ps, numPets); updateErr != nil { - glog.Infof("Failed to update replica count for statefulset %v/%v; requeuing; error: %v", ps.Namespace, ps.Name, updateErr) - return errors.NewAggregate([]error{syncErr, updateErr}) - } - - return syncErr + return ssc.syncStatefulSet(&set, pods) } -// syncStatefulSet syncs a tuple of (statefulset, pets). -func (psc *StatefulSetController) syncStatefulSet(ps *apps.StatefulSet, pets []*v1.Pod) (int, error) { - glog.V(2).Infof("Syncing StatefulSet %v/%v with %d pods", ps.Namespace, ps.Name, len(pets)) - - it := NewStatefulSetIterator(ps, pets) - blockingPet, err := psc.blockingPetStore.Get(ps, pets) - if err != nil { - return 0, err +// syncStatefulSet syncs a tuple of (statefulset, []*v1.Pod). +func (ssc *StatefulSetController) syncStatefulSet(set *apps.StatefulSet, pods []*v1.Pod) error { + glog.V(2).Infof("Syncing StatefulSet %v/%v with %d pods", set.Namespace, set.Name, len(pods)) + if err := ssc.control.UpdateStatefulSet(set, pods); err != nil { + glog.V(2).Infof("Error syncing StatefulSet %s/%s with %d pods : %s", set.Namespace, set.Name, err) + return err } - if blockingPet != nil { - glog.Infof("StatefulSet %v blocked from scaling on pod %v", ps.Name, blockingPet.pod.Name) - } - petManager := psc.newSyncer(blockingPet) - numPets := 0 - - for it.Next() { - pet := it.Value() - if pet == nil { - continue - } - switch pet.event { - case syncPet: - err = petManager.Sync(pet) - if err == nil { - numPets++ - } - case deletePet: - err = petManager.Delete(pet) - } - switch err.(type) { - case errUnhealthyPet: - // We are not passing this error up, but we don't increment numPets if we encounter it, - // since numPets directly translates to statefulset.status.replicas - continue - case nil: - continue - default: - it.errs = append(it.errs, err) - } - } - - if err := psc.blockingPetStore.Add(petManager.blockingPet); err != nil { - it.errs = append(it.errs, err) - } - // TODO: GC pvcs. We can't delete them per pet because of grace period, and - // in fact we *don't want to* till statefulset is stable to guarantee that bugs - // in the controller don't corrupt user data. - return numPets, errors.NewAggregate(it.errs) + glog.V(2).Infof("Succesfully synced StatefulSet %s/%s successful", set.Namespace, set.Name) + return nil } diff --git a/pkg/controller/statefulset/stateful_set_control.go b/pkg/controller/statefulset/stateful_set_control.go new file mode 100644 index 00000000000..0bf74291d1f --- /dev/null +++ b/pkg/controller/statefulset/stateful_set_control.go @@ -0,0 +1,147 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statefulset + +import ( + "sort" + + "k8s.io/kubernetes/pkg/api/v1" + apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" + + "github.com/golang/glog" +) + +// StatefulSetControl implements the control logic for updating StatefulSets and their children Pods. It is implemented +// as an interface to allow for extensions that provide different semantics. Currently, there is only one implementation. +type StatefulSetControlInterface interface { + // UpdateStatefulSet implements the control logic for Pod creation, update, and deletion, and + // persistent volume creation, update, and deletion. + // If an implementation returns a non-nil error, the invocation will be retried using a rate-limited strategy. + // Implementors should sink any errors that they do not wish to trigger a retry, and they may feel free to + // exit exceptionally at any point provided they wish the update to be re-run at a later point in time. + UpdateStatefulSet(set *apps.StatefulSet, pods []*v1.Pod) error +} + +// NewDefaultStatefulSetControl returns a new instance of the default implementation StatefulSetControlInterface that +// implements the documented semantics for StatefulSets. podControl is the PodControlInterface used to create, update, +// and delete Pods and to create PersistentVolumeClaims. You should use an instance returned from +// NewRealStatefulPodControl() for any scenario other than testing. +func NewDefaultStatefulSetControl(podControl StatefulPodControlInterface) StatefulSetControlInterface { + return &defaultStatefulSetControl{podControl} +} + +type defaultStatefulSetControl struct { + podControl StatefulPodControlInterface +} + +func (ssc *defaultStatefulSetControl) UpdateStatefulSet(set *apps.StatefulSet, pods []*v1.Pod) error { + replicaCount := int(*set.Spec.Replicas) + // slice that will contain all Pods such that 0 <= getOrdinal(pod) < set.Spec.Replicas + replicas := make([]*v1.Pod, replicaCount) + // slice that will contain all Pods such that set.Spec.Replicas <= getOrdinal(pod) + condemned := make([]*v1.Pod, 0, len(pods)) + ready := 0 + unhealthy := 0 + + // First we partition pods into two lists valid replicas and condemned Pods + for i := range pods { + //count the number of running and ready replicas + if isRunningAndReady(pods[i]) { + ready++ + } + if ord := getOrdinal(pods[i]); 0 <= ord && ord < replicaCount { + // if the ordinal of the pod is within the range of the current number of replicas, + // insert it at the indirection of its ordinal + replicas[ord] = pods[i] + + } else if ord >= replicaCount { + // if the ordinal is greater than the number of replicas add it to the condemned list + condemned = append(condemned, pods[i]) + } + } + + // for any empty indices in the sequence [0,set.Spec.Replicas) create a new Pod + for ord := 0; ord < replicaCount; ord++ { + if replicas[ord] == nil { + replicas[ord] = newStatefulSetPod(set, ord) + } + } + + // count the number of unhealthy pods + for i := range replicas { + if !isHealthy(replicas[i]) { + unhealthy++ + } + } + for i := range condemned { + if !isHealthy(condemned[i]) { + unhealthy++ + } + } + + // sort the condemned Pods by their ordinals + sort.Sort(ascendingOrdinal(condemned)) + + // if the current number of replicas has changed update the statefulSets replicas + if err := ssc.podControl.UpdateStatefulSetReplicas(set, int32(ready)); err != nil { + return err + } + + // Examine each replica with respect to its ordinal + for i := range replicas { + // delete and recreate failed pods + if isFailed(replicas[i]) { + glog.V(2).Infof("StatefulSet %s is recreating failed Pod %s", set.Name, replicas[i].Name) + if err := ssc.podControl.DeleteStatefulPod(set, replicas[i]); err != nil { + return err + } + replicas[i] = newStatefulSetPod(set, i) + } + // If we find a Pod that has not been created we create the Pod immediately and return + if !isCreated(replicas[i]) { + return ssc.podControl.CreateStatefulPod(set, replicas[i]) + } + // If we have a Pod that has been created but is not running and ready we can not make progress. + // We must ensure that all for each Pod, when we create it, all of its predecessors, with respect to its + // ordinal, are Running and Ready. + if !isRunningAndReady(replicas[i]) { + glog.V(2).Infof("StatefulSet %s is waiting for Pod %s to be Running and Ready", + set.Name, replicas[i].Name) + return nil + } + // Enforce the StatefulSet invariants, + if err := ssc.podControl.UpdateStatefulPod(set, replicas[i]); err != nil { + return err + } + } + + // At this point, all of the current Replicas are Running and Ready, we can consider termination. + // We will wait for all predecessors to be Running and Ready prior to attempting a deletion. + // We will terminate Pods in a monotonically decreasing order over [len(pods),set.Spec.Replicas). + // Note that we do not resurrect Pods in this interval. + if unhealthy > 0 { + glog.V(2).Infof("StatefulSet %s is waiting on %d Pods", set.Name, unhealthy) + return nil + } + if target := len(condemned) - 1; target >= 0 { + glog.V(2).Infof("StatefulSet %s terminating Pod %s", set.Name, condemned[target]) + return ssc.podControl.DeleteStatefulPod(set, condemned[target]) + } + return nil +} + +var _ StatefulSetControlInterface = &defaultStatefulSetControl{} diff --git a/pkg/controller/statefulset/stateful_set_control_test.go b/pkg/controller/statefulset/stateful_set_control_test.go new file mode 100644 index 00000000000..447f9777c10 --- /dev/null +++ b/pkg/controller/statefulset/stateful_set_control_test.go @@ -0,0 +1,712 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statefulset + +import ( + "errors" + "fmt" + "sort" + "strconv" + "testing" + "time" + + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "k8s.io/client-go/tools/cache" + + "k8s.io/kubernetes/pkg/api/v1" + apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" + listers "k8s.io/kubernetes/pkg/client/legacylisters" + "k8s.io/kubernetes/pkg/controller" +) + +func TestDefaultStatefulSetControlCreatesPods(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + if set.Status.Replicas != 3 { + t.Error("Falied to scale statefulset to 3 replicas") + } +} + +func TestStatefulSetControlScaleUp(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + *set.Spec.Replicas = 4 + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to scale StatefulSet : %s", err) + } + if set.Status.Replicas != 4 { + t.Error("Falied to scale statefulset to 4 replicas") + } +} + +func TestStatefulSetControlScaleDown(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + *set.Spec.Replicas = 0 + if err := scaleDownStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to scale StatefulSet : %s", err) + } + if set.Status.Replicas != 0 { + t.Error("Falied to scale statefulset to 4 replicas") + } +} + +func TestStatefulSetControlReplacesPods(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(5) + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + if set.Status.Replicas != 5 { + t.Error("Falied to scale statefulset to 5 replicas") + } + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + t.Error(err) + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + sort.Sort(ascendingOrdinal(pods)) + spc.podsIndexer.Delete(pods[0]) + spc.podsIndexer.Delete(pods[2]) + spc.podsIndexer.Delete(pods[4]) + for i := 0; i < 5; i += 2 { + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if err = ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Failed to update StatefulSet : %s", err) + } + if pods, err = spc.setPodRunning(set, i); err != nil { + t.Error(err) + } + if err = ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Failed to update StatefulSet : %s", err) + } + if pods, err = spc.setPodReady(set, i); err != nil { + t.Error(err) + } + } + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Failed to update StatefulSet : %s", err) + } + if set.Status.Replicas != 5 { + t.Error("Falied to scale StatefulSet to 5 replicas") + } +} + +func TestDefaultStatefulSetControlRecreatesFailedPod(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + t.Error(err) + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Error updating StatefulSet %s", err) + } + if err := assertInvariants(set, spc); err != nil { + t.Error(err) + } + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + pods[0].Status.Phase = v1.PodFailed + spc.podsIndexer.Update(pods[0]) + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Error updating StatefulSet %s", err) + } + if err := assertInvariants(set, spc); err != nil { + t.Error(err) + } + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if isCreated(pods[0]) { + t.Error("StatefulSet did not recreate failed Pod") + } +} + +func TestDefaultStatefulSetControlInitAnnotation(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + t.Error(err) + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if err = ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Error updating StatefulSet %s", err) + } + if err = assertInvariants(set, spc); err != nil { + t.Error(err) + } + if pods, err = spc.setPodRunning(set, 0); err != nil { + t.Error(err) + } + if pods, err = spc.setPodReady(set, 0); err != nil { + t.Error(err) + } + if pods, err = spc.setPodInitStatus(set, 0, false); err != nil { + t.Error(err) + } + replicas := int(set.Status.Replicas) + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Error updating StatefulSet %s", err) + } + if err := assertInvariants(set, spc); err != nil { + t.Error(err) + } + if replicas != int(set.Status.Replicas) { + t.Errorf("StatefulSetControl does not block on %s=false", apps.StatefulSetInitAnnotation) + } + if pods, err = spc.setPodInitStatus(set, 0, true); err != nil { + t.Error(err) + } + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + if int(set.Status.Replicas) != 3 { + t.Errorf("StatefulSetControl does not unblock on %s=true", apps.StatefulSetInitAnnotation) + } +} + +func TestDefaultStatefulSetControlCreatePodFailure(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + spc.SetCreateStatefulPodError(apierrors.NewInternalError(errors.New("API server failed")), 2) + if err := scaleUpStatefulSetControl(set, ssc, spc); !apierrors.IsInternalError(err) { + t.Errorf("StatefulSetControl did not return InternalError foudn %s", err) + } + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + if set.Status.Replicas != 3 { + t.Error("Falied to scale StatefulSet to 3 replicas") + } +} + +func TestDefaultStatefulSetControlUpdatePodFailure(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + spc.SetUpdateStatefulPodError(apierrors.NewInternalError(errors.New("API server failed")), 2) + if err := scaleUpStatefulSetControl(set, ssc, spc); !apierrors.IsInternalError(err) { + t.Errorf("StatefulSetControl did not return InternalError foudn %s", err) + } + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + if set.Status.Replicas != 3 { + t.Error("Falied to scale StatefulSet to 3 replicas") + } +} + +func TestDefaultStatefulSetControlUpdateSetStatusFailure(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + spc.SetUpdateStatefulSetStatusError(apierrors.NewInternalError(errors.New("API server failed")), 2) + if err := scaleUpStatefulSetControl(set, ssc, spc); !apierrors.IsInternalError(err) { + t.Errorf("StatefulSetControl did not return InternalError foudn %s", err) + } + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + if set.Status.Replicas != 3 { + t.Error("Falied to scale StatefulSet to 3 replicas") + } +} + +func TestDefaultStatefulSetControlPodRecreateDeleteError(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + t.Error(err) + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Error updating StatefulSet %s", err) + } + if err := assertInvariants(set, spc); err != nil { + t.Error(err) + } + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + pods[0].Status.Phase = v1.PodFailed + spc.podsIndexer.Update(pods[0]) + spc.SetDeleteStatefulPodError(apierrors.NewInternalError(errors.New("API server failed")), 0) + if err := ssc.UpdateStatefulSet(set, pods); !apierrors.IsInternalError(err) { + t.Errorf("StatefulSet failed to %s", err) + } + if err := assertInvariants(set, spc); err != nil { + t.Error(err) + } + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + t.Errorf("Error updating StatefulSet %s", err) + } + if err := assertInvariants(set, spc); err != nil { + t.Error(err) + } + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if isCreated(pods[0]) { + t.Error("StatefulSet did not recreate failed Pod") + } +} + +func TestStatefulSetControlScaleDownDeleteError(t *testing.T) { + spc := newFakeStatefulPodControl() + ssc := NewDefaultStatefulSetControl(spc) + set := newStatefulSet(3) + if err := scaleUpStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + *set.Spec.Replicas = 0 + spc.SetDeleteStatefulPodError(apierrors.NewInternalError(errors.New("API server failed")), 2) + if err := scaleDownStatefulSetControl(set, ssc, spc); !apierrors.IsInternalError(err) { + t.Errorf("StatefulSetControl failed to throw error on delte %s", err) + } + if err := scaleDownStatefulSetControl(set, ssc, spc); err != nil { + t.Errorf("Failed to turn down StatefulSet %s", err) + } + if set.Status.Replicas != 0 { + t.Error("Falied to scale statefulset to 4 replicas") + } +} + +type requestTracker struct { + requests int + err error + after int +} + +func (rt *requestTracker) errorReady() bool { + return rt.err != nil && rt.requests >= rt.after +} + +func (rt *requestTracker) inc() { + rt.requests++ +} + +func (rt *requestTracker) reset() { + rt.err = nil + rt.after = 0 +} + +type fakeStatefulPodControl struct { + podsLister listers.StoreToPodLister + claimsLister listers.StoreToPersistentVolumeClaimLister + setsLister listers.StoreToStatefulSetLister + podsIndexer cache.Indexer + claimsIndexer cache.Indexer + setsIndexer cache.Indexer + createPodTracker requestTracker + updatePodTracker requestTracker + deletePodTracker requestTracker + updateStatusTracker requestTracker +} + +func newFakeStatefulPodControl() *fakeStatefulPodControl { + podsIndexer := cache.NewIndexer(controller.KeyFunc, + cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) + claimsIndexer := cache.NewIndexer(controller.KeyFunc, + cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) + setsIndexer := cache.NewIndexer(controller.KeyFunc, + cache.Indexers{cache.NamespaceIndex: cache.MetaNamespaceIndexFunc}) + return &fakeStatefulPodControl{ + listers.StoreToPodLister{Indexer: podsIndexer}, + listers.StoreToPersistentVolumeClaimLister{Indexer: claimsIndexer}, + listers.StoreToStatefulSetLister{Store: setsIndexer}, + podsIndexer, + claimsIndexer, + setsIndexer, + requestTracker{0, nil, 0}, + requestTracker{0, nil, 0}, + requestTracker{0, nil, 0}, + requestTracker{0, nil, 0}} +} + +func (spc *fakeStatefulPodControl) SetCreateStatefulPodError(err error, after int) { + spc.createPodTracker.err = err + spc.createPodTracker.after = after +} + +func (spc *fakeStatefulPodControl) SetUpdateStatefulPodError(err error, after int) { + spc.updatePodTracker.err = err + spc.updatePodTracker.after = after +} + +func (spc *fakeStatefulPodControl) SetDeleteStatefulPodError(err error, after int) { + spc.deletePodTracker.err = err + spc.deletePodTracker.after = after +} + +func (spc *fakeStatefulPodControl) SetUpdateStatefulSetStatusError(err error, after int) { + spc.updateStatusTracker.err = err + spc.updateStatusTracker.after = after +} + +func (spc *fakeStatefulPodControl) setPodPending(set *apps.StatefulSet, ordinal int) ([]*v1.Pod, error) { + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return nil, err + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return nil, err + } + if 0 > ordinal || ordinal >= len(pods) { + return nil, fmt.Errorf("ordinal %d out of range [0,%d)", ordinal, len(pods)) + } + sort.Sort(ascendingOrdinal(pods)) + pod := pods[ordinal] + pod.Status.Phase = v1.PodPending + fakeResourceVersion(pod) + spc.podsIndexer.Update(pod) + return spc.podsLister.Pods(set.Namespace).List(selector) +} + +func (spc *fakeStatefulPodControl) setPodRunning(set *apps.StatefulSet, ordinal int) ([]*v1.Pod, error) { + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return nil, err + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return nil, err + } + if 0 > ordinal || ordinal >= len(pods) { + return nil, fmt.Errorf("ordinal %d out of range [0,%d)", ordinal, len(pods)) + } + sort.Sort(ascendingOrdinal(pods)) + pod := pods[ordinal] + pod.Status.Phase = v1.PodRunning + fakeResourceVersion(pod) + spc.podsIndexer.Update(pod) + return spc.podsLister.Pods(set.Namespace).List(selector) +} + +func (spc *fakeStatefulPodControl) setPodReady(set *apps.StatefulSet, ordinal int) ([]*v1.Pod, error) { + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return nil, err + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return nil, err + } + if 0 > ordinal || ordinal >= len(pods) { + return nil, fmt.Errorf("ordinal %d out of range [0,%d)", ordinal, len(pods)) + } + sort.Sort(ascendingOrdinal(pods)) + pod := pods[ordinal] + condition := v1.PodCondition{Type: v1.PodReady, Status: v1.ConditionTrue} + v1.UpdatePodCondition(&pod.Status, &condition) + fakeResourceVersion(pod) + spc.podsIndexer.Update(pod) + return spc.podsLister.Pods(set.Namespace).List(selector) +} + +func (spc *fakeStatefulPodControl) setPodInitStatus(set *apps.StatefulSet, ordinal int, init bool) ([]*v1.Pod, error) { + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return nil, err + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return nil, err + } + if 0 > ordinal || ordinal >= len(pods) { + return nil, fmt.Errorf("ordinal %d out of range [0,%d)", ordinal, len(pods)) + } + sort.Sort(ascendingOrdinal(pods)) + pod := pods[ordinal] + if init { + pod.Annotations[apps.StatefulSetInitAnnotation] = "true" + } else { + pod.Annotations[apps.StatefulSetInitAnnotation] = "false" + } + fakeResourceVersion(pod) + spc.podsIndexer.Update(pod) + return spc.podsLister.Pods(set.Namespace).List(selector) +} + +func (spc *fakeStatefulPodControl) addTerminatedPod(set *apps.StatefulSet, ordinal int) ([]*v1.Pod, error) { + pod := newStatefulSetPod(set, ordinal) + pod.Status.Phase = v1.PodRunning + deleted := metav1.NewTime(time.Now()) + pod.DeletionTimestamp = &deleted + condition := v1.PodCondition{Type: v1.PodReady, Status: v1.ConditionTrue} + fakeResourceVersion(pod) + v1.UpdatePodCondition(&pod.Status, &condition) + spc.podsIndexer.Update(pod) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return nil, err + } + return spc.podsLister.Pods(set.Namespace).List(selector) +} + +func (spc *fakeStatefulPodControl) setPodTerminated(set *apps.StatefulSet, ordinal int) ([]*v1.Pod, error) { + pod := newStatefulSetPod(set, ordinal) + deleted := metav1.NewTime(time.Now()) + pod.DeletionTimestamp = &deleted + fakeResourceVersion(pod) + spc.podsIndexer.Update(pod) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return nil, err + } + return spc.podsLister.Pods(set.Namespace).List(selector) +} + +func (spc *fakeStatefulPodControl) CreateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error { + defer spc.createPodTracker.inc() + if spc.createPodTracker.errorReady() { + defer spc.createPodTracker.reset() + return spc.createPodTracker.err + } + + for _, claim := range getPersistentVolumeClaims(set, pod) { + spc.claimsIndexer.Update(&claim) + } + spc.podsIndexer.Update(pod) + return nil +} + +func (spc *fakeStatefulPodControl) UpdateStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error { + defer spc.updatePodTracker.inc() + if spc.updatePodTracker.errorReady() { + defer spc.updatePodTracker.reset() + return spc.updatePodTracker.err + } + if !identityMatches(set, pod) { + updateIdentity(set, pod) + } + if !storageMatches(set, pod) { + updateStorage(set, pod) + for _, claim := range getPersistentVolumeClaims(set, pod) { + spc.claimsIndexer.Update(&claim) + } + } + spc.podsIndexer.Update(pod) + return nil +} + +func (spc *fakeStatefulPodControl) DeleteStatefulPod(set *apps.StatefulSet, pod *v1.Pod) error { + defer spc.deletePodTracker.inc() + if spc.deletePodTracker.errorReady() { + defer spc.deletePodTracker.reset() + return spc.deletePodTracker.err + } + if key, err := controller.KeyFunc(pod); err != nil { + return err + } else if obj, found, err := spc.podsIndexer.GetByKey(key); err != nil { + return err + } else if found { + spc.podsIndexer.Delete(obj) + } + + return nil +} + +func (spc *fakeStatefulPodControl) UpdateStatefulSetReplicas(set *apps.StatefulSet, replicas int32) error { + defer spc.updateStatusTracker.inc() + if spc.updateStatusTracker.errorReady() { + defer spc.updateStatusTracker.reset() + return spc.updateStatusTracker.err + } + set.Status.Replicas = replicas + spc.setsIndexer.Update(set) + return nil +} + +var _ StatefulPodControlInterface = &fakeStatefulPodControl{} + +func assertInvariants(set *apps.StatefulSet, spc *fakeStatefulPodControl) error { + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return err + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return err + } + sort.Sort(ascendingOrdinal(pods)) + for ord := 0; ord < len(pods); ord++ { + if ord > 0 && isRunningAndReady(pods[ord]) && !isRunningAndReady(pods[ord-1]) { + return fmt.Errorf("Predecessor %s is Running and Ready while %s is not", + pods[ord-1].Name, + pods[ord].Name) + } + if getOrdinal(pods[ord]) != ord { + return fmt.Errorf("Pods %s deployed in the wrong order", + pods[ord].Name) + } + if !storageMatches(set, pods[ord]) { + return fmt.Errorf("Pods %s does not match the storage specification of StatefulSet %s ", + pods[ord]. + Name, set.Name) + } else { + for _, claim := range getPersistentVolumeClaims(set, pods[ord]) { + if claim, err := spc.claimsLister.PersistentVolumeClaims(set.Namespace).Get(claim.Name); err != nil { + return err + } else if claim == nil { + return fmt.Errorf("claim %s for Pod %s was not created", + claim.Name, + pods[ord].Name) + } + } + } + if !identityMatches(set, pods[ord]) { + return fmt.Errorf("Pods %s does not match the identity specification of StatefulSet %s ", + pods[ord].Name, + set.Name) + } + } + return nil +} + +func fakeResourceVersion(object interface{}) { + obj, isObj := object.(metav1.Object) + if !isObj { + return + } else if version := obj.GetResourceVersion(); version == "" { + obj.SetResourceVersion("1") + } else if intValue, err := strconv.ParseInt(version, 10, 32); err == nil { + obj.SetResourceVersion(strconv.FormatInt(intValue+1, 10)) + } +} + +func scaleUpStatefulSetControl(set *apps.StatefulSet, ssc StatefulSetControlInterface, spc *fakeStatefulPodControl) error { + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return err + } + for set.Status.Replicas < *set.Spec.Replicas { + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return err + } + if ord := len(pods) - 1; ord >= 0 { + ord := len(pods) - 1 + if pods, err = spc.setPodPending(set, ord); err != nil { + return err + } + if err = ssc.UpdateStatefulSet(set, pods); err != nil { + return err + } + if pods, err = spc.setPodRunning(set, ord); err != nil { + return err + } + if err = ssc.UpdateStatefulSet(set, pods); err != nil { + return err + } + if pods, err = spc.setPodReady(set, ord); err != nil { + return err + } + } + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + return err + } + if err := assertInvariants(set, spc); err != nil { + return err + } + } + return assertInvariants(set, spc) +} + +func scaleDownStatefulSetControl(set *apps.StatefulSet, ssc StatefulSetControlInterface, spc *fakeStatefulPodControl) error { + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return err + } + for set.Status.Replicas > *set.Spec.Replicas { + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return err + } + if ordinal := len(pods) - 1; ordinal >= 0 { + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + return err + } + if pods, err = spc.addTerminatedPod(set, ordinal); err != nil { + return err + } + if err = ssc.UpdateStatefulSet(set, pods); err != nil { + return err + } + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return err + } + sort.Sort(ascendingOrdinal(pods)) + spc.podsIndexer.Delete(pods[ordinal]) + } + if err := ssc.UpdateStatefulSet(set, pods); err != nil { + return err + } + if err := assertInvariants(set, spc); err != nil { + return err + } + } + return assertInvariants(set, spc) +} diff --git a/pkg/controller/statefulset/stateful_set_test.go b/pkg/controller/statefulset/stateful_set_test.go index f12ffc1bf6a..1d126f67eee 100644 --- a/pkg/controller/statefulset/stateful_set_test.go +++ b/pkg/controller/statefulset/stateful_set_test.go @@ -17,316 +17,444 @@ limitations under the License. package statefulset import ( - "fmt" - "math/rand" - "reflect" + "sort" "testing" + "time" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" + "k8s.io/kubernetes/pkg/api/v1" apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" - fakeinternal "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake" - "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/apps/v1beta1" - "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/apps/v1beta1/fake" - "k8s.io/kubernetes/pkg/client/legacylisters" "k8s.io/kubernetes/pkg/controller" ) -func newFakeStatefulSetController() (*StatefulSetController, *fakePetClient) { - fpc := newFakePetClient() - return &StatefulSetController{ - kubeClient: nil, - blockingPetStore: newUnHealthyPetTracker(fpc), - podStoreSynced: func() bool { return true }, - psStore: listers.StoreToStatefulSetLister{Store: cache.NewStore(controller.KeyFunc)}, - podStore: listers.StoreToPodLister{Indexer: cache.NewIndexer(controller.KeyFunc, cache.Indexers{})}, - newSyncer: func(blockingPet *pcb) *petSyncer { - return &petSyncer{fpc, blockingPet} - }, - }, fpc -} - -func checkPets(ps *apps.StatefulSet, creates, deletes int, fc *fakePetClient, t *testing.T) { - if fc.petsCreated != creates || fc.petsDeleted != deletes { - t.Errorf("Found (creates: %d, deletes: %d), expected (creates: %d, deletes: %d)", fc.petsCreated, fc.petsDeleted, creates, deletes) - } - gotClaims := map[string]v1.PersistentVolumeClaim{} - for _, pvc := range fc.claims { - gotClaims[pvc.Name] = pvc - } - for i := range fc.pets { - expectedPet, _ := newPCB(fmt.Sprintf("%v", i), ps) - if identityHash(ps, fc.pets[i].pod) != identityHash(ps, expectedPet.pod) { - t.Errorf("Unexpected pod at index %d", i) - } - for _, pvc := range expectedPet.pvcs { - gotPVC, ok := gotClaims[pvc.Name] - if !ok { - t.Errorf("PVC %v not created for pod %v", pvc.Name, expectedPet.pod.Name) - } - if !reflect.DeepEqual(gotPVC.Spec, pvc.Spec) { - t.Errorf("got PVC %v differs from created pvc", pvc.Name) - } - } - } -} - -func scaleStatefulSet(t *testing.T, ps *apps.StatefulSet, psc *StatefulSetController, fc *fakePetClient, scale int) error { - errs := []error{} - for i := 0; i < scale; i++ { - pl := fc.getPodList() - if len(pl) != i { - t.Errorf("Unexpected number of pods, expected %d found %d", i, len(pl)) - } - if _, syncErr := psc.syncStatefulSet(ps, pl); syncErr != nil { - errs = append(errs, syncErr) - } - fc.setHealthy(i) - checkPets(ps, i+1, 0, fc, t) - } - return errors.NewAggregate(errs) -} - -func saturateStatefulSet(t *testing.T, ps *apps.StatefulSet, psc *StatefulSetController, fc *fakePetClient) { - err := scaleStatefulSet(t, ps, psc, fc, int(*(ps.Spec.Replicas))) - if err != nil { - t.Errorf("Error scaleStatefulSet: %v", err) - } -} - func TestStatefulSetControllerCreates(t *testing.T) { - psc, fc := newFakeStatefulSetController() - replicas := 3 - ps := newStatefulSet(replicas) - - saturateStatefulSet(t, ps, psc, fc) - - podList := fc.getPodList() - // Deleted pet gets recreated - fc.pets = fc.pets[:replicas-1] - if _, err := psc.syncStatefulSet(ps, podList); err != nil { - t.Errorf("Error syncing StatefulSet: %v", err) + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + if err := scaleUpStatefulSetController(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + if obj, _, err := spc.setsIndexer.Get(set); err != nil { + t.Error(err) + } else { + set = obj.(*apps.StatefulSet) + } + if set.Status.Replicas != 3 { + t.Error("Falied to scale statefulset to 3 replicas") } - checkPets(ps, replicas+1, 0, fc, t) } func TestStatefulSetControllerDeletes(t *testing.T) { - psc, fc := newFakeStatefulSetController() - replicas := 4 - ps := newStatefulSet(replicas) - - saturateStatefulSet(t, ps, psc, fc) - - // Drain - errs := []error{} - *(ps.Spec.Replicas) = 0 - knownPods := fc.getPodList() - for i := replicas - 1; i >= 0; i-- { - if len(fc.pets) != i+1 { - t.Errorf("Unexpected number of pods, expected %d found %d", i+1, len(fc.pets)) - } - if _, syncErr := psc.syncStatefulSet(ps, knownPods); syncErr != nil { - errs = append(errs, syncErr) - } + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + if err := scaleUpStatefulSetController(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) } - if len(errs) != 0 { - t.Errorf("Error syncing StatefulSet: %v", errors.NewAggregate(errs)) + if obj, _, err := spc.setsIndexer.Get(set); err != nil { + t.Error(err) + } else { + set = obj.(*apps.StatefulSet) + } + if set.Status.Replicas != 3 { + t.Error("Falied to scale statefulset to 3 replicas") + } + *set.Spec.Replicas = 0 + if err := scaleDownStatefulSetController(set, ssc, spc); err != nil { + t.Errorf("Failed to turn down StatefulSet : %s", err) + } + if obj, _, err := spc.setsIndexer.Get(set); err != nil { + t.Error(err) + } else { + set = obj.(*apps.StatefulSet) + } + if set.Status.Replicas != 0 { + t.Error("Falied to scale statefulset to 3 replicas") } - checkPets(ps, replicas, replicas, fc, t) } func TestStatefulSetControllerRespectsTermination(t *testing.T) { - psc, fc := newFakeStatefulSetController() - replicas := 4 - ps := newStatefulSet(replicas) - - saturateStatefulSet(t, ps, psc, fc) - - fc.setDeletionTimestamp(replicas - 1) - *(ps.Spec.Replicas) = 2 - _, err := psc.syncStatefulSet(ps, fc.getPodList()) + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + if err := scaleUpStatefulSetController(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) + } + if obj, _, err := spc.setsIndexer.Get(set); err != nil { + t.Error(err) + } else { + set = obj.(*apps.StatefulSet) + } + if set.Status.Replicas != 3 { + t.Error("Falied to scale statefulset to 3 replicas") + } + pods, err := spc.addTerminatedPod(set, 3) if err != nil { - t.Errorf("Error syncing StatefulSet: %v", err) + t.Error(err) } - // Finding a pod with the deletion timestamp will pause all deletions. - knownPods := fc.getPodList() - if len(knownPods) != 4 { - t.Errorf("Pods deleted prematurely before deletion timestamp expired, len %d", len(knownPods)) - } - fc.pets = fc.pets[:replicas-1] - _, err = psc.syncStatefulSet(ps, fc.getPodList()) + pods, err = spc.addTerminatedPod(set, 4) if err != nil { - t.Errorf("Error syncing StatefulSet: %v", err) + t.Error(err) } - checkPets(ps, replicas, 1, fc, t) -} - -func TestStatefulSetControllerRespectsOrder(t *testing.T) { - psc, fc := newFakeStatefulSetController() - replicas := 4 - ps := newStatefulSet(replicas) - - saturateStatefulSet(t, ps, psc, fc) - - errs := []error{} - *(ps.Spec.Replicas) = 0 - // Shuffle known list and check that pets are deleted in reverse - knownPods := fc.getPodList() - for i := range knownPods { - j := rand.Intn(i + 1) - knownPods[i], knownPods[j] = knownPods[j], knownPods[i] + ssc.syncStatefulSet(set, pods) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + t.Error(err) } - - for i := 0; i < replicas; i++ { - if len(fc.pets) != replicas-i { - t.Errorf("Unexpected number of pods, expected %d found %d", i, len(fc.pets)) - } - if _, syncErr := psc.syncStatefulSet(ps, knownPods); syncErr != nil { - errs = append(errs, syncErr) - } - checkPets(ps, replicas, i+1, fc, t) + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) } - if len(errs) != 0 { - t.Errorf("Error syncing StatefulSet: %v", errors.NewAggregate(errs)) + if len(pods) != 5 { + t.Error("StatefulSet does not respect termination") + } + sort.Sort(ascendingOrdinal(pods)) + spc.DeleteStatefulPod(set, pods[3]) + spc.DeleteStatefulPod(set, pods[4]) + *set.Spec.Replicas = 0 + if err := scaleDownStatefulSetController(set, ssc, spc); err != nil { + t.Errorf("Failed to turn down StatefulSet : %s", err) + } + if obj, _, err := spc.setsIndexer.Get(set); err != nil { + t.Error(err) + } else { + set = obj.(*apps.StatefulSet) + } + if set.Status.Replicas != 0 { + t.Error("Falied to scale statefulset to 3 replicas") } } func TestStatefulSetControllerBlocksScaling(t *testing.T) { - psc, fc := newFakeStatefulSetController() - replicas := 5 - ps := newStatefulSet(replicas) - scaleStatefulSet(t, ps, psc, fc, 3) - - // Create 4th pet, then before flipping it to healthy, kill the first pet. - // There should only be 1 not-healty pet at a time. - pl := fc.getPodList() - if _, err := psc.syncStatefulSet(ps, pl); err != nil { - t.Errorf("Error syncing StatefulSet: %v", err) + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + if err := scaleUpStatefulSetController(set, ssc, spc); err != nil { + t.Errorf("Failed to turn up StatefulSet : %s", err) } - - deletedPod := pl[0] - fc.deletePetAtIndex(0) - pl = fc.getPodList() - if _, err := psc.syncStatefulSet(ps, pl); err != nil { - t.Errorf("Error syncing StatefulSet: %v", err) + if obj, _, err := spc.setsIndexer.Get(set); err != nil { + t.Error(err) + } else { + set = obj.(*apps.StatefulSet) } - newPodList := fc.getPodList() - for _, p := range newPodList { - if p.Name == deletedPod.Name { - t.Errorf("Deleted pod was created while existing pod was unhealthy") + if set.Status.Replicas != 3 { + t.Error("Falied to scale statefulset to 3 replicas") + } + *set.Spec.Replicas = 5 + fakeResourceVersion(set) + spc.setsIndexer.Update(set) + pods, err := spc.setPodTerminated(set, 0) + if err != nil { + t.Error("Failed to set pod terminated at ordinal 0") + } + ssc.enqueueStatefulSet(set) + fakeWorker(ssc) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + t.Error(err) + } + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if len(pods) != 3 { + t.Error("StatefulSet does not block scaling") + } + sort.Sort(ascendingOrdinal(pods)) + spc.DeleteStatefulPod(set, pods[0]) + ssc.enqueueStatefulSet(set) + fakeWorker(ssc) + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + t.Error(err) + } + if len(pods) != 3 { + t.Error("StatefulSet does not resume when terminated Pod is removed") + } +} + +func TestStateSetControllerAddPod(t *testing.T) { + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + spc.setsIndexer.Add(set) + ssc.addPod(pod) + key, done := ssc.queue.Get() + if key == nil || done { + t.Error("Failed to enqueue StatefulSet") + } else if key, ok := key.(string); !ok { + t.Error("Key is not a string") + } else if expectedKey, _ := controller.KeyFunc(set); expectedKey != key { + t.Errorf("Expected StatefulSet key %s found %s", expectedKey, key) + } +} + +func TestStateSetControllerAddPodNoSet(t *testing.T) { + ssc, _ := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + ssc.addPod(pod) + ssc.queue.ShutDown() + key, _ := ssc.queue.Get() + if key != nil { + t.Errorf("StatefulSet enqueued key for Pod with no Set %s", key) + } +} + +func TestStatefulSetControllerUpdatePod(t *testing.T) { + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + spc.setsIndexer.Add(set) + prev := *pod + fakeResourceVersion(pod) + ssc.updatePod(&prev, pod) + key, done := ssc.queue.Get() + if key == nil || done { + t.Error("Failed to enqueue StatefulSet") + } else if key, ok := key.(string); !ok { + t.Error("Key is not a string") + } else if expectedKey, _ := controller.KeyFunc(set); expectedKey != key { + t.Errorf("Expected StatefulSet key %s found %s", expectedKey, key) + } +} + +func TestStatefulSetControllerUpdatePodWithNoSet(t *testing.T) { + ssc, _ := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + prev := *pod + fakeResourceVersion(pod) + ssc.updatePod(&prev, pod) + ssc.queue.ShutDown() + key, _ := ssc.queue.Get() + if key != nil { + t.Errorf("StatefulSet enqueued key for Pod with no Set %s", key) + } +} + +func TestStatefulSetControllerUpdatePodWithSameVersion(t *testing.T) { + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + spc.setsIndexer.Add(set) + ssc.updatePod(pod, pod) + ssc.queue.ShutDown() + key, _ := ssc.queue.Get() + if key != nil { + t.Errorf("StatefulSet enqueued key for Pod with no Set %s", key) + } +} + +func TestStatefulSetControllerUpdatePodWithNewLabels(t *testing.T) { + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + set2 := newStatefulSet(3) + set2.Name = "foo2" + set2.Spec.Selector.MatchLabels = map[string]string{"foo2": "bar2"} + set2.Spec.Template.Labels = map[string]string{"foo2": "bar2"} + spc.setsIndexer.Add(set) + spc.setsIndexer.Add(set2) + clone := *pod + clone.Labels = map[string]string{"foo2": "bar2"} + fakeResourceVersion(&clone) + ssc.updatePod(pod, &clone) + key, done := ssc.queue.Get() + if key == nil || done { + t.Error("Failed to enqueue StatefulSet") + } else if key, ok := key.(string); !ok { + t.Error("Key is not a string") + } else if expectedKey, _ := controller.KeyFunc(set2); expectedKey != key { + t.Errorf("Expected StatefulSet key %s found %s", expectedKey, key) + } + key, done = ssc.queue.Get() + if key == nil || done { + t.Error("Failed to enqueue StatefulSet") + } else if key, ok := key.(string); !ok { + t.Error("Key is not a string") + } else if expectedKey, _ := controller.KeyFunc(set); expectedKey != key { + t.Errorf("Expected StatefulSet key %s found %s", expectedKey, key) + } +} + +func TestStatefulSetControllerDeletePod(t *testing.T) { + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + spc.setsIndexer.Add(set) + ssc.deletePod(pod) + key, done := ssc.queue.Get() + if key == nil || done { + t.Error("Failed to enqueue StatefulSet") + } else if key, ok := key.(string); !ok { + t.Error("Key is not a string") + } else if expectedKey, _ := controller.KeyFunc(set); expectedKey != key { + t.Errorf("Expected StatefulSet key %s found %s", expectedKey, key) + } +} +func TestStatefulSetControllerDeletePodTombstone(t *testing.T) { + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + spc.setsIndexer.Add(set) + tombstoneKey, _ := controller.KeyFunc(pod) + tombstone := cache.DeletedFinalStateUnknown{Key: tombstoneKey, Obj: pod} + ssc.deletePod(tombstone) + key, done := ssc.queue.Get() + if key == nil || done { + t.Error("Failed to enqueue StatefulSet") + } else if key, ok := key.(string); !ok { + t.Error("Key is not a string") + } else if expectedKey, _ := controller.KeyFunc(set); expectedKey != key { + t.Errorf("Expected StatefulSet key %s found %s", expectedKey, key) + } +} + +func TestStatefulSetControllerGetStatefulSetForPod(t *testing.T) { + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + spc.setsIndexer.Add(set) + spc.podsIndexer.Add(pod) + if set := ssc.getStatefulSetForPod(pod); set == nil { + t.Error("Failed to get StatefulSet for Pod ") + } +} + +func TestStatefulSetControllerGetStatefulSetForPodOverlapping(t *testing.T) { + ssc, spc := newFakeStatefulSetController() + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 0) + set2 := newStatefulSet(3) + set2.Name = "foo2" + set3 := newStatefulSet(3) + set3.Name = "foo3" + set3.CreationTimestamp.Add(1 * time.Second) + spc.setsIndexer.Add(set3) + spc.setsIndexer.Add(set2) + spc.setsIndexer.Add(set) + spc.podsIndexer.Add(pod) + if found := ssc.getStatefulSetForPod(pod); found == nil { + t.Error("Failed to get StatefulSet for Pod") + } else if found.Name != set.Name { + t.Errorf("Returned wrong StatefulSet %s for Pod", set.Name) + } +} + +func newFakeStatefulSetController() (*StatefulSetController, *fakeStatefulPodControl) { + fpc := newFakeStatefulPodControl() + ssc := &StatefulSetController{ + kubeClient: nil, + podStoreSynced: func() bool { return true }, + setStore: fpc.setsLister, + podStore: fpc.podsLister, + control: NewDefaultStatefulSetControl(fpc), + queue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "statefulset"), + } + return ssc, fpc +} + +func fakeWorker(ssc *StatefulSetController) { + if obj, done := ssc.queue.Get(); !done { + ssc.sync(obj.(string)) + ssc.queue.Done(obj) + } +} + +func getPodAtOrdinal(pods []*v1.Pod, ordinal int) *v1.Pod { + if 0 > ordinal || ordinal >= len(pods) { + return nil + } + sort.Sort(ascendingOrdinal(pods)) + return pods[ordinal] +} + +func scaleUpStatefulSetController(set *apps.StatefulSet, ssc *StatefulSetController, spc *fakeStatefulPodControl) error { + spc.setsIndexer.Add(set) + ssc.enqueueStatefulSet(set) + fakeWorker(ssc) + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return err + } + for set.Status.Replicas < *set.Spec.Replicas { + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + ord := len(pods) - 1 + pod := getPodAtOrdinal(pods, ord) + if pods, err = spc.setPodPending(set, ord); err != nil { + return err + } + pod = getPodAtOrdinal(pods, ord) + ssc.addPod(pod) + fakeWorker(ssc) + pod = getPodAtOrdinal(pods, ord) + prev := *pod + if pods, err = spc.setPodRunning(set, ord); err != nil { + return err + } + pod = getPodAtOrdinal(pods, ord) + ssc.updatePod(&prev, pod) + fakeWorker(ssc) + pod = getPodAtOrdinal(pods, ord) + prev = *pod + if pods, err = spc.setPodReady(set, ord); err != nil { + return err + } + pod = getPodAtOrdinal(pods, ord) + ssc.updatePod(&prev, pod) + fakeWorker(ssc) + if err := assertInvariants(set, spc); err != nil { + return err + } + if obj, _, err := spc.setsIndexer.Get(set); err != nil { + return err + } else { + set = obj.(*apps.StatefulSet) + } + + } + return assertInvariants(set, spc) +} + +func scaleDownStatefulSetController(set *apps.StatefulSet, ssc *StatefulSetController, spc *fakeStatefulPodControl) error { + selector, err := metav1.LabelSelectorAsSelector(set.Spec.Selector) + if err != nil { + return err + } + pods, err := spc.podsLister.Pods(set.Namespace).List(selector) + if err != nil { + return err + } + ord := len(pods) - 1 + pod := getPodAtOrdinal(pods, ord) + prev := *pod + fakeResourceVersion(set) + spc.setsIndexer.Add(set) + ssc.enqueueStatefulSet(set) + fakeWorker(ssc) + pods, err = spc.addTerminatedPod(set, ord) + pod = getPodAtOrdinal(pods, ord) + ssc.updatePod(&prev, pod) + fakeWorker(ssc) + spc.DeleteStatefulPod(set, pod) + ssc.deletePod(pod) + fakeWorker(ssc) + for set.Status.Replicas > *set.Spec.Replicas { + pods, err = spc.podsLister.Pods(set.Namespace).List(selector) + ord := len(pods) + pods, err = spc.addTerminatedPod(set, ord) + pod = getPodAtOrdinal(pods, ord) + ssc.updatePod(&prev, pod) + fakeWorker(ssc) + spc.DeleteStatefulPod(set, pod) + ssc.deletePod(pod) + fakeWorker(ssc) + if obj, _, err := spc.setsIndexer.Get(set); err != nil { + return err + } else { + set = obj.(*apps.StatefulSet) } } - - fc.setHealthy(len(newPodList) - 1) - if _, err := psc.syncStatefulSet(ps, pl); err != nil { - t.Errorf("Error syncing StatefulSet: %v", err) - } - - found := false - for _, p := range fc.getPodList() { - if p.Name == deletedPod.Name { - found = true - break - } - } - if !found { - t.Errorf("Deleted pod was not created after existing pods became healthy") - } -} - -func TestStatefulSetBlockingPetIsCleared(t *testing.T) { - psc, fc := newFakeStatefulSetController() - ps := newStatefulSet(3) - scaleStatefulSet(t, ps, psc, fc, 1) - - if blocking, err := psc.blockingPetStore.Get(ps, fc.getPodList()); err != nil || blocking != nil { - t.Errorf("Unexpected blocking pod %v, err %v", blocking, err) - } - - // 1 not yet healthy pet - psc.syncStatefulSet(ps, fc.getPodList()) - - if blocking, err := psc.blockingPetStore.Get(ps, fc.getPodList()); err != nil || blocking == nil { - t.Errorf("Expected blocking pod %v, err %v", blocking, err) - } - - // Deleting the statefulset should clear the blocking pet - if err := psc.psStore.Store.Delete(ps); err != nil { - t.Fatalf("Unable to delete pod %v from statefulset controller store.", ps.Name) - } - if err := psc.Sync(fmt.Sprintf("%v/%v", ps.Namespace, ps.Name)); err != nil { - t.Errorf("Error during sync of deleted statefulset %v", err) - } - fc.pets = []*pcb{} - fc.petsCreated = 0 - if blocking, err := psc.blockingPetStore.Get(ps, fc.getPodList()); err != nil || blocking != nil { - t.Errorf("Unexpected blocking pod %v, err %v", blocking, err) - } - saturateStatefulSet(t, ps, psc, fc) - - // Make sure we don't leak the final blockin pet in the store - psc.syncStatefulSet(ps, fc.getPodList()) - if p, exists, err := psc.blockingPetStore.store.GetByKey(fmt.Sprintf("%v/%v", ps.Namespace, ps.Name)); err != nil || exists { - t.Errorf("Unexpected blocking pod, err %v: %+v", err, p) - } -} - -func TestSyncStatefulSetBlockedPet(t *testing.T) { - psc, fc := newFakeStatefulSetController() - ps := newStatefulSet(3) - i, _ := psc.syncStatefulSet(ps, fc.getPodList()) - if i != len(fc.getPodList()) { - t.Errorf("syncStatefulSet should return actual amount of pods") - } -} - -type fakeClient struct { - fakeinternal.Clientset - statefulsetClient *fakeStatefulSetClient -} - -func (c *fakeClient) Apps() v1beta1.AppsV1beta1Interface { - return &fakeApps{c, &fake.FakeAppsV1beta1{}} -} - -type fakeApps struct { - *fakeClient - *fake.FakeAppsV1beta1 -} - -func (c *fakeApps) StatefulSets(namespace string) v1beta1.StatefulSetInterface { - c.statefulsetClient.Namespace = namespace - return c.statefulsetClient -} - -type fakeStatefulSetClient struct { - *fake.FakeStatefulSets - Namespace string - replicas int32 -} - -func (f *fakeStatefulSetClient) UpdateStatus(statefulset *apps.StatefulSet) (*apps.StatefulSet, error) { - f.replicas = statefulset.Status.Replicas - return statefulset, nil -} - -func TestStatefulSetReplicaCount(t *testing.T) { - fpsc := &fakeStatefulSetClient{} - psc, _ := newFakeStatefulSetController() - psc.kubeClient = &fakeClient{ - statefulsetClient: fpsc, - } - - ps := newStatefulSet(3) - psKey := fmt.Sprintf("%v/%v", ps.Namespace, ps.Name) - psc.psStore.Store.Add(ps) - - if err := psc.Sync(psKey); err != nil { - t.Errorf("Error during sync of deleted statefulset %v", err) - } - - if fpsc.replicas != 1 { - t.Errorf("Replicas count sent as status update for StatefulSet should be 1, is %d instead", fpsc.replicas) - } + return assertInvariants(set, spc) } diff --git a/pkg/controller/statefulset/stateful_set_utils.go b/pkg/controller/statefulset/stateful_set_utils.go index 64c7587b388..c8c3ec1ed90 100644 --- a/pkg/controller/statefulset/stateful_set_utils.go +++ b/pkg/controller/statefulset/stateful_set_utils.go @@ -18,23 +18,30 @@ package statefulset import ( "fmt" - "sync" + "regexp" + "strconv" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/cache" "k8s.io/kubernetes/pkg/api/v1" + podapi "k8s.io/kubernetes/pkg/api/v1/pod" apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" - appsclientset "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/typed/apps/v1beta1" "k8s.io/kubernetes/pkg/controller" "github.com/golang/glog" ) +// maxUpdateRetries is the maximum number of retries used for update conflict resolution prior to failure +const maxUpdateRetries = 10 + +// updateConflictError is the error used to indicate that the maximum number of retries against the API server have +// been attempted and we need to back off +var updateConflictError = fmt.Errorf("aborting update after %d attempts", maxUpdateRetries) + // overlappingStatefulSets sorts a list of StatefulSets by creation timestamp, using their names as a tie breaker. // Generally used to tie break between StatefulSets that have overlapping selectors. type overlappingStatefulSets []apps.StatefulSet -func (o overlappingStatefulSets) Len() int { return len(o) } +func (o overlappingStatefulSets) Len() int { return len(o) } + func (o overlappingStatefulSets) Swap(i, j int) { o[i], o[j] = o[j], o[i] } func (o overlappingStatefulSets) Less(i, j int) bool { @@ -44,115 +51,206 @@ func (o overlappingStatefulSets) Less(i, j int) bool { return o[i].CreationTimestamp.Before(o[j].CreationTimestamp) } -// updatePetCount attempts to update the Status.Replicas of the given StatefulSet, with a single GET/PUT retry. -func updatePetCount(psClient appsclientset.StatefulSetsGetter, ps apps.StatefulSet, numPets int) (updateErr error) { - if ps.Status.Replicas == int32(numPets) || psClient == nil { - return nil - } - var getErr error - for i, ps := 0, &ps; ; i++ { - glog.V(4).Infof(fmt.Sprintf("Updating replica count for StatefulSet: %s/%s, ", ps.Namespace, ps.Name) + - fmt.Sprintf("replicas %d->%d (need %d), ", ps.Status.Replicas, numPets, *(ps.Spec.Replicas))) +// statefulPodRegex is a regular expression that extracts the parent StatefulSet and ordinal from the Name of a Pod +var statefulPodRegex = regexp.MustCompile("(.*)-([0-9]+)$") - ps.Status = apps.StatefulSetStatus{Replicas: int32(numPets)} - _, updateErr = psClient.StatefulSets(ps.Namespace).UpdateStatus(ps) - if updateErr == nil || i >= statusUpdateRetries { - return updateErr - } - if ps, getErr = psClient.StatefulSets(ps.Namespace).Get(ps.Name, metav1.GetOptions{}); getErr != nil { - return getErr +// getParentNameAndOrdinal gets the name of pod's parent StatefulSet and pod's ordinal as extracted from its Name. If +// the Pod was not created by a StatefulSet, its parent is considered to be nil, and its ordinal is considered to be +// -1. +func getParentNameAndOrdinal(pod *v1.Pod) (string, int) { + parent := "" + ordinal := -1 + subMatches := statefulPodRegex.FindStringSubmatch(pod.Name) + if len(subMatches) < 3 { + return parent, ordinal + } + parent = subMatches[1] + if i, err := strconv.ParseInt(subMatches[2], 10, 32); err == nil { + ordinal = int(i) + } + return parent, ordinal +} + +// getParentName gets the name of pod's parent StatefulSet. If pod has not parent, the empty string is returned. +func getParentName(pod *v1.Pod) string { + parent, _ := getParentNameAndOrdinal(pod) + return parent +} + +// getOrdinal gets pod's ordinal. If pod has no ordinal, -1 is returned. +func getOrdinal(pod *v1.Pod) int { + _, ordinal := getParentNameAndOrdinal(pod) + return ordinal +} + +// getPodName gets the name of set's child Pod with an ordinal index of ordinal +func getPodName(set *apps.StatefulSet, ordinal int) string { + return fmt.Sprintf("%s-%d", set.Name, ordinal) +} + +// getPersistentVolumeClaimName getsthe name of PersistentVolumeClaim for a Pod with an ordinal index of ordinal. claim +// must be a PersistentVolumeClaim from set's VolumeClaims template. +func getPersistentVolumeClaimName(set *apps.StatefulSet, claim *v1.PersistentVolumeClaim, ordinal int) string { + return fmt.Sprintf("%s-%s-%d", claim.Name, set.Name, ordinal) +} + +// isMemberOf tests if pod is a member of set. +func isMemberOf(set *apps.StatefulSet, pod *v1.Pod) bool { + return getParentName(pod) == set.Name +} + +// identityMatches returns true if pod has a valid identity and network identity for a member of set. +func identityMatches(set *apps.StatefulSet, pod *v1.Pod) bool { + parent, ordinal := getParentNameAndOrdinal(pod) + return ordinal >= 0 && + set.Name == parent && + pod.Name == getPodName(set, ordinal) && + pod.Namespace == set.Namespace && + pod.Annotations != nil && + pod.Annotations[podapi.PodHostnameAnnotation] == pod.Name && + pod.Annotations[podapi.PodSubdomainAnnotation] == set.Spec.ServiceName +} + +// storageMatches returns true if pod's Volumes cover the set of PersistentVolumeClaims +func storageMatches(set *apps.StatefulSet, pod *v1.Pod) bool { + ordinal := getOrdinal(pod) + if ordinal < 0 { + return false + } + volumes := make(map[string]v1.Volume, len(pod.Spec.Volumes)) + for _, volume := range pod.Spec.Volumes { + volumes[volume.Name] = volume + } + for _, claim := range set.Spec.VolumeClaimTemplates { + volume, found := volumes[claim.Name] + if !found || + volume.VolumeSource.PersistentVolumeClaim == nil || + volume.VolumeSource.PersistentVolumeClaim.ClaimName != + getPersistentVolumeClaimName(set, &claim, ordinal) { + return false } } + return true } -// unhealthyPetTracker tracks unhealthy pets for statefulsets. -type unhealthyPetTracker struct { - pc petClient - store cache.Store - storeLock sync.Mutex +// getPersistentVolumeClaims gets a map of PersistentVolumeClaims to their template names, as defined in set. The +// returned PersistentVolumeClaims are each constructed with a the name specific to the Pod. This name is determined +// by getPersistentVolumeClaimName. +func getPersistentVolumeClaims(set *apps.StatefulSet, pod *v1.Pod) map[string]v1.PersistentVolumeClaim { + ordinal := getOrdinal(pod) + templates := set.Spec.VolumeClaimTemplates + claims := make(map[string]v1.PersistentVolumeClaim, len(templates)) + for i := range templates { + claim := templates[i] + claim.Name = getPersistentVolumeClaimName(set, &claim, ordinal) + claim.Namespace = set.Namespace + claim.Labels = set.Spec.Selector.MatchLabels + claims[templates[i].Name] = claim + } + return claims } -// Get returns a previously recorded blocking pet for the given statefulset. -func (u *unhealthyPetTracker) Get(ps *apps.StatefulSet, knownPets []*v1.Pod) (*pcb, error) { - u.storeLock.Lock() - defer u.storeLock.Unlock() - - // We "Get" by key but "Add" by object because the store interface doesn't - // allow us to Get/Add a related obj (eg statefulset: blocking pet). - key, err := controller.KeyFunc(ps) - if err != nil { - return nil, err +// updateStorage updates pod's Volumes to conform with the PersistentVolumeClaim of set's templates. If pod has +// conflicting local Volumes these are replaced with Volumes that conform to the set's templates. +func updateStorage(set *apps.StatefulSet, pod *v1.Pod) { + currentVolumes := pod.Spec.Volumes + claims := getPersistentVolumeClaims(set, pod) + newVolumes := make([]v1.Volume, 0, len(claims)) + for name, claim := range claims { + newVolumes = append(newVolumes, v1.Volume{ + Name: name, + VolumeSource: v1.VolumeSource{ + PersistentVolumeClaim: &v1.PersistentVolumeClaimVolumeSource{ + ClaimName: claim.Name, + // TODO: Use source definition to set this value when we have one. + ReadOnly: false, + }, + }, + }) } - obj, exists, err := u.store.GetByKey(key) - if err != nil { - return nil, err - } - - hc := defaultPetHealthChecker{} - // There's no unhealthy pet blocking a scale event, but this might be - // a controller manager restart. If it is, knownPets can be trusted. - if !exists { - for _, p := range knownPets { - if hc.isHealthy(p) && !hc.isDying(p) { - glog.V(4).Infof("Ignoring healthy pod %v for StatefulSet %v", p.Name, ps.Name) - continue - } - glog.V(4).Infof("No recorded blocking pod, but found unhealthy pod %v for StatefulSet %v", p.Name, ps.Name) - return &pcb{pod: p, parent: ps}, nil + for i := range currentVolumes { + if _, ok := claims[currentVolumes[i].Name]; !ok { + newVolumes = append(newVolumes, currentVolumes[i]) } - return nil, nil } - - // This is a pet that's blocking further creates/deletes of a statefulset. If it - // disappears, it's no longer blocking. If it exists, it continues to block - // till it turns healthy or disappears. - bp := obj.(*pcb) - blockingPet, exists, err := u.pc.Get(bp) - if err != nil { - return nil, err - } - if !exists { - glog.V(4).Infof("Clearing blocking pod %v for StatefulSet %v because it's been deleted", bp.pod.Name, ps.Name) - return nil, nil - } - blockingPetPod := blockingPet.pod - if hc.isHealthy(blockingPetPod) && !hc.isDying(blockingPetPod) { - glog.V(4).Infof("Clearing blocking pod %v for StatefulSet %v because it's healthy", bp.pod.Name, ps.Name) - u.store.Delete(blockingPet) - blockingPet = nil - } - return blockingPet, nil + pod.Spec.Volumes = newVolumes } -// Add records the given pet as a blocking pet. -func (u *unhealthyPetTracker) Add(blockingPet *pcb) error { - u.storeLock.Lock() - defer u.storeLock.Unlock() - - if blockingPet == nil { - return nil +// updateIdentity updates pod's name, hostname, and subdomain to conform to set's name and headless service. +func updateIdentity(set *apps.StatefulSet, pod *v1.Pod) { + pod.Name = getPodName(set, getOrdinal(pod)) + pod.Namespace = set.Namespace + if pod.Annotations == nil { + pod.Annotations = make(map[string]string) } - glog.V(4).Infof("Adding blocking pod %v for StatefulSet %v", blockingPet.pod.Name, blockingPet.parent.Name) - return u.store.Add(blockingPet) + pod.Annotations[podapi.PodHostnameAnnotation] = pod.Name + pod.Annotations[podapi.PodSubdomainAnnotation] = set.Spec.ServiceName } -// newUnHealthyPetTracker tracks unhealthy pets that block progress of statefulsets. -func newUnHealthyPetTracker(pc petClient) *unhealthyPetTracker { - return &unhealthyPetTracker{pc: pc, store: cache.NewStore(pcbKeyFunc)} +// isRunningAndReady returns true if pod is in the PodRunning Phase, if it has a condition of PodReady, and if the init +// annotation has not explicitly disabled the Pod from being ready. +func isRunningAndReady(pod *v1.Pod) bool { + if pod.Status.Phase != v1.PodRunning { + return false + } + podReady := v1.IsPodReady(pod) + // User may have specified a pod readiness override through a debug annotation. + initialized, ok := pod.Annotations[apps.StatefulSetInitAnnotation] + if ok { + if initAnnotation, err := strconv.ParseBool(initialized); err != nil { + glog.V(4).Infof("Failed to parse %v annotation on pod %v: %v", + apps.StatefulSetInitAnnotation, pod.Name, err) + } else if !initAnnotation { + glog.V(4).Infof("StatefulSet pod %v waiting on annotation %v", pod.Name, + apps.StatefulSetInitAnnotation) + podReady = initAnnotation + } + } + return podReady } -// pcbKeyFunc computes the key for a given pcb. -// If it's given a key, it simply returns it. -func pcbKeyFunc(obj interface{}) (string, error) { - if key, ok := obj.(string); ok { - return key, nil - } - p, ok := obj.(*pcb) - if !ok { - return "", fmt.Errorf("not a valid pod control block %#v", p) - } - if p.parent == nil { - return "", fmt.Errorf("cannot compute pod control block key without parent pointer %#v", p) - } - return controller.KeyFunc(p.parent) +// isCreated returns true if pod has been created and is maintained by the API server +func isCreated(pod *v1.Pod) bool { + return pod.Status.Phase != "" +} + +// isFailed returns true if pod has a Phase of PodFailed +func isFailed(pod *v1.Pod) bool { + return pod.Status.Phase == v1.PodFailed +} + +// isTerminated returns true if pod's deletion Timestamp has been set +func isTerminated(pod *v1.Pod) bool { + return pod.DeletionTimestamp != nil +} + +// isHealthy returns true if pod is running and ready and has not been terminated +func isHealthy(pod *v1.Pod) bool { + return isRunningAndReady(pod) && !isTerminated(pod) +} + +// newStatefulSetPod returns a new Pod conforming to the set's Spec with an identity generated from ordinal. +func newStatefulSetPod(set *apps.StatefulSet, ordinal int) *v1.Pod { + pod, _ := controller.GetPodFromTemplate(&set.Spec.Template, set, nil) + pod.Name = getPodName(set, ordinal) + updateIdentity(set, pod) + updateStorage(set, pod) + return pod +} + +// ascendingOrdinal is a sort.Interface that Sorts a list of Pods based on the ordinals extracted +// from the Pod. Pod's that have not been constructed by StatefulSet's have an ordinal of -1, and are therefore pushed +// to the front of the list. +type ascendingOrdinal []*v1.Pod + +func (ao ascendingOrdinal) Len() int { + return len(ao) +} + +func (ao ascendingOrdinal) Swap(i, j int) { + ao[i], ao[j] = ao[j], ao[i] +} + +func (ao ascendingOrdinal) Less(i, j int) bool { + return getOrdinal(ao[i]) < getOrdinal(ao[j]) } diff --git a/pkg/controller/statefulset/stateful_set_utils_test.go b/pkg/controller/statefulset/stateful_set_utils_test.go new file mode 100644 index 00000000000..2b340752484 --- /dev/null +++ b/pkg/controller/statefulset/stateful_set_utils_test.go @@ -0,0 +1,353 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package statefulset + +import ( + "fmt" + "math/rand" + "sort" + "strconv" + "testing" + "time" + + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + + "k8s.io/kubernetes/pkg/api/v1" + podapi "k8s.io/kubernetes/pkg/api/v1/pod" + apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" +) + +func TestGetParentNameAndOrdinal(t *testing.T) { + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 1) + if parent, ordinal := getParentNameAndOrdinal(pod); parent != set.Name { + t.Errorf("Extracted the wrong parent name expected %s found %s", set.Name, parent) + } else if ordinal != 1 { + t.Errorf("Extracted the wrong ordinal expected %d found %d", 1, ordinal) + } + pod.Name = "1-bar" + if parent, ordinal := getParentNameAndOrdinal(pod); parent != "" { + t.Error("Expected empty string for non-member Pod parent") + } else if ordinal != -1 { + t.Error("Expected -1 for non member Pod ordinal") + } +} + +func TestIsMemberOf(t *testing.T) { + set := newStatefulSet(3) + set2 := newStatefulSet(3) + set2.Name = "foo2" + pod := newStatefulSetPod(set, 1) + if !isMemberOf(set, pod) { + t.Error("isMemberOf retruned false negative") + } + if isMemberOf(set2, pod) { + t.Error("isMemberOf returned false positive") + } +} + +func TestIdentityMatches(t *testing.T) { + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 1) + if !identityMatches(set, pod) { + t.Error("Newly created Pod has a bad identity") + } + pod.Name = "foo" + if identityMatches(set, pod) { + t.Error("identity matches for a Pod with the wrong name") + } + pod = newStatefulSetPod(set, 1) + pod.Namespace = "" + if identityMatches(set, pod) { + t.Error("identity matches for a Pod with the wrong namespace") + } + pod = newStatefulSetPod(set, 1) + delete(pod.Annotations, podapi.PodHostnameAnnotation) + if identityMatches(set, pod) { + t.Error("identity matches for a Pod with no hostname") + } + pod = newStatefulSetPod(set, 1) + delete(pod.Annotations, podapi.PodSubdomainAnnotation) + if identityMatches(set, pod) { + t.Error("identity matches for a Pod with no subdomain") + } +} + +func TestStorageMatches(t *testing.T) { + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 1) + if !storageMatches(set, pod) { + t.Error("Newly created Pod has a invalid stroage") + } + pod.Spec.Volumes = nil + if storageMatches(set, pod) { + t.Error("Pod with invalid Volumes has valid storage") + } + pod = newStatefulSetPod(set, 1) + for i := range pod.Spec.Volumes { + pod.Spec.Volumes[i].PersistentVolumeClaim = nil + } + if storageMatches(set, pod) { + t.Error("Pod with invalid Volumes claim valid storage") + } + pod = newStatefulSetPod(set, 1) + for i := range pod.Spec.Volumes { + if pod.Spec.Volumes[i].PersistentVolumeClaim != nil { + pod.Spec.Volumes[i].PersistentVolumeClaim.ClaimName = "foo" + } + } + if storageMatches(set, pod) { + t.Error("Pod with invalid Volumes claim valid storage") + } + pod = newStatefulSetPod(set, 1) + pod.Name = "bar" + if storageMatches(set, pod) { + t.Error("Pod with invalid ordinal has valid storage") + } +} + +func TestUpdateIdentity(t *testing.T) { + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 1) + if !identityMatches(set, pod) { + t.Error("Newly created Pod has a bad identity") + } + pod.Namespace = "" + if identityMatches(set, pod) { + t.Error("identity matches for a Pod with the wrong namespace") + } + updateIdentity(set, pod) + if !identityMatches(set, pod) { + t.Error("updateIdentity failed to update the Pods namespace") + } + pod = newStatefulSetPod(set, 1) + delete(pod.Annotations, podapi.PodHostnameAnnotation) + if identityMatches(set, pod) { + t.Error("identity matches for a Pod with no hostname") + } + updateIdentity(set, pod) + if !identityMatches(set, pod) { + t.Error("updateIdentity failed to update the Pod's hostname") + } + pod = newStatefulSetPod(set, 1) + delete(pod.Annotations, podapi.PodSubdomainAnnotation) + if identityMatches(set, pod) { + t.Error("identity matches for a Pod with no subdomain") + } + updateIdentity(set, pod) + if !identityMatches(set, pod) { + t.Error("updateIdentity failed to update the Pod's subdomain") + } + pod = newStatefulSetPod(set, 1) + pod.Annotations = nil + if identityMatches(set, pod) { + t.Error("identity matches for a Pod no annotations") + } + updateIdentity(set, pod) + if !identityMatches(set, pod) { + t.Error("updateIdentity failed to update the Pod's annotations") + } +} + +func TestUpdateStorage(t *testing.T) { + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 1) + if !storageMatches(set, pod) { + t.Error("Newly created Pod has a invalid stroage") + } + pod.Spec.Volumes = nil + if storageMatches(set, pod) { + t.Error("Pod with invalid Volumes has valid storage") + } + updateStorage(set, pod) + if !storageMatches(set, pod) { + t.Error("updateStorage failed to recreate volumes") + } + pod = newStatefulSetPod(set, 1) + for i := range pod.Spec.Volumes { + pod.Spec.Volumes[i].PersistentVolumeClaim = nil + } + if storageMatches(set, pod) { + t.Error("Pod with invalid Volumes claim valid storage") + } + updateStorage(set, pod) + if !storageMatches(set, pod) { + t.Error("updateStorage failed to recreate volume claims") + } + pod = newStatefulSetPod(set, 1) + for i := range pod.Spec.Volumes { + if pod.Spec.Volumes[i].PersistentVolumeClaim != nil { + pod.Spec.Volumes[i].PersistentVolumeClaim.ClaimName = "foo" + } + } + if storageMatches(set, pod) { + t.Error("Pod with invalid Volumes claim valid storage") + } + updateStorage(set, pod) + if !storageMatches(set, pod) { + t.Error("updateStorage failed to recreate volume claim names") + } +} + +func TestIsRunningAndReady(t *testing.T) { + set := newStatefulSet(3) + pod := newStatefulSetPod(set, 1) + if isRunningAndReady(pod) { + t.Error("isRunningAndReady does not respect Pod phase") + } + pod.Status.Phase = v1.PodRunning + if isRunningAndReady(pod) { + t.Error("isRunningAndReady does not respect Pod condition") + } + condition := v1.PodCondition{Type: v1.PodReady, Status: v1.ConditionTrue} + v1.UpdatePodCondition(&pod.Status, &condition) + if !isRunningAndReady(pod) { + t.Error("Pod should be running and ready") + } + pod.Annotations[apps.StatefulSetInitAnnotation] = "true" + if !isRunningAndReady(pod) { + t.Error("isRunningAndReady does not respected init annotation set to true") + } + pod.Annotations[apps.StatefulSetInitAnnotation] = "false" + if isRunningAndReady(pod) { + t.Error("isRunningAndReady does not respected init annotation set to false") + } + pod.Annotations[apps.StatefulSetInitAnnotation] = "blah" + if !isRunningAndReady(pod) { + t.Error("isRunningAndReady does not erroneous init annotation") + } +} + +func TestAscendingOrdinal(t *testing.T) { + set := newStatefulSet(10) + for i := 0; i < 10; i++ { + + } + pods := make([]*v1.Pod, 10) + perm := rand.Perm(10) + for i, v := range perm { + pods[i] = newStatefulSetPod(set, v) + } + sort.Sort(ascendingOrdinal(pods)) + if !sort.IsSorted(ascendingOrdinal(pods)) { + t.Error("ascendingOrdinal fails to sort Pods") + } +} + +func TestOverlappingStatefulSets(t *testing.T) { + sets := make([]apps.StatefulSet, 10) + perm := rand.Perm(10) + for i, v := range perm { + sets[i] = *newStatefulSet(10) + sets[i].CreationTimestamp = metav1.NewTime(sets[i].CreationTimestamp.Add(time.Duration(v) * time.Second)) + } + sort.Sort(overlappingStatefulSets(sets)) + if !sort.IsSorted(overlappingStatefulSets(sets)) { + t.Error("ascendingOrdinal fails to sort Pods") + } + for i, v := range perm { + sets[i] = *newStatefulSet(10) + sets[i].Name = strconv.FormatInt(int64(v), 10) + } + sort.Sort(overlappingStatefulSets(sets)) + if !sort.IsSorted(overlappingStatefulSets(sets)) { + t.Error("ascendingOrdinal fails to sort Pods") + } +} + +func newPVC(name string) v1.PersistentVolumeClaim { + return v1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + Spec: v1.PersistentVolumeClaimSpec{ + Resources: v1.ResourceRequirements{ + Requests: v1.ResourceList{ + v1.ResourceStorage: *resource.NewQuantity(1, resource.BinarySI), + }, + }, + }, + } +} + +func newStatefulSetWithVolumes(replicas int, name string, petMounts []v1.VolumeMount, podMounts []v1.VolumeMount) *apps.StatefulSet { + mounts := append(petMounts, podMounts...) + claims := []v1.PersistentVolumeClaim{} + for _, m := range petMounts { + claims = append(claims, newPVC(m.Name)) + } + + vols := []v1.Volume{} + for _, m := range podMounts { + vols = append(vols, v1.Volume{ + Name: m.Name, + VolumeSource: v1.VolumeSource{ + HostPath: &v1.HostPathVolumeSource{ + Path: fmt.Sprintf("/tmp/%v", m.Name), + }, + }, + }) + } + + template := v1.PodTemplateSpec{ + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: "nginx", + Image: "nginx", + VolumeMounts: mounts, + }, + }, + Volumes: vols, + }, + } + + template.Labels = map[string]string{"foo": "bar"} + + return &apps.StatefulSet{ + TypeMeta: metav1.TypeMeta{ + Kind: "StatefulSet", + APIVersion: "apps/v1beta1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: v1.NamespaceDefault, + UID: types.UID("test"), + }, + Spec: apps.StatefulSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{"foo": "bar"}, + }, + Replicas: func() *int32 { i := int32(replicas); return &i }(), + Template: template, + VolumeClaimTemplates: claims, + ServiceName: "governingsvc", + }, + } +} + +func newStatefulSet(replicas int) *apps.StatefulSet { + petMounts := []v1.VolumeMount{ + {Name: "datadir", MountPath: "/tmp/zookeeper"}, + } + podMounts := []v1.VolumeMount{ + {Name: "home", MountPath: "/home"}, + } + return newStatefulSetWithVolumes(replicas, "foo", petMounts, podMounts) +} diff --git a/pkg/controller/statefulset/statefulpod.go b/pkg/controller/statefulset/statefulpod.go deleted file mode 100644 index 7479474ad39..00000000000 --- a/pkg/controller/statefulset/statefulpod.go +++ /dev/null @@ -1,327 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package statefulset - -import ( - "fmt" - "strconv" - - "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/tools/record" - "k8s.io/kubernetes/pkg/api/v1" - apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" - "k8s.io/kubernetes/pkg/client/clientset_generated/clientset" - - "github.com/golang/glog" -) - -// petLifeCycleEvent is used to communicate high level actions the controller -// needs to take on a given pet. It's recorded in the pcb. The recognized values -// are listed below. -type petLifeCycleEvent string - -const ( - syncPet petLifeCycleEvent = "sync" - deletePet petLifeCycleEvent = "delete" - // updateRetries is the number of Get/Update cycles we perform when an - // update fails. - updateRetries = 3 - // StatefulSetInitAnnotation is an annotation which when set, indicates that the - // pet has finished initializing itself. - // TODO: Replace this with init container status. - StatefulSetInitAnnotation = "pod.alpha.kubernetes.io/initialized" -) - -// pcb is the control block used to transmit all updates about a single pet. -// It serves as the manifest for a single pet. Users must populate the pod -// and parent fields to pass it around safely. -type pcb struct { - // pod is the desired pet pod. - pod *v1.Pod - // pvcs is a list of desired persistent volume claims for the pet pod. - pvcs []v1.PersistentVolumeClaim - // event is the lifecycle event associated with this update. - event petLifeCycleEvent - // id is the identity index of this pet. - id string - // parent is a pointer to the parent statefulset. - parent *apps.StatefulSet -} - -// pvcClient is a client for managing persistent volume claims. -type pvcClient interface { - // DeletePVCs deletes the pvcs in the given pcb. - DeletePVCs(*pcb) error - // SyncPVCs creates/updates pvcs in the given pcb. - SyncPVCs(*pcb) error -} - -// petSyncer syncs a single pet. -type petSyncer struct { - petClient - - // blockingPet is an unhealthy pet either from this iteration or a previous - // iteration, either because it is not yet Running, or being Deleted, that - // prevents other creates/deletions. - blockingPet *pcb -} - -// errUnhealthyPet is returned when a we either know for sure a pet is unhealthy, -// or don't know its state but assume it is unhealthy. It's used as a signal to the caller for further operations like updating status.replicas. -// This is not a fatal error. -type errUnhealthyPet string - -func (e errUnhealthyPet) Error() string { - return string(e) -} - -// Sync syncs the given pet. -func (p *petSyncer) Sync(pet *pcb) error { - if pet == nil { - return nil - } - realPet, exists, err := p.Get(pet) - if err != nil { - return err - } - // There is not constraint except quota on the number of pvcs created. - // This is done per pet so we get a working cluster ASAP, even if user - // runs out of quota. - if err := p.SyncPVCs(pet); err != nil { - return err - } - // if pet failed - we need to remove old one because of consistent naming - if exists && realPet.pod.Status.Phase == v1.PodFailed { - glog.V(2).Infof("Deleting evicted pod %v/%v", realPet.pod.Namespace, realPet.pod.Name) - if err := p.petClient.Delete(realPet); err != nil { - return err - } - } else if exists { - if !p.isHealthy(realPet.pod) { - glog.V(4).Infof("StatefulSet %v waiting on unhealthy pod %v", pet.parent.Name, realPet.pod.Name) - } - return p.Update(realPet, pet) - } - if p.blockingPet != nil { - message := errUnhealthyPet(fmt.Sprintf("Create of %v in StatefulSet %v blocked by unhealthy pod %v", pet.pod.Name, pet.parent.Name, p.blockingPet.pod.Name)) - glog.V(4).Infof(message.Error()) - return message - } - // This is counted as a create, even if it fails. We can't skip indices - // because some pets might allocate a special role to earlier indices. - // The returned error will force a requeue. - // TODO: What's the desired behavior if pet-0 is deleted while pet-1 is - // not yet healthy? currently pet-0 will wait till pet-1 is healthy, - // this feels safer, but might lead to deadlock. - p.blockingPet = pet - if err := p.Create(pet); err != nil { - return err - } - return nil -} - -// Delete deletes the given pet, if no other pet in the statefulset is blocking a -// scale event. -func (p *petSyncer) Delete(pet *pcb) error { - if pet == nil { - return nil - } - realPet, exists, err := p.Get(pet) - if err != nil { - return err - } - if !exists { - return nil - } - if p.blockingPet != nil { - glog.V(4).Infof("Delete of %v in StatefulSet %v blocked by unhealthy pod %v", realPet.pod.Name, pet.parent.Name, p.blockingPet.pod.Name) - return nil - } - // This is counted as a delete, even if it fails. - // The returned error will force a requeue. - p.blockingPet = realPet - if !p.isDying(realPet.pod) { - glog.V(2).Infof("StatefulSet %v deleting pod %v/%v", pet.parent.Name, pet.pod.Namespace, pet.pod.Name) - return p.petClient.Delete(pet) - } - glog.V(4).Infof("StatefulSet %v waiting on pod %v to die in %v", pet.parent.Name, realPet.pod.Name, realPet.pod.DeletionTimestamp) - return nil -} - -// petClient is a client for managing pets. -type petClient interface { - pvcClient - petHealthChecker - Delete(*pcb) error - Get(*pcb) (*pcb, bool, error) - Create(*pcb) error - Update(*pcb, *pcb) error -} - -// apiServerPetClient is a statefulset aware Kubernetes client. -type apiServerPetClient struct { - c clientset.Interface - recorder record.EventRecorder - petHealthChecker -} - -// Get gets the pet in the pcb from the apiserver. -func (p *apiServerPetClient) Get(pet *pcb) (*pcb, bool, error) { - ns := pet.parent.Namespace - pod, err := p.c.Core().Pods(ns).Get(pet.pod.Name, metav1.GetOptions{}) - if errors.IsNotFound(err) { - return nil, false, nil - } - if err != nil { - return nil, false, err - } - realPet := *pet - realPet.pod = pod - return &realPet, true, nil -} - -// Delete deletes the pet in the pcb from the apiserver. -func (p *apiServerPetClient) Delete(pet *pcb) error { - err := p.c.Core().Pods(pet.parent.Namespace).Delete(pet.pod.Name, nil) - if errors.IsNotFound(err) { - err = nil - } - p.event(pet.parent, "Delete", fmt.Sprintf("pod: %v", pet.pod.Name), err) - return err -} - -// Create creates the pet in the pcb. -func (p *apiServerPetClient) Create(pet *pcb) error { - _, err := p.c.Core().Pods(pet.parent.Namespace).Create(pet.pod) - p.event(pet.parent, "Create", fmt.Sprintf("pod: %v", pet.pod.Name), err) - return err -} - -// Update updates the pet in the 'pet' pcb to match the pet in the 'expectedPet' pcb. -// If the pod object of a pet which to be updated has been changed in server side, we -// will get the actual value and set pet identity before retries. -func (p *apiServerPetClient) Update(pet *pcb, expectedPet *pcb) (updateErr error) { - pc := p.c.Core().Pods(pet.parent.Namespace) - - for i := 0; ; i++ { - updatePod, needsUpdate, err := copyPetID(pet, expectedPet) - if err != nil || !needsUpdate { - return err - } - glog.V(4).Infof("Resetting pod %v/%v to match StatefulSet %v spec", pet.pod.Namespace, pet.pod.Name, pet.parent.Name) - _, updateErr = pc.Update(&updatePod) - if updateErr == nil || i >= updateRetries { - return updateErr - } - getPod, getErr := pc.Get(updatePod.Name, metav1.GetOptions{}) - if getErr != nil { - return getErr - } - pet.pod = getPod - } -} - -// DeletePVCs should delete PVCs, when implemented. -func (p *apiServerPetClient) DeletePVCs(pet *pcb) error { - // TODO: Implement this when we delete pvcs. - return nil -} - -func (p *apiServerPetClient) getPVC(pvcName, pvcNamespace string) (*v1.PersistentVolumeClaim, error) { - pvc, err := p.c.Core().PersistentVolumeClaims(pvcNamespace).Get(pvcName, metav1.GetOptions{}) - return pvc, err -} - -func (p *apiServerPetClient) createPVC(pvc *v1.PersistentVolumeClaim) error { - _, err := p.c.Core().PersistentVolumeClaims(pvc.Namespace).Create(pvc) - return err -} - -// SyncPVCs syncs pvcs in the given pcb. -func (p *apiServerPetClient) SyncPVCs(pet *pcb) error { - errmsg := "" - // Create new claims. - for i, pvc := range pet.pvcs { - _, err := p.getPVC(pvc.Name, pet.parent.Namespace) - if err != nil { - if errors.IsNotFound(err) { - var err error - if err = p.createPVC(&pet.pvcs[i]); err != nil { - errmsg += fmt.Sprintf("Failed to create %v: %v", pvc.Name, err) - } - p.event(pet.parent, "Create", fmt.Sprintf("pvc: %v", pvc.Name), err) - } else { - errmsg += fmt.Sprintf("Error trying to get pvc %v, %v.", pvc.Name, err) - } - } - // TODO: Check resource requirements and accessmodes, update if necessary - } - if len(errmsg) != 0 { - return fmt.Errorf("%v", errmsg) - } - return nil -} - -// event formats an event for the given runtime object. -func (p *apiServerPetClient) event(obj runtime.Object, reason, msg string, err error) { - if err != nil { - p.recorder.Eventf(obj, v1.EventTypeWarning, fmt.Sprintf("Failed%v", reason), fmt.Sprintf("%v, error: %v", msg, err)) - } else { - p.recorder.Eventf(obj, v1.EventTypeNormal, fmt.Sprintf("Successful%v", reason), msg) - } -} - -// petHealthChecker is an interface to check pet health. It makes a boolean -// decision based on the given pod. -type petHealthChecker interface { - isHealthy(*v1.Pod) bool - isDying(*v1.Pod) bool -} - -// defaultPetHealthChecks does basic health checking. -// It doesn't update, probe or get the pod. -type defaultPetHealthChecker struct{} - -// isHealthy returns true if the pod is ready & running. If the pod has the -// "pod.alpha.kubernetes.io/initialized" annotation set to "false", pod state is ignored. -func (d *defaultPetHealthChecker) isHealthy(pod *v1.Pod) bool { - if pod == nil || pod.Status.Phase != v1.PodRunning { - return false - } - podReady := v1.IsPodReady(pod) - - // User may have specified a pod readiness override through a debug annotation. - initialized, ok := pod.Annotations[StatefulSetInitAnnotation] - if ok { - if initAnnotation, err := strconv.ParseBool(initialized); err != nil { - glog.V(4).Infof("Failed to parse %v annotation on pod %v: %v", StatefulSetInitAnnotation, pod.Name, err) - } else if !initAnnotation { - glog.V(4).Infof("StatefulSet pod %v waiting on annotation %v", pod.Name, StatefulSetInitAnnotation) - podReady = initAnnotation - } - } - return podReady -} - -// isDying returns true if the pod has a non-nil deletion timestamp. Since the -// timestamp can only decrease, once this method returns true for a given pet, it -// will never return false. -func (d *defaultPetHealthChecker) isDying(pod *v1.Pod) bool { - return pod != nil && pod.DeletionTimestamp != nil -} diff --git a/pkg/controller/statefulset/statefulpod_test.go b/pkg/controller/statefulset/statefulpod_test.go deleted file mode 100644 index 6ab5f898d85..00000000000 --- a/pkg/controller/statefulset/statefulpod_test.go +++ /dev/null @@ -1,178 +0,0 @@ -/* -Copyright 2016 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package statefulset - -import ( - "fmt" - "net/http/httptest" - "testing" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - restclient "k8s.io/client-go/rest" - core "k8s.io/client-go/testing" - utiltesting "k8s.io/client-go/util/testing" - "k8s.io/kubernetes/pkg/api" - "k8s.io/kubernetes/pkg/api/testapi" - "k8s.io/kubernetes/pkg/api/v1" - "k8s.io/kubernetes/pkg/client/clientset_generated/clientset" - "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake" -) - -func newPetClient(client *clientset.Clientset) *apiServerPetClient { - return &apiServerPetClient{ - c: client, - } -} - -func makeTwoDifferntPCB() (pcb1, pcb2 *pcb) { - userAdded := v1.Volume{ - Name: "test", - VolumeSource: v1.VolumeSource{ - EmptyDir: &v1.EmptyDirVolumeSource{Medium: v1.StorageMediumMemory}, - }, - } - ps := newStatefulSet(2) - pcb1, _ = newPCB("1", ps) - pcb2, _ = newPCB("2", ps) - pcb2.pod.Spec.Volumes = append(pcb2.pod.Spec.Volumes, userAdded) - return pcb1, pcb2 -} - -func TestUpdatePetWithoutRetry(t *testing.T) { - pcb1, pcb2 := makeTwoDifferntPCB() - // invalid pet with empty pod - invalidPcb := *pcb1 - invalidPcb.pod = nil - - testCases := []struct { - realPet *pcb - expectedPet *pcb - expectErr bool - requests int - }{ - // case 0: error occurs, no need to update - { - realPet: pcb1, - expectedPet: &invalidPcb, - expectErr: true, - requests: 0, - }, - // case 1: identical pet, no need to update - { - realPet: pcb1, - expectedPet: pcb1, - expectErr: false, - requests: 0, - }, - // case 2: need to call update once - { - realPet: pcb1, - expectedPet: pcb2, - expectErr: false, - requests: 1, - }, - } - - for k, tc := range testCases { - body := runtime.EncodeOrDie(testapi.Default.Codec(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "empty_pod"}}) - fakeHandler := utiltesting.FakeHandler{ - StatusCode: 200, - ResponseBody: string(body), - } - testServer := httptest.NewServer(&fakeHandler) - - client := clientset.NewForConfigOrDie(&restclient.Config{Host: testServer.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}}) - petClient := newPetClient(client) - err := petClient.Update(tc.realPet, tc.expectedPet) - - if tc.expectErr != (err != nil) { - t.Errorf("case %d: expect error(%v), got err: %v", k, tc.expectErr, err) - } - fakeHandler.ValidateRequestCount(t, tc.requests) - testServer.Close() - } -} - -func TestUpdatePetWithFailure(t *testing.T) { - fakeHandler := utiltesting.FakeHandler{ - StatusCode: 500, - ResponseBody: "{}", - } - testServer := httptest.NewServer(&fakeHandler) - defer testServer.Close() - - client := clientset.NewForConfigOrDie(&restclient.Config{Host: testServer.URL, ContentConfig: restclient.ContentConfig{GroupVersion: &api.Registry.GroupOrDie(v1.GroupName).GroupVersion}}) - petClient := newPetClient(client) - - pcb1, pcb2 := makeTwoDifferntPCB() - - if err := petClient.Update(pcb1, pcb2); err == nil { - t.Errorf("expect error, got nil") - } - // 1 Update and 1 GET, both of which fail - fakeHandler.ValidateRequestCount(t, 2) -} - -func TestUpdatePetRetrySucceed(t *testing.T) { - pcb1, pcb2 := makeTwoDifferntPCB() - - fakeClient := &fake.Clientset{} - fakeClient.AddReactor("get", "pods", func(action core.Action) (bool, runtime.Object, error) { - return true, pcb2.pod, nil - }) - fakeClient.AddReactor("*", "*", func(action core.Action) (bool, runtime.Object, error) { - return true, nil, fmt.Errorf("Fake error") - }) - petClient := apiServerPetClient{ - c: fakeClient, - } - - if err := petClient.Update(pcb1, pcb2); err != nil { - t.Errorf("unexpected error: %v", err) - } - - actions := fakeClient.Actions() - if len(actions) != 2 { - t.Errorf("Expect 2 actions, got %d actions", len(actions)) - } - for i := 0; i < len(actions); i++ { - a := actions[i] - if a.GetResource().Resource != "pods" { - t.Errorf("Unexpected action %+v", a) - continue - } - - switch action := a.(type) { - case core.GetAction: - if i%2 == 0 { - t.Errorf("Unexpected Get action") - } - // Make sure the get is for the right pod - if action.GetName() != pcb2.pod.Name { - t.Errorf("Expected get pod %v, got %q instead", pcb2.pod.Name, action.GetName()) - } - case core.UpdateAction: - if i%2 == 1 { - t.Errorf("Unexpected Update action") - } - default: - t.Errorf("Unexpected action %+v", a) - break - } - } -} diff --git a/test/e2e/BUILD b/test/e2e/BUILD index bed5c5d2f38..06de8a316bd 100644 --- a/test/e2e/BUILD +++ b/test/e2e/BUILD @@ -134,7 +134,6 @@ go_library( "//pkg/controller/job:go_default_library", "//pkg/controller/replicaset:go_default_library", "//pkg/controller/replication:go_default_library", - "//pkg/controller/statefulset:go_default_library", "//pkg/kubectl:go_default_library", "//pkg/kubectl/cmd/util:go_default_library", "//pkg/kubelet/api/v1alpha1/stats:go_default_library", @@ -177,7 +176,6 @@ go_library( "//vendor:golang.org/x/net/websocket", "//vendor:google.golang.org/api/compute/v1", "//vendor:google.golang.org/api/googleapi", - "//vendor:gopkg.in/inf.v0", "//vendor:k8s.io/apimachinery/pkg/api/errors", "//vendor:k8s.io/apimachinery/pkg/api/resource", "//vendor:k8s.io/apimachinery/pkg/apis/meta/v1", diff --git a/test/e2e/statefulset.go b/test/e2e/statefulset.go index e5a7222e474..ddd3473d92a 100644 --- a/test/e2e/statefulset.go +++ b/test/e2e/statefulset.go @@ -24,8 +24,6 @@ import ( "strings" "time" - inf "gopkg.in/inf.v0" - . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" apierrs "k8s.io/apimachinery/pkg/api/errors" @@ -44,7 +42,6 @@ import ( "k8s.io/kubernetes/pkg/api/v1" apps "k8s.io/kubernetes/pkg/apis/apps/v1beta1" "k8s.io/kubernetes/pkg/client/clientset_generated/clientset" - "k8s.io/kubernetes/pkg/controller/statefulset" "k8s.io/kubernetes/test/e2e/framework" ) @@ -77,7 +74,7 @@ var _ = framework.KubeDescribe("StatefulSet", func() { ns = f.Namespace.Name }) - framework.KubeDescribe("Basic StatefulSet functionality", func() { + framework.KubeDescribe("Basic StatefulSet functionality [StatefulSetBasic]", func() { ssName := "ss" labels := map[string]string{ "foo": "bar", @@ -144,7 +141,7 @@ var _ = framework.KubeDescribe("StatefulSet", func() { framework.ExpectNoError(sst.execInStatefulPods(ss, cmd)) }) - It("should handle healthy stateful pod restarts during scale", func() { + It("should not deadlock when a pod's predecessor fails", func() { By("Creating statefulset " + ssName + " in namespace " + ns) *(ss.Spec.Replicas) = 2 setInitializedAnnotation(ss, "false") @@ -169,8 +166,8 @@ var _ = framework.KubeDescribe("StatefulSet", func() { By("Deleting healthy stateful pod at index 0.") sst.deleteStatefulPodAtIndex(0, ss) - By("Confirming stateful pod at index 0 is not recreated.") - sst.confirmStatefulPodCount(1, ss, 10*time.Second) + By("Confirming stateful pod at index 0 is recreated.") + sst.waitForRunningAndReady(2, ss) By("Deleting unhealthy stateful pod at index 1.") sst.deleteStatefulPodAtIndex(1, ss) @@ -937,10 +934,10 @@ func (s *statefulSetTester) setHealthy(ss *apps.StatefulSet) { framework.Failf("Found multiple non-healthy stateful pods: %v and %v", pod.Name, markedHealthyPod) } p, err := framework.UpdatePodWithRetries(s.c, pod.Namespace, pod.Name, func(update *v1.Pod) { - update.Annotations[statefulset.StatefulSetInitAnnotation] = "true" + update.Annotations[apps.StatefulSetInitAnnotation] = "true" }) framework.ExpectNoError(err) - framework.Logf("Set annotation %v to %v on pod %v", statefulset.StatefulSetInitAnnotation, p.Annotations[statefulset.StatefulSetInitAnnotation], pod.Name) + framework.Logf("Set annotation %v to %v on pod %v", apps.StatefulSetInitAnnotation, p.Annotations[apps.StatefulSetInitAnnotation], pod.Name) markedHealthyPod = pod.Name } } @@ -1016,7 +1013,7 @@ func deleteAllStatefulSets(c clientset.Interface, ns string) { return true, nil }) if pvcPollErr != nil { - errList = append(errList, fmt.Sprintf("Timeout waiting for pvc deletion.")) + errList = append(errList, "Timeout waiting for pvc deletion.") } pollErr := wait.PollImmediate(statefulsetPoll, statefulsetTimeout, func() (bool, error) { @@ -1038,7 +1035,7 @@ func deleteAllStatefulSets(c clientset.Interface, ns string) { return false, nil }) if pollErr != nil { - errList = append(errList, fmt.Sprintf("Timeout waiting for pv provisioner to delete pvs, this might mean the test leaked pvs.")) + errList = append(errList, "Timeout waiting for pv provisioner to delete pvs, this might mean the test leaked pvs.") } if len(errList) != 0 { framework.ExpectNoError(fmt.Errorf("%v", strings.Join(errList, "\n"))) @@ -1063,7 +1060,7 @@ func pollReadWithTimeout(statefulPod statefulPodTester, statefulPodNumber int, k } func isInitialized(pod v1.Pod) bool { - initialized, ok := pod.Annotations[statefulset.StatefulSetInitAnnotation] + initialized, ok := pod.Annotations[apps.StatefulSetInitAnnotation] if !ok { return false } @@ -1074,10 +1071,6 @@ func isInitialized(pod v1.Pod) bool { return inited } -func dec(i int64, exponent int) *inf.Dec { - return inf.NewDec(i, inf.Scale(-exponent)) -} - func newPVC(name string) v1.PersistentVolumeClaim { return v1.PersistentVolumeClaim{ ObjectMeta: metav1.ObjectMeta{