[kubeadm] fix mirror-pod hash race condition

- Update kubeadm static pod upgrades to use the
  kubetypes.ConfigHashAnnotationKey annotation on the mirror pod rather
  than generating a hash from the full object info. Previously, a status
  update for the pod would allow the upgrade to proceed before the
  new static pod manifest was actually deployed.

Signed-off-by: Jason DeTiberus <detiber@gmail.com>
This commit is contained in:
Jason DeTiberus
2018-03-30 10:19:26 -04:00
committed by leigh schrandt
parent 2bdca2b75f
commit d55d1b6fbe
5 changed files with 18 additions and 22 deletions

View File

@@ -186,7 +186,7 @@ func upgradeComponent(component string, waiter apiclient.Waiter, pathMgr StaticP
// notice the removal of the Static Pod, leading to a false positive below where we check that the API endpoint is healthy // notice the removal of the Static Pod, leading to a false positive below where we check that the API endpoint is healthy
// If we don't do this, there is a case where we remove the Static Pod manifest, kubelet is slow to react, kubeadm checks the // If we don't do this, there is a case where we remove the Static Pod manifest, kubelet is slow to react, kubeadm checks the
// API endpoint below of the OLD Static Pod component and proceeds quickly enough, which might lead to unexpected results. // API endpoint below of the OLD Static Pod component and proceeds quickly enough, which might lead to unexpected results.
if err := waiter.WaitForStaticPodControlPlaneHashChange(cfg.NodeName, component, beforePodHash); err != nil { if err := waiter.WaitForStaticPodHashChange(cfg.NodeName, component, beforePodHash); err != nil {
return rollbackOldManifests(recoverManifests, err, pathMgr, recoverEtcd) return rollbackOldManifests(recoverManifests, err, pathMgr, recoverEtcd)
} }

View File

@@ -117,8 +117,8 @@ func (w *fakeWaiter) WaitForStaticPodSingleHash(_ string, _ string) (string, err
return "", w.errsToReturn[waitForHashes] return "", w.errsToReturn[waitForHashes]
} }
// WaitForStaticPodControlPlaneHashChange returns an error if set from errsToReturn // WaitForStaticPodHashChange returns an error if set from errsToReturn
func (w *fakeWaiter) WaitForStaticPodControlPlaneHashChange(_, _, _ string) error { func (w *fakeWaiter) WaitForStaticPodHashChange(_, _, _ string) error {
return w.errsToReturn[waitForHashChange] return w.errsToReturn[waitForHashChange]
} }

View File

@@ -19,6 +19,7 @@ go_library(
deps = [ deps = [
"//cmd/kubeadm/app/constants:go_default_library", "//cmd/kubeadm/app/constants:go_default_library",
"//cmd/kubeadm/app/util:go_default_library", "//cmd/kubeadm/app/util:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/registry/core/service/ipallocator:go_default_library", "//pkg/registry/core/service/ipallocator:go_default_library",
"//vendor/k8s.io/api/apps/v1:go_default_library", "//vendor/k8s.io/api/apps/v1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library",

View File

@@ -17,8 +17,6 @@ limitations under the License.
package apiclient package apiclient
import ( import (
"crypto/sha256"
"encoding/json"
"fmt" "fmt"
"io" "io"
"net/http" "net/http"
@@ -31,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes" clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/cmd/kubeadm/app/constants" "k8s.io/kubernetes/cmd/kubeadm/app/constants"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
) )
// Waiter is an interface for waiting for criteria in Kubernetes to happen // Waiter is an interface for waiting for criteria in Kubernetes to happen
@@ -43,11 +42,11 @@ type Waiter interface {
WaitForPodToDisappear(staticPodName string) error WaitForPodToDisappear(staticPodName string) error
// WaitForStaticPodSingleHash fetches sha256 hash for the control plane static pod // WaitForStaticPodSingleHash fetches sha256 hash for the control plane static pod
WaitForStaticPodSingleHash(nodeName string, component string) (string, error) WaitForStaticPodSingleHash(nodeName string, component string) (string, error)
// WaitForStaticPodHashChange waits for the given static pod component's static pod hash to get updated.
// By doing that we can be sure that the kubelet has restarted the given Static Pod
WaitForStaticPodHashChange(nodeName, component, previousHash string) error
// WaitForStaticPodControlPlaneHashes fetches sha256 hashes for the control plane static pods // WaitForStaticPodControlPlaneHashes fetches sha256 hashes for the control plane static pods
WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error) WaitForStaticPodControlPlaneHashes(nodeName string) (map[string]string, error)
// WaitForStaticPodControlPlaneHashChange waits for the given static pod component's static pod hash to get updated.
// By doing that we can be sure that the kubelet has restarted the given Static Pod
WaitForStaticPodControlPlaneHashChange(nodeName, component, previousHash string) error
// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok' // WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
WaitForHealthyKubelet(initalTimeout time.Duration, healthzEndpoint string) error WaitForHealthyKubelet(initalTimeout time.Duration, healthzEndpoint string) error
// SetTimeout adjusts the timeout to the specified duration // SetTimeout adjusts the timeout to the specified duration
@@ -194,17 +193,17 @@ func (w *KubeWaiter) WaitForStaticPodSingleHash(nodeName string, component strin
return componentPodHash, err return componentPodHash, err
} }
// WaitForStaticPodControlPlaneHashChange blocks until it timeouts or notices that the Mirror Pod (for the Static Pod, respectively) has changed // WaitForStaticPodHashChange blocks until it timeouts or notices that the Mirror Pod (for the Static Pod, respectively) has changed
// This implicitly means this function blocks until the kubelet has restarted the Static Pod in question // This implicitly means this function blocks until the kubelet has restarted the Static Pod in question
func (w *KubeWaiter) WaitForStaticPodControlPlaneHashChange(nodeName, component, previousHash string) error { func (w *KubeWaiter) WaitForStaticPodHashChange(nodeName, component, previousHash string) error {
return wait.PollImmediate(constants.APICallRetryInterval, w.timeout, func() (bool, error) { return wait.PollImmediate(constants.APICallRetryInterval, w.timeout, func() (bool, error) {
hashes, err := getStaticPodControlPlaneHashes(w.client, nodeName) hash, err := getStaticPodSingleHash(w.client, nodeName, component)
if err != nil { if err != nil {
return false, nil return false, nil
} }
// We should continue polling until the UID changes // We should continue polling until the UID changes
if hashes[component] == previousHash { if hash == previousHash {
return false, nil return false, nil
} }
@@ -235,12 +234,9 @@ func getStaticPodSingleHash(client clientset.Interface, nodeName string, compone
return "", err return "", err
} }
podBytes, err := json.Marshal(staticPod) staticPodHash := staticPod.Annotations[kubetypes.ConfigHashAnnotationKey]
if err != nil { fmt.Printf("Static pod: %s hash: %s\n", staticPodName, staticPodHash)
return "", err return staticPodHash, nil
}
return fmt.Sprintf("%x", sha256.Sum256(podBytes)), nil
} }
// TryRunCommand runs a function a maximum of failureThreshold times, and retries on error. If failureThreshold is hit; the last error is returned // TryRunCommand runs a function a maximum of failureThreshold times, and retries on error. If failureThreshold is hit; the last error is returned

View File

@@ -106,8 +106,7 @@ func (w *Waiter) WaitForHealthyKubelet(_ time.Duration, healthzEndpoint string)
// SetTimeout is a no-op; we don't wait in this implementation // SetTimeout is a no-op; we don't wait in this implementation
func (w *Waiter) SetTimeout(_ time.Duration) {} func (w *Waiter) SetTimeout(_ time.Duration) {}
// WaitForStaticPodControlPlaneHashes returns an empty hash for all control plane images; WaitForStaticPodControlPlaneHashChange won't block in any case // WaitForStaticPodControlPlaneHashes returns an empty hash for all control plane images;
// but the empty strings there are needed
func (w *Waiter) WaitForStaticPodControlPlaneHashes(_ string) (map[string]string, error) { func (w *Waiter) WaitForStaticPodControlPlaneHashes(_ string) (map[string]string, error) {
return map[string]string{ return map[string]string{
constants.KubeAPIServer: "", constants.KubeAPIServer: "",
@@ -122,7 +121,7 @@ func (w *Waiter) WaitForStaticPodSingleHash(_ string, _ string) (string, error)
return "", nil return "", nil
} }
// WaitForStaticPodControlPlaneHashChange returns a dummy nil error in order for the flow to just continue as we're dryrunning // WaitForStaticPodHashChange returns a dummy nil error in order for the flow to just continue as we're dryrunning
func (w *Waiter) WaitForStaticPodControlPlaneHashChange(_, _, _ string) error { func (w *Waiter) WaitForStaticPodHashChange(_, _, _ string) error {
return nil return nil
} }