Files
kubernetes/cmd/kubeadm/app/util/dryrun/dryrun.go
Kubernetes Submit Queue 67870dac16 Merge pull request #62655 from stealthybox/TLSUpgrade_+_detiber-kubeadm_hash
Automatic merge from submit-queue (batch tested with PRs 62655, 61711, 59122, 62853, 62390). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

Modify the kubeadm upgrade DAG for the TLS Upgrade

**What this PR does / why we need it**:
This adds the necessary utilities to detect Etcd TLS on static pods from the file system and query Etcd.
It modifies the upgrade logic to make it support the APIServer downtime.
Tests are included and should be passing.

```bash 
bazel test //cmd/kubeadm/... \
  && bazel build //cmd/kubeadm --platforms=@io_bazel_rules_go//go/toolchain:linux_amd64 \
  && issue=TLSUpgrade ~/Repos/vagrant-kubeadm-testing/copy_kubeadm_bin.sh
```
These cases are working consistently for me
```bash
kubeadm-1.9.6 reset \
  && kubeadm-1.9.6 init --kubernetes-version 1.9.1 \
  && kubectl apply -f https://git.io/weave-kube-1.6
/vagrant/bin/TLSUpgrade_kubeadm upgrade apply 1.9.6  # non-TLS to TLS
/vagrant/bin/TLSUpgrade_kubeadm upgrade apply 1.10.0 # TLS to TLS
/vagrant/bin/TLSUpgrade_kubeadm upgrade apply 1.10.1 # TLS to TLS
/vagrant/bin/TLSUpgrade_kubeadm upgrade apply 1.9.1  # TLS to TLS /w major version downgrade
```

This branch is based on top of #61942, as resolving the hash race condition is necessary for consistent behavior.
It looks to fit in pretty well with @craigtracey's PR: #62141
The interfaces are pretty similar

/assign @detiber @timothysc

**Which issue(s) this PR fixes**
Helps with https://github.com/kubernetes/kubeadm/issues/740

**Special notes for your reviewer**:

278b322a1c
   [kubeadm] Implement ReadStaticPodFromDisk

c74b56372d
   Implement etcdutils with Cluster.HasTLS()

   - Test HasTLS()
   - Instrument throughout upgrade plan and apply
   - Update plan_test and apply_test to use new fake Cluster interfaces
   - Add descriptions to upgrade range test
   - Support KubernetesDir and EtcdDataDir in upgrade tests
   - Cover etcdUpgrade in upgrade tests
   - Cover upcoming TLSUpgrade in upgrade tests

8d8e5fe33b
   Update test-case, fix nil-pointer bug, and improve error message

97117fa873
   Modify the kubeadm upgrade DAG for the TLS Upgrade

   - Calculate `beforePodHashMap` before the etcd upgrade in anticipation of
   KubeAPIServer downtime
   - Detect if pre-upgrade etcd static pod cluster `HasTLS()==false` to switch
   on the Etcd TLS Upgrade if TLS Upgrade:
      - Skip L7 Etcd check (could implement a waiter for this)
      - Skip data rollback on etcd upgrade failure due to lack of L7 check
    (APIServer is already down unable to serve new requests)
      - On APIServer upgrade failure, also rollback the etcd manifest to
    maintain protocol compatibility

   - Add logging

**Release note**:
```release-note
kubeadm upgrade no longer races leading to unexpected upgrade behavior on pod restarts
kubeadm upgrade now successfully upgrades etcd and the controlplane to use TLS
kubeadm upgrade now supports external etcd setups
kubeadm upgrade can now rollback and restore etcd after an upgrade failure
```
2018-04-24 13:28:13 -07:00

128 lines
4.4 KiB
Go
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dryrun
import (
"fmt"
"io"
"io/ioutil"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/errors"
"k8s.io/kubernetes/cmd/kubeadm/app/constants"
"k8s.io/kubernetes/cmd/kubeadm/app/util/apiclient"
)
// FileToPrint represents a temporary file on disk that might want to be aliased when printing
// Useful for things like loading a file from /tmp/ but saying to the user "Would write file foo to /etc/kubernetes/..."
type FileToPrint struct {
RealPath string
PrintPath string
}
// NewFileToPrint makes a new instance of FileToPrint with the specified arguments
func NewFileToPrint(realPath, printPath string) FileToPrint {
return FileToPrint{
RealPath: realPath,
PrintPath: printPath,
}
}
// PrintDryRunFiles prints the contents of the FileToPrints given to it to the writer w
func PrintDryRunFiles(files []FileToPrint, w io.Writer) error {
errs := []error{}
for _, file := range files {
if len(file.RealPath) == 0 {
continue
}
fileBytes, err := ioutil.ReadFile(file.RealPath)
if err != nil {
errs = append(errs, err)
continue
}
// Make it possible to fake the path of the file; i.e. you may want to tell the user
// "Here is what would be written to /etc/kubernetes/admin.conf", although you wrote it to /tmp/kubeadm-dryrun/admin.conf and are loading it from there
// Fall back to the "real" path if PrintPath is not set
outputFilePath := file.PrintPath
if len(outputFilePath) == 0 {
outputFilePath = file.RealPath
}
fmt.Fprintf(w, "[dryrun] Would write file %q with content:\n", outputFilePath)
apiclient.PrintBytesWithLinePrefix(w, fileBytes, "\t")
}
return errors.NewAggregate(errs)
}
// Waiter is an implementation of apiclient.Waiter that should be used for dry-running
type Waiter struct{}
// NewWaiter returns a new Waiter object that talks to the given Kubernetes cluster
func NewWaiter() apiclient.Waiter {
return &Waiter{}
}
// WaitForAPI just returns a dummy nil, to indicate that the program should just proceed
func (w *Waiter) WaitForAPI() error {
fmt.Println("[dryrun] Would wait for the API Server's /healthz endpoint to return 'ok'")
return nil
}
// WaitForPodsWithLabel just returns a dummy nil, to indicate that the program should just proceed
func (w *Waiter) WaitForPodsWithLabel(kvLabel string) error {
fmt.Printf("[dryrun] Would wait for the Pods with the label %q in the %s namespace to become Running\n", kvLabel, metav1.NamespaceSystem)
return nil
}
// WaitForPodToDisappear just returns a dummy nil, to indicate that the program should just proceed
func (w *Waiter) WaitForPodToDisappear(podName string) error {
fmt.Printf("[dryrun] Would wait for the %q Pod in the %s namespace to be deleted\n", podName, metav1.NamespaceSystem)
return nil
}
// WaitForHealthyKubelet blocks until the kubelet /healthz endpoint returns 'ok'
func (w *Waiter) WaitForHealthyKubelet(_ time.Duration, healthzEndpoint string) error {
fmt.Printf("[dryrun] Would make sure the kubelet %q endpoint is healthy\n", healthzEndpoint)
return nil
}
// SetTimeout is a no-op; we don't wait in this implementation
func (w *Waiter) SetTimeout(_ time.Duration) {}
// WaitForStaticPodControlPlaneHashes returns an empty hash for all control plane images;
func (w *Waiter) WaitForStaticPodControlPlaneHashes(_ string) (map[string]string, error) {
return map[string]string{
constants.KubeAPIServer: "",
constants.KubeControllerManager: "",
constants.KubeScheduler: "",
}, nil
}
// WaitForStaticPodSingleHash returns an empty hash
// but the empty strings there are needed
func (w *Waiter) WaitForStaticPodSingleHash(_ string, _ string) (string, error) {
return "", nil
}
// WaitForStaticPodHashChange returns a dummy nil error in order for the flow to just continue as we're dryrunning
func (w *Waiter) WaitForStaticPodHashChange(_, _, _ string) error {
return nil
}