test: Add e2e to verify static pod termination
Add a node e2e to verify that if a static pod is terminated while the container runtime or CRI returns an error, the pod is eventually terminated successfully. This test serves as a regression test for k8s.io/issue/113145 which fixes an issue where force deleted pods may not be terminated if the container runtime fails during a `syncTerminatingPod`. To test this behavior, start a static pod, stop the container runtime, and later start the container runtime. The static pod is expected to eventually terminate successfully. To start and stop the container runtime, we need to find the container runtime systemd unit name. Introduce a util function `findContainerRuntimeServiceName` which finds the unit name by getting the pid of the container runtime from the existing `ContainerRuntimeProcessName` flag passed into node e2e and using systemd dbus `GetUnitNameByPID` function to convert the pid of the container runtime to a unit name. Using the unit name, introduce helper functions to start and stop the container runtime. Signed-off-by: David Porter <david@porter.me>
This commit is contained in:

committed by
Clayton Coleman

parent
d446bebca8
commit
1c75c2cda8
@@ -25,17 +25,21 @@ import (
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"k8s.io/kubernetes/pkg/util/procfs"
|
||||
|
||||
oteltrace "go.opentelemetry.io/otel/trace"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/runtime"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
@@ -55,6 +59,7 @@ import (
|
||||
"k8s.io/kubernetes/pkg/kubelet/types"
|
||||
"k8s.io/kubernetes/pkg/kubelet/util"
|
||||
|
||||
"github.com/coreos/go-systemd/v22/dbus"
|
||||
"k8s.io/kubernetes/test/e2e/framework"
|
||||
e2ekubelet "k8s.io/kubernetes/test/e2e/framework/kubelet"
|
||||
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
|
||||
@@ -84,12 +89,14 @@ const (
|
||||
|
||||
var kubeletHealthCheckURL = fmt.Sprintf("http://127.0.0.1:%d/healthz", ports.KubeletHealthzPort)
|
||||
|
||||
var containerRuntimeUnitName = ""
|
||||
|
||||
func getNodeSummary(ctx context.Context) (*stats.Summary, error) {
|
||||
kubeletConfig, err := getCurrentKubeletConfig(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get current kubelet config")
|
||||
}
|
||||
req, err := http.NewRequest("GET", fmt.Sprintf("http://%s/stats/summary", net.JoinHostPort(kubeletConfig.Address, strconv.Itoa(int(kubeletConfig.ReadOnlyPort)))), nil)
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("http://%s/stats/summary", net.JoinHostPort(kubeletConfig.Address, strconv.Itoa(int(kubeletConfig.ReadOnlyPort)))), nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to build http request: %w", err)
|
||||
}
|
||||
@@ -340,6 +347,71 @@ func findKubeletServiceName(running bool) string {
|
||||
return kubeletServiceName
|
||||
}
|
||||
|
||||
func findContainerRuntimeServiceName() (string, error) {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := dbus.NewWithContext(ctx)
|
||||
framework.ExpectNoError(err, "Failed to setup dbus connection")
|
||||
defer conn.Close()
|
||||
|
||||
runtimePids, err := getPidsForProcess(framework.TestContext.ContainerRuntimeProcessName, framework.TestContext.ContainerRuntimePidFile)
|
||||
framework.ExpectNoError(err, "failed to get list of container runtime pids")
|
||||
framework.ExpectEqual(len(runtimePids), 1, "Unexpected number of container runtime pids. Expected 1 but got %v", len(runtimePids))
|
||||
|
||||
containerRuntimePid := runtimePids[0]
|
||||
|
||||
unitName, err := conn.GetUnitNameByPID(ctx, uint32(containerRuntimePid))
|
||||
framework.ExpectNoError(err, "Failed to get container runtime unit name")
|
||||
|
||||
return unitName, nil
|
||||
}
|
||||
|
||||
type containerRuntimeUnitOp int
|
||||
|
||||
const (
|
||||
startContainerRuntimeUnitOp containerRuntimeUnitOp = iota
|
||||
stopContainerRuntimeUnitOp
|
||||
)
|
||||
|
||||
func performContainerRuntimeUnitOp(op containerRuntimeUnitOp) error {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
conn, err := dbus.NewWithContext(ctx)
|
||||
framework.ExpectNoError(err, "Failed to setup dbus connection")
|
||||
defer conn.Close()
|
||||
|
||||
if containerRuntimeUnitName == "" {
|
||||
containerRuntimeUnitName, err = findContainerRuntimeServiceName()
|
||||
framework.ExpectNoError(err, "Failed to find container runtime name")
|
||||
}
|
||||
|
||||
reschan := make(chan string)
|
||||
|
||||
switch op {
|
||||
case startContainerRuntimeUnitOp:
|
||||
conn.StartUnitContext(ctx, containerRuntimeUnitName, "replace", reschan)
|
||||
case stopContainerRuntimeUnitOp:
|
||||
conn.StopUnitContext(ctx, containerRuntimeUnitName, "replace", reschan)
|
||||
default:
|
||||
framework.Failf("Unexpected container runtime op: %v", op)
|
||||
}
|
||||
|
||||
job := <-reschan
|
||||
framework.ExpectEqual(job, "done", "Expected job to complete with done")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func stopContainerRuntime() error {
|
||||
return performContainerRuntimeUnitOp(stopContainerRuntimeUnitOp)
|
||||
}
|
||||
|
||||
func startContainerRuntime() error {
|
||||
return performContainerRuntimeUnitOp(startContainerRuntimeUnitOp)
|
||||
}
|
||||
|
||||
// restartKubelet restarts the current kubelet service.
|
||||
// the "current" kubelet service is the instance managed by the current e2e_node test run.
|
||||
// If `running` is true, restarts only if the current kubelet is actually running. In some cases,
|
||||
@@ -465,3 +537,35 @@ func waitForAllContainerRemoval(ctx context.Context, podName, podNS string) {
|
||||
return nil
|
||||
}, 2*time.Minute, 1*time.Second).Should(gomega.Succeed())
|
||||
}
|
||||
|
||||
func getPidsForProcess(name, pidFile string) ([]int, error) {
|
||||
if len(pidFile) > 0 {
|
||||
pid, err := getPidFromPidFile(pidFile)
|
||||
if err == nil {
|
||||
return []int{pid}, nil
|
||||
}
|
||||
// log the error and fall back to pidof
|
||||
runtime.HandleError(err)
|
||||
}
|
||||
return procfs.PidOf(name)
|
||||
}
|
||||
|
||||
func getPidFromPidFile(pidFile string) (int, error) {
|
||||
file, err := os.Open(pidFile)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error opening pid file %s: %v", pidFile, err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
data, err := io.ReadAll(file)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error reading pid file %s: %v", pidFile, err)
|
||||
}
|
||||
|
||||
pid, err := strconv.Atoi(string(data))
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("error parsing %s as a number: %v", string(data), err)
|
||||
}
|
||||
|
||||
return pid, nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user