Add initial wait for health-monitor and use pkill -x.

Signed-off-by: Lantao Liu <lantaol@google.com>
This commit is contained in:
Lantao Liu 2018-02-14 08:56:07 +00:00
parent df461c0d48
commit 9b2d2a203c
3 changed files with 21 additions and 6 deletions

View File

@ -19,6 +19,11 @@ set -o pipefail
# CRICTL is the path of crictl # CRICTL is the path of crictl
CRICTL=${CRICTL:-"crictl"} CRICTL=${CRICTL:-"crictl"}
# INITIAL_WAIT_ATTEMPTS is the number to attempt, before start
# performing health check. The problem is that cri-containerd
# and containerd are started around the same time with health
# monitor, they may not be ready yet when health-monitor is started.
INITIAL_WAIT_ATTEMPTS=${INITIAL_WAIT_ATTEMPTS:-5}
# COMMAND_TIMEOUT is the timeout for the health check command. # COMMAND_TIMEOUT is the timeout for the health check command.
COMMAND_TIMEOUT=${COMMAND_TIMEOUT:-60} COMMAND_TIMEOUT=${COMMAND_TIMEOUT:-60}
# CHECK_PERIOD is the health check period. # CHECK_PERIOD is the health check period.
@ -27,13 +32,21 @@ CHECK_PERIOD=${CHECK_PERIOD:-10}
# and containerd. # and containerd.
SLEEP_SECONDS=${SLEEP_SECONDS:-120} SLEEP_SECONDS=${SLEEP_SECONDS:-120}
attempt=1
until timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null || (( attempt == INITIAL_WAIT_ATTEMPTS ))
do
echo "$attempt initial attempt \"$CRICTL pods\"! Trying again in $attempt seconds..."
sleep $(( attempt++ ))
done
echo "Start performing health check."
while true; do while true; do
# Use crictl pods because it requires both containerd and # Use crictl pods because it requires both containerd and
# cri-containerd to be working. # cri-containerd to be working.
if ! timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null; then if ! timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null; then
echo "crictl pods timeout!" echo "\"$CRICTL pods\" failed!"
pkill containerd pkill -x cri-containerd
pkill cri-containerd pkill -x containerd
# Wait for a while, as we don't want to kill it again before it is really up. # Wait for a while, as we don't want to kill it again before it is really up.
sleep ${SLEEP_SECONDS} sleep ${SLEEP_SECONDS}
else else

View File

@ -45,7 +45,8 @@ test_setup() {
echo "containerd is not installed, please run hack/install-deps.sh" echo "containerd is not installed, please run hack/install-deps.sh"
exit 1 exit 1
fi fi
sudo pkill containerd sudo pkill -x cri-containerd
sudo pkill -x containerd
keepalive "sudo containerd" ${RESTART_WAIT_PERIOD} &> ${report_dir}/containerd.log & keepalive "sudo containerd" ${RESTART_WAIT_PERIOD} &> ${report_dir}/containerd.log &
containerd_pid=$! containerd_pid=$!
# Wait for containerd to be running by using the containerd client ctr to check the version # Wait for containerd to be running by using the containerd client ctr to check the version
@ -69,7 +70,8 @@ test_teardown() {
if [ -n "${cri_containerd_pid}" ]; then if [ -n "${cri_containerd_pid}" ]; then
kill ${cri_containerd_pid} kill ${cri_containerd_pid}
fi fi
sudo pkill containerd sudo pkill -x cri-containerd
sudo pkill -x containerd
} }
# keepalive runs a command and keeps it alive. # keepalive runs a command and keeps it alive.

View File

@ -206,7 +206,7 @@ func Randomize(str string) string {
// KillProcess kills the process by name. pkill is used. // KillProcess kills the process by name. pkill is used.
func KillProcess(name string) error { func KillProcess(name string) error {
output, err := exec.Command("pkill", fmt.Sprintf("^%s$", name)).CombinedOutput() output, err := exec.Command("pkill", "-x", fmt.Sprintf("^%s$", name)).CombinedOutput()
if err != nil { if err != nil {
return fmt.Errorf("failed to kill %q - error: %v, output: %q", name, err, output) return fmt.Errorf("failed to kill %q - error: %v, output: %q", name, err, output)
} }