Merge pull request #612 from Random-Liu/health-monitor-initial-wait

Add initial wait for health-monitor and use pkill -x.
This commit is contained in:
Lantao Liu 2018-02-14 11:36:45 -08:00 committed by GitHub
commit 561d045d71
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 21 additions and 6 deletions

View File

@ -19,6 +19,11 @@ set -o pipefail
# CRICTL is the path of crictl
CRICTL=${CRICTL:-"crictl"}
# INITIAL_WAIT_ATTEMPTS is the number to attempt, before start
# performing health check. The problem is that cri-containerd
# and containerd are started around the same time with health
# monitor, they may not be ready yet when health-monitor is started.
INITIAL_WAIT_ATTEMPTS=${INITIAL_WAIT_ATTEMPTS:-5}
# COMMAND_TIMEOUT is the timeout for the health check command.
COMMAND_TIMEOUT=${COMMAND_TIMEOUT:-60}
# CHECK_PERIOD is the health check period.
@ -27,13 +32,21 @@ CHECK_PERIOD=${CHECK_PERIOD:-10}
# and containerd.
SLEEP_SECONDS=${SLEEP_SECONDS:-120}
attempt=1
until timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null || (( attempt == INITIAL_WAIT_ATTEMPTS ))
do
echo "$attempt initial attempt \"$CRICTL pods\"! Trying again in $attempt seconds..."
sleep $(( attempt++ ))
done
echo "Start performing health check."
while true; do
# Use crictl pods because it requires both containerd and
# cri-containerd to be working.
if ! timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null; then
echo "crictl pods timeout!"
pkill containerd
pkill cri-containerd
echo "\"$CRICTL pods\" failed!"
pkill -x cri-containerd
pkill -x containerd
# Wait for a while, as we don't want to kill it again before it is really up.
sleep ${SLEEP_SECONDS}
else

View File

@ -45,7 +45,8 @@ test_setup() {
echo "containerd is not installed, please run hack/install-deps.sh"
exit 1
fi
sudo pkill containerd
sudo pkill -x cri-containerd
sudo pkill -x containerd
keepalive "sudo containerd" ${RESTART_WAIT_PERIOD} &> ${report_dir}/containerd.log &
containerd_pid=$!
# Wait for containerd to be running by using the containerd client ctr to check the version
@ -69,7 +70,8 @@ test_teardown() {
if [ -n "${cri_containerd_pid}" ]; then
kill ${cri_containerd_pid}
fi
sudo pkill containerd
sudo pkill -x cri-containerd
sudo pkill -x containerd
}
# keepalive runs a command and keeps it alive.

View File

@ -206,7 +206,7 @@ func Randomize(str string) string {
// KillProcess kills the process by name. pkill is used.
func KillProcess(name string) error {
output, err := exec.Command("pkill", fmt.Sprintf("^%s$", name)).CombinedOutput()
output, err := exec.Command("pkill", "-x", fmt.Sprintf("^%s$", name)).CombinedOutput()
if err != nil {
return fmt.Errorf("failed to kill %q - error: %v, output: %q", name, err, output)
}