Merge pull request #612 from Random-Liu/health-monitor-initial-wait
Add initial wait for health-monitor and use pkill -x.
This commit is contained in:
commit
561d045d71
@ -19,6 +19,11 @@ set -o pipefail
|
|||||||
|
|
||||||
# CRICTL is the path of crictl
|
# CRICTL is the path of crictl
|
||||||
CRICTL=${CRICTL:-"crictl"}
|
CRICTL=${CRICTL:-"crictl"}
|
||||||
|
# INITIAL_WAIT_ATTEMPTS is the number to attempt, before start
|
||||||
|
# performing health check. The problem is that cri-containerd
|
||||||
|
# and containerd are started around the same time with health
|
||||||
|
# monitor, they may not be ready yet when health-monitor is started.
|
||||||
|
INITIAL_WAIT_ATTEMPTS=${INITIAL_WAIT_ATTEMPTS:-5}
|
||||||
# COMMAND_TIMEOUT is the timeout for the health check command.
|
# COMMAND_TIMEOUT is the timeout for the health check command.
|
||||||
COMMAND_TIMEOUT=${COMMAND_TIMEOUT:-60}
|
COMMAND_TIMEOUT=${COMMAND_TIMEOUT:-60}
|
||||||
# CHECK_PERIOD is the health check period.
|
# CHECK_PERIOD is the health check period.
|
||||||
@ -27,13 +32,21 @@ CHECK_PERIOD=${CHECK_PERIOD:-10}
|
|||||||
# and containerd.
|
# and containerd.
|
||||||
SLEEP_SECONDS=${SLEEP_SECONDS:-120}
|
SLEEP_SECONDS=${SLEEP_SECONDS:-120}
|
||||||
|
|
||||||
|
attempt=1
|
||||||
|
until timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null || (( attempt == INITIAL_WAIT_ATTEMPTS ))
|
||||||
|
do
|
||||||
|
echo "$attempt initial attempt \"$CRICTL pods\"! Trying again in $attempt seconds..."
|
||||||
|
sleep $(( attempt++ ))
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Start performing health check."
|
||||||
while true; do
|
while true; do
|
||||||
# Use crictl pods because it requires both containerd and
|
# Use crictl pods because it requires both containerd and
|
||||||
# cri-containerd to be working.
|
# cri-containerd to be working.
|
||||||
if ! timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null; then
|
if ! timeout ${COMMAND_TIMEOUT} ${CRICTL} pods > /dev/null; then
|
||||||
echo "crictl pods timeout!"
|
echo "\"$CRICTL pods\" failed!"
|
||||||
pkill containerd
|
pkill -x cri-containerd
|
||||||
pkill cri-containerd
|
pkill -x containerd
|
||||||
# Wait for a while, as we don't want to kill it again before it is really up.
|
# Wait for a while, as we don't want to kill it again before it is really up.
|
||||||
sleep ${SLEEP_SECONDS}
|
sleep ${SLEEP_SECONDS}
|
||||||
else
|
else
|
||||||
|
@ -45,7 +45,8 @@ test_setup() {
|
|||||||
echo "containerd is not installed, please run hack/install-deps.sh"
|
echo "containerd is not installed, please run hack/install-deps.sh"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
sudo pkill containerd
|
sudo pkill -x cri-containerd
|
||||||
|
sudo pkill -x containerd
|
||||||
keepalive "sudo containerd" ${RESTART_WAIT_PERIOD} &> ${report_dir}/containerd.log &
|
keepalive "sudo containerd" ${RESTART_WAIT_PERIOD} &> ${report_dir}/containerd.log &
|
||||||
containerd_pid=$!
|
containerd_pid=$!
|
||||||
# Wait for containerd to be running by using the containerd client ctr to check the version
|
# Wait for containerd to be running by using the containerd client ctr to check the version
|
||||||
@ -69,7 +70,8 @@ test_teardown() {
|
|||||||
if [ -n "${cri_containerd_pid}" ]; then
|
if [ -n "${cri_containerd_pid}" ]; then
|
||||||
kill ${cri_containerd_pid}
|
kill ${cri_containerd_pid}
|
||||||
fi
|
fi
|
||||||
sudo pkill containerd
|
sudo pkill -x cri-containerd
|
||||||
|
sudo pkill -x containerd
|
||||||
}
|
}
|
||||||
|
|
||||||
# keepalive runs a command and keeps it alive.
|
# keepalive runs a command and keeps it alive.
|
||||||
|
@ -206,7 +206,7 @@ func Randomize(str string) string {
|
|||||||
|
|
||||||
// KillProcess kills the process by name. pkill is used.
|
// KillProcess kills the process by name. pkill is used.
|
||||||
func KillProcess(name string) error {
|
func KillProcess(name string) error {
|
||||||
output, err := exec.Command("pkill", fmt.Sprintf("^%s$", name)).CombinedOutput()
|
output, err := exec.Command("pkill", "-x", fmt.Sprintf("^%s$", name)).CombinedOutput()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to kill %q - error: %v, output: %q", name, err, output)
|
return fmt.Errorf("failed to kill %q - error: %v, output: %q", name, err, output)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user