Systemd/non-Redhat: Add docker healthcheck script
We do the equivalent of #21727 for systemd systems. Issue #21731
This commit is contained in:
44
cluster/saltbase/salt/docker/docker-healthcheck
Executable file
44
cluster/saltbase/salt/docker/docker-healthcheck
Executable file
@@ -0,0 +1,44 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Copyright 2015 The Kubernetes Authors All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
# This script is intended to be run periodically, to check the health
|
||||||
|
# of docker. If it detects a failure, it will restart docker using systemctl.
|
||||||
|
|
||||||
|
if timeout 10 docker version > /dev/null; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "docker failed"
|
||||||
|
echo "Giving docker 30 seconds grace before restarting"
|
||||||
|
sleep 30
|
||||||
|
|
||||||
|
if timeout 10 docker version > /dev/null; then
|
||||||
|
echo "docker recovered"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "docker still down; triggering docker restart"
|
||||||
|
systemctl restart docker
|
||||||
|
|
||||||
|
echo "Waiting 60 seconds to give docker time to start"
|
||||||
|
sleep 60
|
||||||
|
|
||||||
|
if timeout 10 docker version > /dev/null; then
|
||||||
|
echo "docker recovered"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "docker still failing"
|
||||||
9
cluster/saltbase/salt/docker/docker-healthcheck.service
Normal file
9
cluster/saltbase/salt/docker/docker-healthcheck.service
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Run docker-healthcheck once
|
||||||
|
|
||||||
|
[Service]
|
||||||
|
Type=oneshot
|
||||||
|
ExecStart=/opt/kubernetes/helpers/docker-healthcheck
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
9
cluster/saltbase/salt/docker/docker-healthcheck.timer
Normal file
9
cluster/saltbase/salt/docker/docker-healthcheck.timer
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
[Unit]
|
||||||
|
Description=Trigger docker-healthcheck periodically
|
||||||
|
|
||||||
|
[Timer]
|
||||||
|
OnUnitInactiveSec=10s
|
||||||
|
Unit=docker-healthcheck.service
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
@@ -338,6 +338,45 @@ fix-service-docker:
|
|||||||
- cmd: docker-upgrade
|
- cmd: docker-upgrade
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
/opt/kubernetes/helpers/docker-healthcheck:
|
||||||
|
file.managed:
|
||||||
|
- source: salt://docker/docker-healthcheck
|
||||||
|
- user: root
|
||||||
|
- group: root
|
||||||
|
- mode: 755
|
||||||
|
|
||||||
|
{{ pillar.get('systemd_system_path') }}/docker-healthcheck.service:
|
||||||
|
file.managed:
|
||||||
|
- source: salt://docker/docker-healthcheck.service
|
||||||
|
- template: jinja
|
||||||
|
- user: root
|
||||||
|
- group: root
|
||||||
|
- mode: 644
|
||||||
|
|
||||||
|
{{ pillar.get('systemd_system_path') }}/docker-healthcheck.timer:
|
||||||
|
file.managed:
|
||||||
|
- source: salt://docker/docker-healthcheck.timer
|
||||||
|
- template: jinja
|
||||||
|
- user: root
|
||||||
|
- group: root
|
||||||
|
- mode: 644
|
||||||
|
|
||||||
|
# Tell systemd to load the timer
|
||||||
|
fix-systemd-docker-healthcheck-timer:
|
||||||
|
cmd.wait:
|
||||||
|
- name: /opt/kubernetes/helpers/services bounce docker-healthcheck.timer
|
||||||
|
- watch:
|
||||||
|
- file: {{ pillar.get('systemd_system_path') }}/docker-healthcheck.timer
|
||||||
|
|
||||||
|
# Trigger a first run of docker-healthcheck; needed because the timer fires 10s after the previous run.
|
||||||
|
fix-systemd-docker-healthcheck-service:
|
||||||
|
cmd.wait:
|
||||||
|
- name: /opt/kubernetes/helpers/services bounce docker-healthcheck.service
|
||||||
|
- watch:
|
||||||
|
- file: {{ pillar.get('systemd_system_path') }}/docker-healthcheck.service
|
||||||
|
- require:
|
||||||
|
- cmd: fix-service-docker
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
docker:
|
docker:
|
||||||
|
|||||||
Reference in New Issue
Block a user