Dockerfile.test: add "cri-in-userns" (aka rootless) test stage

The `cri-in-userns` stage is for testing "CRI-in-UserNS", which should be used in conjunction with "Kubelet-in-UserNS":
https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2033-kubelet-in-userns-aka-rootless

This feature is mostly expected to be used for `kind` and `minikube`.

Requires Rootless Docker/Podman/nerdctl with cgroup v2 delegation: https://rootlesscontaine.rs/getting-started/common/cgroup2/
(Rootless Docker/Podman/nerdctl prepares the UserNS, so we do not need to create UserNS by ourselves)

Usage:
```
podman build --target cri-in-userns -t cri-in-userns -f contrib/Dockerfile.test .
podman run -it --rm --privileged cri-in-userns
```

The stage is tested on CI with Rootless Podman on Fedora 34 on Vagrant.

Signed-off-by: Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
This commit is contained in:
Akihiro Suda 2021-07-08 18:24:14 +09:00
parent 0573e22e37
commit aefabe5462
No known key found for this signature in database
GPG Key ID: 49524C6F9F638F1A
5 changed files with 139 additions and 1 deletions

View File

@ -526,7 +526,7 @@ jobs:
*-junit.xml
cgroup2:
name: CGroupsV2 and SELinux Integration
name: CGroupsV2 - SELinux enforced
# nested virtualization is only available on macOS hosts
runs-on: macos-10.15
timeout-minutes: 45
@ -580,3 +580,31 @@ jobs:
path: |
${{github.workspace}}/*-junit.xml
${{github.workspace}}/critestreport/*
cgroup2-misc:
name: CGroupsV2 - rootless CRI test
# nested virtualization is only available on macOS hosts
runs-on: macos-10.15
timeout-minutes: 45
needs: [project, linters, protos, man]
steps:
- uses: actions/checkout@v2
- name: "Cache ~/.vagrant.d/boxes"
uses: actions/cache@v2
with:
path: ~/.vagrant.d/boxes
key: vagrant-${{ hashFiles('Vagrantfile*') }}
- name: Vagrant start
run: |
# Retry if it fails (download.fedoraproject.org returns 404 sometimes)
vagrant up || vagrant up
# slow, so separated from the regular cgroup2 task
- name: CRI-in-UserNS test with Rootless Podman
run: |
vagrant up --provision-with=install-rootless-podman
# Execute rootless podman to create the UserNS env
vagrant ssh -- podman build --target cri-in-userns -t cri-in-userns -f /vagrant/contrib/Dockerfile.test /vagrant
vagrant ssh -- podman run --rm --privileged cri-in-userns

27
Vagrantfile vendored
View File

@ -257,4 +257,31 @@ EOF
SHELL
end
# Rootless Podman is used for testing CRI-in-UserNS
# (We could use rootless nerdctl, but we are using Podman here because it is available in dnf)
config.vm.provision "install-rootless-podman", type: "shell", run: "never" do |sh|
sh.upload_path = "/tmp/vagrant-install-rootless-podman"
sh.inline = <<~SHELL
#!/usr/bin/env bash
set -eux -o pipefail
# Delegate cgroup v2 controllers to rootless
mkdir -p /etc/systemd/system/user@.service.d
cat > /etc/systemd/system/user@.service.d/delegate.conf << EOF
[Service]
Delegate=yes
EOF
systemctl daemon-reload
# Install Podman
dnf install -y podman
# Configure Podman to resolve `golang` to `docker.io/library/golang`
mkdir -p /etc/containers
cat > /etc/containers/registries.conf <<EOF
[registries.search]
registries = ['docker.io']
EOF
# Disable SELinux to allow overlayfs
setenforce 0
SHELL
end
end

View File

@ -58,3 +58,25 @@ COPY --from=proto3 /usr/local/include/google /usr/local/include/google
COPY --from=runc /usr/local/sbin/runc /usr/local/go/bin/runc
COPY . .
# cri-in-userns stage is for testing "CRI-in-UserNS", which should be used in conjunction with
# "Kubelet-in-UserNS": https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2033-kubelet-in-userns-aka-rootless
# This feature is mostly expected to be used for `kind` and `minikube`.
#
# Requires Rootless Docker/Podman/nerdctl with cgroup v2 delegation: https://rootlesscontaine.rs/getting-started/common/cgroup2/
# (Rootless Docker/Podman/nerdctl prepares the UserNS, so we do not need to create UserNS by ourselves)
FROM dev AS cri-in-userns
RUN make BUILDTAGS="no_btrfs no_devmapper" binaries install
RUN apt-get update && apt-get install -y iptables
RUN ./script/setup/install-cni
RUN ./script/setup/install-critools
COPY contrib/Dockerfile.test.d/cri-in-userns/etc_containerd_config.toml /etc/containerd/config.toml
COPY contrib/Dockerfile.test.d/cri-in-userns/docker-entrypoint.sh /docker-entrypoint.sh
VOLUME /var/lib/containerd
ENTRYPOINT ["/docker-entrypoint.sh"]
# Skip "runtime should support unsafe sysctls": `container init caused: write sysctl key fs.mqueue.msg_max: open /proc/sys/fs/mqueue/msg_max: permission denied`
# Skip "runtime should support safe sysctls": `container init caused: write sysctl key kernel.shm_rmid_forced: open /proc/sys/kernel/shm_rmid_forced: permission denied`
# Skip "should allow privilege escalation when (NoNewPrivis is) false": expected log "Effective uid: 0\n" (stream="stdout") not found in logs [{timestamp:{wall:974487519 ext:63761339984 loc:<nil>} stream:stdout log:Effective uid: 1000) }]
CMD ["critest", "--ginkgo.skip=should support unsafe sysctls|should support safe sysctls|should allow privilege escalation when false"]
FROM dev AS default

View File

@ -0,0 +1,51 @@
#!/bin/bash
# Copyright The containerd Authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -eu -o pipefail
# Check 4294967295 to detect UserNS (https://github.com/opencontainers/runc/blob/v1.0.0/libcontainer/userns/userns_linux.go#L29-L32)
if grep -Eq "0[[:space:]]+0[[:space:]]+4294967295" /proc/self/uid_map; then
echo >&2 "ERROR: Needs to be executed in UserNS (i.e., rootless Docker/Podman/nerdctl)"
exit 1
fi
if [ ! -f "/sys/fs/cgroup/cgroup.controllers" ]; then
echo >&2 "ERROR: Needs cgroup v2"
exit 1
fi
for f in cpu memory pids; do
if ! grep -qw "$f" "/sys/fs/cgroup/cgroup.controllers"; then
echo >&2 "ERROR: Needs cgroup v2 controller ${f} to be delegated"
exit 1
fi
done
echo >&2 "Enabling cgroup v2 nesting"
# https://github.com/moby/moby/blob/v20.10.7/hack/dind#L28-L38
mkdir -p /sys/fs/cgroup/init
xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || :
sed -e 's/ / +/g' -e 's/^/+/' < /sys/fs/cgroup/cgroup.controllers \
> /sys/fs/cgroup/cgroup.subtree_control
set -x
echo >&2 "Running containerd in background"
containerd &
echo >&2 "Waiting for containerd"
until ctr plugins list; do sleep 3; done
exec "$@"

View File

@ -0,0 +1,10 @@
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri"]
disable_apparmor = true
restrict_oom_score_adj = true
disable_hugetlb_controller = true
[plugins."io.containerd.grpc.v1.cri".containerd]
# Rootless overlayfs requires kernel >= 5.11 && !selinux
snapshotter = "overlayfs"