diff --git a/contrib/ansible/README.md b/contrib/ansible/README.md
new file mode 100644
index 000000000..bbe05d3ff
--- /dev/null
+++ b/contrib/ansible/README.md
@@ -0,0 +1,122 @@
+# Kubernetes Cluster with Containerd
+
+
+
+
+
+
+This document provides the steps to bring up a Kubernetes cluster using ansible and kubeadm tools.
+
+### Prerequisites:
+- **OS**: Ubuntu 16.04 (will be updated with additional distros after testing)
+- **Python**: 2.7+
+- **Ansible**: 2.4+
+
+## Step 0:
+- Install Ansible on the host where you will provision the cluster. This host may be one of the nodes you plan to include in your cluster. Installation instructions for Ansible are found [here](http://docs.ansible.com/ansible/latest/intro_installation.html).
+- Create a hosts file and include the IP addresses of the hosts that need to be provisioned by Ansible.
+```console
+$ cat hosts
+172.31.7.230
+172.31.13.159
+172.31.1.227
+```
+- Setup passwordless SSH access from the host where you are running Ansible to all the hosts in the hosts file. The instructions can be found in [here](http://www.linuxproblem.org/art_9.html)
+
+## Step 1:
+At this point, the ansible playbook should be able to ssh into the machines in the hosts file.
+```console
+git clone https://github.com/containerd/cri
+cd ./cri/contrib/ansible
+ansible-playbook -i hosts cri-containerd.yaml
+```
+A typical cloud login might have a username and private key file, in which case the following can be used:
+```console
+ansible-playbook -i hosts -u --private-key cri-containerd.yaml
+ ```
+For more options ansible config file (/etc/ansible/ansible.cfg) can be used to set defaults. Please refer to [Ansible options](http://docs.ansible.com/ansible/latest/intro_configuration.html) for advanced ansible configurations.
+
+At the end of this step, you will have the required software installed in the hosts to bringup a kubernetes cluster.
+```console
+PLAY RECAP ***************************************************************************************************************************************************************
+172.31.1.227 : ok=21 changed=7 unreachable=0 failed=0
+172.31.13.159 : ok=21 changed=7 unreachable=0 failed=0
+172.31.7.230 : ok=21 changed=7 unreachable=0 failed=0
+```
+
+## Step 2:
+Use [kubeadm](https://kubernetes.io/docs/setup/independent/install-kubeadm/) to bring up a Kubernetes Cluster. Depending on what third-party provider you choose, you might have to set the ```--pod-network-cidr``` to something provider-specific.
+Initialize the cluster from one of the nodes (Note: This node will be the master node):
+```console
+$sudo kubeadm init --skip-preflight-checks
+[kubeadm] WARNING: kubeadm is in beta, please do not use it for production clusters.
+[init] Using Kubernetes version: v1.7.6
+[init] Using Authorization modes: [Node RBAC]
+[preflight] Skipping pre-flight checks
+[kubeadm] WARNING: starting in 1.8, tokens expire after 24 hours by default (if you require a non-expiring token use --token-ttl 0)
+[certificates] Generated CA certificate and key.
+[certificates] Generated API server certificate and key.
+[certificates] API Server serving cert is signed for DNS names [abhi-k8-ubuntu-1 kubernetes kubernetes.default kubernetes.default.svc kubernetes.default.svc.cluster.local] and IPs [10.96.0.1 172.31.7.230]
+[certificates] Generated API server kubelet client certificate and key.
+[certificates] Generated service account token signing key and public key.
+[certificates] Generated front-proxy CA certificate and key.
+[certificates] Generated front-proxy client certificate and key.
+[certificates] Valid certificates and keys now exist in "/etc/kubernetes/pki"
+[kubeconfig] Wrote KubeConfig file to disk: "/etc/kubernetes/admin.conf"
+[kubeconfig] Wrote KubeConfig file to disk: "/etc/kubernetes/kubelet.conf"
+[kubeconfig] Wrote KubeConfig file to disk: "/etc/kubernetes/controller-manager.conf"
+[kubeconfig] Wrote KubeConfig file to disk: "/etc/kubernetes/scheduler.conf"
+[apiclient] Created API client, waiting for the control plane to become ready
+[apiclient] All control plane components are healthy after 42.002391 seconds
+[token] Using token: 43a25d.420ff2e06336e4c1
+[apiconfig] Created RBAC rules
+[addons] Applied essential addon: kube-proxy
+[addons] Applied essential addon: kube-dns
+
+Your Kubernetes master has initialized successfully!
+
+To start using your cluster, you need to run (as a regular user):
+
+ mkdir -p $HOME/.kube
+ sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
+ sudo chown $(id -u):$(id -g) $HOME/.kube/config
+
+You should now deploy a pod network to the cluster.
+Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
+ http://kubernetes.io/docs/admin/addons/
+
+You can now join any number of machines by running the following on each node
+as root:
+
+ kubeadm join --token 43a25d.420ff2e06336e4c1 172.31.7.230:6443
+
+```
+## Step 3:
+Use kubeadm join to add each of the remaining nodes to your cluster. (Note: Uses token that was generated during cluster init.)
+```console
+$sudo kubeadm join --token 43a25d.420ff2e06336e4c1 172.31.7.230:6443 --skip-preflight-checks
+[kubeadm] WARNING: kubeadm is in beta, please do not use it for production clusters.
+[preflight] Skipping pre-flight checks
+[discovery] Trying to connect to API Server "172.31.7.230:6443"
+[discovery] Created cluster-info discovery client, requesting info from "https://172.31.7.230:6443"
+[discovery] Cluster info signature and contents are valid, will use API Server "https://172.31.7.230:6443"
+[discovery] Successfully established connection with API Server "172.31.7.230:6443"
+[bootstrap] Detected server version: v1.7.6
+[bootstrap] The server supports the Certificates API (certificates.k8s.io/v1beta1)
+[csr] Created API client to obtain unique certificate for this node, generating keys and certificate signing request
+[csr] Received signed certificate from the API server, generating KubeConfig...
+[kubeconfig] Wrote KubeConfig file to disk: "/etc/kubernetes/kubelet.conf"
+
+Node join complete:
+* Certificate signing request sent to master and response
+ received.
+* Kubelet informed of new secure connection details.
+
+Run 'kubectl get nodes' on the master to see this machine join.
+```
+At the end of Step 3 you should have a kubernetes cluster up and running and ready for deployment.
+
+## Step 4:
+Please follow the instructions [here](https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/#pod-network) to deploy CNI network plugins and start a demo app.
+
+We are constantly striving to improve the installer. Please feel free to open issues and provide suggestions to make the installer fast and easy to use. We are open to receiving help in validating and improving the installer on different distros.
diff --git a/contrib/ansible/cri-containerd.yaml b/contrib/ansible/cri-containerd.yaml
new file mode 100644
index 000000000..f7949601b
--- /dev/null
+++ b/contrib/ansible/cri-containerd.yaml
@@ -0,0 +1,66 @@
+---
+- hosts: all
+ become: true
+ tasks:
+ - include_vars: vars/vars.yaml # Contains tasks variables for installer
+ - include_tasks: tasks/bootstrap_ubuntu.yaml # Contains tasks bootstrap components for ubuntu systems
+ when: ansible_distribution == "Ubuntu"
+ - include_tasks: tasks/bootstrap_centos.yaml # Contains tasks bootstrap components for centos systems
+ when: ansible_distribution == "CentOS"
+ - include_tasks: tasks/k8s.yaml # Contains tasks kubernetes component installation
+ - include_tasks: tasks/binaries.yaml # Contains tasks for pulling containerd components
+
+ - name: "Create a directory for containerd config"
+ file: path=/etc/containerd state=directory
+
+ - name: "Start Containerd"
+ systemd: name=containerd daemon_reload=yes state=started enabled=yes
+
+ - name: "Load br_netfilter kernel module"
+ modprobe:
+ name: br_netfilter
+ state: present
+
+ - name: "Set bridge-nf-call-iptables"
+ sysctl:
+ name: net.bridge.bridge-nf-call-iptables
+ value: 1
+
+ - name: "Set ip_forward"
+ sysctl:
+ name: net.ipv4.ip_forward
+ value: 1
+
+ - name: "Check kubelet args in kubelet config (Ubuntu)"
+ shell: grep "^Environment=\"KUBELET_EXTRA_ARGS=" /etc/systemd/system/kubelet.service.d/10-kubeadm.conf || true
+ register: check_args
+ when: ansible_distribution == "Ubuntu"
+
+ - name: "Add runtime args in kubelet conf (Ubuntu)"
+ lineinfile:
+ dest: "/etc/systemd/system/kubelet.service.d/10-kubeadm.conf"
+ line: "Environment=\"KUBELET_EXTRA_ARGS= --runtime-cgroups=/system.slice/containerd.service --container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock\""
+ insertafter: '\[Service\]'
+ when: ansible_distribution == "Ubuntu" and check_args.stdout == ""
+
+ - name: "Check kubelet args in kubelet config (CentOS)"
+ shell: grep "^Environment=\"KUBELET_EXTRA_ARGS=" /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf || true
+ register: check_args
+ when: ansible_distribution == "CentOS"
+
+ - name: "Add runtime args in kubelet conf (CentOS)"
+ lineinfile:
+ dest: "/usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf"
+ line: "Environment=\"KUBELET_EXTRA_ARGS= --runtime-cgroups=/system.slice/containerd.service --container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock\""
+ insertafter: '\[Service\]'
+ when: ansible_distribution == "CentOS" and check_args.stdout == ""
+
+ - name: "Start Kubelet"
+ systemd: name=kubelet daemon_reload=yes state=started enabled=yes
+
+ # TODO This needs to be removed once we have consistent concurrent pull results
+ - name: "Pre-pull pause container image"
+ shell: |
+ /usr/local/bin/ctr pull k8s.gcr.io/pause:3.2
+ /usr/local/bin/crictl --runtime-endpoint unix:///run/containerd/containerd.sock \
+ pull k8s.gcr.io/pause:3.2
diff --git a/contrib/ansible/tasks/binaries.yaml b/contrib/ansible/tasks/binaries.yaml
new file mode 100644
index 000000000..b34144f75
--- /dev/null
+++ b/contrib/ansible/tasks/binaries.yaml
@@ -0,0 +1,12 @@
+---
+- name: "Get Containerd"
+ unarchive:
+ src: "https://storage.googleapis.com/cri-containerd-release/cri-containerd-{{ containerd_release_version }}.linux-amd64.tar.gz"
+ dest: "/"
+ remote_src: yes
+
+- name: "Create a directory for cni binary"
+ file: path={{ cni_bin_dir }} state=directory
+
+- name: "Create a directory for cni config files"
+ file: path={{ cni_conf_dir }} state=directory
diff --git a/contrib/ansible/tasks/bootstrap_centos.yaml b/contrib/ansible/tasks/bootstrap_centos.yaml
new file mode 100644
index 000000000..5d9e66a62
--- /dev/null
+++ b/contrib/ansible/tasks/bootstrap_centos.yaml
@@ -0,0 +1,12 @@
+---
+- name: "Install required packages on CentOS "
+ yum:
+ name: "{{ item }}"
+ state: latest
+ with_items:
+ - unzip
+ - tar
+ - btrfs-progs
+ - libseccomp
+ - util-linux
+ - libselinux-python
diff --git a/contrib/ansible/tasks/bootstrap_ubuntu.yaml b/contrib/ansible/tasks/bootstrap_ubuntu.yaml
new file mode 100644
index 000000000..3bb9b2134
--- /dev/null
+++ b/contrib/ansible/tasks/bootstrap_ubuntu.yaml
@@ -0,0 +1,12 @@
+---
+- name: "Install required packages on Ubuntu"
+ package:
+ name: "{{ item }}"
+ state: latest
+ with_items:
+ - unzip
+ - tar
+ - apt-transport-https
+ - btrfs-tools
+ - libseccomp2
+ - util-linux
diff --git a/contrib/ansible/tasks/k8s.yaml b/contrib/ansible/tasks/k8s.yaml
new file mode 100644
index 000000000..e2e017c20
--- /dev/null
+++ b/contrib/ansible/tasks/k8s.yaml
@@ -0,0 +1,52 @@
+---
+- name: "Add gpg key (Ubuntu)"
+ apt_key:
+ url: https://packages.cloud.google.com/apt/doc/apt-key.gpg
+ state: present
+ when: ansible_distribution == "Ubuntu"
+
+- name: "Add kubernetes source list (Ubuntu)"
+ apt_repository:
+ repo: "deb http://apt.kubernetes.io/ kubernetes-{{ ansible_distribution_release }} main"
+ state: present
+ filename: "kubernetes"
+ when: ansible_distribution == "Ubuntu"
+
+- name: "Update the repository cache (Ubuntu)"
+ apt:
+ update_cache: yes
+ when: ansible_distribution == "Ubuntu"
+
+- name: "Add Kubernetes repository and install gpg key (CentOS)"
+ yum_repository:
+ name: kubernetes
+ description: Kubernetes repository
+ baseurl: https://packages.cloud.google.com/yum/repos/kubernetes-el7-x86_64
+ gpgcheck: yes
+ enabled: yes
+ repo_gpgcheck: yes
+ gpgkey:
+ - https://packages.cloud.google.com/yum/doc/yum-key.gpg
+ - https://packages.cloud.google.com/yum/doc/rpm-package-key.gpg
+ when: ansible_distribution == "CentOS"
+
+- name: "Disable SELinux (CentOS)"
+ selinux:
+ state: disabled
+ when: ansible_distribution == "CentOS"
+
+- name: "Install kubelet,kubeadm,kubectl (CentOS)"
+ yum: state=present name={{ item }}
+ with_items:
+ - kubelet
+ - kubeadm
+ - kubectl
+ when: ansible_distribution == "CentOS"
+
+- name: "Install kubelet, kubeadm, kubectl (Ubuntu)"
+ apt: name={{item}} state=installed
+ with_items:
+ - kubelet
+ - kubeadm
+ - kubectl
+ when: ansible_distribution == "Ubuntu"
diff --git a/contrib/ansible/vars/vars.yaml b/contrib/ansible/vars/vars.yaml
new file mode 100644
index 000000000..9ae0e0680
--- /dev/null
+++ b/contrib/ansible/vars/vars.yaml
@@ -0,0 +1,4 @@
+---
+containerd_release_version: 1.3.0
+cni_bin_dir: /opt/cni/bin/
+cni_conf_dir: /etc/cni/net.d/
diff --git a/contrib/linuxkit/README.md b/contrib/linuxkit/README.md
new file mode 100644
index 000000000..18dada82f
--- /dev/null
+++ b/contrib/linuxkit/README.md
@@ -0,0 +1,5 @@
+# LinuxKit Kubernetes project
+
+The LinuxKit [`projects/kubernetes`](https://github.com/linuxkit/linuxkit/tree/master/projects/kubernetes) subdirectory contains a project to build master and worker node virtual machines. When built with `KUBE_RUNTIME=cri-containerd` then these images will use `cri-containerd` as their execution backend.
+
+See the [project README](https://github.com/linuxkit/linuxkit/blob/master/projects/kubernetes/README.md).
diff --git a/contrib/systemd-units/containerd.service b/contrib/systemd-units/containerd.service
new file mode 100644
index 000000000..c059e97ae
--- /dev/null
+++ b/contrib/systemd-units/containerd.service
@@ -0,0 +1,22 @@
+[Unit]
+Description=containerd container runtime
+Documentation=https://containerd.io
+After=network.target
+
+[Service]
+ExecStartPre=/sbin/modprobe overlay
+ExecStart=/usr/local/bin/containerd
+Restart=always
+RestartSec=5
+Delegate=yes
+KillMode=process
+OOMScoreAdjust=-999
+LimitNOFILE=1048576
+# Having non-zero Limit*s causes performance problems due to accounting overhead
+# in the kernel. We recommend using cgroups to do container-local accounting.
+LimitNPROC=infinity
+LimitCORE=infinity
+TasksMax=infinity
+
+[Install]
+WantedBy=multi-user.target
diff --git a/docs/architecture.md b/docs/architecture.md
new file mode 100644
index 000000000..823e72853
--- /dev/null
+++ b/docs/architecture.md
@@ -0,0 +1,18 @@
+# Architecture of The CRI Plugin
+This document describes the architecture of the `cri` plugin for `containerd`.
+
+This plugin is an implementation of Kubernetes [container runtime interface (CRI)](https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/apis/cri/runtime/v1alpha2/api.proto). Containerd operates on the same node as the [Kubelet](https://kubernetes.io/docs/reference/generated/kubelet/). The `cri` plugin inside containerd handles all CRI service requests from the Kubelet and uses containerd internals to manage containers and container images.
+
+The `cri` plugin uses containerd to manage the full container lifecycle and all container images. As also shown below, `cri` manages pod networking via [CNI](https://github.com/containernetworking/cni) (another CNCF project).
+
+
+
+Let's use an example to demonstrate how the `cri` plugin works for the case when Kubelet creates a single-container pod:
+* Kubelet calls the `cri` plugin, via the CRI runtime service API, to create a pod;
+* `cri` creates and configures the pod’s network namespace using CNI;
+* `cri` uses containerd internal to create and start a special [pause container](https://www.ianlewis.org/en/almighty-pause-container) (the sandbox container) and put that container inside the pod’s cgroups and namespace (steps omitted for brevity);
+* Kubelet subsequently calls the `cri` plugin, via the CRI image service API, to pull the application container image;
+* `cri` further uses containerd to pull the image if the image is not present on the node;
+* Kubelet then calls `cri`, via the CRI runtime service API, to create and start the application container inside the pod using the pulled container image;
+* `cri` finally uses containerd internal to create the application container, put it inside the pod’s cgroups and namespace, then to start the pod’s new application container.
+After these steps, a pod and its corresponding application container is created and running.
diff --git a/docs/architecture.png b/docs/architecture.png
new file mode 100644
index 000000000..c65bd8e87
Binary files /dev/null and b/docs/architecture.png differ
diff --git a/docs/config.md b/docs/config.md
new file mode 100644
index 000000000..1203c24c7
--- /dev/null
+++ b/docs/config.md
@@ -0,0 +1,316 @@
+# CRI Plugin Config Guide
+This document provides the description of the CRI plugin configuration.
+The CRI plugin config is part of the containerd config (default
+path: `/etc/containerd/config.toml`).
+
+See [here](https://github.com/containerd/containerd/blob/master/docs/ops.md)
+for more information about containerd config.
+
+The explanation and default value of each configuration item are as follows:
+```toml
+# Use config version 2 to enable new configuration fields.
+# Config file is parsed as version 1 by default.
+# Version 2 uses long plugin names, i.e. "io.containerd.grpc.v1.cri" vs "cri".
+version = 2
+
+# The 'plugins."io.containerd.grpc.v1.cri"' table contains all of the server options.
+[plugins."io.containerd.grpc.v1.cri"]
+
+ # disable_tcp_service disables serving CRI on the TCP server.
+ # Note that a TCP server is enabled for containerd if TCPAddress is set in section [grpc].
+ disable_tcp_service = true
+
+ # stream_server_address is the ip address streaming server is listening on.
+ stream_server_address = "127.0.0.1"
+
+ # stream_server_port is the port streaming server is listening on.
+ stream_server_port = "0"
+
+ # stream_idle_timeout is the maximum time a streaming connection can be
+ # idle before the connection is automatically closed.
+ # The string is in the golang duration format, see:
+ # https://golang.org/pkg/time/#ParseDuration
+ stream_idle_timeout = "4h"
+
+ # enable_selinux indicates to enable the selinux support.
+ enable_selinux = false
+
+ # selinux_category_range allows the upper bound on the category range to be set.
+ # if not specified or set to 0, defaults to 1024 from the selinux package.
+ selinux_category_range = 1024
+
+ # sandbox_image is the image used by sandbox container.
+ sandbox_image = "k8s.gcr.io/pause:3.2"
+
+ # stats_collect_period is the period (in seconds) of snapshots stats collection.
+ stats_collect_period = 10
+
+ # enable_tls_streaming enables the TLS streaming support.
+ # It generates a self-sign certificate unless the following x509_key_pair_streaming are both set.
+ enable_tls_streaming = false
+
+ # tolerate_missing_hugetlb_controller if set to false will error out on create/update
+ # container requests with huge page limits if the cgroup controller for hugepages is not present.
+ # This helps with supporting Kubernetes <=1.18 out of the box. (default is `true`)
+ tolerate_missing_hugetlb_controller = true
+
+ # ignore_image_defined_volumes ignores volumes defined by the image. Useful for better resource
+ # isolation, security and early detection of issues in the mount configuration when using
+ # ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
+ ignore_image_defined_volumes = false
+
+ # 'plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming' contains a x509 valid key pair to stream with tls.
+ [plugins."io.containerd.grpc.v1.cri".x509_key_pair_streaming]
+ # tls_cert_file is the filepath to the certificate paired with the "tls_key_file"
+ tls_cert_file = ""
+
+ # tls_key_file is the filepath to the private key paired with the "tls_cert_file"
+ tls_key_file = ""
+
+ # max_container_log_line_size is the maximum log line size in bytes for a container.
+ # Log line longer than the limit will be split into multiple lines. -1 means no
+ # limit.
+ max_container_log_line_size = 16384
+
+ # disable_cgroup indicates to disable the cgroup support.
+ # This is useful when the daemon does not have permission to access cgroup.
+ disable_cgroup = false
+
+ # disable_apparmor indicates to disable the apparmor support.
+ # This is useful when the daemon does not have permission to access apparmor.
+ disable_apparmor = false
+
+ # restrict_oom_score_adj indicates to limit the lower bound of OOMScoreAdj to
+ # the containerd's current OOMScoreAdj.
+ # This is useful when the containerd does not have permission to decrease OOMScoreAdj.
+ restrict_oom_score_adj = false
+
+ # max_concurrent_downloads restricts the number of concurrent downloads for each image.
+ max_concurrent_downloads = 3
+
+ # disable_proc_mount disables Kubernetes ProcMount support. This MUST be set to `true`
+ # when using containerd with Kubernetes <=1.11.
+ disable_proc_mount = false
+
+ # unsetSeccompProfile is the profile containerd/cri will use if the provided seccomp profile is
+ # unset (`""`) for a container (default is `unconfined`)
+ unset_seccomp_profile = ""
+
+ # 'plugins."io.containerd.grpc.v1.cri".containerd' contains config related to containerd
+ [plugins."io.containerd.grpc.v1.cri".containerd]
+
+ # snapshotter is the snapshotter used by containerd.
+ snapshotter = "overlayfs"
+
+ # no_pivot disables pivot-root (linux only), required when running a container in a RamDisk with runc.
+ # This only works for runtime type "io.containerd.runtime.v1.linux".
+ no_pivot = false
+
+ # disable_snapshot_annotations disables to pass additional annotations (image
+ # related information) to snapshotters. These annotations are required by
+ # stargz snapshotter (https://github.com/containerd/stargz-snapshotter)
+ disable_snapshot_annotations = false
+
+ # discard_unpacked_layers allows GC to remove layers from the content store after
+ # successfully unpacking these layers to the snapshotter.
+ discard_unpacked_layers = false
+
+ # default_runtime_name is the default runtime name to use.
+ default_runtime_name = "runc"
+
+ # 'plugins."io.containerd.grpc.v1.cri".containerd.default_runtime' is the runtime to use in containerd.
+ # DEPRECATED: use `default_runtime_name` and `plugins."io.containerd.grpc.v1.cri".runtimes` instead.
+ # Remove in containerd 1.4.
+ [plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
+
+ # 'plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime' is a runtime to run untrusted workloads on it.
+ # DEPRECATED: use `untrusted` runtime in `plugins."io.containerd.grpc.v1.cri".runtimes` instead.
+ # Remove in containerd 1.4.
+ [plugins."io.containerd.grpc.v1.cri".containerd.untrusted_workload_runtime]
+
+ # 'plugins."io.containerd.grpc.v1.cri".containerd.runtimes' is a map from CRI RuntimeHandler strings, which specify types
+ # of runtime configurations, to the matching configurations.
+ # In this example, 'runc' is the RuntimeHandler string to match.
+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
+ # runtime_type is the runtime type to use in containerd.
+ # The default value is "io.containerd.runc.v2" since containerd 1.4.
+ # The default value was "io.containerd.runc.v1" in containerd 1.3, "io.containerd.runtime.v1.linux" in prior releases.
+ runtime_type = "io.containerd.runc.v2"
+
+ # pod_annotations is a list of pod annotations passed to both pod
+ # sandbox as well as container OCI annotations. Pod_annotations also
+ # supports golang path match pattern - https://golang.org/pkg/path/#Match.
+ # e.g. ["runc.com.*"], ["*.runc.com"], ["runc.com/*"].
+ #
+ # For the naming convention of annotation keys, please reference:
+ # * Kubernetes: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/#syntax-and-character-set
+ # * OCI: https://github.com/opencontainers/image-spec/blob/master/annotations.md
+ pod_annotations = []
+
+ # container_annotations is a list of container annotations passed through to the OCI config of the containers.
+ # Container annotations in CRI are usually generated by other Kubernetes node components (i.e., not users).
+ # Currently, only device plugins populate the annotations.
+ container_annotations = []
+
+ # privileged_without_host_devices allows overloading the default behaviour of passing host
+ # devices through to privileged containers. This is useful when using a runtime where it does
+ # not make sense to pass host devices to the container when privileged. Defaults to false -
+ # i.e pass host devices through to privileged containers.
+ privileged_without_host_devices = false
+
+ # base_runtime_spec is a file path to a JSON file with the OCI spec that will be used as the base spec that all
+ # container's are created from.
+ # Use containerd's `ctr oci spec > /etc/containerd/cri-base.json` to output initial spec file.
+ # Spec files are loaded at launch, so containerd daemon must be restared on any changes to refresh default specs.
+ # Still running containers and restarted containers will still be using the original spec from which that container was created.
+ base_runtime_spec = ""
+
+ # 'plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options' is options specific to
+ # "io.containerd.runc.v1" and "io.containerd.runc.v2". Its corresponding options type is:
+ # https://github.com/containerd/containerd/blob/v1.3.2/runtime/v2/runc/options/oci.pb.go#L26 .
+ [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
+ # NoPivotRoot disables pivot root when creating a container.
+ NoPivotRoot = false
+
+ # NoNewKeyring disables new keyring for the container.
+ NoNewKeyring = false
+
+ # ShimCgroup places the shim in a cgroup.
+ ShimCgroup = ""
+
+ # IoUid sets the I/O's pipes uid.
+ IoUid = 0
+
+ # IoGid sets the I/O's pipes gid.
+ IoGid = 0
+
+ # BinaryName is the binary name of the runc binary.
+ BinaryName = ""
+
+ # Root is the runc root directory.
+ Root = ""
+
+ # CriuPath is the criu binary path.
+ CriuPath = ""
+
+ # SystemdCgroup enables systemd cgroups.
+ SystemdCgroup = false
+
+ # CriuImagePath is the criu image path
+ CriuImagePath = ""
+
+ # CriuWorkPath is the criu work path.
+ CriuWorkPath = ""
+
+ # 'plugins."io.containerd.grpc.v1.cri".cni' contains config related to cni
+ [plugins."io.containerd.grpc.v1.cri".cni]
+ # bin_dir is the directory in which the binaries for the plugin is kept.
+ bin_dir = "/opt/cni/bin"
+
+ # conf_dir is the directory in which the admin places a CNI conf.
+ conf_dir = "/etc/cni/net.d"
+
+ # max_conf_num specifies the maximum number of CNI plugin config files to
+ # load from the CNI config directory. By default, only 1 CNI plugin config
+ # file will be loaded. If you want to load multiple CNI plugin config files
+ # set max_conf_num to the number desired. Setting max_config_num to 0 is
+ # interpreted as no limit is desired and will result in all CNI plugin
+ # config files being loaded from the CNI config directory.
+ max_conf_num = 1
+
+ # conf_template is the file path of golang template used to generate
+ # cni config.
+ # If this is set, containerd will generate a cni config file from the
+ # template. Otherwise, containerd will wait for the system admin or cni
+ # daemon to drop the config file into the conf_dir.
+ # This is a temporary backward-compatible solution for kubenet users
+ # who don't have a cni daemonset in production yet.
+ # This will be deprecated when kubenet is deprecated.
+ # See the "CNI Config Template" section for more details.
+ conf_template = ""
+
+ # 'plugins."io.containerd.grpc.v1.cri".registry' contains config related to the registry
+ [plugins."io.containerd.grpc.v1.cri".registry]
+
+ # 'plugins."io.containerd.grpc.v1.cri.registry.headers sets the http request headers to send for all registry requests
+ [plugins."io.containerd.grpc.v1.cri".registry.headers]
+ Foo = ["bar"]
+
+ # 'plugins."io.containerd.grpc.v1.cri".registry.mirrors' are namespace to mirror mapping for all namespaces.
+ [plugins."io.containerd.grpc.v1.cri".registry.mirrors]
+ [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
+ endpoint = ["https://registry-1.docker.io", ]
+
+ # 'plugins."io.containerd.grpc.v1.cri".image_decryption' contains config related
+ # to handling decryption of encrypted container images.
+ [plugins."io.containerd.grpc.v1.cri".image_decryption]
+ # key_model defines the name of the key model used for how the cri obtains
+ # keys used for decryption of encrypted container images.
+ # The [decryption document](https://github.com/containerd/cri/blob/master/docs/decryption.md)
+ # contains additional information about the key models available.
+ #
+ # Set of available string options: {"", "node"}
+ # Omission of this field defaults to the empty string "", which indicates no key model,
+ # disabling image decryption.
+ #
+ # In order to use the decryption feature, additional configurations must be made.
+ # The [decryption document](https://github.com/containerd/cri/blob/master/docs/decryption.md)
+ # provides information of how to set up stream processors and the containerd imgcrypt decoder
+ # with the appropriate key models.
+ #
+ # Additional information:
+ # * Stream processors: https://github.com/containerd/containerd/blob/master/docs/stream_processors.md
+ # * Containerd imgcrypt: https://github.com/containerd/imgcrypt
+ key_model = "node"
+```
+
+## Untrusted Workload
+
+The recommended way to run untrusted workload is to use
+[`RuntimeClass`](https://kubernetes.io/docs/concepts/containers/runtime-class/) api
+introduced in Kubernetes 1.12 to select RuntimeHandlers configured to run
+untrusted workload in `plugins."io.containerd.grpc.v1.cri".containerd.runtimes`.
+
+However, if you are using the legacy `io.kubernetes.cri.untrusted-workload`pod annotation
+to request a pod be run using a runtime for untrusted workloads, the RuntimeHandler
+`plugins."io.containerd.grpc.v1.cri"cri.containerd.runtimes.untrusted` must be defined first.
+When the annotation `io.kubernetes.cri.untrusted-workload` is set to `true` the `untrusted`
+runtime will be used. For example, see
+[Create an untrusted pod using Kata Containers](https://github.com/kata-containers/documentation/blob/master/how-to/how-to-use-k8s-with-cri-containerd-and-kata.md#create-an-untrusted-pod-using-kata-containers).
+
+## CNI Config Template
+
+Ideally the cni config should be placed by system admin or cni daemon like calico,
+weaveworks etc. However, there are still users using [kubenet](https://kubernetes.io/docs/concepts/cluster-administration/network-plugins/#kubenet)
+today, who don't have a cni daemonset in production. The cni config template is
+a temporary backward-compatible solution for them. This is expected to be
+deprecated when kubenet is deprecated.
+
+The cni config template uses the [golang
+template](https://golang.org/pkg/text/template/) format. Currently supported
+values are:
+* `.PodCIDR` is a string of the first CIDR assigned to the node.
+* `.PodCIDRRanges` is a string array of all CIDRs assigned to the node. It is
+ usually used for
+ [dualstack](https://github.com/kubernetes/enhancements/blob/master/keps/sig-network/20180612-ipv4-ipv6-dual-stack.md) support.
+* `.Routes` is a string array of all routes needed. It is usually used for
+ dualstack support or single stack but IPv4 or IPv6 is decided at runtime.
+
+The [golang template actions](https://golang.org/pkg/text/template/#hdr-Actions)
+can be used to render the cni config. For example, you can use the following
+template to add CIDRs and routes for dualstack in the CNI config:
+```
+"ipam": {
+ "type": "host-local",
+ "ranges": [{{range $i, $range := .PodCIDRRanges}}{{if $i}}, {{end}}[{"subnet": "{{$range}}"}]{{end}}],
+ "routes": [{{range $i, $route := .Routes}}{{if $i}}, {{end}}{"dst": "{{$route}}"}{{end}}]
+}
+```
+
+## Deprecation
+The config options of the CRI plugin follow the [Kubernetes deprecation
+policy of "admin-facing CLI components"](https://kubernetes.io/docs/reference/using-api/deprecation-policy/#deprecating-a-flag-or-cli).
+
+In summary, when a config option is announced to be deprecated:
+* It is kept functional for 6 months or 1 release (whichever is longer);
+* A warning is emitted when it is used.
diff --git a/docs/containerd.png b/docs/containerd.png
new file mode 100644
index 000000000..9eb1802b8
Binary files /dev/null and b/docs/containerd.png differ
diff --git a/docs/cri.png b/docs/cri.png
new file mode 100644
index 000000000..0373b3bc0
Binary files /dev/null and b/docs/cri.png differ
diff --git a/docs/crictl.md b/docs/crictl.md
new file mode 100644
index 000000000..3a71575b5
--- /dev/null
+++ b/docs/crictl.md
@@ -0,0 +1,216 @@
+CRICTL User Guide
+=================
+This document presumes you already have `containerd` with the `cri` plugin installed and running.
+
+This document is for developers who wish to debug, inspect, and manage their pods,
+containers, and container images.
+
+Before generating issues against this document, `containerd`, `containerd/cri`,
+or `crictl` please make sure the issue has not already been submitted.
+
+## Install crictl
+If you have not already installed crictl please install the version compatible
+with the `cri` plugin you are using. If you are a user, your deployment
+should have installed crictl for you. If not, get it from your release tarball.
+If you are a developer the current version of crictl is specified [here](../hack/utils.sh).
+A helper command has been included to install the dependencies at the right version:
+```console
+$ make install.deps
+```
+* Note: The file named `/etc/crictl.yaml` is used to configure crictl
+so you don't have to repeatedly specify the runtime sock used to connect crictl
+to the container runtime:
+```console
+$ cat /etc/crictl.yaml
+runtime-endpoint: unix:///run/containerd/containerd.sock
+image-endpoint: unix:///run/containerd/containerd.sock
+timeout: 10
+debug: true
+```
+
+## Download and Inspect a Container Image
+The pull command tells the container runtime to download a container image from
+a container registry.
+```console
+$ crictl pull busybox
+ ...
+$ crictl inspecti busybox
+ ... displays information about the image.
+```
+
+***Note:*** If you get an error similar to the following when running a `crictl`
+command (and your containerd instance is already running):
+```console
+crictl info
+FATA[0000] getting status of runtime failed: rpc error: code = Unimplemented desc = unknown service runtime.v1alpha2.RuntimeService
+```
+This could be that you are using an incorrect containerd configuration (maybe
+from a Docker install). You will need to update your containerd configuration
+to the containerd instance that you are running. One way of doing this is as
+follows:
+```console
+$ mv /etc/containerd/config.toml /etc/containerd/config.bak
+$ containerd config default > /etc/containerd/config.toml
+```
+
+## Directly Load a Container Image
+Another way to load an image into the container runtime is with the load
+command. With the load command you inject a container image into the container
+runtime from a file. First you need to create a container image tarball. For
+example to create an image tarball for a pause container using Docker:
+```console
+$ docker pull k8s.gcr.io/pause-amd64:3.2
+ 3.2: Pulling from pause-amd64
+ 67ddbfb20a22: Pull complete
+ Digest: sha256:59eec8837a4d942cc19a52b8c09ea75121acc38114a2c68b98983ce9356b8610
+ Status: Downloaded newer image for k8s.gcr.io/pause-amd64:3.2
+$ docker save k8s.gcr.io/pause-amd64:3.2 -o pause.tar
+```
+Then use [`ctr`](https://github.com/containerd/containerd/blob/master/docs/man/ctr.1.md)
+to load the container image into the container runtime:
+```console
+# The cri plugin uses the "k8s.io" containerd namespace.
+$ sudo ctr -n=k8s.io images import pause.tar
+ Loaded image: k8s.gcr.io/pause-amd64:3.2
+```
+List images and inspect the pause image:
+```console
+$ sudo crictl images
+IMAGE TAG IMAGE ID SIZE
+docker.io/library/busybox latest f6e427c148a76 728kB
+k8s.gcr.io/pause-amd64 3.2 da86e6ba6ca19 746kB
+$ sudo crictl inspecti da86e6ba6ca19
+ ... displays information about the pause image.
+$ sudo crictl inspecti k8s.gcr.io/pause-amd64:3.2
+ ... displays information about the pause image.
+```
+
+## Run a pod sandbox (using a config file)
+```console
+$ cat sandbox-config.json
+{
+ "metadata": {
+ "name": "nginx-sandbox",
+ "namespace": "default",
+ "attempt": 1,
+ "uid": "hdishd83djaidwnduwk28bcsb"
+ },
+ "linux": {
+ }
+}
+
+$ crictl runp sandbox-config.json
+e1c83b0b8d481d4af8ba98d5f7812577fc175a37b10dc824335951f52addbb4e
+$ crictl pods
+PODSANDBOX ID CREATED STATE NAME NAMESPACE ATTEMPT
+e1c83b0b8d481 2 hours ago SANDBOX_READY nginx-sandbox default 1
+$ crictl inspectp e1c8
+ ... displays information about the pod and the pod sandbox pause container.
+```
+* Note: As shown above, you may use truncated IDs if they are unique.
+* Other commands to manage the pod include `stops ID` to stop a running pod and
+`rmp ID` to remove a pod sandbox.
+
+## Create and Run a Container in the Pod Sandbox (using a config file)
+```console
+$ cat container-config.json
+{
+ "metadata": {
+ "name": "busybox"
+ },
+ "image":{
+ "image": "busybox"
+ },
+ "command": [
+ "top"
+ ],
+ "linux": {
+ }
+}
+
+$ crictl create e1c83 container-config.json sandbox-config.json
+0a2c761303163f2acaaeaee07d2ba143ee4cea7e3bde3d32190e2a36525c8a05
+$ crictl ps -a
+CONTAINER ID IMAGE CREATED STATE NAME ATTEMPT
+0a2c761303163 docker.io/busybox 2 hours ago CONTAINER_CREATED busybox 0
+$ crictl start 0a2c
+0a2c761303163f2acaaeaee07d2ba143ee4cea7e3bde3d32190e2a36525c8a05
+$ crictl ps
+CONTAINER ID IMAGE CREATED STATE NAME ATTEMPT
+0a2c761303163 docker.io/busybox 2 hours ago CONTAINER_RUNNING busybox 0
+$ crictl inspect 0a2c7
+ ... show detailed information about the container
+```
+## Exec a Command in the Container
+```console
+$ crictl exec -i -t 0a2c ls
+bin dev etc home proc root sys tmp usr var
+```
+## Display Stats for the Container
+```console
+$ crictl stats
+CONTAINER CPU % MEM DISK INODES
+0a2c761303163f 0.00 983kB 16.38kB 6
+```
+* Other commands to manage the container include `stop ID` to stop a running
+container and `rm ID` to remove a container.
+## Display Version Information
+```console
+$ crictl version
+Version: 0.1.0
+RuntimeName: containerd
+RuntimeVersion: 1.0.0-beta.1-186-gdd47a72-TEST
+RuntimeApiVersion: v1alpha2
+```
+## Display Status & Configuration Information about Containerd & The CRI Plugin
+```console
+$ crictl info
+{
+ "status": {
+ "conditions": [
+ {
+ "type": "RuntimeReady",
+ "status": true,
+ "reason": "",
+ "message": ""
+ },
+ {
+ "type": "NetworkReady",
+ "status": true,
+ "reason": "",
+ "message": ""
+ }
+ ]
+ },
+ "config": {
+ "containerd": {
+ "snapshotter": "overlayfs",
+ "runtime": "io.containerd.runtime.v1.linux"
+ },
+ "cni": {
+ "binDir": "/opt/cni/bin",
+ "confDir": "/etc/cni/net.d"
+ },
+ "registry": {
+ "mirrors": {
+ "docker.io": {
+ "endpoint": [
+ "https://registry-1.docker.io"
+ ]
+ }
+ }
+ },
+ "streamServerPort": "10010",
+ "sandboxImage": "k8s.gcr.io/pause:3.2",
+ "statsCollectPeriod": 10,
+ "containerdRootDir": "/var/lib/containerd",
+ "containerdEndpoint": "unix:///run/containerd/containerd.sock",
+ "rootDir": "/var/lib/containerd/io.containerd.grpc.v1.cri",
+ "stateDir": "/run/containerd/io.containerd.grpc.v1.cri",
+ },
+ "golang": "go1.10"
+}
+```
+## More Information
+See [here](https://github.com/kubernetes-sigs/cri-tools/blob/master/docs/crictl.md)
+for information about crictl.
diff --git a/docs/decryption.md b/docs/decryption.md
new file mode 100644
index 000000000..abde945b8
--- /dev/null
+++ b/docs/decryption.md
@@ -0,0 +1,46 @@
+# Configure Image Decryption
+This document describes the method to configure encrypted container image decryption for `containerd` for use with the `cri` plugin.
+
+## Encrypted Container Images
+
+Encrypted container images are OCI images which contain encrypted blobs. These encrypted images can be created through the use of [containerd/imgcrypt project](https://github.com/containerd/imgcrypt). To decrypt these images, the `containerd` runtime uses information passed from the `cri` such as keys, options and encryption metadata.
+
+## The "node" Key Model
+
+Encryption ties trust to an entity based on the model in which a key is associated with it. We call this the key model. One such usecase is when we want to tie the trust of a key to the node in a cluster. In this case, we call it the "node" or "host" Key Model. Future work will include more key models to facilitate other trust associations (i.e. for multi-tenancy).
+
+### "node" Key Model Usecase
+
+In this model encryption is tied to worker nodes. The usecase here revolves around the idea that an image should be decryptable only on trusted host. Using this model, various node based technologies which help bootstrap trust in worker nodes and perform secure key distribution (i.e. TPM, host attestation, secure/measured boot). In this scenario, runtimes are capable of fetching the necessary decryption keys. An example of this is using the [`--decryption-keys-path` flag in imgcrypt](https://github.com/containerd/imgcrypt).
+
+### Configuring image decryption for "node" key model
+
+The default configuration does not handle decrypting encrypted container images.
+
+An example for configuring the "node" key model for container image decryption:
+
+Configure `cri` to enable decryption with "node" key model
+```toml
+[plugins."io.containerd.grpc.v1.cri".image_decryption]
+ key_model = "node"
+```
+
+Configure `containerd` daemon [`stream_processors`](https://github.com/containerd/containerd/blob/master/docs/stream_processors.md) to handle the
+encrypted mediatypes.
+```toml
+[stream_processors]
+ [stream_processors."io.containerd.ocicrypt.decoder.v1.tar.gzip"]
+ accepts = ["application/vnd.oci.image.layer.v1.tar+gzip+encrypted"]
+ returns = "application/vnd.oci.image.layer.v1.tar+gzip"
+ path = "/usr/local/bin/ctd-decoder"
+ args = ["--decryption-keys-path", "/keys"]
+ [stream_processors."io.containerd.ocicrypt.decoder.v1.tar"]
+ accepts = ["application/vnd.oci.image.layer.v1.tar+encrypted"]
+ returns = "application/vnd.oci.image.layer.v1.tar"
+ path = "/usr/local/bin/ctd-decoder"
+ args = ["--decryption-keys-path", "/keys"]
+```
+
+In this example, container image decryption is set to use the "node" key model. In addition, the decryption [`stream_processors`](https://github.com/containerd/containerd/blob/master/docs/stream_processors.md) are configured as specified in [containerd/imgcrypt project](https://github.com/containerd/imgcrypt), with the additional field `--decryption-keys-path` configured to specify where decryption keys are located locally in the node.
+
+After modify this config, you need restart the `containerd` service.
diff --git a/docs/installation.md b/docs/installation.md
new file mode 100644
index 000000000..2337ffc6a
--- /dev/null
+++ b/docs/installation.md
@@ -0,0 +1,108 @@
+# Install Containerd with Release Tarball
+This document provides the steps to install `containerd` and its dependencies with the release tarball, and bring up a Kubernetes cluster using kubeadm.
+
+These steps have been verified on Ubuntu 16.04. For other OS distributions, the steps may differ. Please feel free to file issues or PRs if you encounter any problems on other OS distributions.
+
+*Note: You need to run the following steps on each node you are planning to use in your Kubernetes cluster.*
+## Release Tarball
+For each `containerd` release, we'll publish a release tarball specifically for Kubernetes named `cri-containerd-${VERSION}.${OS}-${ARCH}.tar.gz`. This release tarball contains all required binaries and files for using `containerd` with Kubernetes. For example, the 1.2.4 version is available at https://storage.googleapis.com/cri-containerd-release/cri-containerd-1.2.4.linux-amd64.tar.gz.
+
+Note: The VERSION tag specified for the tarball corresponds to the `containerd` release tag, not a containerd/cri repository release tag. The `containerd` release includes the containerd/cri repository code through vendoring. The containerd/cri version of the containerd/cri code included in `containerd` is specified via a commit hash for containerd/cri in containerd/containerd/vendor.conf.
+### Content
+As shown below, the release tarball contains:
+1) `containerd`, `containerd-shim`, `containerd-stress`, `containerd-release`, `ctr`: binaries for containerd.
+2) `runc`: runc binary.
+3) `crictl`, `crictl.yaml`: command line tools for CRI container runtime and its config file.
+4) `critest`: binary to run [CRI validation test](https://github.com/kubernetes-sigs/cri-tools/blob/master/docs/validation.md).
+5) `containerd.service`: Systemd unit for containerd.
+6) `/opt/containerd/cluster/`: scripts for `kube-up.sh`.
+```console
+$ tar -tf cri-containerd-1.1.0-rc.0.linux-amd64.tar.gz
+./
+./opt
+./opt/containerd
+./opt/containerd/cluster
+./opt/containerd/cluster/gce
+./opt/containerd/cluster/gce/cloud-init
+./opt/containerd/cluster/gce/cloud-init/node.yaml
+./opt/containerd/cluster/gce/cloud-init/master.yaml
+./opt/containerd/cluster/gce/configure.sh
+./opt/containerd/cluster/gce/env
+./opt/containerd/cluster/version
+./opt/containerd/cluster/health-monitor.sh
+./usr
+./usr/local
+./usr/local/sbin
+./usr/local/sbin/runc
+./usr/local/bin
+./usr/local/bin/crictl
+./usr/local/bin/containerd
+./usr/local/bin/containerd-stress
+./usr/local/bin/critest
+./usr/local/bin/containerd-release
+./usr/local/bin/containerd-shim
+./usr/local/bin/ctr
+./etc
+./etc/systemd
+./etc/systemd/system
+./etc/systemd/system/containerd.service
+./etc/crictl.yaml
+```
+### Binary Information
+Information about the binaries in the release tarball:
+
+| Binary Name | Support | OS | Architecture |
+|:------------------------------:|:------------------:|:-----:|:------------:|
+| containerd | seccomp, apparmor,
overlay, btrfs | linux | amd64 |
+| containerd-shim | overlay, btrfs | linux | amd64 |
+| runc | seccomp, apparmor | linux | amd64 |
+
+
+If you have other requirements for the binaries, e.g. selinux support, another architecture support etc., you need to build the binaries yourself following [the instructions](../README.md#getting-started-for-developers).
+
+### Download
+
+The release tarball could be downloaded from the release GCS bucket https://storage.googleapis.com/cri-containerd-release/.
+
+## Step 0: Install Dependent Libraries
+Install required library for seccomp.
+```bash
+sudo apt-get update
+sudo apt-get install libseccomp2
+```
+Note that:
+1) If you are using Ubuntu <=Trusty or Debian <=jessie, a backported version of `libseccomp2` is needed. (See the [trusty-backports](https://packages.ubuntu.com/trusty-backports/libseccomp2) and [jessie-backports](https://packages.debian.org/jessie-backports/libseccomp2)).
+## Step 1: Download Release Tarball
+Download release tarball for the `containerd` version you want to install from the GCS bucket.
+```bash
+wget https://storage.googleapis.com/cri-containerd-release/cri-containerd-${VERSION}.linux-amd64.tar.gz
+```
+Validate checksum of the release tarball:
+```bash
+sha256sum cri-containerd-${VERSION}.linux-amd64.tar.gz
+curl https://storage.googleapis.com/cri-containerd-release/cri-containerd-${VERSION}.linux-amd64.tar.gz.sha256
+# Compare to make sure the 2 checksums are the same.
+```
+## Step 2: Install Containerd
+If you are using systemd, just simply unpack the tarball to the root directory:
+```bash
+sudo tar --no-overwrite-dir -C / -xzf cri-containerd-${VERSION}.linux-amd64.tar.gz
+sudo systemctl start containerd
+```
+If you are not using systemd, please unpack all binaries into a directory in your `PATH`, and start `containerd` as monitored long running services with the service manager you are using e.g. `supervisord`, `upstart` etc.
+## Step 3: Install Kubeadm, Kubelet and Kubectl
+Follow [the instructions](https://kubernetes.io/docs/setup/independent/install-kubeadm/) to install kubeadm, kubelet and kubectl.
+## Step 4: Create Systemd Drop-In for Containerd
+Create the systemd drop-in file `/etc/systemd/system/kubelet.service.d/0-containerd.conf`:
+```
+[Service]
+Environment="KUBELET_EXTRA_ARGS=--container-runtime=remote --runtime-request-timeout=15m --container-runtime-endpoint=unix:///run/containerd/containerd.sock"
+```
+And reload systemd configuration:
+```bash
+systemctl daemon-reload
+```
+## Bring Up the Cluster
+Now you should have properly installed all required binaries and dependencies on each of your node.
+
+The next step is to use kubeadm to bring up the Kubernetes cluster. It is the same with [the ansible installer](../contrib/ansible). Please follow the steps 2-4 [here](../contrib/ansible/README.md#step-2).
diff --git a/docs/kube-up.md b/docs/kube-up.md
new file mode 100644
index 000000000..31be73461
--- /dev/null
+++ b/docs/kube-up.md
@@ -0,0 +1,25 @@
+# Production Quality Cluster on GCE
+This document provides the steps to bring up a production quality cluster on GCE with [`kube-up.sh`](https://kubernetes.io/docs/setup/turnkey/gce/).
+
+**If your Kubernetes version is 1.15 or greater, you can simply run:**
+```
+export KUBE_CONTAINER_RUNTIME=containerd
+```
+Follow these instructions [here](https://kubernetes.io/docs/setup/turnkey/gce/) to create a production quality Kubernetes cluster on GCE.
+## Download CRI-Containerd Release Tarball
+To download release tarball, see [step 1](./installation.md#step-1-download-cri-containerd-release-tarball) in installation.md.
+
+Unpack release tarball to any directory, using `${CRI_CONTAINERD_PATH}` to indicate the directory in the doc:
+```bash
+tar -C ${CRI_CONTAINERD_PATH} -xzf cri-containerd-${VERSION}.linux-amd64.tar.gz
+```
+## Set Environment Variables for CRI-Containerd
+```bash
+. ${CRI_CONTAINERD_PATH}/opt/containerd/cluster/gce/env
+```
+## Create Kubernetes Cluster on GCE
+Follow these instructions [here](https://kubernetes.io/docs/setup/turnkey/gce/) to create a production quality Kubernetes cluster on GCE.
+
+**Make sure the Kubernetes version you are using is v1.11 or greater:**
+* When using `https://get.k8s.io`, use the environment variable `KUBERNETES_RELEASE` to set version.
+* When using a Kubernetes release tarball, make sure to select version 1.11 or greater.
diff --git a/docs/performance.png b/docs/performance.png
new file mode 100644
index 000000000..387fa9bb3
Binary files /dev/null and b/docs/performance.png differ
diff --git a/docs/proposal.md b/docs/proposal.md
new file mode 100644
index 000000000..64db560db
--- /dev/null
+++ b/docs/proposal.md
@@ -0,0 +1,111 @@
+Containerd CRI Integration
+=============
+Author: Lantao Liu (@random-liu)
+## Abstract
+This proposal aims to integrate [containerd](https://github.com/containerd/containerd) with Kubelet against the [container runtime interface (CRI)](https://github.com/kubernetes/kubernetes/blob/v1.6.0/pkg/kubelet/api/v1alpha1/runtime/api.proto).
+## Background
+Containerd is a core container runtime, which provides the minimum set of functionalities to manage the complete container lifecycle of its host system, including container execution and supervision, image distribution and storage, etc.
+
+Containerd was [introduced in Docker 1.11](https://blog.docker.com/2016/04/docker-engine-1-11-runc/), used to manage [runC](https://runc.io/) containers on the node. As shown below, it creates a containerd-shim for each container, and the shim manages the lifecycle of its corresponding container.
+
+
+In Dec. 2016, Docker Inc. spun it out into a standalone component, and donated it to [CNCF](https://www.cncf.io/) in Mar. 2017.
+
+## Motivation
+Containerd is one potential alternative to Docker as the runtime for Kubernetes clusters. *Compared with Docker*, containerd has pros and cons.
+### Pros
+* **Stability**: Containerd has limited scope and slower feature velocity, which is expected to be more stable.
+* **Compatibility**: The scope of containerd aligns with Kubernetes' requirements. It provides the required functionalities and the flexibility for areas like image pulling, networking, volume and logging etc.
+* **Performance**:
+ * Containerd consumes less resource than Docker at least because it's a subset of Docker;
+ * Containerd CRI integration eliminates an extra hop in the stack (as shown below). 
+* **Neutral Foundation**: Containerd is part of CNCF now.
+### Cons
+* **User Adoption**:
+ * Ideally, Kubernetes users don't interact with the underlying container runtime directly. However, for the lack of debug toolkits, sometimes users still need to login the node to debug with Docker CLI directly.
+ * Containerd provides barebone CLIs [ctr](https://github.com/containerd/containerd/tree/master/cmd/ctr) and [dist](https://github.com/containerd/containerd/tree/master/cmd/dist) for development and debugging purpose, but they may not be sufficient and necessary. Additionally, presuming these are sufficient and necessary tools, a plan and time would be needed to sufficiently document these CLIs and educate users in their use.
+* **Maturity**: The rescoped containerd is pretty new, and it's still under heavy development.
+## Goals
+* Make sure containerd meets the requirement of Kubernetes, now and into the foreseeable future.
+* Implement containerd CRI shim and make sure it provides equivalent functionality, usability and debuggability.
+* Improve Kubernetes by taking advantage of the flexibility provided by containerd.
+## Design
+The following sections discuss the design aspects of the containerd CRI integration. For the purposes of this doc, the containerd CRI integration will be referred to as `CRI-containerd`.
+### Container Lifecycle
+CRI-containerd relies on containerd to manage container lifecycle.
+
+Ideally, CRI-containerd only needs to do api translation and information reorganization. However, CRI-containerd needs to maintain some metadata because:
+* There is a mismatch between container lifecycle of CRI and containerd - containerd only tracks running processes, once the container and it's corresponding containerd-shim exit, the container is no longer visible in the containerd API.
+* Some sandbox/container metadata is not provided by containerd, and we can not leverage OCI runtime annotation to store it because of the container lifecycle mismatch, e.g. labels/annotations, `PodSandboxID` of a container, `FinishedAt` timestamp, `ExitCode`, `Mounts` etc.
+
+CRI-containerd should checkpoint these metadata itself or use [containerd metadata service](https://github.com/containerd/containerd/blob/0a5544d8c4dab44dfc682f5ad07f1cd011c0a115/design/plugins.md#core) if available.
+### Container Logging
+Containerd doesn't provide persistent container log. It redirects container STDIO into different FIFOs.
+
+CRI-containerd should start a goroutine (process/container in the future) to:
+* Continuously drain the FIFO;
+* Decorate the log line into [CRI-defined format](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/kubelet-cri-logging.md#proposed-solution);
+* Write the log into [CRI-defined log path](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/node/kubelet-cri-logging.md#proposed-solution).
+### Container Streaming
+Containerd supports creating a process in the container with `Exec`, and the STDIO is also exposed as FIFOs. Containerd also supports resizing console of a specific process with `Pty`.
+
+CRI-containerd could reuse the [streaming server](https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/kubelet/server/streaming/server.go), it should implement the [streaming runtime interface](https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/kubelet/server/streaming/server.go#L61-L65).
+
+For different CRI streaming functions:
+* `ExecSync`: CRI-containerd should use `Exec` to create the exec process, collect the stdout/stderr of the process, and wait for the process to terminate.
+* `Exec`: CRI-containerd should use `Exec` to create the exec process, create a goroutine (process/container) to redirect streams, and wait for the process to terminate.
+* `Attach`: CRI-containerd should create a goroutine (process/container) to read the existing container log to the output, redirect streams of the init process, and wait for any stream to be closed.
+* `PortForward`: CRI-containerd could implement this with `socat` and `nsenter`, similar with [current Docker portforward implementation](https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/kubelet/dockertools/docker_manager.go#L1373-L1428).
+### Container Networking
+Containerd doesn't provide container networking, but OCI runtime spec supports joining a linux container into an existing network namespace.
+
+CRI-containerd should:
+* Create a network namespace for a sandbox;
+* Call [network plugin](https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/network/plugins.go) to update the options of the network namespace;
+* Let the user containers in the same sandbox share the network namespace.
+### Container Metrics
+Containerd provides [container cgroup metrics](https://github.com/containerd/containerd/blob/master/reports/2017-03-17.md#metrics), and plans to provide [container writable layer disk usage](https://github.com/containerd/containerd/issues/678).
+
+CRI container metrics api needs to be defined ([#27097](https://github.com/kubernetes/kubernetes/issues/27097)). After that, CRI-containerd should translate containerd container metrics into CRI container metrics.
+### Image Management
+CRI-containerd relies on containerd to manage images. Containerd should provide all function and information required by CRI, and CRI-containerd only needs to do api translation and information reorganization.
+
+### ImageFS Metrics
+Containerd plans to provide [image filesystem metrics](https://github.com/containerd/containerd/issues/678).
+
+CRI image filesystem metrics needs to be defined ([#33048](https://github.com/kubernetes/kubernetes/issues/33048)). After that, we should make sure containerd provides the required metrics, and CRI-containerd should translate containerd image filesystem metrics into CRI image filesystem metrics.
+### Out of Scope
+Following items are out of the scope of this design, we may address them in future version as enhancement or optimization.
+* **Debuggability**: One of the biggest concern of CRI-containerd is debuggability. We should provide equivalent debuggability with Docker CLI through `kubectl`, [`cri-tools`](https://github.com/kubernetes-sigs/cri-tools) or containerd CLI.
+* **Built-in CRI support**: The [plugin model](https://github.com/containerd/containerd/blob/master/design/plugins.md) provided by containerd makes it possible to directly build CRI support into containerd as a plugin, which will eliminate one more hop from the stack. But because of the [limitation of golang plugin](https://github.com/containerd/containerd/issues/563), we have to either maintain our own branch or push CRI plugin upstream.
+* **Seccomp**: ([#36997](https://github.com/kubernetes/kubernetes/issues/36997)) Seccomp is supported in OCI runtime spec. However, current seccomp implementation in Kubernetes is experimental and docker specific, the api needs to be defined in CRI first before CRI-containerd implements it.
+* **Streaming server authentication**: ([#36666](https://github.com/kubernetes/kubernetes/issues/36666)) CRI-containerd will be out-of-process with Kubelet, so it could not reuse Kubelet authentication. Its streaming server should implement its own authentication mechanism.
+* **Move container facilities into pod cgroup**: Container facilities including container image puller, container streaming handler, log handler and containerd-shim serve a specific container. They should be moved to the corresponding pod cgroup, and the overhead introduced by them should be charged to the pod.
+* **Log rotation**: ([#42718](https://github.com/kubernetes/kubernetes/issues/42718)) Container log rotation is under design. A function may be added in CRI to signal the runtime to reopen log file. CRI-containerd should implement that function after it is defined.
+* **Exec container**: With the flexibility provided by containerd, it is possible to implement `Exec` with a separate container sharing the same rootfs and mount namespace with the original container. The advantage is that the `Exec` container could have it's own sub-cgroup, so that it will not consume the resource of application container and user could specify dedicated resource for it.
+* **Advanced image management**: The image management interface in CRI is relatively simple because the requirement of Kubelet image management is not clearly scoped out. In the future, we may want to leverage the flexibility provided by containerd more, e.g. estimate image size before pulling etc.
+* ...
+## Roadmap and Milestones
+### Milestones
+#### Kubernetes 1.7 - Q2
+* [P0] Basic container lifecycle.
+* [P0] Basic image management.
+* [P0] Container networking.
+* [P1] Container streaming/logging.
+* [P2] Container/ImageFS Metrics.
+
+*Test Plan: Each feature added should have unit test and pass its corresponding cri validation test.*
+#### Kubernetes 1.8 - Q3
+* [P0] Feature complete, pass 100% cri validation test.
+* [P0] Integrate CRI-containerd with Kubernetes, and build the e2e/node e2e test framework.
+* [P1] Address the debuggability problem.
+### Q2 Roadmap
+| Item | 1/2 Mar. | 2/2 Mar. | 1/2 Apr. | 2/2 Apr. | 1/2 May. | 2/2 May. |
+|:--------------------------------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
+| Survey | ✓ | | | | | |
+| POC | | ✓ | | | | |
+| Proposal | | | ✓ | | | |
+| Containerd Feature Complete | ✓ | ✓ | ✓ | | | |
+| Runtime Management Integration | | | ✓ | ✓ | ✓ | ✓ |
+| Image Management Integration | | | | ✓ | ✓ | ✓ |
+| Container Networking Integration | | | | | ✓ | ✓ |
diff --git a/docs/registry.md b/docs/registry.md
new file mode 100644
index 000000000..2bf1a6633
--- /dev/null
+++ b/docs/registry.md
@@ -0,0 +1,187 @@
+# Configure Image Registry
+
+This document describes the method to configure the image registry for `containerd` for use with the `cri` plugin.
+
+NOTE: The configuration syntax used in this doc is in version 2 which is the
+recommended since `containerd` 1.3. If your configuration is still in version 1,
+you can replace `"io.containerd.grpc.v1.cri"` with `cri`.
+
+## Configure Registry Endpoint
+
+With containerd, `docker.io` is the default image registry. You can also set up other image registries similar to docker.
+
+To configure image registries create/modify the `/etc/containerd/config.toml` as follows:
+
+```toml
+# Config file is parsed as version 1 by default.
+# To use the long form of plugin names set "version = 2"
+# explicitly use v2 config format
+version = 2
+
+[plugin."io.containerd.grpc.v1.cri".registry.mirrors]
+ [plugin."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
+ endpoint = ["https://registry-1.docker.io"]
+ [plugin."io.containerd.grpc.v1.cri".registry.mirrors."test.https-registry.io"]
+ endpoint = ["https://HostIP1:Port1"]
+ [plugin."io.containerd.grpc.v1.cri".registry.mirrors."test.http-registry.io"]
+ endpoint = ["http://HostIP2:Port2"]
+ # wildcard matching is supported but not required.
+ [plugin."io.containerd.grpc.v1.cri".registry.mirrors."*"]
+ endpoint = ["https://HostIP3:Port3"]
+```
+
+The default configuration can be generated by `containerd config default > /etc/containerd/config.toml`.
+
+The endpoint is a list that can contain multiple image registry URLs split by commas. When pulling an image
+from a registry, containerd will try these endpoint URLs one by one, and use the first working one. Please note
+that if the default registry endpoint is not already specified in the endpoint list, it will be automatically
+tried at the end with scheme `https` and path `v2`, e.g. `https://gcr.io/v2` for `gcr.io`.
+
+As an example, for the image `gcr.io/library/busybox:latest`, the endpoints are:
+
+* `gcr.io` is configured: endpoints for `gcr.io` + default endpoint `https://gcr.io/v2`.
+* `*` is configured, and `gcr.io` is not: endpoints for `*` + default
+ endpoint `https://gcr.io/v2`.
+* None of above is configured: default endpoint `https://gcr.io/v2`.
+
+After modify this config, you need restart the `containerd` service.
+
+## Configure Registry TLS Communication
+
+`cri` plugin also supports configuring TLS settings when communicating with a registry.
+
+To configure the TLS settings for a specific registry, create/modify the `/etc/containerd/config.toml` as follows:
+
+```toml
+# explicitly use v2 config format
+version = 2
+
+# The registry host has to be a domain name or IP. Port number is also
+# needed if the default HTTPS or HTTP port is not used.
+[plugin."io.containerd.grpc.v1.cri".registry.configs."my.custom.registry".tls]
+ ca_file = "ca.pem"
+ cert_file = "cert.pem"
+ key_file = "key.pem"
+```
+
+In the config example shown above, TLS mutual authentication will be used for communications with the registry endpoint located at .
+`ca_file` is file name of the certificate authority (CA) certificate used to authenticate the x509 certificate/key pair specified by the files respectively pointed to by `cert_file` and `key_file`.
+
+`cert_file` and `key_file` are not needed when TLS mutual authentication is unused.
+
+```toml
+# explicitly use v2 config format
+version = 2
+
+[plugin."io.containerd.grpc.v1.cri".registry.configs."my.custom.registry".tls]
+ ca_file = "ca.pem"
+```
+
+To skip the registry certificate verification:
+
+```toml
+# explicitly use v2 config format
+version = 2
+
+[plugin."io.containerd.grpc.v1.cri".registry.configs."my.custom.registry".tls]
+ insecure_skip_verify = true
+```
+
+## Configure Registry Credentials
+
+`cri` plugin also supports docker like registry credential config.
+
+To configure a credential for a specific registry, create/modify the
+`/etc/containerd/config.toml` as follows:
+
+```toml
+# explicitly use v2 config format
+version = 2
+
+# The registry host has to be a domain name or IP. Port number is also
+# needed if the default HTTPS or HTTP port is not used.
+[plugin."io.containerd.grpc.v1.cri".registry.configs."gcr.io".auth]
+ username = ""
+ password = ""
+ auth = ""
+ identitytoken = ""
+```
+
+The meaning of each field is the same with the corresponding field in `.docker/config.json`.
+
+Please note that auth config passed by CRI takes precedence over this config.
+The registry credential in this config will only be used when auth config is
+not specified by Kubernetes via CRI.
+
+After modifying this config, you need to restart the `containerd` service.
+
+### Configure Registry Credentials Example - GCR with Service Account Key Authentication
+
+If you don't already have Google Container Registry (GCR) set-up then you need to do the following steps:
+
+* Create a Google Cloud Platform (GCP) account and project if not already created (see [GCP getting started](https://cloud.google.com/gcp/getting-started))
+* Enable GCR for your project (see [Quickstart for Container Registry](https://cloud.google.com/container-registry/docs/quickstart))
+* For authentication to GCR: Create [service account and JSON key](https://cloud.google.com/container-registry/docs/advanced-authentication#json-key)
+* The JSON key file needs to be downloaded to your system from the GCP console
+* For access to the GCR storage: Add service account to the GCR storage bucket with storage admin access rights (see [Granting permissions](https://cloud.google.com/container-registry/docs/access-control#grant-bucket))
+
+Refer to [Pushing and pulling images](https://cloud.google.com/container-registry/docs/pushing-and-pulling) for detailed information on the above steps.
+
+> Note: The JSON key file is a multi-line file and it can be cumbersome to use the contents as a key outside of the file. It is worthwhile generating a single line format output of the file. One way of doing this is using the `jq` tool as follows: `jq -c . key.json`
+
+It is beneficial to first confirm that from your terminal you can authenticate with your GCR and have access to the storage before hooking it into containerd. This can be verified by performing a login to your GCR and
+pushing an image to it as follows:
+
+```console
+docker login -u _json_key -p "$(cat key.json)" gcr.io
+
+docker pull busybox
+
+docker tag busybox gcr.io/your-gcp-project-id/busybox
+
+docker push gcr.io/your-gcp-project-id/busybox
+
+docker logout gcr.io
+```
+
+Now that you know you can access your GCR from your terminal, it is now time to try out containerd.
+
+Edit the containerd config (default location is at `/etc/containerd/config.toml`)
+to add your JSON key for `gcr.io` domain image pull
+requests:
+
+```toml
+version = 2
+
+[plugins."io.containerd.grpc.v1.cri".registry]
+ [plugins."io.containerd.grpc.v1.cri".registry.mirrors]
+ [plugins."io.containerd.grpc.v1.cri".registry.mirrors."docker.io"]
+ endpoint = ["https://registry-1.docker.io"]
+ [plugins."io.containerd.grpc.v1.cri".registry.mirrors."gcr.io"]
+ endpoint = ["https://gcr.io"]
+ [plugins."io.containerd.grpc.v1.cri".registry.configs]
+ [plugins."io.containerd.grpc.v1.cri".registry.configs."gcr.io".auth]
+ username = "_json_key"
+ password = 'paste output from jq'
+```
+
+> Note: `username` of `_json_key` signifies that JSON key authentication will be used.
+
+Restart containerd:
+
+```console
+service containerd restart
+```
+
+Pull an image from your GCR with `crictl`:
+
+```console
+$ sudo crictl pull gcr.io/your-gcp-project-id/busybox
+
+DEBU[0000] get image connection
+DEBU[0000] connect using endpoint 'unix:///run/containerd/containerd.sock' with '3s' timeout
+DEBU[0000] connected successfully using endpoint: unix:///run/containerd/containerd.sock
+DEBU[0000] PullImageRequest: &PullImageRequest{Image:&ImageSpec{Image:gcr.io/your-gcr-instance-id/busybox,},Auth:nil,SandboxConfig:nil,}
+DEBU[0001] PullImageResponse: &PullImageResponse{ImageRef:sha256:78096d0a54788961ca68393e5f8038704b97d8af374249dc5c8faec1b8045e42,}
+Image is up to date for sha256:78096d0a54788961ca68393e5f8038704b97d8af374249dc5c8faec1b8045e42
+```
diff --git a/docs/release.md b/docs/release.md
new file mode 100644
index 000000000..1fc32264c
--- /dev/null
+++ b/docs/release.md
@@ -0,0 +1,27 @@
+# Release Process
+This document describes how to cut a `cri` plugin release.
+
+## Step 1: Update containerd vendor
+Update the version of containerd located in `containerd/cri/vendor.conf`
+to the latest version of containerd for the desired branch of containerd,
+and make sure all tests in CI pass https://k8s-testgrid.appspot.com/sig-node-containerd.
+## Step 2: Cut the release
+Draft and tag a new release in https://github.com/containerd/cri/releases.
+## Step 3: Update `cri` version in containerd
+Push a PR to `containerd/containerd` that updates the version of
+`containerd/cri` in `containerd/containerd/vendor.conf` to the newly
+tagged release created in Step 2.
+## Step 4: Iterate step 1 updating containerd vendor
+## Step 5: Publish release tarball for Kubernetes
+Publish the release tarball `cri-containerd-${CONTAINERD_VERSION}.${OS}-${ARCH}.tar.gz`
+```shell
+# Checkout `containerd/cri` to the newly released version.
+git checkout ${RELEASE_VERSION}
+
+# Publish the release tarball without cni.
+DEPLOY_BUCKET=cri-containerd-release make push TARBALL_PREFIX=cri-containerd OFFICIAL_RELEASE=true VERSION=${CONTAINERD_VERSION}
+
+# Publish the release tarball with cni.
+DEPLOY_BUCKET=cri-containerd-release make push TARBALL_PREFIX=cri-containerd-cni OFFICIAL_RELEASE=true INCLUDE_CNI=true VERSION=${CONTAINERD_VERSION}
+```
+## Step 6: Update release note with release tarball information
diff --git a/docs/testing.md b/docs/testing.md
new file mode 100644
index 000000000..ba74db5d2
--- /dev/null
+++ b/docs/testing.md
@@ -0,0 +1,58 @@
+CRI Plugin Testing Guide
+========================
+This document assumes you have already setup the development environment (go, git, `containerd/cri` repo etc.).
+
+Before sending pull requests you should at least make sure your changes have passed code verification, unit, integration and CRI validation tests.
+## Code Verification
+Code verification includes lint, and code formatting check etc.
+* Install tools used by code verification:
+```bash
+make install.tools
+```
+***Note:*** Some make actions (like `install.tools`) use the user's `GOPATH` and will otherwise not work when it is not set. Other make actions override it by setting it to a temporary directory for release build and testing purposes.
+* Run code verification:
+```bash
+make verify
+```
+## Unit Test
+Run all unit tests in `containerd/cri` repo.
+```bash
+make test
+```
+## Integration Test
+Run all integration tests in `containerd/cri` repo.
+* [Install dependencies](../README.md#install-dependencies).
+* Run integration test:
+```bash
+make test-integration
+```
+## CRI Validation Test
+[CRI validation test](https://github.com/kubernetes/community/blob/master/contributors/devel/cri-validation.md) is a test framework for validating that a Container Runtime Interface (CRI) implementation such as containerd with the `cri` plugin meets all the requirements necessary to manage pod sandboxes, containers, images etc.
+
+CRI validation test makes it possible to verify CRI conformance of `containerd/cri` without setting up Kubernetes components or running Kubernetes end-to-end tests.
+* [Install dependencies](../README.md#install-dependencies).
+* Build containerd with the `cri` plugin:
+```bash
+make
+```
+* Run CRI validation test:
+```bash
+make test-cri
+```
+* Focus or skip specific CRI validation test:
+```bash
+make test-cri FOCUS=REGEXP_TO_FOCUS SKIP=REGEXP_TO_SKIP
+```
+[More information](https://github.com/kubernetes-sigs/cri-tools) about CRI validation test.
+## Node E2E Test
+[Node e2e test](https://github.com/kubernetes/community/blob/master/contributors/devel/e2e-node-tests.md) is a test framework testing Kubernetes node level functionalities such as managing pods, mounting volumes etc. It starts a local cluster with Kubelet and a few other minimum dependencies, and runs node functionality tests against the local cluster.
+* [Install dependencies](../README.md#install-dependencies).
+* Run node e2e test:
+```bash
+make test-e2e-node
+```
+* Focus or skip specific node e2e test:
+```bash
+make test-e2e-node FOCUS=REGEXP_TO_FOCUS SKIP=REGEXP_TO_SKIP
+```
+[More information](https://github.com/kubernetes/community/blob/master/contributors/devel/e2e-node-tests.md) about Kubernetes node e2e test.
diff --git a/hack/boilerplate/boilerplate b/hack/boilerplate/boilerplate
new file mode 100644
index 000000000..c073fa4ad
--- /dev/null
+++ b/hack/boilerplate/boilerplate
@@ -0,0 +1,15 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
diff --git a/hack/install/install-cni-config.sh b/hack/install/install-cni-config.sh
new file mode 100755
index 000000000..93f574408
--- /dev/null
+++ b/hack/install/install-cni-config.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+source $(dirname "${BASH_SOURCE[0]}")/utils.sh
+CNI_CONFIG_DIR=${DESTDIR}/etc/cni/net.d
+${SUDO} mkdir -p ${CNI_CONFIG_DIR}
+${SUDO} bash -c 'cat >'${CNI_CONFIG_DIR}'/10-containerd-net.conflist <"'"${CRICTL_CONFIG_DIR}"'"/crictl.yaml <= 8 )); then
+ mask=255
+ elif (( len > 0 )); then
+ mask=$(( 256 - 2 ** ( 8 - len ) ))
+ else
+ mask=0
+ fi
+ (( len -= 8 ))
+ result_array[i]=$(( gateway_array[i] & mask ))
+ done
+ result="$(printf ".%s" "${result_array[@]}")"
+ result="${result:1}"
+ echo "$result/$((32 - prefix_len))"
+}
+
+# nat already exists on the Windows VM, the subnet and gateway
+# we specify should match that.
+gateway="$(powershell -c "(Get-NetIPAddress -InterfaceAlias 'vEthernet (nat)' -AddressFamily IPv4).IPAddress")"
+prefix_len="$(powershell -c "(Get-NetIPAddress -InterfaceAlias 'vEthernet (nat)' -AddressFamily IPv4).PrefixLength")"
+
+subnet="$(calculate_subnet "$gateway" "$prefix_len")"
+
+# The "name" field in the config is used as the underlying
+# network type right now (see
+# https://github.com/microsoft/windows-container-networking/pull/45),
+# so it must match a network type in:
+# https://docs.microsoft.com/en-us/windows-server/networking/technologies/hcn/hcn-json-document-schemas
+bash -c 'cat >"'"${CNI_CONFIG_DIR}"'"/0-containerd-nat.conf < /dev/null; then
+ create_ttl_bucket ${DEPLOY_BUCKET}
+fi
+
+if [ -z "${DEPLOY_DIR}" ]; then
+ DEPLOY_PATH="${DEPLOY_BUCKET}"
+else
+ DEPLOY_PATH="${DEPLOY_BUCKET}/${DEPLOY_DIR}"
+fi
+
+# TODO(random-liu): Add checksum for the tarball.
+gsutil cp ${release_tar} "gs://${DEPLOY_PATH}/"
+gsutil cp ${release_tar_checksum} "gs://${DEPLOY_PATH}/"
+echo "Release tarball is uploaded to:
+ https://storage.googleapis.com/${DEPLOY_PATH}/${TARBALL}"
+
+if ${PUSH_VERSION}; then
+ if [[ -z "${VERSION}" ]]; then
+ echo "VERSION is not set"
+ exit 1
+ fi
+ echo ${VERSION} | gsutil cp - "gs://${DEPLOY_PATH}/${LATEST}"
+ echo "Latest version is uploaded to:
+ https://storage.googleapis.com/${DEPLOY_PATH}/${LATEST}"
+fi
diff --git a/hack/release-windows.sh b/hack/release-windows.sh
new file mode 100755
index 000000000..da01f5384
--- /dev/null
+++ b/hack/release-windows.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+source $(dirname "${BASH_SOURCE[0]}")/utils.sh
+cd ${ROOT}
+
+umask 0022
+
+# BUILD_DIR is the directory to generate release tar.
+# TARBALL is the name of the release tar.
+BUILD_DIR=${BUILD_DIR:-"_output"}
+# Convert to absolute path if it's relative.
+if [[ ${BUILD_DIR} != /* ]]; then
+ BUILD_DIR=${ROOT}/${BUILD_DIR}
+fi
+TARBALL=${TARBALL:-"cri-containerd.tar.gz"}
+# INCLUDE_CNI indicates whether to install CNI. By default don't
+# include CNI in release tarball.
+INCLUDE_CNI=${INCLUDE_CNI:-false}
+# CUSTOM_CONTAINERD indicates whether to install customized containerd
+# for CI test.
+CUSTOM_CONTAINERD=${CUSTOM_CONTAINERD:-false}
+
+destdir=${BUILD_DIR}/release-stage
+
+if [[ -z "${VERSION}" ]]; then
+ echo "VERSION is not set"
+ exit 1
+fi
+
+# Remove release-stage directory to avoid including old files.
+rm -rf ${destdir}
+
+# Install dependencies into release stage.
+# Install hcsshim
+HCSSHIM_DIR=${destdir} ./hack/install/windows/install-hcsshim.sh
+
+if ${INCLUDE_CNI}; then
+ # Install cni
+ NOSUDO=true WINCNI_BIN_DIR=${destdir}/cni ./hack/install/windows/install-cni.sh
+fi
+
+# Build containerd from source
+NOSUDO=true CONTAINERD_DIR=${destdir} ./hack/install/install-containerd.sh
+# Containerd makefile always installs into a "bin" directory.
+mv "${destdir}"/bin/* "${destdir}"
+rm -rf "${destdir}/bin"
+
+if ${CUSTOM_CONTAINERD}; then
+ make install -e BINDIR=${destdir}
+fi
+
+# Create release tar
+tarball=${BUILD_DIR}/${TARBALL}
+tar -zcvf ${tarball} -C ${destdir} . --owner=0 --group=0
+checksum=$(sha256 ${tarball})
+echo "sha256sum: ${checksum} ${tarball}"
+echo ${checksum} > ${tarball}.sha256
diff --git a/hack/release.sh b/hack/release.sh
new file mode 100755
index 000000000..a70c53268
--- /dev/null
+++ b/hack/release.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+source $(dirname "${BASH_SOURCE[0]}")/utils.sh
+cd ${ROOT}
+
+umask 0022
+
+# BUILD_DIR is the directory to generate release tar.
+# TARBALL is the name of the release tar.
+BUILD_DIR=${BUILD_DIR:-"_output"}
+# Convert to absolute path if it's relative.
+if [[ ${BUILD_DIR} != /* ]]; then
+ BUILD_DIR=${ROOT}/${BUILD_DIR}
+fi
+TARBALL=${TARBALL:-"cri-containerd.tar.gz"}
+# INCLUDE_CNI indicates whether to install CNI. By default don't
+# include CNI in release tarball.
+INCLUDE_CNI=${INCLUDE_CNI:-false}
+# CUSTOM_CONTAINERD indicates whether to install customized containerd
+# for CI test.
+CUSTOM_CONTAINERD=${CUSTOM_CONTAINERD:-false}
+# OFFICIAL_RELEASE indicates whether to use official containerd release.
+OFFICIAL_RELEASE=${OFFICIAL_RELEASE:-false}
+# LOCAL_RELEASE indicates that containerd has been built and released
+# locally.
+LOCAL_RELEASE=${LOCAL_RELEASE:-false}
+if [ -z "${GOOS:-}" ]
+then
+ GOOS=$(go env GOOS)
+fi
+if [ -z "${GOARCH:-}" ]
+then
+ GOARCH=$(go env GOARCH)
+fi
+
+
+destdir=${BUILD_DIR}/release-stage
+
+if [[ -z "${VERSION}" ]]; then
+ echo "VERSION is not set"
+ exit 1
+fi
+
+# Remove release-stage directory to avoid including old files.
+rm -rf ${destdir}
+
+# download_containerd downloads containerd from official release.
+download_containerd() {
+ local -r tmppath="$(mktemp -d /tmp/download-containerd.XXXX)"
+ local -r tarball="${tmppath}/containerd.tar.gz"
+ local -r url="https://github.com/containerd/containerd/releases/download/v${VERSION}/containerd-${VERSION}.linux-amd64.tar.gz"
+ wget -O "${tarball}" "${url}"
+ tar -C "${destdir}/usr/local" -xzf "${tarball}"
+ rm -rf "${tmppath}"
+}
+
+# copy_local_containerd copies local containerd release.
+copy_local_containerd() {
+ local -r tarball="${GOPATH}/src/github.com/containerd/containerd/releases/containerd-${VERSION}.${GOOS}-${GOARCH}.tar.gz"
+ if [[ ! -e "${tarball}" ]]; then
+ echo "Containerd release is not built"
+ exit 1
+ fi
+ tar -C "${destdir}/usr/local" -xzf "${tarball}"
+}
+
+# Install dependencies into release stage.
+# Install runc
+NOSUDO=true DESTDIR=${destdir} ./hack/install/install-runc.sh
+
+if ${INCLUDE_CNI}; then
+ # Install cni
+ NOSUDO=true DESTDIR=${destdir} ./hack/install/install-cni.sh
+fi
+
+# Install critools
+NOSUDO=true DESTDIR=${destdir} ./hack/install/install-critools.sh
+
+# Install containerd
+if $OFFICIAL_RELEASE; then
+ download_containerd
+elif $LOCAL_RELEASE; then
+ copy_local_containerd
+else
+ # Build containerd from source
+ NOSUDO=true DESTDIR=${destdir} ./hack/install/install-containerd.sh
+fi
+
+if ${CUSTOM_CONTAINERD}; then
+ make install -e DESTDIR=${destdir}
+fi
+
+# Install systemd units into release stage.
+mkdir -p ${destdir}/etc/systemd/system
+cp ${ROOT}/contrib/systemd-units/* ${destdir}/etc/systemd/system/
+# Install cluster directory into release stage.
+mkdir -p ${destdir}/opt/containerd
+cp -r ${ROOT}/cluster ${destdir}/opt/containerd
+# Write a version file into the release tarball.
+cat > ${destdir}/opt/containerd/cluster/version < ${tarball}.sha256
diff --git a/hack/sort-vendor.sh b/hack/sort-vendor.sh
new file mode 100755
index 000000000..988d1560d
--- /dev/null
+++ b/hack/sort-vendor.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+source $(dirname "${BASH_SOURCE[0]}")/utils.sh
+cd ${ROOT}
+
+echo "Sort vendor.conf..."
+tmpdir="$(mktemp -d)"
+trap "rm -rf ${tmpdir}" EXIT
+
+awk -v RS= '{print > "'${tmpdir}/'TMP."NR}' vendor.conf
+for file in ${tmpdir}/*; do
+ if [[ -e "${tmpdir}/vendor.conf" ]]; then
+ echo >> "${tmpdir}/vendor.conf"
+ fi
+ sort -Vru "${file}" >> "${tmpdir}/vendor.conf"
+done
+
+mv "${tmpdir}/vendor.conf" vendor.conf
+
+echo "Please commit the change made by this file..."
diff --git a/hack/sync-vendor.sh b/hack/sync-vendor.sh
new file mode 100755
index 000000000..c5feaf6cd
--- /dev/null
+++ b/hack/sync-vendor.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+source $(dirname "${BASH_SOURCE[0]}")/utils.sh
+cd ${ROOT}
+
+echo "Compare vendor with containerd vendors..."
+containerd_vendor=$(mktemp /tmp/containerd-vendor.conf.XXXX)
+from-vendor CONTAINERD github.com/containerd/containerd
+curl -s https://raw.githubusercontent.com/${CONTAINERD_REPO#*/}/${CONTAINERD_VERSION}/vendor.conf > ${containerd_vendor}
+# Create a temporary vendor file to update.
+tmp_vendor=$(mktemp /tmp/vendor.conf.XXXX)
+while read vendor; do
+ repo=$(echo ${vendor} | awk '{print $1}')
+ commit=$(echo ${vendor} | awk '{print $2}')
+ alias=$(echo ${vendor} | awk '{print $3}')
+ vendor_in_containerd=$(grep ${repo} ${containerd_vendor} || true)
+ if [ -z "${vendor_in_containerd}" ]; then
+ echo ${vendor} >> ${tmp_vendor}
+ continue
+ fi
+ commit_in_containerd=$(echo ${vendor_in_containerd} | awk '{print $2}')
+ alias_in_containerd=$(echo ${vendor_in_containerd} | awk '{print $3}')
+ if [[ "${commit}" != "${commit_in_containerd}" || "${alias}" != "${alias_in_containerd}" ]]; then
+ echo ${vendor_in_containerd} >> ${tmp_vendor}
+ else
+ echo ${vendor} >> ${tmp_vendor}
+ fi
+done < vendor.conf
+# Update vendors if temporary vendor.conf is different from the original one.
+if ! diff vendor.conf ${tmp_vendor} > /dev/null; then
+ if [ $# -gt 0 ] && [ ${1} = "-only-verify" ]; then
+ echo "Need to update vendor.conf."
+ diff vendor.conf ${tmp_vendor}
+ rm ${tmp_vendor}
+ exit 1
+ else
+ echo "Updating vendor.conf."
+ mv ${tmp_vendor} vendor.conf
+ fi
+fi
+rm ${containerd_vendor}
diff --git a/hack/test-cri.sh b/hack/test-cri.sh
new file mode 100755
index 000000000..3667bece8
--- /dev/null
+++ b/hack/test-cri.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o nounset
+set -o pipefail
+
+source $(dirname "${BASH_SOURCE[0]}")/test-utils.sh
+
+# FOCUS focuses the test to run.
+FOCUS=${FOCUS:-}
+# SKIP skips the test to skip.
+SKIP=${SKIP:-""}
+# REPORT_DIR is the the directory to store test logs.
+REPORT_DIR=${REPORT_DIR:-"/tmp/test-cri"}
+# RUNTIME is the runtime handler to use in the test.
+RUNTIME=${RUNTIME:-""}
+
+# Check GOPATH
+if [[ -z "${GOPATH}" ]]; then
+ echo "GOPATH is not set"
+ exit 1
+fi
+
+# For multiple GOPATHs, keep the first one only
+GOPATH=${GOPATH%%:*}
+
+CRITEST=${GOPATH}/bin/critest
+
+GINKGO_PKG=github.com/onsi/ginkgo/ginkgo
+
+# Install ginkgo
+if [ ! -x "$(command -v ginkgo)" ]; then
+ go get -u ${GINKGO_PKG}
+fi
+
+# Install critest
+if [ ! -x "$(command -v ${CRITEST})" ]; then
+ go get -d ${CRITOOL_PKG}/...
+ cd ${GOPATH}/src/${CRITOOL_PKG}
+ git fetch --all
+ git checkout ${CRITOOL_VERSION}
+ make critest
+ make install-critest -e BINDIR="${GOPATH}/bin"
+fi
+which ${CRITEST}
+
+mkdir -p ${REPORT_DIR}
+test_setup ${REPORT_DIR}
+
+# Run cri validation test
+sudo env PATH=${PATH} GOPATH=${GOPATH} ${CRITEST} --runtime-endpoint=${CONTAINERD_SOCK} --ginkgo.focus="${FOCUS}" --ginkgo.skip="${SKIP}" --parallel=8 --runtime-handler=${RUNTIME}
+test_exit_code=$?
+
+test_teardown
+
+exit ${test_exit_code}
diff --git a/hack/test-e2e-node.sh b/hack/test-e2e-node.sh
new file mode 100755
index 000000000..9038a7f22
--- /dev/null
+++ b/hack/test-e2e-node.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o nounset
+set -o pipefail
+
+source $(dirname "${BASH_SOURCE[0]}")/test-utils.sh
+
+DEFAULT_SKIP="\[Flaky\]|\[Slow\]|\[Serial\]"
+DEFAULT_SKIP+="|querying\s\/stats\/summary"
+
+# FOCUS focuses the test to run.
+export FOCUS=${FOCUS:-""}
+# SKIP skips the test to skip.
+export SKIP=${SKIP:-${DEFAULT_SKIP}}
+# REPORT_DIR is the the directory to store test logs.
+REPORT_DIR=${REPORT_DIR:-"/tmp/test-e2e-node"}
+# UPLOAD_LOG indicates whether to upload test log to gcs.
+UPLOAD_LOG=${UPLOAD_LOG:-false}
+# TIMEOUT is the timeout of the test.
+TIMEOUT=${TIMEOUT:-"40m"}
+# FAIL_SWAP_ON makes kubelet fail when swap is on.
+# Many dev environments run with swap on, so we don't fail by default.
+FAIL_SWAP_ON=${FAIL_SWAP_ON:-"false"}
+
+# Check GOPATH
+if [[ -z "${GOPATH}" ]]; then
+ echo "GOPATH is not set"
+ exit 1
+fi
+
+ORIGINAL_RULES=`mktemp`
+sudo iptables-save > ${ORIGINAL_RULES}
+
+# Update ip firewall
+# We need to add rules to accept all TCP/UDP/ICMP packets.
+if sudo iptables -L INPUT | grep "Chain INPUT (policy DROP)" > /dev/null; then
+ sudo iptables -A INPUT -w -p TCP -j ACCEPT
+ sudo iptables -A INPUT -w -p UDP -j ACCEPT
+ sudo iptables -A INPUT -w -p ICMP -j ACCEPT
+fi
+if sudo iptables -L FORWARD | grep "Chain FORWARD (policy DROP)" > /dev/null; then
+ sudo iptables -A FORWARD -w -p TCP -j ACCEPT
+ sudo iptables -A FORWARD -w -p UDP -j ACCEPT
+ sudo iptables -A FORWARD -w -p ICMP -j ACCEPT
+fi
+
+# For multiple GOPATHs, keep the first one only
+GOPATH=${GOPATH%%:*}
+
+# Get kubernetes
+KUBERNETES_REPO="https://github.com/kubernetes/kubernetes"
+KUBERNETES_PATH="${GOPATH}/src/k8s.io/kubernetes"
+if [ ! -d "${KUBERNETES_PATH}" ]; then
+ mkdir -p ${KUBERNETES_PATH}
+ cd ${KUBERNETES_PATH}
+ git clone https://${KUBERNETES_REPO} .
+fi
+cd ${KUBERNETES_PATH}
+git fetch --all
+git checkout ${KUBERNETES_VERSION}
+
+mkdir -p ${REPORT_DIR}
+test_setup ${REPORT_DIR}
+
+timeout "${TIMEOUT}" make test-e2e-node \
+ RUNTIME=remote \
+ CONTAINER_RUNTIME_ENDPOINT=unix://${CONTAINERD_SOCK} \
+ ARTIFACTS=${REPORT_DIR} \
+ TEST_ARGS='--kubelet-flags=--cgroups-per-qos=true \
+ --kubelet-flags=--cgroup-root=/ \
+ --kubelet-flags=--fail-swap-on='${FAIL_SWAP_ON}' \
+ --prepull-images=false'
+test_exit_code=$?
+
+test_teardown
+
+sudo iptables-restore < ${ORIGINAL_RULES}
+rm ${ORIGINAL_RULES}
+
+# UPLOAD_LOG_PATH is bucket to upload test logs.
+UPLOAD_LOG_PATH=cri-containerd_test-e2e-node
+if ${UPLOAD_LOG}; then
+ if [ -z "${VERSION}" ]; then
+ echo "VERSION is not set"
+ exit 1
+ fi
+ upload_logs_to_gcs "${UPLOAD_LOG_PATH}" "${VERSION}-$(date +%Y%m%d-%H%M%S)" "${REPORT_DIR}"
+fi
+
+exit ${test_exit_code}
diff --git a/hack/test-integration.sh b/hack/test-integration.sh
new file mode 100755
index 000000000..820d030ef
--- /dev/null
+++ b/hack/test-integration.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o nounset
+set -o pipefail
+
+source $(dirname "${BASH_SOURCE[0]}")/test-utils.sh
+cd ${ROOT}
+
+# FOCUS focuses the test to run.
+FOCUS=${FOCUS:-""}
+# REPORT_DIR is the the directory to store test logs.
+REPORT_DIR=${REPORT_DIR:-"/tmp/test-integration"}
+# RUNTIME is the runtime handler to use in the test.
+RUNTIME=${RUNTIME:-""}
+
+CRI_ROOT="${CONTAINERD_ROOT}/io.containerd.grpc.v1.cri"
+
+mkdir -p ${REPORT_DIR}
+test_setup ${REPORT_DIR}
+
+# Run integration test.
+sudo PATH=${PATH} ${ROOT}/_output/integration.test --test.run="${FOCUS}" --test.v \
+ --cri-endpoint=${CONTAINERD_SOCK} \
+ --cri-root=${CRI_ROOT} \
+ --runtime-handler=${RUNTIME} \
+ --containerd-bin=${CONTAINERD_BIN}
+
+test_exit_code=$?
+
+test_teardown
+
+exit ${test_exit_code}
diff --git a/hack/test-utils.sh b/hack/test-utils.sh
new file mode 100755
index 000000000..bde573b88
--- /dev/null
+++ b/hack/test-utils.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+source $(dirname "${BASH_SOURCE[0]}")/utils.sh
+
+# RESTART_WAIT_PERIOD is the period to wait before restarting containerd.
+RESTART_WAIT_PERIOD=${RESTART_WAIT_PERIOD:-10}
+# CONTAINERD_FLAGS contains all containerd flags.
+CONTAINERD_FLAGS="--log-level=debug "
+
+# Use a configuration file for containerd.
+CONTAINERD_CONFIG_FILE=${CONTAINERD_CONFIG_FILE:-""}
+if [ -z "${CONTAINERD_CONFIG_FILE}" ] && command -v sestatus >/dev/null 2>&1; then
+ selinux_config="/tmp/containerd-config-selinux.toml"
+ cat >${selinux_config} <<<'
+[plugins.cri]
+ enable_selinux = true
+'
+ CONTAINERD_CONFIG_FILE=${CONTAINERD_CONFIG_FILE:-"${selinux_config}"}
+fi
+
+# CONTAINERD_TEST_SUFFIX is the suffix appended to the root/state directory used
+# by test containerd.
+CONTAINERD_TEST_SUFFIX=${CONTAINERD_TEST_SUFFIX:-"-test"}
+# The containerd root directory.
+CONTAINERD_ROOT=${CONTAINERD_ROOT:-"/var/lib/containerd${CONTAINERD_TEST_SUFFIX}"}
+# The containerd state directory.
+CONTAINERD_STATE=${CONTAINERD_STATE:-"/run/containerd${CONTAINERD_TEST_SUFFIX}"}
+# The containerd socket address.
+CONTAINERD_SOCK=${CONTAINERD_SOCK:-unix://${CONTAINERD_STATE}/containerd.sock}
+# The containerd binary name.
+CONTAINERD_BIN=${CONTAINERD_BIN:-"containerd${CONTAINERD_TEST_SUFFIX}"}
+if [ -f "${CONTAINERD_CONFIG_FILE}" ]; then
+ CONTAINERD_FLAGS+="--config ${CONTAINERD_CONFIG_FILE} "
+fi
+CONTAINERD_FLAGS+="--address ${CONTAINERD_SOCK#"unix://"} \
+ --state ${CONTAINERD_STATE} \
+ --root ${CONTAINERD_ROOT}"
+
+containerd_groupid=
+
+# test_setup starts containerd.
+test_setup() {
+ local report_dir=$1
+ # Start containerd
+ if [ ! -x "${ROOT}/_output/containerd" ]; then
+ echo "containerd is not built"
+ exit 1
+ fi
+ # rename the test containerd binary, so that we can easily
+ # distinguish it.
+ cp ${ROOT}/_output/containerd ${ROOT}/_output/${CONTAINERD_BIN}
+ set -m
+ # Create containerd in a different process group
+ # so that we can easily clean them up.
+ keepalive "sudo PATH=${PATH} ${ROOT}/_output/${CONTAINERD_BIN} ${CONTAINERD_FLAGS}" \
+ ${RESTART_WAIT_PERIOD} &> ${report_dir}/containerd.log &
+ pid=$!
+ set +m
+ containerd_groupid=$(ps -o pgid= -p ${pid})
+ # Wait for containerd to be running by using the containerd client ctr to check the version
+ # of the containerd server. Wait an increasing amount of time after each of five attempts
+ local -r ctr_path=$(which ctr)
+ if [ -z "${ctr_path}" ]; then
+ echo "ctr is not in PATH"
+ exit 1
+ fi
+ local -r crictl_path=$(which crictl)
+ if [ -z "${crictl_path}" ]; then
+ echo "crictl is not in PATH"
+ exit 1
+ fi
+ readiness_check "sudo ${ctr_path} --address ${CONTAINERD_SOCK#"unix://"} version"
+ readiness_check "sudo ${crictl_path} --runtime-endpoint=${CONTAINERD_SOCK} info"
+}
+
+# test_teardown kills containerd.
+test_teardown() {
+ if [ -n "${containerd_groupid}" ]; then
+ sudo pkill -g ${containerd_groupid}
+ fi
+}
+
+# keepalive runs a command and keeps it alive.
+# keepalive process is eventually killed in test_teardown.
+keepalive() {
+ local command=$1
+ echo ${command}
+ local wait_period=$2
+ while true; do
+ ${command}
+ sleep ${wait_period}
+ done
+}
+
+# readiness_check checks readiness of a daemon with specified command.
+readiness_check() {
+ local command=$1
+ local MAX_ATTEMPTS=5
+ local attempt_num=1
+ until ${command} &> /dev/null || (( attempt_num == MAX_ATTEMPTS ))
+ do
+ echo "$attempt_num attempt \"$command\"! Trying again in $attempt_num seconds..."
+ sleep $(( attempt_num++ ))
+ done
+}
diff --git a/hack/update-proto.sh b/hack/update-proto.sh
new file mode 100755
index 000000000..505bb8d6c
--- /dev/null
+++ b/hack/update-proto.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"/..
+API_ROOT="${ROOT}/${API_PATH-"pkg/api/v1"}"
+
+go get k8s.io/code-generator/cmd/go-to-protobuf/protoc-gen-gogo
+if ! which protoc-gen-gogo >/dev/null; then
+ echo "GOPATH is not in PATH"
+ exit 1
+fi
+
+function cleanup {
+ rm -f ${API_ROOT}/api.pb.go.bak
+}
+
+trap cleanup EXIT
+
+protoc \
+ --proto_path="${API_ROOT}" \
+ --proto_path="${ROOT}/vendor" \
+ --gogo_out=plugins=grpc:${API_ROOT} ${API_ROOT}/api.proto
+
+# Update boilerplate for the generated file.
+echo "$(cat hack/boilerplate/boilerplate ${API_ROOT}/api.pb.go)" > ${API_ROOT}/api.pb.go
+
+gofmt -l -s -w ${API_ROOT}/api.pb.go
diff --git a/hack/utils.sh b/hack/utils.sh
new file mode 100755
index 000000000..13a6a3ff5
--- /dev/null
+++ b/hack/utils.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"/..
+
+# Not from vendor.conf.
+KUBERNETES_VERSION="v1.19.0-beta.2"
+CRITOOL_VERSION=${CRITOOL_VERSION:-baca4a152dfe671fc17911a7af74bcb61680ee39}
+CRITOOL_PKG=github.com/kubernetes-sigs/cri-tools
+CRITOOL_REPO=github.com/kubernetes-sigs/cri-tools
+
+# VENDOR is the path to vendor.conf.
+VENDOR=${VENDOR:-"${ROOT}/vendor.conf"}
+
+# upload_logs_to_gcs uploads test logs to gcs.
+# Var set:
+# 1. Bucket: gcs bucket to upload logs.
+# 2. Dir: directory name to upload logs.
+# 3. Test Result: directory of the test result.
+upload_logs_to_gcs() {
+ local -r bucket=$1
+ local -r dir=$2
+ local -r result=$3
+ if ! gsutil ls "gs://${bucket}" > /dev/null; then
+ create_ttl_bucket ${bucket}
+ fi
+ local -r upload_log_path=${bucket}/${dir}
+ gsutil cp -r "${result}" "gs://${upload_log_path}"
+ echo "Test logs are uploaed to:
+ http://gcsweb.k8s.io/gcs/${upload_log_path}/"
+}
+
+# create_ttl_bucket create a public bucket in which all objects
+# have a default TTL (30 days).
+# Var set:
+# 1. Bucket: gcs bucket name.
+create_ttl_bucket() {
+ local -r bucket=$1
+ gsutil mb "gs://${bucket}"
+ local -r bucket_rule=$(mktemp)
+ # Set 30 day TTL for logs inside the bucket.
+ echo '{"rule": [{"action": {"type": "Delete"},"condition": {"age": 30}}]}' > ${bucket_rule}
+ gsutil lifecycle set "${bucket_rule}" "gs://${bucket}"
+ rm "${bucket_rule}"
+
+ gsutil -m acl ch -g all:R "gs://${bucket}"
+ gsutil defacl set public-read "gs://${bucket}"
+}
+
+# sha256 generates a sha256 checksum for a file.
+# Var set:
+# 1. Filename.
+sha256() {
+ if which sha256sum >/dev/null 2>&1; then
+ sha256sum "$1" | awk '{ print $1 }'
+ else
+ shasum -a256 "$1" | awk '{ print $1 }'
+ fi
+}
+
+# Takes a prefix ($what) and a $repo and sets `$what_VERSION` and
+# `$what_REPO` from vendor.conf, where `$what_REPO` defaults to $repo
+# but is overridden by the 3rd field of vendor.conf.
+from-vendor() {
+ local what=$1
+ local repo=$2
+ local vendor=$VENDOR
+ setvars=$(awk -v REPO=$repo -v WHAT=$what -- '
+ BEGIN { rc=1 } # Assume we did not find what we were looking for.
+ // {
+ if ($1 == REPO) {
+ if ($3 != "" && $3 !~ /#.*/ ) { gsub(/http.*\/\//, "", $3); REPO = $3 }; # Override repo.
+ printf("%s_VERSION=%s; %s_REPO=%s\n", WHAT, $2, WHAT, REPO);
+ rc=0; # Note success for use in END block.
+ exit # No point looking further.
+ }
+ }
+ END { exit rc } # Exit with the desired code.
+ ' $vendor)
+ if [ $? -ne 0 ] ; then
+ echo "failed to get version of $repo from $vendor" >&2
+ exit 1
+ fi
+ eval $setvars
+}
+
+# yaml-quote quotes something appropriate for a yaml string.
+# This is the same with:
+# https://github.com/kubernetes/kubernetes/blob/v1.10.1/cluster/gce/util.sh#L471.
+yaml-quote() {
+ echo "'$(echo "${@:-}" | sed -e "s/'/''/g")'"
+}
diff --git a/hack/verify-gofmt.sh b/hack/verify-gofmt.sh
new file mode 100755
index 000000000..60992016f
--- /dev/null
+++ b/hack/verify-gofmt.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+find_files() {
+ find . -not \( \
+ \( \
+ -wholename '*/vendor/*' \
+ \) -prune \
+ \) -name '*.go'
+}
+
+GOFMT="gofmt -s"
+bad_files=$(find_files | xargs $GOFMT -l)
+if [[ -n "${bad_files}" ]]; then
+ echo "!!! '$GOFMT' needs to be run on the following files: "
+ echo "${bad_files}"
+ exit 1
+fi
diff --git a/hack/verify-vendor.sh b/hack/verify-vendor.sh
new file mode 100755
index 000000000..a095b80ae
--- /dev/null
+++ b/hack/verify-vendor.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+tmpdir="$(mktemp -d)"
+trap "rm -rf ${tmpdir}" EXIT
+
+git clone "https://github.com/containerd/project" "${tmpdir}"
+"${tmpdir}"/script/validate/vendor
diff --git a/integration/addition_gids_test.go b/integration/addition_gids_test.go
new file mode 100644
index 000000000..c984cf7ad
--- /dev/null
+++ b/integration/addition_gids_test.go
@@ -0,0 +1,89 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestAdditionalGids(t *testing.T) {
+ testPodLogDir, err := ioutil.TempDir("/tmp", "additional-gids")
+ require.NoError(t, err)
+ defer os.RemoveAll(testPodLogDir)
+
+ t.Log("Create a sandbox with log directory")
+ sbConfig := PodSandboxConfig("sandbox", "additional-gids",
+ WithPodLogDirectory(testPodLogDir))
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ const (
+ testImage = "busybox"
+ containerName = "test-container"
+ )
+ t.Logf("Pull test image %q", testImage)
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+
+ t.Log("Create a container to print id")
+ cnConfig := ContainerConfig(
+ containerName,
+ "busybox",
+ WithCommand("id"),
+ WithLogPath(containerName),
+ WithSupplementalGroups([]int64{1 /*daemon*/, 1234 /*new group*/}),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Log("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Log("Wait for container to finish running")
+ require.NoError(t, Eventually(func() (bool, error) {
+ s, err := runtimeService.ContainerStatus(cn)
+ if err != nil {
+ return false, err
+ }
+ if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+
+ t.Log("Search additional groups in container log")
+ content, err := ioutil.ReadFile(filepath.Join(testPodLogDir, containerName))
+ assert.NoError(t, err)
+ assert.Contains(t, string(content), "groups=1(daemon),10(wheel),1234")
+}
diff --git a/integration/container_log_test.go b/integration/container_log_test.go
new file mode 100644
index 000000000..c11c874bf
--- /dev/null
+++ b/integration/container_log_test.go
@@ -0,0 +1,175 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestContainerLogWithoutTailingNewLine(t *testing.T) {
+ testPodLogDir, err := ioutil.TempDir("/tmp", "container-log-without-tailing-newline")
+ require.NoError(t, err)
+ defer os.RemoveAll(testPodLogDir)
+
+ t.Log("Create a sandbox with log directory")
+ sbConfig := PodSandboxConfig("sandbox", "container-log-without-tailing-newline",
+ WithPodLogDirectory(testPodLogDir),
+ )
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ const (
+ testImage = "busybox"
+ containerName = "test-container"
+ )
+ t.Logf("Pull test image %q", testImage)
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+
+ t.Log("Create a container with log path")
+ cnConfig := ContainerConfig(
+ containerName,
+ testImage,
+ WithCommand("sh", "-c", "printf abcd"),
+ WithLogPath(containerName),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Log("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Log("Wait for container to finish running")
+ require.NoError(t, Eventually(func() (bool, error) {
+ s, err := runtimeService.ContainerStatus(cn)
+ if err != nil {
+ return false, err
+ }
+ if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+
+ t.Log("Check container log")
+ content, err := ioutil.ReadFile(filepath.Join(testPodLogDir, containerName))
+ assert.NoError(t, err)
+ checkContainerLog(t, string(content), []string{
+ fmt.Sprintf("%s %s %s", runtime.Stdout, runtime.LogTagPartial, "abcd"),
+ })
+}
+
+func TestLongContainerLog(t *testing.T) {
+ testPodLogDir, err := ioutil.TempDir("/tmp", "long-container-log")
+ require.NoError(t, err)
+ defer os.RemoveAll(testPodLogDir)
+
+ t.Log("Create a sandbox with log directory")
+ sbConfig := PodSandboxConfig("sandbox", "long-container-log",
+ WithPodLogDirectory(testPodLogDir),
+ )
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ const (
+ testImage = "busybox"
+ containerName = "test-container"
+ )
+ t.Logf("Pull test image %q", testImage)
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+
+ t.Log("Create a container with log path")
+ config, err := CRIConfig()
+ require.NoError(t, err)
+ maxSize := config.MaxContainerLogLineSize
+ shortLineCmd := fmt.Sprintf("i=0; while [ $i -lt %d ]; do printf %s; i=$((i+1)); done", maxSize-1, "a")
+ maxLenLineCmd := fmt.Sprintf("i=0; while [ $i -lt %d ]; do printf %s; i=$((i+1)); done", maxSize, "b")
+ longLineCmd := fmt.Sprintf("i=0; while [ $i -lt %d ]; do printf %s; i=$((i+1)); done", maxSize+1, "c")
+ cnConfig := ContainerConfig(
+ containerName,
+ testImage,
+ WithCommand("sh", "-c",
+ fmt.Sprintf("%s; echo; %s; echo; %s; echo", shortLineCmd, maxLenLineCmd, longLineCmd)),
+ WithLogPath(containerName),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Log("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Log("Wait for container to finish running")
+ require.NoError(t, Eventually(func() (bool, error) {
+ s, err := runtimeService.ContainerStatus(cn)
+ if err != nil {
+ return false, err
+ }
+ if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+
+ t.Log("Check container log")
+ content, err := ioutil.ReadFile(filepath.Join(testPodLogDir, containerName))
+ assert.NoError(t, err)
+ checkContainerLog(t, string(content), []string{
+ fmt.Sprintf("%s %s %s", runtime.Stdout, runtime.LogTagFull, strings.Repeat("a", maxSize-1)),
+ fmt.Sprintf("%s %s %s", runtime.Stdout, runtime.LogTagFull, strings.Repeat("b", maxSize)),
+ fmt.Sprintf("%s %s %s", runtime.Stdout, runtime.LogTagPartial, strings.Repeat("c", maxSize)),
+ fmt.Sprintf("%s %s %s", runtime.Stdout, runtime.LogTagFull, "c"),
+ })
+}
+
+func checkContainerLog(t *testing.T, log string, messages []string) {
+ lines := strings.Split(strings.TrimSpace(log), "\n")
+ require.Len(t, lines, len(messages), "log line number should match")
+ for i, line := range lines {
+ parts := strings.SplitN(line, " ", 2)
+ require.Len(t, parts, 2)
+ _, err := time.Parse(time.RFC3339Nano, parts[0])
+ assert.NoError(t, err, "timestamp should be in RFC3339Nano format")
+ assert.Equal(t, messages[i], parts[1], "log content should match")
+ }
+}
diff --git a/integration/container_restart_test.go b/integration/container_restart_test.go
new file mode 100644
index 000000000..bce558e88
--- /dev/null
+++ b/integration/container_restart_test.go
@@ -0,0 +1,62 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+// Test to verify container can be restarted
+func TestContainerRestart(t *testing.T) {
+ t.Logf("Create a pod config and run sandbox container")
+ sbConfig := PodSandboxConfig("sandbox1", "restart")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+ t.Logf("Create a container config and run container in a pod")
+ containerConfig := ContainerConfig(
+ "container1",
+ pauseImage,
+ WithTestLabels(),
+ WithTestAnnotations(),
+ )
+ cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.RemoveContainer(cn))
+ }()
+ require.NoError(t, runtimeService.StartContainer(cn))
+ defer func() {
+ assert.NoError(t, runtimeService.StopContainer(cn, 10))
+ }()
+
+ t.Logf("Restart the container with same config")
+ require.NoError(t, runtimeService.StopContainer(cn, 10))
+ require.NoError(t, runtimeService.RemoveContainer(cn))
+
+ cn, err = runtimeService.CreateContainer(sb, containerConfig, sbConfig)
+ require.NoError(t, err)
+ require.NoError(t, runtimeService.StartContainer(cn))
+}
diff --git a/integration/container_stats_test.go b/integration/container_stats_test.go
new file mode 100644
index 000000000..6752b523f
--- /dev/null
+++ b/integration/container_stats_test.go
@@ -0,0 +1,347 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "fmt"
+ "testing"
+ "time"
+
+ "github.com/pkg/errors"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// Test to verify for a container ID
+func TestContainerStats(t *testing.T) {
+ t.Logf("Create a pod config and run sandbox container")
+ sbConfig := PodSandboxConfig("sandbox1", "stats")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+ t.Logf("Create a container config and run container in a pod")
+ containerConfig := ContainerConfig(
+ "container1",
+ pauseImage,
+ WithTestLabels(),
+ WithTestAnnotations(),
+ )
+ cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.RemoveContainer(cn))
+ }()
+ require.NoError(t, runtimeService.StartContainer(cn))
+ defer func() {
+ assert.NoError(t, runtimeService.StopContainer(cn, 10))
+ }()
+
+ t.Logf("Fetch stats for container")
+ var s *runtime.ContainerStats
+ require.NoError(t, Eventually(func() (bool, error) {
+ s, err = runtimeService.ContainerStats(cn)
+ if err != nil {
+ return false, err
+ }
+ if s.GetWritableLayer().GetUsedBytes().GetValue() != 0 &&
+ s.GetWritableLayer().GetInodesUsed().GetValue() != 0 {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+
+ t.Logf("Verify stats received for container %q", cn)
+ testStats(t, s, containerConfig)
+}
+
+// Test to verify filtering without any filter
+func TestContainerListStats(t *testing.T) {
+ t.Logf("Create a pod config and run sandbox container")
+ sbConfig := PodSandboxConfig("running-pod", "statsls")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+ t.Logf("Create a container config and run containers in a pod")
+ containerConfigMap := make(map[string]*runtime.ContainerConfig)
+ for i := 0; i < 3; i++ {
+ cName := fmt.Sprintf("container%d", i)
+ containerConfig := ContainerConfig(
+ cName,
+ pauseImage,
+ WithTestLabels(),
+ WithTestAnnotations(),
+ )
+ cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
+ require.NoError(t, err)
+ containerConfigMap[cn] = containerConfig
+ defer func() {
+ assert.NoError(t, runtimeService.RemoveContainer(cn))
+ }()
+ require.NoError(t, runtimeService.StartContainer(cn))
+ defer func() {
+ assert.NoError(t, runtimeService.StopContainer(cn, 10))
+ }()
+ }
+
+ t.Logf("Fetch all container stats")
+ var stats []*runtime.ContainerStats
+ require.NoError(t, Eventually(func() (bool, error) {
+ stats, err = runtimeService.ListContainerStats(&runtime.ContainerStatsFilter{})
+ if err != nil {
+ return false, err
+ }
+ for _, s := range stats {
+ if s.GetWritableLayer().GetUsedBytes().GetValue() == 0 &&
+ s.GetWritableLayer().GetInodesUsed().GetValue() == 0 {
+ return false, nil
+ }
+ }
+ return true, nil
+ }, time.Second, 30*time.Second))
+
+ t.Logf("Verify all container stats")
+ for _, s := range stats {
+ testStats(t, s, containerConfigMap[s.GetAttributes().GetId()])
+ }
+}
+
+// Test to verify filtering given a specific container ID
+// TODO Convert the filter tests into table driven tests and unit tests
+func TestContainerListStatsWithIdFilter(t *testing.T) {
+ t.Logf("Create a pod config and run sandbox container")
+ sbConfig := PodSandboxConfig("running-pod", "statsls")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+ t.Logf("Create a container config and run containers in a pod")
+ containerConfigMap := make(map[string]*runtime.ContainerConfig)
+ for i := 0; i < 3; i++ {
+ cName := fmt.Sprintf("container%d", i)
+ containerConfig := ContainerConfig(
+ cName,
+ pauseImage,
+ WithTestLabels(),
+ WithTestAnnotations(),
+ )
+ cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
+ containerConfigMap[cn] = containerConfig
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.RemoveContainer(cn))
+ }()
+ require.NoError(t, runtimeService.StartContainer(cn))
+ defer func() {
+ assert.NoError(t, runtimeService.StopContainer(cn, 10))
+ }()
+ }
+
+ t.Logf("Fetch container stats for each container with Filter")
+ var stats []*runtime.ContainerStats
+ for id := range containerConfigMap {
+ require.NoError(t, Eventually(func() (bool, error) {
+ stats, err = runtimeService.ListContainerStats(
+ &runtime.ContainerStatsFilter{Id: id})
+ if err != nil {
+ return false, err
+ }
+ if len(stats) != 1 {
+ return false, errors.New("unexpected stats length")
+ }
+ if stats[0].GetWritableLayer().GetUsedBytes().GetValue() != 0 &&
+ stats[0].GetWritableLayer().GetInodesUsed().GetValue() != 0 {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+
+ t.Logf("Verify container stats for %s", id)
+ for _, s := range stats {
+ require.Equal(t, s.GetAttributes().GetId(), id)
+ testStats(t, s, containerConfigMap[id])
+ }
+ }
+}
+
+// Test to verify filtering given a specific Sandbox ID. Stats for
+// all the containers in a pod should be returned
+func TestContainerListStatsWithSandboxIdFilter(t *testing.T) {
+ t.Logf("Create a pod config and run sandbox container")
+ sbConfig := PodSandboxConfig("running-pod", "statsls")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+ t.Logf("Create a container config and run containers in a pod")
+ containerConfigMap := make(map[string]*runtime.ContainerConfig)
+ for i := 0; i < 3; i++ {
+ cName := fmt.Sprintf("container%d", i)
+ containerConfig := ContainerConfig(
+ cName,
+ pauseImage,
+ WithTestLabels(),
+ WithTestAnnotations(),
+ )
+ cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
+ containerConfigMap[cn] = containerConfig
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.RemoveContainer(cn))
+ }()
+ require.NoError(t, runtimeService.StartContainer(cn))
+ defer func() {
+ assert.NoError(t, runtimeService.StopContainer(cn, 10))
+ }()
+ }
+
+ t.Logf("Fetch container stats for each container with Filter")
+ var stats []*runtime.ContainerStats
+ require.NoError(t, Eventually(func() (bool, error) {
+ stats, err = runtimeService.ListContainerStats(
+ &runtime.ContainerStatsFilter{PodSandboxId: sb})
+ if err != nil {
+ return false, err
+ }
+ if len(stats) != 3 {
+ return false, errors.New("unexpected stats length")
+ }
+ if stats[0].GetWritableLayer().GetUsedBytes().GetValue() != 0 &&
+ stats[0].GetWritableLayer().GetInodesUsed().GetValue() != 0 {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+ t.Logf("Verify container stats for sandbox %q", sb)
+ for _, s := range stats {
+ testStats(t, s, containerConfigMap[s.GetAttributes().GetId()])
+ }
+}
+
+// Test to verify filtering given a specific container ID and
+// sandbox ID
+func TestContainerListStatsWithIdSandboxIdFilter(t *testing.T) {
+ t.Logf("Create a pod config and run sandbox container")
+ sbConfig := PodSandboxConfig("running-pod", "statsls")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+ t.Logf("Create container config and run containers in a pod")
+ containerConfigMap := make(map[string]*runtime.ContainerConfig)
+ for i := 0; i < 3; i++ {
+ cName := fmt.Sprintf("container%d", i)
+ containerConfig := ContainerConfig(
+ cName,
+ pauseImage,
+ WithTestLabels(),
+ WithTestAnnotations(),
+ )
+ cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
+ containerConfigMap[cn] = containerConfig
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.RemoveContainer(cn))
+ }()
+ require.NoError(t, runtimeService.StartContainer(cn))
+ defer func() {
+ assert.NoError(t, runtimeService.StopContainer(cn, 10))
+ }()
+ }
+ t.Logf("Fetch container stats for sandbox ID and container ID filter")
+ var stats []*runtime.ContainerStats
+ for id, config := range containerConfigMap {
+ require.NoError(t, Eventually(func() (bool, error) {
+ stats, err = runtimeService.ListContainerStats(
+ &runtime.ContainerStatsFilter{Id: id, PodSandboxId: sb})
+ if err != nil {
+ return false, err
+ }
+ if len(stats) != 1 {
+ return false, errors.New("unexpected stats length")
+ }
+ if stats[0].GetWritableLayer().GetUsedBytes().GetValue() != 0 &&
+ stats[0].GetWritableLayer().GetInodesUsed().GetValue() != 0 {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+ t.Logf("Verify container stats for sandbox %q and container %q filter", sb, id)
+ for _, s := range stats {
+ testStats(t, s, config)
+ }
+ }
+
+ t.Logf("Fetch container stats for sandbox truncID and container truncID filter ")
+ for id, config := range containerConfigMap {
+ require.NoError(t, Eventually(func() (bool, error) {
+ stats, err = runtimeService.ListContainerStats(
+ &runtime.ContainerStatsFilter{Id: id[:3], PodSandboxId: sb[:3]})
+ if err != nil {
+ return false, err
+ }
+ if len(stats) != 1 {
+ return false, errors.New("unexpected stats length")
+ }
+ if stats[0].GetWritableLayer().GetUsedBytes().GetValue() != 0 &&
+ stats[0].GetWritableLayer().GetInodesUsed().GetValue() != 0 {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+ t.Logf("Verify container stats for sandbox %q and container %q filter", sb, id)
+ for _, s := range stats {
+ testStats(t, s, config)
+ }
+ }
+}
+
+// TODO make this as options to use for dead container tests
+func testStats(t *testing.T,
+ s *runtime.ContainerStats,
+ config *runtime.ContainerConfig,
+) {
+ require.NotEmpty(t, s.GetAttributes().GetId())
+ require.NotEmpty(t, s.GetAttributes().GetMetadata())
+ require.NotEmpty(t, s.GetAttributes().GetAnnotations())
+ require.Equal(t, s.GetAttributes().GetLabels(), config.Labels)
+ require.Equal(t, s.GetAttributes().GetAnnotations(), config.Annotations)
+ require.Equal(t, s.GetAttributes().GetMetadata().Name, config.Metadata.Name)
+ require.NotEmpty(t, s.GetAttributes().GetLabels())
+ require.NotEmpty(t, s.GetCpu().GetTimestamp())
+ require.NotEmpty(t, s.GetCpu().GetUsageCoreNanoSeconds().GetValue())
+ require.NotEmpty(t, s.GetMemory().GetTimestamp())
+ require.NotEmpty(t, s.GetMemory().GetWorkingSetBytes().GetValue())
+ require.NotEmpty(t, s.GetWritableLayer().GetTimestamp())
+ require.NotEmpty(t, s.GetWritableLayer().GetFsId().GetMountpoint())
+ require.NotEmpty(t, s.GetWritableLayer().GetUsedBytes().GetValue())
+ require.NotEmpty(t, s.GetWritableLayer().GetInodesUsed().GetValue())
+}
diff --git a/integration/container_stop_test.go b/integration/container_stop_test.go
new file mode 100644
index 000000000..b270ed54b
--- /dev/null
+++ b/integration/container_stop_test.go
@@ -0,0 +1,141 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "context"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestSharedPidMultiProcessContainerStop(t *testing.T) {
+ for name, sbConfig := range map[string]*runtime.PodSandboxConfig{
+ "hostpid": PodSandboxConfig("sandbox", "host-pid-container-stop", WithHostPid),
+ "podpid": PodSandboxConfig("sandbox", "pod-pid-container-stop", WithPodPid),
+ } {
+ t.Run(name, func(t *testing.T) {
+ t.Log("Create a shared pid sandbox")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ const (
+ testImage = "busybox"
+ containerName = "test-container"
+ )
+ t.Logf("Pull test image %q", testImage)
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+
+ t.Log("Create a multi-process container")
+ cnConfig := ContainerConfig(
+ containerName,
+ testImage,
+ WithCommand("sh", "-c", "sleep 10000 & sleep 10000"),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Log("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Log("Stop the container")
+ require.NoError(t, runtimeService.StopContainer(cn, 0))
+
+ t.Log("The container state should be exited")
+ s, err := runtimeService.ContainerStatus(cn)
+ require.NoError(t, err)
+ assert.Equal(t, s.GetState(), runtime.ContainerState_CONTAINER_EXITED)
+ })
+ }
+}
+
+func TestContainerStopCancellation(t *testing.T) {
+ t.Log("Create a pod sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "cancel-container-stop")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ const (
+ testImage = "busybox"
+ containerName = "test-container"
+ )
+ t.Logf("Pull test image %q", testImage)
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+
+ t.Log("Create a container which traps sigterm")
+ cnConfig := ContainerConfig(
+ containerName,
+ testImage,
+ WithCommand("sh", "-c", `trap "echo ignore sigterm" TERM; sleep 1000`),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Log("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Log("Stop the container with 3s timeout, but 1s context timeout")
+ // Note that with container pid namespace, the sleep process
+ // is pid 1, and SIGTERM sent by `StopContainer` will be ignored.
+ rawClient, err := RawRuntimeClient()
+ require.NoError(t, err)
+ ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second)
+ defer cancel()
+ _, err = rawClient.StopContainer(ctx, &runtime.StopContainerRequest{
+ ContainerId: cn,
+ Timeout: 3,
+ })
+ assert.Error(t, err)
+
+ t.Log("The container should still be running even after 5 seconds")
+ assert.NoError(t, Consistently(func() (bool, error) {
+ s, err := runtimeService.ContainerStatus(cn)
+ if err != nil {
+ return false, err
+ }
+ return s.GetState() == runtime.ContainerState_CONTAINER_RUNNING, nil
+ }, 100*time.Millisecond, 5*time.Second))
+
+ t.Log("Stop the container with 1s timeout, without shorter context timeout")
+ assert.NoError(t, runtimeService.StopContainer(cn, 1))
+
+ t.Log("The container state should be exited")
+ s, err := runtimeService.ContainerStatus(cn)
+ require.NoError(t, err)
+ assert.Equal(t, s.GetState(), runtime.ContainerState_CONTAINER_EXITED)
+}
diff --git a/integration/container_update_resources_test.go b/integration/container_update_resources_test.go
new file mode 100644
index 000000000..83850e0b7
--- /dev/null
+++ b/integration/container_update_resources_test.go
@@ -0,0 +1,107 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "testing"
+
+ "github.com/containerd/cgroups"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func checkMemoryLimit(t *testing.T, spec *runtimespec.Spec, memLimit int64) {
+ require.NotNil(t, spec)
+ require.NotNil(t, spec.Linux)
+ require.NotNil(t, spec.Linux.Resources)
+ require.NotNil(t, spec.Linux.Resources.Memory)
+ require.NotNil(t, spec.Linux.Resources.Memory.Limit)
+ assert.Equal(t, memLimit, *spec.Linux.Resources.Memory.Limit)
+}
+
+func TestUpdateContainerResources(t *testing.T) {
+ t.Log("Create a sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "update-container-resources")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ t.Log("Create a container with memory limit")
+ cnConfig := ContainerConfig(
+ "container",
+ pauseImage,
+ WithResources(&runtime.LinuxContainerResources{
+ MemoryLimitInBytes: 200 * 1024 * 1024,
+ }),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Log("Check memory limit in container OCI spec")
+ container, err := containerdClient.LoadContainer(context.Background(), cn)
+ require.NoError(t, err)
+ spec, err := container.Spec(context.Background())
+ require.NoError(t, err)
+ checkMemoryLimit(t, spec, 200*1024*1024)
+
+ t.Log("Update container memory limit after created")
+ err = runtimeService.UpdateContainerResources(cn, &runtime.LinuxContainerResources{
+ MemoryLimitInBytes: 400 * 1024 * 1024,
+ })
+ require.NoError(t, err)
+
+ t.Log("Check memory limit in container OCI spec")
+ spec, err = container.Spec(context.Background())
+ require.NoError(t, err)
+ checkMemoryLimit(t, spec, 400*1024*1024)
+
+ t.Log("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+ task, err := container.Task(context.Background(), nil)
+ require.NoError(t, err)
+
+ t.Log("Check memory limit in cgroup")
+ cgroup, err := cgroups.Load(cgroups.V1, cgroups.PidPath(int(task.Pid())))
+ require.NoError(t, err)
+ stat, err := cgroup.Stat(cgroups.IgnoreNotExist)
+ require.NoError(t, err)
+ assert.Equal(t, uint64(400*1024*1024), stat.Memory.Usage.Limit)
+
+ t.Log("Update container memory limit after started")
+ err = runtimeService.UpdateContainerResources(cn, &runtime.LinuxContainerResources{
+ MemoryLimitInBytes: 800 * 1024 * 1024,
+ })
+ require.NoError(t, err)
+
+ t.Log("Check memory limit in container OCI spec")
+ spec, err = container.Spec(context.Background())
+ require.NoError(t, err)
+ checkMemoryLimit(t, spec, 800*1024*1024)
+
+ t.Log("Check memory limit in cgroup")
+ stat, err = cgroup.Stat(cgroups.IgnoreNotExist)
+ require.NoError(t, err)
+ assert.Equal(t, uint64(800*1024*1024), stat.Memory.Usage.Limit)
+}
diff --git a/integration/container_without_image_ref_test.go b/integration/container_without_image_ref_test.go
new file mode 100644
index 000000000..9fc897789
--- /dev/null
+++ b/integration/container_without_image_ref_test.go
@@ -0,0 +1,77 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// Test container lifecycle can work without image references.
+func TestContainerLifecycleWithoutImageRef(t *testing.T) {
+ t.Log("Create a sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "container-lifecycle-without-image-ref")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ const (
+ testImage = "busybox"
+ containerName = "test-container"
+ )
+ t.Log("Pull test image")
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+
+ t.Log("Create test container")
+ cnConfig := ContainerConfig(
+ containerName,
+ testImage,
+ WithCommand("sleep", "1000"),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Log("Remove test image")
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+
+ t.Log("Container status should be running")
+ status, err := runtimeService.ContainerStatus(cn)
+ require.NoError(t, err)
+ assert.Equal(t, status.GetState(), runtime.ContainerState_CONTAINER_RUNNING)
+
+ t.Logf("Stop container")
+ err = runtimeService.StopContainer(cn, 1)
+ assert.NoError(t, err)
+
+ t.Log("Container status should be exited")
+ status, err = runtimeService.ContainerStatus(cn)
+ require.NoError(t, err)
+ assert.Equal(t, status.GetState(), runtime.ContainerState_CONTAINER_EXITED)
+}
diff --git a/integration/containerd_image_test.go b/integration/containerd_image_test.go
new file mode 100644
index 000000000..1832b50ff
--- /dev/null
+++ b/integration/containerd_image_test.go
@@ -0,0 +1,213 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "testing"
+ "time"
+
+ "golang.org/x/net/context"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/namespaces"
+ "github.com/pkg/errors"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// Test to test the CRI plugin should see image pulled into containerd directly.
+func TestContainerdImage(t *testing.T) {
+ const testImage = "docker.io/library/busybox:latest"
+ ctx := context.Background()
+
+ t.Logf("make sure the test image doesn't exist in the cri plugin")
+ i, err := imageService.ImageStatus(&runtime.ImageSpec{Image: testImage})
+ require.NoError(t, err)
+ if i != nil {
+ require.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: testImage}))
+ }
+
+ t.Logf("pull the image into containerd")
+ _, err = containerdClient.Pull(ctx, testImage, containerd.WithPullUnpack)
+ assert.NoError(t, err)
+ defer func() {
+ // Make sure the image is cleaned up in any case.
+ if err := containerdClient.ImageService().Delete(ctx, testImage); err != nil {
+ assert.True(t, errdefs.IsNotFound(err), err)
+ }
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: testImage}))
+ }()
+
+ t.Logf("the image should be seen by the cri plugin")
+ var id string
+ checkImage := func() (bool, error) {
+ img, err := imageService.ImageStatus(&runtime.ImageSpec{Image: testImage})
+ if err != nil {
+ return false, err
+ }
+ if img == nil {
+ t.Logf("Image %q not show up in the cri plugin yet", testImage)
+ return false, nil
+ }
+ id = img.Id
+ img, err = imageService.ImageStatus(&runtime.ImageSpec{Image: id})
+ if err != nil {
+ return false, err
+ }
+ if img == nil {
+ // We always generate image id as a reference first, it must
+ // be ready here.
+ return false, errors.New("can't reference image by id")
+ }
+ if len(img.RepoTags) != 1 {
+ // RepoTags must have been populated correctly.
+ return false, errors.Errorf("unexpected repotags: %+v", img.RepoTags)
+ }
+ if img.RepoTags[0] != testImage {
+ return false, errors.Errorf("unexpected repotag %q", img.RepoTags[0])
+ }
+ return true, nil
+ }
+ require.NoError(t, Eventually(checkImage, 100*time.Millisecond, 10*time.Second))
+ require.NoError(t, Consistently(checkImage, 100*time.Millisecond, time.Second))
+ defer func() {
+ t.Logf("image should still be seen by id if only tag get deleted")
+ if err := containerdClient.ImageService().Delete(ctx, testImage); err != nil {
+ assert.True(t, errdefs.IsNotFound(err), err)
+ }
+ assert.NoError(t, Consistently(func() (bool, error) {
+ img, err := imageService.ImageStatus(&runtime.ImageSpec{Image: id})
+ if err != nil {
+ return false, err
+ }
+ return img != nil, nil
+ }, 100*time.Millisecond, time.Second))
+ t.Logf("image should be removed from the cri plugin if all references get deleted")
+ if err := containerdClient.ImageService().Delete(ctx, id); err != nil {
+ assert.True(t, errdefs.IsNotFound(err), err)
+ }
+ assert.NoError(t, Eventually(func() (bool, error) {
+ img, err := imageService.ImageStatus(&runtime.ImageSpec{Image: id})
+ if err != nil {
+ return false, err
+ }
+ return img == nil, nil
+ }, 100*time.Millisecond, 10*time.Second))
+ }()
+
+ t.Logf("the image should be marked as managed")
+ imgByRef, err := containerdClient.GetImage(ctx, testImage)
+ assert.NoError(t, err)
+ assert.Equal(t, imgByRef.Labels()["io.cri-containerd.image"], "managed")
+
+ t.Logf("the image id should be created and managed")
+ imgByID, err := containerdClient.GetImage(ctx, id)
+ assert.NoError(t, err)
+ assert.Equal(t, imgByID.Labels()["io.cri-containerd.image"], "managed")
+
+ t.Logf("should be able to start container with the image")
+ sbConfig := PodSandboxConfig("sandbox", "containerd-image")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ cnConfig := ContainerConfig(
+ "test-container",
+ id,
+ WithCommand("top"),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+ require.NoError(t, runtimeService.StartContainer(cn))
+ checkContainer := func() (bool, error) {
+ s, err := runtimeService.ContainerStatus(cn)
+ if err != nil {
+ return false, err
+ }
+ return s.GetState() == runtime.ContainerState_CONTAINER_RUNNING, nil
+ }
+ require.NoError(t, Eventually(checkContainer, 100*time.Millisecond, 10*time.Second))
+ require.NoError(t, Consistently(checkContainer, 100*time.Millisecond, time.Second))
+}
+
+// Test image managed by CRI plugin shouldn't be affected by images in other namespaces.
+func TestContainerdImageInOtherNamespaces(t *testing.T) {
+ const testImage = "docker.io/library/busybox:latest"
+ ctx := context.Background()
+
+ t.Logf("make sure the test image doesn't exist in the cri plugin")
+ i, err := imageService.ImageStatus(&runtime.ImageSpec{Image: testImage})
+ require.NoError(t, err)
+ if i != nil {
+ require.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: testImage}))
+ }
+
+ t.Logf("pull the image into test namespace")
+ namespacedCtx := namespaces.WithNamespace(ctx, "test")
+ _, err = containerdClient.Pull(namespacedCtx, testImage, containerd.WithPullUnpack)
+ assert.NoError(t, err)
+ defer func() {
+ // Make sure the image is cleaned up in any case.
+ if err := containerdClient.ImageService().Delete(namespacedCtx, testImage); err != nil {
+ assert.True(t, errdefs.IsNotFound(err), err)
+ }
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: testImage}))
+ }()
+
+ t.Logf("cri plugin should not see the image")
+ checkImage := func() (bool, error) {
+ img, err := imageService.ImageStatus(&runtime.ImageSpec{Image: testImage})
+ if err != nil {
+ return false, err
+ }
+ return img == nil, nil
+ }
+ require.NoError(t, Consistently(checkImage, 100*time.Millisecond, time.Second))
+
+ sbConfig := PodSandboxConfig("sandbox", "test")
+ t.Logf("pull the image into cri plugin")
+ id, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: id}))
+ }()
+
+ t.Logf("cri plugin should see the image now")
+ img, err := imageService.ImageStatus(&runtime.ImageSpec{Image: testImage})
+ require.NoError(t, err)
+ assert.NotNil(t, img)
+
+ t.Logf("remove the image from test namespace")
+ require.NoError(t, containerdClient.ImageService().Delete(namespacedCtx, testImage))
+
+ t.Logf("cri plugin should still see the image")
+ checkImage = func() (bool, error) {
+ img, err := imageService.ImageStatus(&runtime.ImageSpec{Image: testImage})
+ if err != nil {
+ return false, err
+ }
+ return img != nil, nil
+ }
+ assert.NoError(t, Consistently(checkImage, 100*time.Millisecond, time.Second))
+}
diff --git a/integration/duplicate_name_test.go b/integration/duplicate_name_test.go
new file mode 100644
index 000000000..6c8532063
--- /dev/null
+++ b/integration/duplicate_name_test.go
@@ -0,0 +1,53 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestDuplicateName(t *testing.T) {
+ t.Logf("Create a sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "duplicate-name")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ t.Logf("Create the sandbox again should fail")
+ _, err = runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.Error(t, err)
+
+ t.Logf("Create a container")
+ cnConfig := ContainerConfig(
+ "container",
+ pauseImage,
+ )
+ _, err = runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Logf("Create the container again should fail")
+ _, err = runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.Error(t, err)
+}
diff --git a/integration/image_load_test.go b/integration/image_load_test.go
new file mode 100644
index 000000000..ab7fed613
--- /dev/null
+++ b/integration/image_load_test.go
@@ -0,0 +1,101 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "io/ioutil"
+ "os"
+ "os/exec"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// Test to load an image from tarball.
+func TestImageLoad(t *testing.T) {
+ testImage := "busybox:latest"
+ loadedImage := "docker.io/library/" + testImage
+ _, err := exec.LookPath("docker")
+ if err != nil {
+ t.Skipf("Docker is not available: %v", err)
+ }
+ t.Logf("docker save image into tarball")
+ output, err := exec.Command("docker", "pull", testImage).CombinedOutput()
+ require.NoError(t, err, "output: %q", output)
+ tarF, err := ioutil.TempFile("", "image-load")
+ tar := tarF.Name()
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, os.RemoveAll(tar))
+ }()
+ output, err = exec.Command("docker", "save", testImage, "-o", tar).CombinedOutput()
+ require.NoError(t, err, "output: %q", output)
+
+ t.Logf("make sure no such image in cri")
+ img, err := imageService.ImageStatus(&runtime.ImageSpec{Image: testImage})
+ require.NoError(t, err)
+ if img != nil {
+ require.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: testImage}))
+ }
+
+ t.Logf("load image in cri")
+ ctr, err := exec.LookPath("ctr")
+ require.NoError(t, err, "ctr should be installed, make sure you've run `make install.deps`")
+ output, err = exec.Command(ctr, "-address="+containerdEndpoint,
+ "-n=k8s.io", "images", "import", tar).CombinedOutput()
+ require.NoError(t, err, "output: %q", output)
+
+ t.Logf("make sure image is loaded")
+ // Use Eventually because the cri plugin needs a short period of time
+ // to pick up images imported into containerd directly.
+ require.NoError(t, Eventually(func() (bool, error) {
+ img, err = imageService.ImageStatus(&runtime.ImageSpec{Image: testImage})
+ if err != nil {
+ return false, err
+ }
+ return img != nil, nil
+ }, 100*time.Millisecond, 10*time.Second))
+ require.Equal(t, []string{loadedImage}, img.RepoTags)
+
+ t.Logf("create a container with the loaded image")
+ sbConfig := PodSandboxConfig("sandbox", Randomize("image-load"))
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+ containerConfig := ContainerConfig(
+ "container",
+ testImage,
+ WithCommand("tail", "-f", "/dev/null"),
+ )
+ // Rely on sandbox clean to do container cleanup.
+ cn, err := runtimeService.CreateContainer(sb, containerConfig, sbConfig)
+ require.NoError(t, err)
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Logf("make sure container is running")
+ status, err := runtimeService.ContainerStatus(cn)
+ require.NoError(t, err)
+ require.Equal(t, runtime.ContainerState_CONTAINER_RUNNING, status.State)
+}
diff --git a/integration/imagefs_info_test.go b/integration/imagefs_info_test.go
new file mode 100644
index 000000000..1fc01e9ea
--- /dev/null
+++ b/integration/imagefs_info_test.go
@@ -0,0 +1,78 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "os"
+ "testing"
+ "time"
+
+ "github.com/pkg/errors"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestImageFSInfo(t *testing.T) {
+ config := PodSandboxConfig("running-pod", "imagefs")
+
+ t.Logf("Pull an image to make sure image fs is not empty")
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: "busybox"}, nil, config)
+ require.NoError(t, err)
+ defer func() {
+ err := imageService.RemoveImage(&runtime.ImageSpec{Image: img})
+ assert.NoError(t, err)
+ }()
+ t.Logf("Create a sandbox to make sure there is an active snapshot")
+ sb, err := runtimeService.RunPodSandbox(config, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ // It takes time to populate imagefs stats. Use eventually
+ // to check for a period of time.
+ t.Logf("Check imagefs info")
+ var info *runtime.FilesystemUsage
+ require.NoError(t, Eventually(func() (bool, error) {
+ stats, err := imageService.ImageFsInfo()
+ if err != nil {
+ return false, err
+ }
+ if len(stats) == 0 {
+ return false, nil
+ }
+ if len(stats) >= 2 {
+ return false, errors.Errorf("unexpected stats length: %d", len(stats))
+ }
+ info = stats[0]
+ if info.GetTimestamp() != 0 &&
+ info.GetUsedBytes().GetValue() != 0 &&
+ info.GetInodesUsed().GetValue() != 0 &&
+ info.GetFsId().GetMountpoint() != "" {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+
+ t.Logf("Image filesystem mountpath should exist")
+ _, err = os.Stat(info.GetFsId().GetMountpoint())
+ assert.NoError(t, err)
+}
diff --git a/integration/images/volume-copy-up/Dockerfile b/integration/images/volume-copy-up/Dockerfile
new file mode 100644
index 000000000..ed6bba63b
--- /dev/null
+++ b/integration/images/volume-copy-up/Dockerfile
@@ -0,0 +1,17 @@
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM busybox
+RUN sh -c "mkdir /test_dir; echo test_content > /test_dir/test_file"
+VOLUME "/test_dir"
diff --git a/integration/images/volume-copy-up/Makefile b/integration/images/volume-copy-up/Makefile
new file mode 100644
index 000000000..f5721c4a9
--- /dev/null
+++ b/integration/images/volume-copy-up/Makefile
@@ -0,0 +1,27 @@
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+all: build
+
+PROJ=gcr.io/k8s-cri-containerd
+VERSION=1.0
+IMAGE=$(PROJ)/volume-copy-up:$(VERSION)
+
+build:
+ docker build -t $(IMAGE) .
+
+push:
+ gcloud docker -- push $(IMAGE)
+
+.PHONY: build push
diff --git a/integration/images/volume-ownership/Dockerfile b/integration/images/volume-ownership/Dockerfile
new file mode 100644
index 000000000..f7a9086b3
--- /dev/null
+++ b/integration/images/volume-ownership/Dockerfile
@@ -0,0 +1,18 @@
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM busybox
+RUN mkdir -p /test_dir && \
+ chown -R nobody:nogroup /test_dir
+VOLUME /test_dir
diff --git a/integration/images/volume-ownership/Makefile b/integration/images/volume-ownership/Makefile
new file mode 100644
index 000000000..d4654d292
--- /dev/null
+++ b/integration/images/volume-ownership/Makefile
@@ -0,0 +1,27 @@
+# Copyright The containerd Authors.
+
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+
+# http://www.apache.org/licenses/LICENSE-2.0
+
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+all: build
+
+PROJ=gcr.io/k8s-cri-containerd
+VERSION=1.0
+IMAGE=$(PROJ)/volume-ownership:$(VERSION)
+
+build:
+ docker build -t $(IMAGE) .
+
+push:
+ gcloud docker -- push $(IMAGE)
+
+.PHONY: build push
diff --git a/integration/main_test.go b/integration/main_test.go
new file mode 100644
index 000000000..e8f4459b5
--- /dev/null
+++ b/integration/main_test.go
@@ -0,0 +1,418 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "context"
+ "encoding/json"
+ "flag"
+ "fmt"
+ "os"
+ "os/exec"
+ "strconv"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/containerd/containerd"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "google.golang.org/grpc"
+ cri "k8s.io/cri-api/pkg/apis"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/integration/remote"
+ dialer "github.com/containerd/cri/integration/util"
+ criconfig "github.com/containerd/cri/pkg/config"
+ "github.com/containerd/cri/pkg/constants"
+ "github.com/containerd/cri/pkg/server"
+ "github.com/containerd/cri/pkg/util"
+)
+
+const (
+ timeout = 1 * time.Minute
+ pauseImage = "k8s.gcr.io/pause:3.2" // This is the same with default sandbox image.
+ k8sNamespace = constants.K8sContainerdNamespace
+)
+
+var (
+ runtimeService cri.RuntimeService
+ imageService cri.ImageManagerService
+ containerdClient *containerd.Client
+ containerdEndpoint string
+)
+
+var criEndpoint = flag.String("cri-endpoint", "unix:///run/containerd/containerd.sock", "The endpoint of cri plugin.")
+var criRoot = flag.String("cri-root", "/var/lib/containerd/io.containerd.grpc.v1.cri", "The root directory of cri plugin.")
+var runtimeHandler = flag.String("runtime-handler", "", "The runtime handler to use in the test.")
+var containerdBin = flag.String("containerd-bin", "containerd", "The containerd binary name. The name is used to restart containerd during test.")
+
+func TestMain(m *testing.M) {
+ flag.Parse()
+ if err := ConnectDaemons(); err != nil {
+ logrus.WithError(err).Fatalf("Failed to connect daemons")
+ }
+ os.Exit(m.Run())
+}
+
+// ConnectDaemons connect cri plugin and containerd, and initialize the clients.
+func ConnectDaemons() error {
+ var err error
+ runtimeService, err = remote.NewRuntimeService(*criEndpoint, timeout)
+ if err != nil {
+ return errors.Wrap(err, "failed to create runtime service")
+ }
+ imageService, err = remote.NewImageService(*criEndpoint, timeout)
+ if err != nil {
+ return errors.Wrap(err, "failed to create image service")
+ }
+ // Since CRI grpc client doesn't have `WithBlock` specified, we
+ // need to check whether it is actually connected.
+ // TODO(random-liu): Extend cri remote client to accept extra grpc options.
+ _, err = runtimeService.ListContainers(&runtime.ContainerFilter{})
+ if err != nil {
+ return errors.Wrap(err, "failed to list containers")
+ }
+ _, err = imageService.ListImages(&runtime.ImageFilter{})
+ if err != nil {
+ return errors.Wrap(err, "failed to list images")
+ }
+ // containerdEndpoint is the same with criEndpoint now
+ containerdEndpoint = strings.TrimPrefix(*criEndpoint, "unix://")
+ containerdClient, err = containerd.New(containerdEndpoint, containerd.WithDefaultNamespace(k8sNamespace))
+ if err != nil {
+ return errors.Wrap(err, "failed to connect containerd")
+ }
+ return nil
+}
+
+// Opts sets specific information in pod sandbox config.
+type PodSandboxOpts func(*runtime.PodSandboxConfig)
+
+// Set host network.
+func WithHostNetwork(p *runtime.PodSandboxConfig) {
+ if p.Linux == nil {
+ p.Linux = &runtime.LinuxPodSandboxConfig{}
+ }
+ if p.Linux.SecurityContext == nil {
+ p.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{}
+ }
+ if p.Linux.SecurityContext.NamespaceOptions == nil {
+ p.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{}
+ }
+ p.Linux.SecurityContext.NamespaceOptions.Network = runtime.NamespaceMode_NODE
+}
+
+// Set host pid.
+func WithHostPid(p *runtime.PodSandboxConfig) {
+ if p.Linux == nil {
+ p.Linux = &runtime.LinuxPodSandboxConfig{}
+ }
+ if p.Linux.SecurityContext == nil {
+ p.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{}
+ }
+ if p.Linux.SecurityContext.NamespaceOptions == nil {
+ p.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{}
+ }
+ p.Linux.SecurityContext.NamespaceOptions.Pid = runtime.NamespaceMode_NODE
+}
+
+// Set pod pid.
+func WithPodPid(p *runtime.PodSandboxConfig) {
+ if p.Linux == nil {
+ p.Linux = &runtime.LinuxPodSandboxConfig{}
+ }
+ if p.Linux.SecurityContext == nil {
+ p.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{}
+ }
+ if p.Linux.SecurityContext.NamespaceOptions == nil {
+ p.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{}
+ }
+ p.Linux.SecurityContext.NamespaceOptions.Pid = runtime.NamespaceMode_POD
+}
+
+// Add pod log directory.
+func WithPodLogDirectory(dir string) PodSandboxOpts {
+ return func(p *runtime.PodSandboxConfig) {
+ p.LogDirectory = dir
+ }
+}
+
+// Add pod hostname.
+func WithPodHostname(hostname string) PodSandboxOpts {
+ return func(p *runtime.PodSandboxConfig) {
+ p.Hostname = hostname
+ }
+}
+
+// PodSandboxConfig generates a pod sandbox config for test.
+func PodSandboxConfig(name, ns string, opts ...PodSandboxOpts) *runtime.PodSandboxConfig {
+ config := &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: name,
+ // Using random id as uuid is good enough for local
+ // integration test.
+ Uid: util.GenerateID(),
+ Namespace: Randomize(ns),
+ },
+ Linux: &runtime.LinuxPodSandboxConfig{},
+ }
+ for _, opt := range opts {
+ opt(config)
+ }
+ return config
+}
+
+// ContainerOpts to set any specific attribute like labels,
+// annotations, metadata etc
+type ContainerOpts func(*runtime.ContainerConfig)
+
+func WithTestLabels() ContainerOpts {
+ return func(c *runtime.ContainerConfig) {
+ c.Labels = map[string]string{"key": "value"}
+ }
+}
+
+func WithTestAnnotations() ContainerOpts {
+ return func(c *runtime.ContainerConfig) {
+ c.Annotations = map[string]string{"a.b.c": "test"}
+ }
+}
+
+// Add container resource limits.
+func WithResources(r *runtime.LinuxContainerResources) ContainerOpts {
+ return func(c *runtime.ContainerConfig) {
+ if c.Linux == nil {
+ c.Linux = &runtime.LinuxContainerConfig{}
+ }
+ c.Linux.Resources = r
+ }
+}
+
+// Add container command.
+func WithCommand(cmd string, args ...string) ContainerOpts {
+ return func(c *runtime.ContainerConfig) {
+ c.Command = []string{cmd}
+ c.Args = args
+ }
+}
+
+// Add pid namespace mode.
+func WithPidNamespace(mode runtime.NamespaceMode) ContainerOpts {
+ return func(c *runtime.ContainerConfig) {
+ if c.Linux == nil {
+ c.Linux = &runtime.LinuxContainerConfig{}
+ }
+ if c.Linux.SecurityContext == nil {
+ c.Linux.SecurityContext = &runtime.LinuxContainerSecurityContext{}
+ }
+ if c.Linux.SecurityContext.NamespaceOptions == nil {
+ c.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{}
+ }
+ c.Linux.SecurityContext.NamespaceOptions.Pid = mode
+ }
+
+}
+
+// Add container log path.
+func WithLogPath(path string) ContainerOpts {
+ return func(c *runtime.ContainerConfig) {
+ c.LogPath = path
+ }
+}
+
+// WithSupplementalGroups adds supplemental groups.
+func WithSupplementalGroups(gids []int64) ContainerOpts {
+ return func(c *runtime.ContainerConfig) {
+ if c.Linux == nil {
+ c.Linux = &runtime.LinuxContainerConfig{}
+ }
+ if c.Linux.SecurityContext == nil {
+ c.Linux.SecurityContext = &runtime.LinuxContainerSecurityContext{}
+ }
+ c.Linux.SecurityContext.SupplementalGroups = gids
+ }
+}
+
+// ContainerConfig creates a container config given a name and image name
+// and additional container config options
+func ContainerConfig(name, image string, opts ...ContainerOpts) *runtime.ContainerConfig {
+ cConfig := &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: name,
+ },
+ Image: &runtime.ImageSpec{Image: image},
+ }
+ for _, opt := range opts {
+ opt(cConfig)
+ }
+ return cConfig
+}
+
+// CheckFunc is the function used to check a condition is true/false.
+type CheckFunc func() (bool, error)
+
+// Eventually waits for f to return true, it checks every period, and
+// returns error if timeout exceeds. If f returns error, Eventually
+// will return the same error immediately.
+func Eventually(f CheckFunc, period, timeout time.Duration) error {
+ start := time.Now()
+ for {
+ done, err := f()
+ if done {
+ return nil
+ }
+ if err != nil {
+ return err
+ }
+ if time.Since(start) >= timeout {
+ return errors.New("timeout exceeded")
+ }
+ time.Sleep(period)
+ }
+}
+
+// Consistently makes sure that f consistently returns true without
+// error before timeout exceeds. If f returns error, Consistently
+// will return the same error immediately.
+func Consistently(f CheckFunc, period, timeout time.Duration) error {
+ start := time.Now()
+ for {
+ ok, err := f()
+ if !ok {
+ return errors.New("get false")
+ }
+ if err != nil {
+ return err
+ }
+ if time.Since(start) >= timeout {
+ return nil
+ }
+ time.Sleep(period)
+ }
+}
+
+// Randomize adds uuid after a string.
+func Randomize(str string) string {
+ return str + "-" + util.GenerateID()
+}
+
+// KillProcess kills the process by name. pkill is used.
+func KillProcess(name string) error {
+ output, err := exec.Command("pkill", "-x", fmt.Sprintf("^%s$", name)).CombinedOutput()
+ if err != nil {
+ return errors.Errorf("failed to kill %q - error: %v, output: %q", name, err, output)
+ }
+ return nil
+}
+
+// KillPid kills the process by pid. kill is used.
+func KillPid(pid int) error {
+ output, err := exec.Command("kill", strconv.Itoa(pid)).CombinedOutput()
+ if err != nil {
+ return errors.Errorf("failed to kill %d - error: %v, output: %q", pid, err, output)
+ }
+ return nil
+}
+
+// PidOf returns pid of a process by name.
+func PidOf(name string) (int, error) {
+ b, err := exec.Command("pidof", name).CombinedOutput()
+ output := strings.TrimSpace(string(b))
+ if err != nil {
+ if len(output) != 0 {
+ return 0, errors.Errorf("failed to run pidof %q - error: %v, output: %q", name, err, output)
+ }
+ return 0, nil
+ }
+ return strconv.Atoi(output)
+}
+
+// RawRuntimeClient returns a raw grpc runtime service client.
+func RawRuntimeClient() (runtime.RuntimeServiceClient, error) {
+ addr, dialer, err := dialer.GetAddressAndDialer(*criEndpoint)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get dialer")
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), timeout)
+ defer cancel()
+ conn, err := grpc.DialContext(ctx, addr, grpc.WithInsecure(), grpc.WithContextDialer(dialer))
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to connect cri endpoint")
+ }
+ return runtime.NewRuntimeServiceClient(conn), nil
+}
+
+// CRIConfig gets current cri config from containerd.
+func CRIConfig() (*criconfig.Config, error) {
+ client, err := RawRuntimeClient()
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get raw runtime client")
+ }
+ resp, err := client.Status(context.Background(), &runtime.StatusRequest{Verbose: true})
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get status")
+ }
+ config := &criconfig.Config{}
+ if err := json.Unmarshal([]byte(resp.Info["config"]), config); err != nil {
+ return nil, errors.Wrap(err, "failed to unmarshal config")
+ }
+ return config, nil
+}
+
+// SandboxInfo gets sandbox info.
+func SandboxInfo(id string) (*runtime.PodSandboxStatus, *server.SandboxInfo, error) {
+ client, err := RawRuntimeClient()
+ if err != nil {
+ return nil, nil, errors.Wrap(err, "failed to get raw runtime client")
+ }
+ resp, err := client.PodSandboxStatus(context.Background(), &runtime.PodSandboxStatusRequest{
+ PodSandboxId: id,
+ Verbose: true,
+ })
+ if err != nil {
+ return nil, nil, errors.Wrap(err, "failed to get sandbox status")
+ }
+ status := resp.GetStatus()
+ var info server.SandboxInfo
+ if err := json.Unmarshal([]byte(resp.GetInfo()["info"]), &info); err != nil {
+ return nil, nil, errors.Wrap(err, "failed to unmarshal sandbox info")
+ }
+ return status, &info, nil
+}
+
+func RestartContainerd(t *testing.T) {
+ require.NoError(t, KillProcess(*containerdBin))
+
+ // Use assert so that the 3rd wait always runs, this makes sure
+ // containerd is running before this function returns.
+ assert.NoError(t, Eventually(func() (bool, error) {
+ pid, err := PidOf(*containerdBin)
+ if err != nil {
+ return false, err
+ }
+ return pid == 0, nil
+ }, time.Second, 30*time.Second), "wait for containerd to be killed")
+
+ require.NoError(t, Eventually(func() (bool, error) {
+ return ConnectDaemons() == nil, nil
+ }, time.Second, 30*time.Second), "wait for containerd to be restarted")
+}
diff --git a/integration/no_metadata_test.go b/integration/no_metadata_test.go
new file mode 100644
index 000000000..5766e19b4
--- /dev/null
+++ b/integration/no_metadata_test.go
@@ -0,0 +1,50 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestRunPodSandboxWithoutMetadata(t *testing.T) {
+ sbConfig := &runtime.PodSandboxConfig{}
+ _, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.Error(t, err)
+ _, err = runtimeService.Status()
+ require.NoError(t, err)
+}
+
+func TestCreateContainerWithoutMetadata(t *testing.T) {
+ sbConfig := PodSandboxConfig("sandbox", "container-create")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ // Make sure the sandbox is cleaned up in any case.
+ runtimeService.StopPodSandbox(sb)
+ runtimeService.RemovePodSandbox(sb)
+ }()
+ config := &runtime.ContainerConfig{}
+ _, err = runtimeService.CreateContainer(sb, config, sbConfig)
+ require.Error(t, err)
+ _, err = runtimeService.Status()
+ require.NoError(t, err)
+}
diff --git a/integration/pod_dualstack_test.go b/integration/pod_dualstack_test.go
new file mode 100644
index 000000000..acaacdd56
--- /dev/null
+++ b/integration/pod_dualstack_test.go
@@ -0,0 +1,107 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "io/ioutil"
+ "net"
+ "os"
+ "path/filepath"
+ "regexp"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestPodDualStack(t *testing.T) {
+ testPodLogDir, err := ioutil.TempDir("/tmp", "dualstack")
+ require.NoError(t, err)
+ defer os.RemoveAll(testPodLogDir)
+
+ t.Log("Create a sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "dualstack", WithPodLogDirectory(testPodLogDir))
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ const (
+ testImage = "busybox"
+ containerName = "test-container"
+ )
+ t.Logf("Pull test image %q", testImage)
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+
+ t.Log("Create a container to print env")
+ cnConfig := ContainerConfig(
+ containerName,
+ testImage,
+ WithCommand("ip", "address", "show", "dev", "eth0"),
+ WithLogPath(containerName),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Log("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Log("Wait for container to finish running")
+ require.NoError(t, Eventually(func() (bool, error) {
+ s, err := runtimeService.ContainerStatus(cn)
+ if err != nil {
+ return false, err
+ }
+ if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+
+ content, err := ioutil.ReadFile(filepath.Join(testPodLogDir, containerName))
+ assert.NoError(t, err)
+ status, err := runtimeService.PodSandboxStatus(sb)
+ require.NoError(t, err)
+ ip := status.GetNetwork().GetIp()
+ additionalIps := status.GetNetwork().GetAdditionalIps()
+
+ ipv4Enabled, err := regexp.MatchString("inet .* scope global", string(content))
+ assert.NoError(t, err)
+ ipv6Enabled, err := regexp.MatchString("inet6 .* scope global", string(content))
+ assert.NoError(t, err)
+
+ if ipv4Enabled && ipv6Enabled {
+ t.Log("Dualstack should be enabled")
+ require.Len(t, additionalIps, 1)
+ assert.NotNil(t, net.ParseIP(ip).To4())
+ assert.Nil(t, net.ParseIP(additionalIps[0].GetIp()).To4())
+ } else {
+ t.Log("Dualstack should not be enabled")
+ assert.Len(t, additionalIps, 0)
+ assert.NotEmpty(t, ip)
+ }
+}
diff --git a/integration/pod_hostname_test.go b/integration/pod_hostname_test.go
new file mode 100644
index 000000000..5b87f6dc8
--- /dev/null
+++ b/integration/pod_hostname_test.go
@@ -0,0 +1,132 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestPodHostname(t *testing.T) {
+ hostname, err := os.Hostname()
+ require.NoError(t, err)
+ for name, test := range map[string]struct {
+ opts []PodSandboxOpts
+ expectedHostname string
+ expectErr bool
+ }{
+ "regular pod with custom hostname": {
+ opts: []PodSandboxOpts{
+ WithPodHostname("test-hostname"),
+ },
+ expectedHostname: "test-hostname",
+ },
+ "host network pod without custom hostname": {
+ opts: []PodSandboxOpts{
+ WithHostNetwork,
+ },
+ expectedHostname: hostname,
+ },
+ "host network pod with custom hostname should fail": {
+ opts: []PodSandboxOpts{
+ WithHostNetwork,
+ WithPodHostname("test-hostname"),
+ },
+ expectErr: true,
+ },
+ } {
+ t.Run(name, func(t *testing.T) {
+ testPodLogDir, err := ioutil.TempDir("/tmp", "hostname")
+ require.NoError(t, err)
+ defer os.RemoveAll(testPodLogDir)
+
+ opts := append(test.opts, WithPodLogDirectory(testPodLogDir))
+ t.Log("Create a sandbox with hostname")
+ sbConfig := PodSandboxConfig("sandbox", "hostname", opts...)
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ if err != nil {
+ if !test.expectErr {
+ t.Fatalf("Unexpected RunPodSandbox error: %v", err)
+ }
+ return
+ }
+ // Make sure the sandbox is cleaned up.
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+ if test.expectErr {
+ t.Fatalf("Expected RunPodSandbox to return error")
+ }
+
+ const (
+ testImage = "busybox"
+ containerName = "test-container"
+ )
+ t.Logf("Pull test image %q", testImage)
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+
+ t.Log("Create a container to print env")
+ cnConfig := ContainerConfig(
+ containerName,
+ testImage,
+ WithCommand("sh", "-c",
+ "echo -n /etc/hostname= && cat /etc/hostname && env"),
+ WithLogPath(containerName),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Log("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ t.Log("Wait for container to finish running")
+ require.NoError(t, Eventually(func() (bool, error) {
+ s, err := runtimeService.ContainerStatus(cn)
+ if err != nil {
+ return false, err
+ }
+ if s.GetState() == runtime.ContainerState_CONTAINER_EXITED {
+ return true, nil
+ }
+ return false, nil
+ }, time.Second, 30*time.Second))
+
+ content, err := ioutil.ReadFile(filepath.Join(testPodLogDir, containerName))
+ assert.NoError(t, err)
+
+ t.Log("Search hostname env in container log")
+ assert.Contains(t, string(content), "HOSTNAME="+test.expectedHostname)
+
+ t.Log("Search /etc/hostname content in container log")
+ assert.Contains(t, string(content), "/etc/hostname="+test.expectedHostname)
+ })
+ }
+}
diff --git a/integration/remote/doc.go b/integration/remote/doc.go
new file mode 100644
index 000000000..5b8260831
--- /dev/null
+++ b/integration/remote/doc.go
@@ -0,0 +1,35 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package remote contains gRPC implementation of internalapi.RuntimeService
+// and internalapi.ImageManagerService.
+package remote
diff --git a/integration/remote/remote_image.go b/integration/remote/remote_image.go
new file mode 100644
index 000000000..346c370b5
--- /dev/null
+++ b/integration/remote/remote_image.go
@@ -0,0 +1,172 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package remote
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "time"
+
+ "google.golang.org/grpc"
+ "k8s.io/klog/v2"
+
+ internalapi "k8s.io/cri-api/pkg/apis"
+ runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/integration/remote/util"
+)
+
+// ImageService is a gRPC implementation of internalapi.ImageManagerService.
+type ImageService struct {
+ timeout time.Duration
+ imageClient runtimeapi.ImageServiceClient
+}
+
+// NewImageService creates a new internalapi.ImageManagerService.
+func NewImageService(endpoint string, connectionTimeout time.Duration) (internalapi.ImageManagerService, error) {
+ klog.V(3).Infof("Connecting to image service %s", endpoint)
+ addr, dialer, err := util.GetAddressAndDialer(endpoint)
+ if err != nil {
+ return nil, err
+ }
+
+ ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout)
+ defer cancel()
+
+ conn, err := grpc.DialContext(ctx, addr, grpc.WithInsecure(), grpc.WithContextDialer(dialer), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)))
+ if err != nil {
+ klog.Errorf("Connect remote image service %s failed: %v", addr, err)
+ return nil, err
+ }
+
+ return &ImageService{
+ timeout: connectionTimeout,
+ imageClient: runtimeapi.NewImageServiceClient(conn),
+ }, nil
+}
+
+// ListImages lists available images.
+func (r *ImageService) ListImages(filter *runtimeapi.ImageFilter) ([]*runtimeapi.Image, error) {
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.imageClient.ListImages(ctx, &runtimeapi.ListImagesRequest{
+ Filter: filter,
+ })
+ if err != nil {
+ klog.Errorf("ListImages with filter %+v from image service failed: %v", filter, err)
+ return nil, err
+ }
+
+ return resp.Images, nil
+}
+
+// ImageStatus returns the status of the image.
+func (r *ImageService) ImageStatus(image *runtimeapi.ImageSpec) (*runtimeapi.Image, error) {
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.imageClient.ImageStatus(ctx, &runtimeapi.ImageStatusRequest{
+ Image: image,
+ })
+ if err != nil {
+ klog.Errorf("ImageStatus %q from image service failed: %v", image.Image, err)
+ return nil, err
+ }
+
+ if resp.Image != nil {
+ if resp.Image.Id == "" || resp.Image.Size_ == 0 {
+ errorMessage := fmt.Sprintf("Id or size of image %q is not set", image.Image)
+ klog.Errorf("ImageStatus failed: %s", errorMessage)
+ return nil, errors.New(errorMessage)
+ }
+ }
+
+ return resp.Image, nil
+}
+
+// PullImage pulls an image with authentication config.
+func (r *ImageService) PullImage(image *runtimeapi.ImageSpec, auth *runtimeapi.AuthConfig, podSandboxConfig *runtimeapi.PodSandboxConfig) (string, error) {
+ ctx, cancel := getContextWithCancel()
+ defer cancel()
+
+ resp, err := r.imageClient.PullImage(ctx, &runtimeapi.PullImageRequest{
+ Image: image,
+ Auth: auth,
+ SandboxConfig: podSandboxConfig,
+ })
+ if err != nil {
+ klog.Errorf("PullImage %q from image service failed: %v", image.Image, err)
+ return "", err
+ }
+
+ if resp.ImageRef == "" {
+ errorMessage := fmt.Sprintf("imageRef of image %q is not set", image.Image)
+ klog.Errorf("PullImage failed: %s", errorMessage)
+ return "", errors.New(errorMessage)
+ }
+
+ return resp.ImageRef, nil
+}
+
+// RemoveImage removes the image.
+func (r *ImageService) RemoveImage(image *runtimeapi.ImageSpec) error {
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ _, err := r.imageClient.RemoveImage(ctx, &runtimeapi.RemoveImageRequest{
+ Image: image,
+ })
+ if err != nil {
+ klog.Errorf("RemoveImage %q from image service failed: %v", image.Image, err)
+ return err
+ }
+
+ return nil
+}
+
+// ImageFsInfo returns information of the filesystem that is used to store images.
+func (r *ImageService) ImageFsInfo() ([]*runtimeapi.FilesystemUsage, error) {
+ // Do not set timeout, because `ImageFsInfo` takes time.
+ // TODO(random-liu): Should we assume runtime should cache the result, and set timeout here?
+ ctx, cancel := getContextWithCancel()
+ defer cancel()
+
+ resp, err := r.imageClient.ImageFsInfo(ctx, &runtimeapi.ImageFsInfoRequest{})
+ if err != nil {
+ klog.Errorf("ImageFsInfo from image service failed: %v", err)
+ return nil, err
+ }
+ return resp.GetImageFilesystems(), nil
+}
diff --git a/integration/remote/remote_runtime.go b/integration/remote/remote_runtime.go
new file mode 100644
index 000000000..c15e7884a
--- /dev/null
+++ b/integration/remote/remote_runtime.go
@@ -0,0 +1,586 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package remote
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "strings"
+ "time"
+
+ "google.golang.org/grpc"
+ "k8s.io/klog/v2"
+
+ "k8s.io/component-base/logs/logreduction"
+ internalapi "k8s.io/cri-api/pkg/apis"
+ runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+ utilexec "k8s.io/utils/exec"
+
+ "github.com/containerd/cri/integration/remote/util"
+)
+
+// RuntimeService is a gRPC implementation of internalapi.RuntimeService.
+type RuntimeService struct {
+ timeout time.Duration
+ runtimeClient runtimeapi.RuntimeServiceClient
+ // Cache last per-container error message to reduce log spam
+ logReduction *logreduction.LogReduction
+}
+
+const (
+ // How frequently to report identical errors
+ identicalErrorDelay = 1 * time.Minute
+)
+
+// NewRuntimeService creates a new internalapi.RuntimeService.
+func NewRuntimeService(endpoint string, connectionTimeout time.Duration) (internalapi.RuntimeService, error) {
+ klog.V(3).Infof("Connecting to runtime service %s", endpoint)
+ addr, dialer, err := util.GetAddressAndDialer(endpoint)
+ if err != nil {
+ return nil, err
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout)
+ defer cancel()
+
+ conn, err := grpc.DialContext(ctx, addr, grpc.WithInsecure(), grpc.WithContextDialer(dialer), grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)))
+ if err != nil {
+ klog.Errorf("Connect remote runtime %s failed: %v", addr, err)
+ return nil, err
+ }
+
+ return &RuntimeService{
+ timeout: connectionTimeout,
+ runtimeClient: runtimeapi.NewRuntimeServiceClient(conn),
+ logReduction: logreduction.NewLogReduction(identicalErrorDelay),
+ }, nil
+}
+
+// Version returns the runtime name, runtime version and runtime API version.
+func (r *RuntimeService) Version(apiVersion string) (*runtimeapi.VersionResponse, error) {
+ klog.V(10).Infof("[RuntimeService] Version (apiVersion=%v, timeout=%v)", apiVersion, r.timeout)
+
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ typedVersion, err := r.runtimeClient.Version(ctx, &runtimeapi.VersionRequest{
+ Version: apiVersion,
+ })
+ if err != nil {
+ klog.Errorf("Version from runtime service failed: %v", err)
+ return nil, err
+ }
+
+ klog.V(10).Infof("[RuntimeService] Version Response (typedVersion=%v)", typedVersion)
+
+ if typedVersion.Version == "" || typedVersion.RuntimeName == "" || typedVersion.RuntimeApiVersion == "" || typedVersion.RuntimeVersion == "" {
+ return nil, fmt.Errorf("not all fields are set in VersionResponse (%q)", *typedVersion)
+ }
+
+ return typedVersion, err
+}
+
+// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
+// the sandbox is in ready state.
+func (r *RuntimeService) RunPodSandbox(config *runtimeapi.PodSandboxConfig, runtimeHandler string) (string, error) {
+ // Use 2 times longer timeout for sandbox operation (4 mins by default)
+ // TODO: Make the pod sandbox timeout configurable.
+ timeout := r.timeout * 2
+
+ klog.V(10).Infof("[RuntimeService] RunPodSandbox (config=%v, runtimeHandler=%v, timeout=%v)", config, runtimeHandler, timeout)
+
+ ctx, cancel := getContextWithTimeout(timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.RunPodSandbox(ctx, &runtimeapi.RunPodSandboxRequest{
+ Config: config,
+ RuntimeHandler: runtimeHandler,
+ })
+ if err != nil {
+ klog.Errorf("RunPodSandbox from runtime service failed: %v", err)
+ return "", err
+ }
+
+ if resp.PodSandboxId == "" {
+ errorMessage := fmt.Sprintf("PodSandboxId is not set for sandbox %q", config.GetMetadata())
+ klog.Errorf("RunPodSandbox failed: %s", errorMessage)
+ return "", errors.New(errorMessage)
+ }
+
+ klog.V(10).Infof("[RuntimeService] RunPodSandbox Response (PodSandboxId=%v)", resp.PodSandboxId)
+
+ return resp.PodSandboxId, nil
+}
+
+// StopPodSandbox stops the sandbox. If there are any running containers in the
+// sandbox, they should be forced to termination.
+func (r *RuntimeService) StopPodSandbox(podSandBoxID string) error {
+ klog.V(10).Infof("[RuntimeService] StopPodSandbox (podSandboxID=%v, timeout=%v)", podSandBoxID, r.timeout)
+
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ _, err := r.runtimeClient.StopPodSandbox(ctx, &runtimeapi.StopPodSandboxRequest{
+ PodSandboxId: podSandBoxID,
+ })
+ if err != nil {
+ klog.Errorf("StopPodSandbox %q from runtime service failed: %v", podSandBoxID, err)
+ return err
+ }
+
+ klog.V(10).Infof("[RuntimeService] StopPodSandbox Response (podSandboxID=%v)", podSandBoxID)
+
+ return nil
+}
+
+// RemovePodSandbox removes the sandbox. If there are any containers in the
+// sandbox, they should be forcibly removed.
+func (r *RuntimeService) RemovePodSandbox(podSandBoxID string) error {
+ klog.V(10).Infof("[RuntimeService] RemovePodSandbox (podSandboxID=%v, timeout=%v)", podSandBoxID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ _, err := r.runtimeClient.RemovePodSandbox(ctx, &runtimeapi.RemovePodSandboxRequest{
+ PodSandboxId: podSandBoxID,
+ })
+ if err != nil {
+ klog.Errorf("RemovePodSandbox %q from runtime service failed: %v", podSandBoxID, err)
+ return err
+ }
+
+ klog.V(10).Infof("[RuntimeService] RemovePodSandbox Response (podSandboxID=%v)", podSandBoxID)
+
+ return nil
+}
+
+// PodSandboxStatus returns the status of the PodSandbox.
+func (r *RuntimeService) PodSandboxStatus(podSandBoxID string) (*runtimeapi.PodSandboxStatus, error) {
+ klog.V(10).Infof("[RuntimeService] PodSandboxStatus (podSandboxID=%v, timeout=%v)", podSandBoxID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.PodSandboxStatus(ctx, &runtimeapi.PodSandboxStatusRequest{
+ PodSandboxId: podSandBoxID,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ klog.V(10).Infof("[RuntimeService] PodSandboxStatus Response (podSandboxID=%v, status=%v)", podSandBoxID, resp.Status)
+
+ if resp.Status != nil {
+ if err := verifySandboxStatus(resp.Status); err != nil {
+ return nil, err
+ }
+ }
+
+ return resp.Status, nil
+}
+
+// ListPodSandbox returns a list of PodSandboxes.
+func (r *RuntimeService) ListPodSandbox(filter *runtimeapi.PodSandboxFilter) ([]*runtimeapi.PodSandbox, error) {
+ klog.V(10).Infof("[RuntimeService] ListPodSandbox (filter=%v, timeout=%v)", filter, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.ListPodSandbox(ctx, &runtimeapi.ListPodSandboxRequest{
+ Filter: filter,
+ })
+ if err != nil {
+ klog.Errorf("ListPodSandbox with filter %+v from runtime service failed: %v", filter, err)
+ return nil, err
+ }
+
+ klog.V(10).Infof("[RuntimeService] ListPodSandbox Response (filter=%v, items=%v)", filter, resp.Items)
+
+ return resp.Items, nil
+}
+
+// CreateContainer creates a new container in the specified PodSandbox.
+func (r *RuntimeService) CreateContainer(podSandBoxID string, config *runtimeapi.ContainerConfig, sandboxConfig *runtimeapi.PodSandboxConfig) (string, error) {
+ klog.V(10).Infof("[RuntimeService] CreateContainer (podSandBoxID=%v, timeout=%v)", podSandBoxID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.CreateContainer(ctx, &runtimeapi.CreateContainerRequest{
+ PodSandboxId: podSandBoxID,
+ Config: config,
+ SandboxConfig: sandboxConfig,
+ })
+ if err != nil {
+ klog.Errorf("CreateContainer in sandbox %q from runtime service failed: %v", podSandBoxID, err)
+ return "", err
+ }
+
+ klog.V(10).Infof("[RuntimeService] CreateContainer (podSandBoxID=%v, ContainerId=%v)", podSandBoxID, resp.ContainerId)
+ if resp.ContainerId == "" {
+ errorMessage := fmt.Sprintf("ContainerId is not set for container %q", config.GetMetadata())
+ klog.Errorf("CreateContainer failed: %s", errorMessage)
+ return "", errors.New(errorMessage)
+ }
+
+ return resp.ContainerId, nil
+}
+
+// StartContainer starts the container.
+func (r *RuntimeService) StartContainer(containerID string) error {
+ klog.V(10).Infof("[RuntimeService] StartContainer (containerID=%v, timeout=%v)", containerID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ _, err := r.runtimeClient.StartContainer(ctx, &runtimeapi.StartContainerRequest{
+ ContainerId: containerID,
+ })
+ if err != nil {
+ klog.Errorf("StartContainer %q from runtime service failed: %v", containerID, err)
+ return err
+ }
+ klog.V(10).Infof("[RuntimeService] StartContainer Response (containerID=%v)", containerID)
+
+ return nil
+}
+
+// StopContainer stops a running container with a grace period (i.e., timeout).
+func (r *RuntimeService) StopContainer(containerID string, timeout int64) error {
+ klog.V(10).Infof("[RuntimeService] StopContainer (containerID=%v, timeout=%v)", containerID, timeout)
+ // Use timeout + default timeout (2 minutes) as timeout to leave extra time
+ // for SIGKILL container and request latency.
+ t := r.timeout + time.Duration(timeout)*time.Second
+ ctx, cancel := getContextWithTimeout(t)
+ defer cancel()
+
+ r.logReduction.ClearID(containerID)
+ _, err := r.runtimeClient.StopContainer(ctx, &runtimeapi.StopContainerRequest{
+ ContainerId: containerID,
+ Timeout: timeout,
+ })
+ if err != nil {
+ klog.Errorf("StopContainer %q from runtime service failed: %v", containerID, err)
+ return err
+ }
+ klog.V(10).Infof("[RuntimeService] StopContainer Response (containerID=%v)", containerID)
+
+ return nil
+}
+
+// RemoveContainer removes the container. If the container is running, the container
+// should be forced to removal.
+func (r *RuntimeService) RemoveContainer(containerID string) error {
+ klog.V(10).Infof("[RuntimeService] RemoveContainer (containerID=%v, timeout=%v)", containerID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ r.logReduction.ClearID(containerID)
+ _, err := r.runtimeClient.RemoveContainer(ctx, &runtimeapi.RemoveContainerRequest{
+ ContainerId: containerID,
+ })
+ if err != nil {
+ klog.Errorf("RemoveContainer %q from runtime service failed: %v", containerID, err)
+ return err
+ }
+ klog.V(10).Infof("[RuntimeService] RemoveContainer Response (containerID=%v)", containerID)
+
+ return nil
+}
+
+// ListContainers lists containers by filters.
+func (r *RuntimeService) ListContainers(filter *runtimeapi.ContainerFilter) ([]*runtimeapi.Container, error) {
+ klog.V(10).Infof("[RuntimeService] ListContainers (filter=%v, timeout=%v)", filter, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.ListContainers(ctx, &runtimeapi.ListContainersRequest{
+ Filter: filter,
+ })
+ if err != nil {
+ klog.Errorf("ListContainers with filter %+v from runtime service failed: %v", filter, err)
+ return nil, err
+ }
+ klog.V(10).Infof("[RuntimeService] ListContainers Response (filter=%v, containers=%v)", filter, resp.Containers)
+
+ return resp.Containers, nil
+}
+
+// ContainerStatus returns the container status.
+func (r *RuntimeService) ContainerStatus(containerID string) (*runtimeapi.ContainerStatus, error) {
+ klog.V(10).Infof("[RuntimeService] ContainerStatus (containerID=%v, timeout=%v)", containerID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.ContainerStatus(ctx, &runtimeapi.ContainerStatusRequest{
+ ContainerId: containerID,
+ })
+ if err != nil {
+ // Don't spam the log with endless messages about the same failure.
+ if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) {
+ klog.Errorf("ContainerStatus %q from runtime service failed: %v", containerID, err)
+ }
+ return nil, err
+ }
+ r.logReduction.ClearID(containerID)
+ klog.V(10).Infof("[RuntimeService] ContainerStatus Response (containerID=%v, status=%v)", containerID, resp.Status)
+
+ if resp.Status != nil {
+ if err := verifyContainerStatus(resp.Status); err != nil {
+ klog.Errorf("ContainerStatus of %q failed: %v", containerID, err)
+ return nil, err
+ }
+ }
+
+ return resp.Status, nil
+}
+
+// UpdateContainerResources updates a containers resource config
+func (r *RuntimeService) UpdateContainerResources(containerID string, resources *runtimeapi.LinuxContainerResources) error {
+ klog.V(10).Infof("[RuntimeService] UpdateContainerResources (containerID=%v, timeout=%v)", containerID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ _, err := r.runtimeClient.UpdateContainerResources(ctx, &runtimeapi.UpdateContainerResourcesRequest{
+ ContainerId: containerID,
+ Linux: resources,
+ })
+ if err != nil {
+ klog.Errorf("UpdateContainerResources %q from runtime service failed: %v", containerID, err)
+ return err
+ }
+ klog.V(10).Infof("[RuntimeService] UpdateContainerResources Response (containerID=%v)", containerID)
+
+ return nil
+}
+
+// ExecSync executes a command in the container, and returns the stdout output.
+// If command exits with a non-zero exit code, an error is returned.
+func (r *RuntimeService) ExecSync(containerID string, cmd []string, timeout time.Duration) (stdout []byte, stderr []byte, err error) {
+ klog.V(10).Infof("[RuntimeService] ExecSync (containerID=%v, timeout=%v)", containerID, timeout)
+ // Do not set timeout when timeout is 0.
+ var ctx context.Context
+ var cancel context.CancelFunc
+ if timeout != 0 {
+ // Use timeout + default timeout (2 minutes) as timeout to leave some time for
+ // the runtime to do cleanup.
+ ctx, cancel = getContextWithTimeout(r.timeout + timeout)
+ } else {
+ ctx, cancel = getContextWithCancel()
+ }
+ defer cancel()
+
+ timeoutSeconds := int64(timeout.Seconds())
+ req := &runtimeapi.ExecSyncRequest{
+ ContainerId: containerID,
+ Cmd: cmd,
+ Timeout: timeoutSeconds,
+ }
+ resp, err := r.runtimeClient.ExecSync(ctx, req)
+ if err != nil {
+ klog.Errorf("ExecSync %s '%s' from runtime service failed: %v", containerID, strings.Join(cmd, " "), err)
+ return nil, nil, err
+ }
+
+ klog.V(10).Infof("[RuntimeService] ExecSync Response (containerID=%v, ExitCode=%v)", containerID, resp.ExitCode)
+ err = nil
+ if resp.ExitCode != 0 {
+ err = utilexec.CodeExitError{
+ Err: fmt.Errorf("command '%s' exited with %d: %s", strings.Join(cmd, " "), resp.ExitCode, resp.Stderr),
+ Code: int(resp.ExitCode),
+ }
+ }
+
+ return resp.Stdout, resp.Stderr, err
+}
+
+// Exec prepares a streaming endpoint to execute a command in the container, and returns the address.
+func (r *RuntimeService) Exec(req *runtimeapi.ExecRequest) (*runtimeapi.ExecResponse, error) {
+ klog.V(10).Infof("[RuntimeService] Exec (timeout=%v)", r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.Exec(ctx, req)
+ if err != nil {
+ klog.Errorf("Exec %s '%s' from runtime service failed: %v", req.ContainerId, strings.Join(req.Cmd, " "), err)
+ return nil, err
+ }
+ klog.V(10).Info("[RuntimeService] Exec Response")
+
+ if resp.Url == "" {
+ errorMessage := "URL is not set"
+ klog.Errorf("Exec failed: %s", errorMessage)
+ return nil, errors.New(errorMessage)
+ }
+
+ return resp, nil
+}
+
+// Attach prepares a streaming endpoint to attach to a running container, and returns the address.
+func (r *RuntimeService) Attach(req *runtimeapi.AttachRequest) (*runtimeapi.AttachResponse, error) {
+ klog.V(10).Infof("[RuntimeService] Attach (containerId=%v, timeout=%v)", req.ContainerId, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.Attach(ctx, req)
+ if err != nil {
+ klog.Errorf("Attach %s from runtime service failed: %v", req.ContainerId, err)
+ return nil, err
+ }
+ klog.V(10).Infof("[RuntimeService] Attach Response (containerId=%v)", req.ContainerId)
+
+ if resp.Url == "" {
+ errorMessage := "URL is not set"
+ klog.Errorf("Attach failed: %s", errorMessage)
+ return nil, errors.New(errorMessage)
+ }
+ return resp, nil
+}
+
+// PortForward prepares a streaming endpoint to forward ports from a PodSandbox, and returns the address.
+func (r *RuntimeService) PortForward(req *runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error) {
+ klog.V(10).Infof("[RuntimeService] PortForward (podSandboxID=%v, port=%v, timeout=%v)", req.PodSandboxId, req.Port, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.PortForward(ctx, req)
+ if err != nil {
+ klog.Errorf("PortForward %s from runtime service failed: %v", req.PodSandboxId, err)
+ return nil, err
+ }
+ klog.V(10).Infof("[RuntimeService] PortForward Response (podSandboxID=%v)", req.PodSandboxId)
+
+ if resp.Url == "" {
+ errorMessage := "URL is not set"
+ klog.Errorf("PortForward failed: %s", errorMessage)
+ return nil, errors.New(errorMessage)
+ }
+
+ return resp, nil
+}
+
+// UpdateRuntimeConfig updates the config of a runtime service. The only
+// update payload currently supported is the pod CIDR assigned to a node,
+// and the runtime service just proxies it down to the network plugin.
+func (r *RuntimeService) UpdateRuntimeConfig(runtimeConfig *runtimeapi.RuntimeConfig) error {
+ klog.V(10).Infof("[RuntimeService] UpdateRuntimeConfig (runtimeConfig=%v, timeout=%v)", runtimeConfig, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ // Response doesn't contain anything of interest. This translates to an
+ // Event notification to the network plugin, which can't fail, so we're
+ // really looking to surface destination unreachable.
+ _, err := r.runtimeClient.UpdateRuntimeConfig(ctx, &runtimeapi.UpdateRuntimeConfigRequest{
+ RuntimeConfig: runtimeConfig,
+ })
+
+ if err != nil {
+ return err
+ }
+ klog.V(10).Infof("[RuntimeService] UpdateRuntimeConfig Response (runtimeConfig=%v)", runtimeConfig)
+
+ return nil
+}
+
+// Status returns the status of the runtime.
+func (r *RuntimeService) Status() (*runtimeapi.RuntimeStatus, error) {
+ klog.V(10).Infof("[RuntimeService] Status (timeout=%v)", r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.Status(ctx, &runtimeapi.StatusRequest{})
+ if err != nil {
+ klog.Errorf("Status from runtime service failed: %v", err)
+ return nil, err
+ }
+
+ klog.V(10).Infof("[RuntimeService] Status Response (status=%v)", resp.Status)
+
+ if resp.Status == nil || len(resp.Status.Conditions) < 2 {
+ errorMessage := "RuntimeReady or NetworkReady condition are not set"
+ klog.Errorf("Status failed: %s", errorMessage)
+ return nil, errors.New(errorMessage)
+ }
+
+ return resp.Status, nil
+}
+
+// ContainerStats returns the stats of the container.
+func (r *RuntimeService) ContainerStats(containerID string) (*runtimeapi.ContainerStats, error) {
+ klog.V(10).Infof("[RuntimeService] ContainerStats (containerID=%v, timeout=%v)", containerID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ resp, err := r.runtimeClient.ContainerStats(ctx, &runtimeapi.ContainerStatsRequest{
+ ContainerId: containerID,
+ })
+ if err != nil {
+ if r.logReduction.ShouldMessageBePrinted(err.Error(), containerID) {
+ klog.Errorf("ContainerStats %q from runtime service failed: %v", containerID, err)
+ }
+ return nil, err
+ }
+ r.logReduction.ClearID(containerID)
+ klog.V(10).Infof("[RuntimeService] ContainerStats Response (containerID=%v, stats=%v)", containerID, resp.GetStats())
+
+ return resp.GetStats(), nil
+}
+
+func (r *RuntimeService) ListContainerStats(filter *runtimeapi.ContainerStatsFilter) ([]*runtimeapi.ContainerStats, error) {
+ klog.V(10).Infof("[RuntimeService] ListContainerStats (filter=%v)", filter)
+ // Do not set timeout, because writable layer stats collection takes time.
+ // TODO(random-liu): Should we assume runtime should cache the result, and set timeout here?
+ ctx, cancel := getContextWithCancel()
+ defer cancel()
+
+ resp, err := r.runtimeClient.ListContainerStats(ctx, &runtimeapi.ListContainerStatsRequest{
+ Filter: filter,
+ })
+ if err != nil {
+ klog.Errorf("ListContainerStats with filter %+v from runtime service failed: %v", filter, err)
+ return nil, err
+ }
+ klog.V(10).Infof("[RuntimeService] ListContainerStats Response (filter=%v, stats=%v)", filter, resp.GetStats())
+
+ return resp.GetStats(), nil
+}
+
+func (r *RuntimeService) ReopenContainerLog(containerID string) error {
+ klog.V(10).Infof("[RuntimeService] ReopenContainerLog (containerID=%v, timeout=%v)", containerID, r.timeout)
+ ctx, cancel := getContextWithTimeout(r.timeout)
+ defer cancel()
+
+ _, err := r.runtimeClient.ReopenContainerLog(ctx, &runtimeapi.ReopenContainerLogRequest{ContainerId: containerID})
+ if err != nil {
+ klog.Errorf("ReopenContainerLog %q from runtime service failed: %v", containerID, err)
+ return err
+ }
+
+ klog.V(10).Infof("[RuntimeService] ReopenContainerLog Response (containerID=%v)", containerID)
+ return nil
+}
diff --git a/integration/remote/util/util_unix.go b/integration/remote/util/util_unix.go
new file mode 100644
index 000000000..cc3f85814
--- /dev/null
+++ b/integration/remote/util/util_unix.go
@@ -0,0 +1,161 @@
+// +build freebsd linux darwin
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ "context"
+ "fmt"
+ "io/ioutil"
+ "net"
+ "net/url"
+ "os"
+ "path/filepath"
+
+ "golang.org/x/sys/unix"
+ "k8s.io/klog/v2"
+)
+
+const (
+ // unixProtocol is the network protocol of unix socket.
+ unixProtocol = "unix"
+)
+
+// CreateListener creates a listener on the specified endpoint.
+func CreateListener(endpoint string) (net.Listener, error) {
+ protocol, addr, err := parseEndpointWithFallbackProtocol(endpoint, unixProtocol)
+ if err != nil {
+ return nil, err
+ }
+ if protocol != unixProtocol {
+ return nil, fmt.Errorf("only support unix socket endpoint")
+ }
+
+ // Unlink to cleanup the previous socket file.
+ err = unix.Unlink(addr)
+ if err != nil && !os.IsNotExist(err) {
+ return nil, fmt.Errorf("failed to unlink socket file %q: %v", addr, err)
+ }
+
+ if err := os.MkdirAll(filepath.Dir(addr), 0750); err != nil {
+ return nil, fmt.Errorf("error creating socket directory %q: %v", filepath.Dir(addr), err)
+ }
+
+ // Create the socket on a tempfile and move it to the destination socket to handle improprer cleanup
+ file, err := ioutil.TempFile(filepath.Dir(addr), "")
+ if err != nil {
+ return nil, fmt.Errorf("failed to create temporary file: %v", err)
+ }
+
+ if err := os.Remove(file.Name()); err != nil {
+ return nil, fmt.Errorf("failed to remove temporary file: %v", err)
+ }
+
+ l, err := net.Listen(protocol, file.Name())
+ if err != nil {
+ return nil, err
+ }
+
+ if err = os.Rename(file.Name(), addr); err != nil {
+ return nil, fmt.Errorf("failed to move temporary file to addr %q: %v", addr, err)
+ }
+
+ return l, nil
+}
+
+// GetAddressAndDialer returns the address parsed from the given endpoint and a context dialer.
+func GetAddressAndDialer(endpoint string) (string, func(ctx context.Context, addr string) (net.Conn, error), error) {
+ protocol, addr, err := parseEndpointWithFallbackProtocol(endpoint, unixProtocol)
+ if err != nil {
+ return "", nil, err
+ }
+ if protocol != unixProtocol {
+ return "", nil, fmt.Errorf("only support unix socket endpoint")
+ }
+
+ return addr, dial, nil
+}
+
+func dial(ctx context.Context, addr string) (net.Conn, error) {
+ return (&net.Dialer{}).DialContext(ctx, unixProtocol, addr)
+}
+
+func parseEndpointWithFallbackProtocol(endpoint string, fallbackProtocol string) (protocol string, addr string, err error) {
+ if protocol, addr, err = parseEndpoint(endpoint); err != nil && protocol == "" {
+ fallbackEndpoint := fallbackProtocol + "://" + endpoint
+ protocol, addr, err = parseEndpoint(fallbackEndpoint)
+ if err == nil {
+ klog.Warningf("Using %q as endpoint is deprecated, please consider using full url format %q.", endpoint, fallbackEndpoint)
+ }
+ }
+ return
+}
+
+func parseEndpoint(endpoint string) (string, string, error) {
+ u, err := url.Parse(endpoint)
+ if err != nil {
+ return "", "", err
+ }
+
+ switch u.Scheme {
+ case "tcp":
+ return "tcp", u.Host, nil
+
+ case "unix":
+ return "unix", u.Path, nil
+
+ case "":
+ return "", "", fmt.Errorf("using %q as endpoint is deprecated, please consider using full url format", endpoint)
+
+ default:
+ return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme)
+ }
+}
+
+// IsUnixDomainSocket returns whether a given file is a AF_UNIX socket file
+func IsUnixDomainSocket(filePath string) (bool, error) {
+ fi, err := os.Stat(filePath)
+ if err != nil {
+ return false, fmt.Errorf("stat file %s failed: %v", filePath, err)
+ }
+ if fi.Mode()&os.ModeSocket == 0 {
+ return false, nil
+ }
+ return true, nil
+}
+
+// NormalizePath is a no-op for Linux for now
+func NormalizePath(path string) string {
+ return path
+}
diff --git a/integration/remote/util/util_unsupported.go b/integration/remote/util/util_unsupported.go
new file mode 100644
index 000000000..81f412172
--- /dev/null
+++ b/integration/remote/util/util_unsupported.go
@@ -0,0 +1,71 @@
+// +build !freebsd,!linux,!windows,!darwin
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ "context"
+ "fmt"
+ "net"
+ "time"
+)
+
+// CreateListener creates a listener on the specified endpoint.
+func CreateListener(endpoint string) (net.Listener, error) {
+ return nil, fmt.Errorf("CreateListener is unsupported in this build")
+}
+
+// GetAddressAndDialer returns the address parsed from the given endpoint and a context dialer.
+func GetAddressAndDialer(endpoint string) (string, func(ctx context.Context, addr string) (net.Conn, error), error) {
+ return "", nil, fmt.Errorf("GetAddressAndDialer is unsupported in this build")
+}
+
+// LockAndCheckSubPath empty implementation
+func LockAndCheckSubPath(volumePath, subPath string) ([]uintptr, error) {
+ return []uintptr{}, nil
+}
+
+// UnlockPath empty implementation
+func UnlockPath(fileHandles []uintptr) {
+}
+
+// LocalEndpoint empty implementation
+func LocalEndpoint(path, file string) (string, error) {
+ return "", fmt.Errorf("LocalEndpoints are unsupported in this build")
+}
+
+// GetBootTime empty implementation
+func GetBootTime() (time.Time, error) {
+ return time.Time{}, fmt.Errorf("GetBootTime is unsupported in this build")
+}
diff --git a/integration/remote/util/util_windows.go b/integration/remote/util/util_windows.go
new file mode 100644
index 000000000..979ebf2fc
--- /dev/null
+++ b/integration/remote/util/util_windows.go
@@ -0,0 +1,165 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ "context"
+ "fmt"
+ "net"
+ "net/url"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/Microsoft/go-winio"
+)
+
+const (
+ tcpProtocol = "tcp"
+ npipeProtocol = "npipe"
+)
+
+// CreateListener creates a listener on the specified endpoint.
+func CreateListener(endpoint string) (net.Listener, error) {
+ protocol, addr, err := parseEndpoint(endpoint)
+ if err != nil {
+ return nil, err
+ }
+
+ switch protocol {
+ case tcpProtocol:
+ return net.Listen(tcpProtocol, addr)
+
+ case npipeProtocol:
+ return winio.ListenPipe(addr, nil)
+
+ default:
+ return nil, fmt.Errorf("only support tcp and npipe endpoint")
+ }
+}
+
+// GetAddressAndDialer returns the address parsed from the given endpoint and a context dialer.
+func GetAddressAndDialer(endpoint string) (string, func(ctx context.Context, addr string) (net.Conn, error), error) {
+ protocol, addr, err := parseEndpoint(endpoint)
+ if err != nil {
+ return "", nil, err
+ }
+
+ if protocol == tcpProtocol {
+ return addr, tcpDial, nil
+ }
+
+ if protocol == npipeProtocol {
+ return addr, npipeDial, nil
+ }
+
+ return "", nil, fmt.Errorf("only support tcp and npipe endpoint")
+}
+
+func tcpDial(ctx context.Context, addr string) (net.Conn, error) {
+ return (&net.Dialer{}).DialContext(ctx, tcpProtocol, addr)
+}
+
+func npipeDial(ctx context.Context, addr string) (net.Conn, error) {
+ return winio.DialPipeContext(ctx, addr)
+}
+
+func parseEndpoint(endpoint string) (string, string, error) {
+ // url.Parse doesn't recognize \, so replace with / first.
+ endpoint = strings.Replace(endpoint, "\\", "/", -1)
+ u, err := url.Parse(endpoint)
+ if err != nil {
+ return "", "", err
+ }
+
+ if u.Scheme == "tcp" {
+ return "tcp", u.Host, nil
+ } else if u.Scheme == "npipe" {
+ if strings.HasPrefix(u.Path, "//./pipe") {
+ return "npipe", u.Path, nil
+ }
+
+ // fallback host if not provided.
+ host := u.Host
+ if host == "" {
+ host = "."
+ }
+ return "npipe", fmt.Sprintf("//%s%s", host, u.Path), nil
+ } else if u.Scheme == "" {
+ return "", "", fmt.Errorf("Using %q as endpoint is deprecated, please consider using full url format", endpoint)
+ } else {
+ return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme)
+ }
+}
+
+var tickCount = syscall.NewLazyDLL("kernel32.dll").NewProc("GetTickCount64")
+
+// GetBootTime returns the time at which the machine was started, truncated to the nearest second
+func GetBootTime() (time.Time, error) {
+ currentTime := time.Now()
+ output, _, err := tickCount.Call()
+ if errno, ok := err.(syscall.Errno); !ok || errno != 0 {
+ return time.Time{}, err
+ }
+ return currentTime.Add(-time.Duration(output) * time.Millisecond).Truncate(time.Second), nil
+}
+
+// IsUnixDomainSocket returns whether a given file is a AF_UNIX socket file
+func IsUnixDomainSocket(filePath string) (bool, error) {
+ // Due to the absence of golang support for os.ModeSocket in Windows (https://github.com/golang/go/issues/33357)
+ // we need to dial the file and check if we receive an error to determine if a file is Unix Domain Socket file.
+
+ // Note that querrying for the Reparse Points (https://docs.microsoft.com/en-us/windows/win32/fileio/reparse-points)
+ // for the file (using FSCTL_GET_REPARSE_POINT) and checking for reparse tag: reparseTagSocket
+ // does NOT work in 1809 if the socket file is created within a bind mounted directory by a container
+ // and the FSCTL is issued in the host by the kubelet.
+
+ c, err := net.Dial("unix", filePath)
+ if err == nil {
+ c.Close()
+ return true, nil
+ }
+ return false, nil
+}
+
+// NormalizePath converts FS paths returned by certain go frameworks (like fsnotify)
+// to native Windows paths that can be passed to Windows specific code
+func NormalizePath(path string) string {
+ path = strings.ReplaceAll(path, "/", "\\")
+ if strings.HasPrefix(path, "\\") {
+ path = "c:" + path
+ }
+ return path
+}
diff --git a/integration/remote/utils.go b/integration/remote/utils.go
new file mode 100644
index 000000000..a1390c57b
--- /dev/null
+++ b/integration/remote/utils.go
@@ -0,0 +1,107 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package remote
+
+import (
+ "context"
+ "fmt"
+ "time"
+
+ runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// maxMsgSize use 16MB as the default message size limit.
+// grpc library default is 4MB
+const maxMsgSize = 1024 * 1024 * 16
+
+// getContextWithTimeout returns a context with timeout.
+func getContextWithTimeout(timeout time.Duration) (context.Context, context.CancelFunc) {
+ return context.WithTimeout(context.Background(), timeout)
+}
+
+// getContextWithCancel returns a context with cancel.
+func getContextWithCancel() (context.Context, context.CancelFunc) {
+ return context.WithCancel(context.Background())
+}
+
+// verifySandboxStatus verified whether all required fields are set in PodSandboxStatus.
+func verifySandboxStatus(status *runtimeapi.PodSandboxStatus) error {
+ if status.Id == "" {
+ return fmt.Errorf("Id is not set")
+ }
+
+ if status.Metadata == nil {
+ return fmt.Errorf("Metadata is not set")
+ }
+
+ metadata := status.Metadata
+ if metadata.Name == "" || metadata.Namespace == "" || metadata.Uid == "" {
+ return fmt.Errorf("Name, Namespace or Uid is not in metadata %q", metadata)
+ }
+
+ if status.CreatedAt == 0 {
+ return fmt.Errorf("CreatedAt is not set")
+ }
+
+ return nil
+}
+
+// verifyContainerStatus verified whether all required fields are set in ContainerStatus.
+func verifyContainerStatus(status *runtimeapi.ContainerStatus) error {
+ if status.Id == "" {
+ return fmt.Errorf("Id is not set")
+ }
+
+ if status.Metadata == nil {
+ return fmt.Errorf("Metadata is not set")
+ }
+
+ metadata := status.Metadata
+ if metadata.Name == "" {
+ return fmt.Errorf("Name is not in metadata %q", metadata)
+ }
+
+ if status.CreatedAt == 0 {
+ return fmt.Errorf("CreatedAt is not set")
+ }
+
+ if status.Image == nil || status.Image.Image == "" {
+ return fmt.Errorf("Image is not set")
+ }
+
+ if status.ImageRef == "" {
+ return fmt.Errorf("ImageRef is not set")
+ }
+
+ return nil
+}
diff --git a/integration/restart_test.go b/integration/restart_test.go
new file mode 100644
index 000000000..9c9051193
--- /dev/null
+++ b/integration/restart_test.go
@@ -0,0 +1,201 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "sort"
+ "testing"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// Restart test must run sequentially.
+
+func TestContainerdRestart(t *testing.T) {
+ type container struct {
+ name string
+ id string
+ state runtime.ContainerState
+ }
+ type sandbox struct {
+ name string
+ id string
+ state runtime.PodSandboxState
+ containers []container
+ }
+ ctx := context.Background()
+ sandboxNS := "restart-containerd"
+ sandboxes := []sandbox{
+ {
+ name: "ready-sandbox",
+ state: runtime.PodSandboxState_SANDBOX_READY,
+ containers: []container{
+ {
+ name: "created-container",
+ state: runtime.ContainerState_CONTAINER_CREATED,
+ },
+ {
+ name: "running-container",
+ state: runtime.ContainerState_CONTAINER_RUNNING,
+ },
+ {
+ name: "exited-container",
+ state: runtime.ContainerState_CONTAINER_EXITED,
+ },
+ },
+ },
+ {
+ name: "notready-sandbox",
+ state: runtime.PodSandboxState_SANDBOX_NOTREADY,
+ containers: []container{
+ {
+ name: "created-container",
+ state: runtime.ContainerState_CONTAINER_CREATED,
+ },
+ {
+ name: "running-container",
+ state: runtime.ContainerState_CONTAINER_RUNNING,
+ },
+ {
+ name: "exited-container",
+ state: runtime.ContainerState_CONTAINER_EXITED,
+ },
+ },
+ },
+ }
+ t.Logf("Make sure no sandbox is running before test")
+ existingSandboxes, err := runtimeService.ListPodSandbox(&runtime.PodSandboxFilter{})
+ require.NoError(t, err)
+ require.Empty(t, existingSandboxes)
+
+ t.Logf("Start test sandboxes and containers")
+ for i := range sandboxes {
+ s := &sandboxes[i]
+ sbCfg := PodSandboxConfig(s.name, sandboxNS)
+ sid, err := runtimeService.RunPodSandbox(sbCfg, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ // Make sure the sandbox is cleaned up in any case.
+ runtimeService.StopPodSandbox(sid)
+ runtimeService.RemovePodSandbox(sid)
+ }()
+ s.id = sid
+ for j := range s.containers {
+ c := &s.containers[j]
+ cfg := ContainerConfig(c.name, pauseImage,
+ // Set pid namespace as per container, so that container won't die
+ // when sandbox container is killed.
+ WithPidNamespace(runtime.NamespaceMode_CONTAINER),
+ )
+ cid, err := runtimeService.CreateContainer(sid, cfg, sbCfg)
+ require.NoError(t, err)
+ // Reply on sandbox cleanup.
+ c.id = cid
+ switch c.state {
+ case runtime.ContainerState_CONTAINER_CREATED:
+ case runtime.ContainerState_CONTAINER_RUNNING:
+ require.NoError(t, runtimeService.StartContainer(cid))
+ case runtime.ContainerState_CONTAINER_EXITED:
+ require.NoError(t, runtimeService.StartContainer(cid))
+ require.NoError(t, runtimeService.StopContainer(cid, 10))
+ }
+ }
+ if s.state == runtime.PodSandboxState_SANDBOX_NOTREADY {
+ cntr, err := containerdClient.LoadContainer(ctx, sid)
+ require.NoError(t, err)
+ task, err := cntr.Task(ctx, nil)
+ require.NoError(t, err)
+ _, err = task.Delete(ctx, containerd.WithProcessKill)
+ if err != nil {
+ require.True(t, errdefs.IsNotFound(err))
+ }
+ }
+ }
+
+ t.Logf("Pull test images")
+ for _, image := range []string{"busybox", "alpine"} {
+ img, err := imageService.PullImage(&runtime.ImageSpec{Image: image}, nil, nil)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtime.ImageSpec{Image: img}))
+ }()
+ }
+ imagesBeforeRestart, err := imageService.ListImages(nil)
+ assert.NoError(t, err)
+
+ t.Logf("Restart containerd")
+ RestartContainerd(t)
+
+ t.Logf("Check sandbox and container state after restart")
+ loadedSandboxes, err := runtimeService.ListPodSandbox(&runtime.PodSandboxFilter{})
+ require.NoError(t, err)
+ assert.Len(t, loadedSandboxes, len(sandboxes))
+ loadedContainers, err := runtimeService.ListContainers(&runtime.ContainerFilter{})
+ require.NoError(t, err)
+ assert.Len(t, loadedContainers, len(sandboxes)*3)
+ for _, s := range sandboxes {
+ for _, loaded := range loadedSandboxes {
+ if s.id == loaded.Id {
+ assert.Equal(t, s.state, loaded.State)
+ break
+ }
+ }
+ for _, c := range s.containers {
+ for _, loaded := range loadedContainers {
+ if c.id == loaded.Id {
+ assert.Equal(t, c.state, loaded.State)
+ break
+ }
+ }
+ }
+ }
+
+ t.Logf("Should be able to stop and remove sandbox after restart")
+ for _, s := range sandboxes {
+ assert.NoError(t, runtimeService.StopPodSandbox(s.id))
+ assert.NoError(t, runtimeService.RemovePodSandbox(s.id))
+ }
+
+ t.Logf("Should recover all images")
+ imagesAfterRestart, err := imageService.ListImages(nil)
+ assert.NoError(t, err)
+ assert.Equal(t, len(imagesBeforeRestart), len(imagesAfterRestart))
+ for _, i1 := range imagesBeforeRestart {
+ found := false
+ for _, i2 := range imagesAfterRestart {
+ if i1.Id == i2.Id {
+ sort.Strings(i1.RepoTags)
+ sort.Strings(i1.RepoDigests)
+ sort.Strings(i2.RepoTags)
+ sort.Strings(i2.RepoDigests)
+ assert.Equal(t, i1, i2)
+ found = true
+ break
+ }
+ }
+ assert.True(t, found, "should find image %+v", i1)
+ }
+}
+
+// TODO: Add back the unknown state test.
diff --git a/integration/runtime_handler_test.go b/integration/runtime_handler_test.go
new file mode 100644
index 000000000..f07bf7b90
--- /dev/null
+++ b/integration/runtime_handler_test.go
@@ -0,0 +1,52 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestRuntimeHandler(t *testing.T) {
+ t.Logf("Create a sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "test-runtime-handler")
+ t.Logf("the --runtime-handler flag value is: %s", *runtimeHandler)
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ // Make sure the sandbox is cleaned up in any case.
+ runtimeService.StopPodSandbox(sb)
+ runtimeService.RemovePodSandbox(sb)
+ }()
+
+ t.Logf("Verify runtimeService.PodSandboxStatus sets RuntimeHandler")
+ sbStatus, err := runtimeService.PodSandboxStatus(sb)
+ require.NoError(t, err)
+ t.Logf("runtimeService.PodSandboxStatus sets RuntimeHandler to %s", sbStatus.RuntimeHandler)
+ assert.Equal(t, *runtimeHandler, sbStatus.RuntimeHandler)
+
+ t.Logf("Verify runtimeService.ListPodSandbox sets RuntimeHandler")
+ sandboxes, err := runtimeService.ListPodSandbox(&runtime.PodSandboxFilter{})
+ require.NoError(t, err)
+ t.Logf("runtimeService.ListPodSandbox sets RuntimeHandler to %s", sbStatus.RuntimeHandler)
+ assert.Equal(t, *runtimeHandler, sandboxes[0].RuntimeHandler)
+}
diff --git a/integration/sandbox_clean_remove_test.go b/integration/sandbox_clean_remove_test.go
new file mode 100644
index 000000000..f74c145cc
--- /dev/null
+++ b/integration/sandbox_clean_remove_test.go
@@ -0,0 +1,126 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+ "time"
+
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "golang.org/x/sys/unix"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestSandboxRemoveWithoutIPLeakage(t *testing.T) {
+ const hostLocalCheckpointDir = "/var/lib/cni"
+
+ t.Logf("Make sure host-local ipam is in use")
+ config, err := CRIConfig()
+ require.NoError(t, err)
+ fs, err := ioutil.ReadDir(config.NetworkPluginConfDir)
+ require.NoError(t, err)
+ require.NotEmpty(t, fs)
+ f := filepath.Join(config.NetworkPluginConfDir, fs[0].Name())
+ cniConfig, err := ioutil.ReadFile(f)
+ require.NoError(t, err)
+ if !strings.Contains(string(cniConfig), "host-local") {
+ t.Skip("host-local ipam is not in use")
+ }
+
+ t.Logf("Create a sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "remove-without-ip-leakage")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ // Make sure the sandbox is cleaned up in any case.
+ runtimeService.StopPodSandbox(sb)
+ runtimeService.RemovePodSandbox(sb)
+ }()
+
+ t.Logf("Get pod information")
+ status, info, err := SandboxInfo(sb)
+ require.NoError(t, err)
+ ip := status.GetNetwork().GetIp()
+ require.NotEmpty(t, ip)
+ require.NotNil(t, info.RuntimeSpec.Linux)
+ var netNS string
+ for _, n := range info.RuntimeSpec.Linux.Namespaces {
+ if n.Type == runtimespec.NetworkNamespace {
+ netNS = n.Path
+ }
+ }
+ require.NotEmpty(t, netNS, "network namespace should be set")
+
+ t.Logf("Should be able to find the pod ip in host-local checkpoint")
+ checkIP := func(ip string) bool {
+ found := false
+ filepath.Walk(hostLocalCheckpointDir, func(_ string, info os.FileInfo, _ error) error {
+ if info != nil && info.Name() == ip {
+ found = true
+ }
+ return nil
+ })
+ return found
+ }
+ require.True(t, checkIP(ip))
+
+ t.Logf("Kill sandbox container")
+ require.NoError(t, KillPid(int(info.Pid)))
+
+ t.Logf("Unmount network namespace")
+ require.NoError(t, unix.Unmount(netNS, unix.MNT_DETACH))
+
+ t.Logf("Network namespace should be closed")
+ _, info, err = SandboxInfo(sb)
+ require.NoError(t, err)
+ assert.True(t, info.NetNSClosed)
+
+ t.Logf("Remove network namespace")
+ require.NoError(t, os.RemoveAll(netNS))
+
+ t.Logf("Network namespace should still be closed")
+ _, info, err = SandboxInfo(sb)
+ require.NoError(t, err)
+ assert.True(t, info.NetNSClosed)
+
+ t.Logf("Sandbox state should be NOTREADY")
+ assert.NoError(t, Eventually(func() (bool, error) {
+ status, err := runtimeService.PodSandboxStatus(sb)
+ if err != nil {
+ return false, err
+ }
+ return status.GetState() == runtime.PodSandboxState_SANDBOX_NOTREADY, nil
+ }, time.Second, 30*time.Second), "sandbox state should become NOTREADY")
+
+ t.Logf("Should still be able to find the pod ip in host-local checkpoint")
+ assert.True(t, checkIP(ip))
+
+ t.Logf("Should be able to stop and remove the sandbox")
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+
+ t.Logf("Should not be able to find the pod ip in host-local checkpoint")
+ assert.False(t, checkIP(ip))
+}
diff --git a/integration/truncindex_test.go b/integration/truncindex_test.go
new file mode 100644
index 000000000..f3e52300b
--- /dev/null
+++ b/integration/truncindex_test.go
@@ -0,0 +1,160 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func genTruncIndex(normalName string) string {
+ return normalName[:(len(normalName)+1)/2]
+}
+
+func TestTruncIndex(t *testing.T) {
+ sbConfig := PodSandboxConfig("sandbox", "truncindex")
+
+ t.Logf("Pull an image")
+ const appImage = "busybox"
+ imgID, err := imageService.PullImage(&runtimeapi.ImageSpec{Image: appImage}, nil, sbConfig)
+ require.NoError(t, err)
+ imgTruncID := genTruncIndex(imgID)
+ defer func() {
+ assert.NoError(t, imageService.RemoveImage(&runtimeapi.ImageSpec{Image: imgTruncID}))
+ }()
+
+ t.Logf("Get image status by truncindex, truncID: %s", imgTruncID)
+ res, err := imageService.ImageStatus(&runtimeapi.ImageSpec{Image: imgTruncID})
+ require.NoError(t, err)
+ require.NotEqual(t, nil, res)
+ assert.Equal(t, imgID, res.Id)
+
+ // TODO(yanxuean): for failure test case where there are two images with the same truncindex.
+ // if you add n images at least two will share the same leading digit.
+ // "sha256:n" where n is the a number from 0-9 where two images have the same trunc,
+ // for example sha256:9
+ // https://github.com/containerd/cri/pull/352
+ // I am thinking how I get the two image which have same trunc.
+
+ // TODO(yanxuean): add test case for ListImages
+
+ t.Logf("Create a sandbox")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ sbTruncIndex := genTruncIndex(sb)
+ var hasStoppedSandbox bool
+ defer func() {
+ // The 2th StopPodSandbox will fail, the 2th RemovePodSandbox will success.
+ if !hasStoppedSandbox {
+ assert.NoError(t, runtimeService.StopPodSandbox(sbTruncIndex))
+ }
+ assert.NoError(t, runtimeService.RemovePodSandbox(sbTruncIndex))
+ }()
+
+ t.Logf("Get sandbox status by truncindex")
+ sbStatus, err := runtimeService.PodSandboxStatus(sbTruncIndex)
+ require.NoError(t, err)
+ assert.Equal(t, sb, sbStatus.Id)
+
+ t.Logf("Forward port for sandbox by truncindex")
+ _, err = runtimeService.PortForward(&runtimeapi.PortForwardRequest{PodSandboxId: sbTruncIndex, Port: []int32{80}})
+ assert.NoError(t, err)
+
+ // TODO(yanxuean): add test case for ListPodSandbox
+
+ t.Logf("Create a container")
+ cnConfig := ContainerConfig(
+ "containerTruncIndex",
+ appImage,
+ WithCommand("top"),
+ )
+ cn, err := runtimeService.CreateContainer(sbTruncIndex, cnConfig, sbConfig)
+ require.NoError(t, err)
+ cnTruncIndex := genTruncIndex(cn)
+ defer func() {
+ // the 2th RemovePodSandbox will success.
+ assert.NoError(t, runtimeService.RemoveContainer(cnTruncIndex))
+ }()
+
+ t.Logf("Get container status by truncindex")
+ cStatus, err := runtimeService.ContainerStatus(cnTruncIndex)
+ require.NoError(t, err)
+ assert.Equal(t, cn, cStatus.Id)
+
+ t.Logf("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cnTruncIndex))
+ var hasStoppedContainer bool
+ defer func() {
+ // The 2th StopPodSandbox will fail
+ if !hasStoppedContainer {
+ assert.NoError(t, runtimeService.StopContainer(cnTruncIndex, 10))
+ }
+ }()
+
+ t.Logf("Stats the container")
+ cStats, err := runtimeService.ContainerStats(cnTruncIndex)
+ require.NoError(t, err)
+ assert.Equal(t, cn, cStats.Attributes.Id)
+
+ t.Logf("Update container memory limit after started")
+ err = runtimeService.UpdateContainerResources(cnTruncIndex, &runtimeapi.LinuxContainerResources{
+ MemoryLimitInBytes: 50 * 1024 * 1024,
+ })
+ assert.NoError(t, err)
+
+ t.Logf("Execute cmd in container")
+ execReq := &runtimeapi.ExecRequest{
+ ContainerId: cnTruncIndex,
+ Cmd: []string{"pwd"},
+ Stdout: true,
+ }
+ _, err = runtimeService.Exec(execReq)
+ assert.NoError(t, err)
+
+ t.Logf("Execute cmd in container by sync")
+ _, _, err = runtimeService.ExecSync(cnTruncIndex, []string{"pwd"}, 10)
+ assert.NoError(t, err)
+
+ // TODO(yanxuean): add test case for ListContainers
+
+ t.Logf("Get a non exist container status by truncindex")
+ err = runtimeService.StopContainer(cnTruncIndex, 10)
+ assert.NoError(t, err)
+ if err == nil {
+ hasStoppedContainer = true
+ }
+ _, err = runtimeService.ContainerStats(cnTruncIndex)
+ assert.Error(t, err)
+ assert.NoError(t, runtimeService.RemoveContainer(cnTruncIndex))
+ _, err = runtimeService.ContainerStatus(cnTruncIndex)
+ assert.Error(t, err)
+
+ t.Logf("Get a non exist sandbox status by truncindex")
+ err = runtimeService.StopPodSandbox(sbTruncIndex)
+ assert.NoError(t, err)
+ if err == nil {
+ hasStoppedSandbox = true
+ }
+ assert.NoError(t, runtimeService.RemovePodSandbox(sbTruncIndex))
+ _, err = runtimeService.PodSandboxStatus(sbTruncIndex)
+ assert.Error(t, err)
+}
diff --git a/integration/util/boottime_util_darwin.go b/integration/util/boottime_util_darwin.go
new file mode 100644
index 000000000..74abe9fb9
--- /dev/null
+++ b/integration/util/boottime_util_darwin.go
@@ -0,0 +1,60 @@
+// +build darwin
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ "fmt"
+ "syscall"
+ "time"
+ "unsafe"
+
+ "golang.org/x/sys/unix"
+)
+
+// GetBootTime returns the time at which the machine was started, truncated to the nearest second
+func GetBootTime() (time.Time, error) {
+ output, err := unix.SysctlRaw("kern.boottime")
+ if err != nil {
+ return time.Time{}, err
+ }
+ var timeval syscall.Timeval
+ if len(output) != int(unsafe.Sizeof(timeval)) {
+ return time.Time{}, fmt.Errorf("unexpected output when calling syscall kern.bootime. Expected len(output) to be %v, but got %v",
+ int(unsafe.Sizeof(timeval)), len(output))
+ }
+ timeval = *(*syscall.Timeval)(unsafe.Pointer(&output[0]))
+ sec, nsec := timeval.Unix()
+ return time.Unix(sec, nsec).Truncate(time.Second), nil
+}
diff --git a/integration/util/boottime_util_linux.go b/integration/util/boottime_util_linux.go
new file mode 100644
index 000000000..2699ae5fe
--- /dev/null
+++ b/integration/util/boottime_util_linux.go
@@ -0,0 +1,52 @@
+// +build freebsd linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2018 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ "fmt"
+ "time"
+
+ "golang.org/x/sys/unix"
+)
+
+// GetBootTime returns the time at which the machine was started, truncated to the nearest second
+func GetBootTime() (time.Time, error) {
+ currentTime := time.Now()
+ var info unix.Sysinfo_t
+ if err := unix.Sysinfo(&info); err != nil {
+ return time.Time{}, fmt.Errorf("error getting system uptime: %s", err)
+ }
+ return currentTime.Add(-time.Duration(info.Uptime) * time.Second).Truncate(time.Second), nil
+}
diff --git a/integration/util/doc.go b/integration/util/doc.go
new file mode 100644
index 000000000..307fa03c2
--- /dev/null
+++ b/integration/util/doc.go
@@ -0,0 +1,34 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package util holds utility functions.
+package util
diff --git a/integration/util/util.go b/integration/util/util.go
new file mode 100644
index 000000000..334cb8b24
--- /dev/null
+++ b/integration/util/util.go
@@ -0,0 +1,43 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+)
+
+// FromApiserverCache modifies so that the GET request will
+// be served from apiserver cache instead of from etcd.
+func FromApiserverCache(opts *metav1.GetOptions) {
+ opts.ResourceVersion = "0"
+}
diff --git a/integration/util/util_unix.go b/integration/util/util_unix.go
new file mode 100644
index 000000000..713df4b63
--- /dev/null
+++ b/integration/util/util_unix.go
@@ -0,0 +1,170 @@
+// +build freebsd linux darwin
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ "context"
+ "fmt"
+ "io/ioutil"
+ "net"
+ "net/url"
+ "os"
+ "path/filepath"
+
+ "golang.org/x/sys/unix"
+ "k8s.io/klog/v2"
+)
+
+const (
+ // unixProtocol is the network protocol of unix socket.
+ unixProtocol = "unix"
+)
+
+// CreateListener creates a listener on the specified endpoint.
+func CreateListener(endpoint string) (net.Listener, error) {
+ protocol, addr, err := parseEndpointWithFallbackProtocol(endpoint, unixProtocol)
+ if err != nil {
+ return nil, err
+ }
+ if protocol != unixProtocol {
+ return nil, fmt.Errorf("only support unix socket endpoint")
+ }
+
+ // Unlink to cleanup the previous socket file.
+ err = unix.Unlink(addr)
+ if err != nil && !os.IsNotExist(err) {
+ return nil, fmt.Errorf("failed to unlink socket file %q: %v", addr, err)
+ }
+
+ if err := os.MkdirAll(filepath.Dir(addr), 0750); err != nil {
+ return nil, fmt.Errorf("error creating socket directory %q: %v", filepath.Dir(addr), err)
+ }
+
+ // Create the socket on a tempfile and move it to the destination socket to handle improprer cleanup
+ file, err := ioutil.TempFile(filepath.Dir(addr), "")
+ if err != nil {
+ return nil, fmt.Errorf("failed to create temporary file: %v", err)
+ }
+
+ if err := os.Remove(file.Name()); err != nil {
+ return nil, fmt.Errorf("failed to remove temporary file: %v", err)
+ }
+
+ l, err := net.Listen(protocol, file.Name())
+ if err != nil {
+ return nil, err
+ }
+
+ if err = os.Rename(file.Name(), addr); err != nil {
+ return nil, fmt.Errorf("failed to move temporary file to addr %q: %v", addr, err)
+ }
+
+ return l, nil
+}
+
+// GetAddressAndDialer returns the address parsed from the given endpoint and a context dialer.
+func GetAddressAndDialer(endpoint string) (string, func(ctx context.Context, addr string) (net.Conn, error), error) {
+ protocol, addr, err := parseEndpointWithFallbackProtocol(endpoint, unixProtocol)
+ if err != nil {
+ return "", nil, err
+ }
+ if protocol != unixProtocol {
+ return "", nil, fmt.Errorf("only support unix socket endpoint")
+ }
+
+ return addr, dial, nil
+}
+
+func dial(ctx context.Context, addr string) (net.Conn, error) {
+ return (&net.Dialer{}).DialContext(ctx, unixProtocol, addr)
+}
+
+func parseEndpointWithFallbackProtocol(endpoint string, fallbackProtocol string) (protocol string, addr string, err error) {
+ if protocol, addr, err = parseEndpoint(endpoint); err != nil && protocol == "" {
+ fallbackEndpoint := fallbackProtocol + "://" + endpoint
+ protocol, addr, err = parseEndpoint(fallbackEndpoint)
+ if err == nil {
+ klog.Warningf("Using %q as endpoint is deprecated, please consider using full url format %q.", endpoint, fallbackEndpoint)
+ }
+ }
+ return
+}
+
+func parseEndpoint(endpoint string) (string, string, error) {
+ u, err := url.Parse(endpoint)
+ if err != nil {
+ return "", "", err
+ }
+
+ switch u.Scheme {
+ case "tcp":
+ return "tcp", u.Host, nil
+
+ case "unix":
+ return "unix", u.Path, nil
+
+ case "":
+ return "", "", fmt.Errorf("using %q as endpoint is deprecated, please consider using full url format", endpoint)
+
+ default:
+ return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme)
+ }
+}
+
+// LocalEndpoint returns the full path to a unix socket at the given endpoint
+func LocalEndpoint(path, file string) (string, error) {
+ u := url.URL{
+ Scheme: unixProtocol,
+ Path: path,
+ }
+ return filepath.Join(u.String(), file+".sock"), nil
+}
+
+// IsUnixDomainSocket returns whether a given file is a AF_UNIX socket file
+func IsUnixDomainSocket(filePath string) (bool, error) {
+ fi, err := os.Stat(filePath)
+ if err != nil {
+ return false, fmt.Errorf("stat file %s failed: %v", filePath, err)
+ }
+ if fi.Mode()&os.ModeSocket == 0 {
+ return false, nil
+ }
+ return true, nil
+}
+
+// NormalizePath is a no-op for Linux for now
+func NormalizePath(path string) string {
+ return path
+}
diff --git a/integration/util/util_unsupported.go b/integration/util/util_unsupported.go
new file mode 100644
index 000000000..81f412172
--- /dev/null
+++ b/integration/util/util_unsupported.go
@@ -0,0 +1,71 @@
+// +build !freebsd,!linux,!windows,!darwin
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ "context"
+ "fmt"
+ "net"
+ "time"
+)
+
+// CreateListener creates a listener on the specified endpoint.
+func CreateListener(endpoint string) (net.Listener, error) {
+ return nil, fmt.Errorf("CreateListener is unsupported in this build")
+}
+
+// GetAddressAndDialer returns the address parsed from the given endpoint and a context dialer.
+func GetAddressAndDialer(endpoint string) (string, func(ctx context.Context, addr string) (net.Conn, error), error) {
+ return "", nil, fmt.Errorf("GetAddressAndDialer is unsupported in this build")
+}
+
+// LockAndCheckSubPath empty implementation
+func LockAndCheckSubPath(volumePath, subPath string) ([]uintptr, error) {
+ return []uintptr{}, nil
+}
+
+// UnlockPath empty implementation
+func UnlockPath(fileHandles []uintptr) {
+}
+
+// LocalEndpoint empty implementation
+func LocalEndpoint(path, file string) (string, error) {
+ return "", fmt.Errorf("LocalEndpoints are unsupported in this build")
+}
+
+// GetBootTime empty implementation
+func GetBootTime() (time.Time, error) {
+ return time.Time{}, fmt.Errorf("GetBootTime is unsupported in this build")
+}
diff --git a/integration/util/util_windows.go b/integration/util/util_windows.go
new file mode 100644
index 000000000..850a50f81
--- /dev/null
+++ b/integration/util/util_windows.go
@@ -0,0 +1,170 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2017 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package util
+
+import (
+ "context"
+ "fmt"
+ "net"
+ "net/url"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/Microsoft/go-winio"
+)
+
+const (
+ tcpProtocol = "tcp"
+ npipeProtocol = "npipe"
+)
+
+// CreateListener creates a listener on the specified endpoint.
+func CreateListener(endpoint string) (net.Listener, error) {
+ protocol, addr, err := parseEndpoint(endpoint)
+ if err != nil {
+ return nil, err
+ }
+
+ switch protocol {
+ case tcpProtocol:
+ return net.Listen(tcpProtocol, addr)
+
+ case npipeProtocol:
+ return winio.ListenPipe(addr, nil)
+
+ default:
+ return nil, fmt.Errorf("only support tcp and npipe endpoint")
+ }
+}
+
+// GetAddressAndDialer returns the address parsed from the given endpoint and a context dialer.
+func GetAddressAndDialer(endpoint string) (string, func(ctx context.Context, addr string) (net.Conn, error), error) {
+ protocol, addr, err := parseEndpoint(endpoint)
+ if err != nil {
+ return "", nil, err
+ }
+
+ if protocol == tcpProtocol {
+ return addr, tcpDial, nil
+ }
+
+ if protocol == npipeProtocol {
+ return addr, npipeDial, nil
+ }
+
+ return "", nil, fmt.Errorf("only support tcp and npipe endpoint")
+}
+
+func tcpDial(ctx context.Context, addr string) (net.Conn, error) {
+ return (&net.Dialer{}).DialContext(ctx, tcpProtocol, addr)
+}
+
+func npipeDial(ctx context.Context, addr string) (net.Conn, error) {
+ return winio.DialPipeContext(ctx, addr)
+}
+
+func parseEndpoint(endpoint string) (string, string, error) {
+ // url.Parse doesn't recognize \, so replace with / first.
+ endpoint = strings.Replace(endpoint, "\\", "/", -1)
+ u, err := url.Parse(endpoint)
+ if err != nil {
+ return "", "", err
+ }
+
+ if u.Scheme == "tcp" {
+ return "tcp", u.Host, nil
+ } else if u.Scheme == "npipe" {
+ if strings.HasPrefix(u.Path, "//./pipe") {
+ return "npipe", u.Path, nil
+ }
+
+ // fallback host if not provided.
+ host := u.Host
+ if host == "" {
+ host = "."
+ }
+ return "npipe", fmt.Sprintf("//%s%s", host, u.Path), nil
+ } else if u.Scheme == "" {
+ return "", "", fmt.Errorf("Using %q as endpoint is deprecated, please consider using full url format", endpoint)
+ } else {
+ return u.Scheme, "", fmt.Errorf("protocol %q not supported", u.Scheme)
+ }
+}
+
+// LocalEndpoint empty implementation
+func LocalEndpoint(path, file string) (string, error) {
+ return "", fmt.Errorf("LocalEndpoints are unsupported in this build")
+}
+
+var tickCount = syscall.NewLazyDLL("kernel32.dll").NewProc("GetTickCount64")
+
+// GetBootTime returns the time at which the machine was started, truncated to the nearest second
+func GetBootTime() (time.Time, error) {
+ currentTime := time.Now()
+ output, _, err := tickCount.Call()
+ if errno, ok := err.(syscall.Errno); !ok || errno != 0 {
+ return time.Time{}, err
+ }
+ return currentTime.Add(-time.Duration(output) * time.Millisecond).Truncate(time.Second), nil
+}
+
+// IsUnixDomainSocket returns whether a given file is a AF_UNIX socket file
+func IsUnixDomainSocket(filePath string) (bool, error) {
+ // Due to the absence of golang support for os.ModeSocket in Windows (https://github.com/golang/go/issues/33357)
+ // we need to dial the file and check if we receive an error to determine if a file is Unix Domain Socket file.
+
+ // Note that querrying for the Reparse Points (https://docs.microsoft.com/en-us/windows/win32/fileio/reparse-points)
+ // for the file (using FSCTL_GET_REPARSE_POINT) and checking for reparse tag: reparseTagSocket
+ // does NOT work in 1809 if the socket file is created within a bind mounted directory by a container
+ // and the FSCTL is issued in the host by the kubelet.
+
+ c, err := net.Dial("unix", filePath)
+ if err == nil {
+ c.Close()
+ return true, nil
+ }
+ return false, nil
+}
+
+// NormalizePath converts FS paths returned by certain go frameworks (like fsnotify)
+// to native Windows paths that can be passed to Windows specific code
+func NormalizePath(path string) string {
+ path = strings.ReplaceAll(path, "/", "\\")
+ if strings.HasPrefix(path, "\\") {
+ path = "c:" + path
+ }
+ return path
+}
diff --git a/integration/volume_copy_up_test.go b/integration/volume_copy_up_test.go
new file mode 100644
index 000000000..d9b9f1533
--- /dev/null
+++ b/integration/volume_copy_up_test.go
@@ -0,0 +1,140 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package integration
+
+import (
+ "fmt"
+ "os/exec"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestVolumeCopyUp(t *testing.T) {
+ const (
+ testImage = "gcr.io/k8s-cri-containerd/volume-copy-up:1.0"
+ execTimeout = time.Minute
+ )
+
+ t.Logf("Create a sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "volume-copy-up")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ t.Logf("Pull test image")
+ _, err = imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+
+ t.Logf("Create a container with volume-copy-up test image")
+ cnConfig := ContainerConfig(
+ "container",
+ testImage,
+ WithCommand("tail", "-f", "/dev/null"),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Logf("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ // gcr.io/k8s-cri-containerd/volume-copy-up:1.0 contains a test_dir
+ // volume, which contains a test_file with content "test_content".
+ t.Logf("Check whether volume contains the test file")
+ stdout, stderr, err := runtimeService.ExecSync(cn, []string{
+ "cat",
+ "/test_dir/test_file",
+ }, execTimeout)
+ require.NoError(t, err)
+ assert.Empty(t, stderr)
+ assert.Equal(t, "test_content\n", string(stdout))
+
+ t.Logf("Check host path of the volume")
+ hostCmd := fmt.Sprintf("find %s/containers/%s/volumes/*/test_file | xargs cat", *criRoot, cn)
+ output, err := exec.Command("sh", "-c", hostCmd).CombinedOutput()
+ require.NoError(t, err)
+ assert.Equal(t, "test_content\n", string(output))
+
+ t.Logf("Update volume from inside the container")
+ _, _, err = runtimeService.ExecSync(cn, []string{
+ "sh",
+ "-c",
+ "echo new_content > /test_dir/test_file",
+ }, execTimeout)
+ require.NoError(t, err)
+
+ t.Logf("Check whether host path of the volume is updated")
+ output, err = exec.Command("sh", "-c", hostCmd).CombinedOutput()
+ require.NoError(t, err)
+ assert.Equal(t, "new_content\n", string(output))
+}
+
+func TestVolumeOwnership(t *testing.T) {
+ const (
+ testImage = "gcr.io/k8s-cri-containerd/volume-ownership:1.0"
+ execTimeout = time.Minute
+ )
+
+ t.Logf("Create a sandbox")
+ sbConfig := PodSandboxConfig("sandbox", "volume-ownership")
+ sb, err := runtimeService.RunPodSandbox(sbConfig, *runtimeHandler)
+ require.NoError(t, err)
+ defer func() {
+ assert.NoError(t, runtimeService.StopPodSandbox(sb))
+ assert.NoError(t, runtimeService.RemovePodSandbox(sb))
+ }()
+
+ t.Logf("Pull test image")
+ _, err = imageService.PullImage(&runtime.ImageSpec{Image: testImage}, nil, sbConfig)
+ require.NoError(t, err)
+
+ t.Logf("Create a container with volume-ownership test image")
+ cnConfig := ContainerConfig(
+ "container",
+ testImage,
+ WithCommand("tail", "-f", "/dev/null"),
+ )
+ cn, err := runtimeService.CreateContainer(sb, cnConfig, sbConfig)
+ require.NoError(t, err)
+
+ t.Logf("Start the container")
+ require.NoError(t, runtimeService.StartContainer(cn))
+
+ // gcr.io/k8s-cri-containerd/volume-ownership:1.0 contains a test_dir
+ // volume, which is owned by nobody:nogroup.
+ t.Logf("Check ownership of test directory inside container")
+ stdout, stderr, err := runtimeService.ExecSync(cn, []string{
+ "stat", "-c", "%U:%G", "/test_dir",
+ }, execTimeout)
+ require.NoError(t, err)
+ assert.Empty(t, stderr)
+ assert.Equal(t, "nobody:nogroup\n", string(stdout))
+
+ t.Logf("Check ownership of test directory on the host")
+ hostCmd := fmt.Sprintf("find %s/containers/%s/volumes/* | xargs stat -c %%U:%%G", *criRoot, cn)
+ output, err := exec.Command("sh", "-c", hostCmd).CombinedOutput()
+ require.NoError(t, err)
+ assert.Equal(t, "nobody:nogroup\n", string(output))
+}
diff --git a/pkg/annotations/annotations.go b/pkg/annotations/annotations.go
new file mode 100644
index 000000000..122c4c489
--- /dev/null
+++ b/pkg/annotations/annotations.go
@@ -0,0 +1,50 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package annotations
+
+// ContainerType values
+// Following OCI annotations are used by katacontainers now.
+// We'll switch to standard secure pod API after it is defined in CRI.
+const (
+ // ContainerTypeSandbox represents a pod sandbox container
+ ContainerTypeSandbox = "sandbox"
+
+ // ContainerTypeContainer represents a container running within a pod
+ ContainerTypeContainer = "container"
+
+ // ContainerType is the container type (sandbox or container) annotation
+ ContainerType = "io.kubernetes.cri.container-type"
+
+ // SandboxID is the sandbox ID annotation
+ SandboxID = "io.kubernetes.cri.sandbox-id"
+
+ // SandboxLogDir is the pod log directory annotation.
+ // If the sandbox needs to generate any log, it will put it into this directory.
+ // Kubelet will be responsible for:
+ // 1) Monitoring the disk usage of the log, and including it as part of the pod
+ // ephemeral storage usage.
+ // 2) Cleaning up the logs when the pod is deleted.
+ // NOTE: Kubelet is not responsible for rotating the logs.
+ SandboxLogDir = "io.kubernetes.cri.sandbox-log-directory"
+
+ // UntrustedWorkload is the sandbox annotation for untrusted workload. Untrusted
+ // workload can only run on dedicated runtime for untrusted workload.
+ UntrustedWorkload = "io.kubernetes.cri.untrusted-workload"
+
+ // containerName is the name of the container in the pod
+ ContainerName = "io.kubernetes.cri.container-name"
+)
diff --git a/pkg/api/runtimeoptions/v1/api.pb.go b/pkg/api/runtimeoptions/v1/api.pb.go
new file mode 100644
index 000000000..bf0cf3d41
--- /dev/null
+++ b/pkg/api/runtimeoptions/v1/api.pb.go
@@ -0,0 +1,394 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+// Code generated by protoc-gen-gogo. DO NOT EDIT.
+// source: api.proto
+
+/*
+ Package cri_runtimeoptions_v1 is a generated protocol buffer package.
+
+ It is generated from these files:
+ api.proto
+
+ It has these top-level messages:
+ Options
+*/
+package cri_runtimeoptions_v1
+
+import proto "github.com/gogo/protobuf/proto"
+import fmt "fmt"
+import math "math"
+import _ "github.com/gogo/protobuf/gogoproto"
+
+import strings "strings"
+import reflect "reflect"
+
+import io "io"
+
+// Reference imports to suppress errors if they are not otherwise used.
+var _ = proto.Marshal
+var _ = fmt.Errorf
+var _ = math.Inf
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the proto package it is being compiled against.
+// A compilation error at this line likely means your copy of the
+// proto package needs to be updated.
+const _ = proto.GoGoProtoPackageIsVersion2 // please upgrade the proto package
+
+type Options struct {
+ // TypeUrl specifies the type of the content inside the config file.
+ TypeUrl string `protobuf:"bytes,1,opt,name=type_url,json=typeUrl,proto3" json:"type_url,omitempty"`
+ // ConfigPath specifies the filesystem location of the config file
+ // used by the runtime.
+ ConfigPath string `protobuf:"bytes,2,opt,name=config_path,json=configPath,proto3" json:"config_path,omitempty"`
+}
+
+func (m *Options) Reset() { *m = Options{} }
+func (*Options) ProtoMessage() {}
+func (*Options) Descriptor() ([]byte, []int) { return fileDescriptorApi, []int{0} }
+
+func (m *Options) GetTypeUrl() string {
+ if m != nil {
+ return m.TypeUrl
+ }
+ return ""
+}
+
+func (m *Options) GetConfigPath() string {
+ if m != nil {
+ return m.ConfigPath
+ }
+ return ""
+}
+
+func init() {
+ proto.RegisterType((*Options)(nil), "cri.runtimeoptions.v1.Options")
+}
+func (m *Options) Marshal() (dAtA []byte, err error) {
+ size := m.Size()
+ dAtA = make([]byte, size)
+ n, err := m.MarshalTo(dAtA)
+ if err != nil {
+ return nil, err
+ }
+ return dAtA[:n], nil
+}
+
+func (m *Options) MarshalTo(dAtA []byte) (int, error) {
+ var i int
+ _ = i
+ var l int
+ _ = l
+ if len(m.TypeUrl) > 0 {
+ dAtA[i] = 0xa
+ i++
+ i = encodeVarintApi(dAtA, i, uint64(len(m.TypeUrl)))
+ i += copy(dAtA[i:], m.TypeUrl)
+ }
+ if len(m.ConfigPath) > 0 {
+ dAtA[i] = 0x12
+ i++
+ i = encodeVarintApi(dAtA, i, uint64(len(m.ConfigPath)))
+ i += copy(dAtA[i:], m.ConfigPath)
+ }
+ return i, nil
+}
+
+func encodeVarintApi(dAtA []byte, offset int, v uint64) int {
+ for v >= 1<<7 {
+ dAtA[offset] = uint8(v&0x7f | 0x80)
+ v >>= 7
+ offset++
+ }
+ dAtA[offset] = uint8(v)
+ return offset + 1
+}
+func (m *Options) Size() (n int) {
+ var l int
+ _ = l
+ l = len(m.TypeUrl)
+ if l > 0 {
+ n += 1 + l + sovApi(uint64(l))
+ }
+ l = len(m.ConfigPath)
+ if l > 0 {
+ n += 1 + l + sovApi(uint64(l))
+ }
+ return n
+}
+
+func sovApi(x uint64) (n int) {
+ for {
+ n++
+ x >>= 7
+ if x == 0 {
+ break
+ }
+ }
+ return n
+}
+func sozApi(x uint64) (n int) {
+ return sovApi(uint64((x << 1) ^ uint64((int64(x) >> 63))))
+}
+func (this *Options) String() string {
+ if this == nil {
+ return "nil"
+ }
+ s := strings.Join([]string{`&Options{`,
+ `TypeUrl:` + fmt.Sprintf("%v", this.TypeUrl) + `,`,
+ `ConfigPath:` + fmt.Sprintf("%v", this.ConfigPath) + `,`,
+ `}`,
+ }, "")
+ return s
+}
+func valueToStringApi(v interface{}) string {
+ rv := reflect.ValueOf(v)
+ if rv.IsNil() {
+ return "nil"
+ }
+ pv := reflect.Indirect(rv).Interface()
+ return fmt.Sprintf("*%v", pv)
+}
+func (m *Options) Unmarshal(dAtA []byte) error {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ preIndex := iNdEx
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowApi
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= (uint64(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ fieldNum := int32(wire >> 3)
+ wireType := int(wire & 0x7)
+ if wireType == 4 {
+ return fmt.Errorf("proto: Options: wiretype end group for non-group")
+ }
+ if fieldNum <= 0 {
+ return fmt.Errorf("proto: Options: illegal tag %d (wire type %d)", fieldNum, wire)
+ }
+ switch fieldNum {
+ case 1:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field TypeUrl", wireType)
+ }
+ var stringLen uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowApi
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ stringLen |= (uint64(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ intStringLen := int(stringLen)
+ if intStringLen < 0 {
+ return ErrInvalidLengthApi
+ }
+ postIndex := iNdEx + intStringLen
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.TypeUrl = string(dAtA[iNdEx:postIndex])
+ iNdEx = postIndex
+ case 2:
+ if wireType != 2 {
+ return fmt.Errorf("proto: wrong wireType = %d for field ConfigPath", wireType)
+ }
+ var stringLen uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return ErrIntOverflowApi
+ }
+ if iNdEx >= l {
+ return io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ stringLen |= (uint64(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ intStringLen := int(stringLen)
+ if intStringLen < 0 {
+ return ErrInvalidLengthApi
+ }
+ postIndex := iNdEx + intStringLen
+ if postIndex > l {
+ return io.ErrUnexpectedEOF
+ }
+ m.ConfigPath = string(dAtA[iNdEx:postIndex])
+ iNdEx = postIndex
+ default:
+ iNdEx = preIndex
+ skippy, err := skipApi(dAtA[iNdEx:])
+ if err != nil {
+ return err
+ }
+ if skippy < 0 {
+ return ErrInvalidLengthApi
+ }
+ if (iNdEx + skippy) > l {
+ return io.ErrUnexpectedEOF
+ }
+ iNdEx += skippy
+ }
+ }
+
+ if iNdEx > l {
+ return io.ErrUnexpectedEOF
+ }
+ return nil
+}
+func skipApi(dAtA []byte) (n int, err error) {
+ l := len(dAtA)
+ iNdEx := 0
+ for iNdEx < l {
+ var wire uint64
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowApi
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ wire |= (uint64(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ wireType := int(wire & 0x7)
+ switch wireType {
+ case 0:
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowApi
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ iNdEx++
+ if dAtA[iNdEx-1] < 0x80 {
+ break
+ }
+ }
+ return iNdEx, nil
+ case 1:
+ iNdEx += 8
+ return iNdEx, nil
+ case 2:
+ var length int
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowApi
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ length |= (int(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ iNdEx += length
+ if length < 0 {
+ return 0, ErrInvalidLengthApi
+ }
+ return iNdEx, nil
+ case 3:
+ for {
+ var innerWire uint64
+ var start int = iNdEx
+ for shift := uint(0); ; shift += 7 {
+ if shift >= 64 {
+ return 0, ErrIntOverflowApi
+ }
+ if iNdEx >= l {
+ return 0, io.ErrUnexpectedEOF
+ }
+ b := dAtA[iNdEx]
+ iNdEx++
+ innerWire |= (uint64(b) & 0x7F) << shift
+ if b < 0x80 {
+ break
+ }
+ }
+ innerWireType := int(innerWire & 0x7)
+ if innerWireType == 4 {
+ break
+ }
+ next, err := skipApi(dAtA[start:])
+ if err != nil {
+ return 0, err
+ }
+ iNdEx = start + next
+ }
+ return iNdEx, nil
+ case 4:
+ return iNdEx, nil
+ case 5:
+ iNdEx += 4
+ return iNdEx, nil
+ default:
+ return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
+ }
+ }
+ panic("unreachable")
+}
+
+var (
+ ErrInvalidLengthApi = fmt.Errorf("proto: negative length found during unmarshaling")
+ ErrIntOverflowApi = fmt.Errorf("proto: integer overflow")
+)
+
+func init() { proto.RegisterFile("api.proto", fileDescriptorApi) }
+
+var fileDescriptorApi = []byte{
+ // 183 bytes of a gzipped FileDescriptorProto
+ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x4c, 0x2c, 0xc8, 0xd4,
+ 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x12, 0x4d, 0x2e, 0xca, 0xd4, 0x2b, 0x2a, 0xcd, 0x2b, 0xc9,
+ 0xcc, 0x4d, 0xcd, 0x2f, 0x28, 0xc9, 0xcc, 0xcf, 0x2b, 0xd6, 0x2b, 0x33, 0x94, 0xd2, 0x4d, 0xcf,
+ 0x2c, 0xc9, 0x28, 0x4d, 0xd2, 0x4b, 0xce, 0xcf, 0xd5, 0x4f, 0xcf, 0x4f, 0xcf, 0xd7, 0x07, 0xab,
+ 0x4e, 0x2a, 0x4d, 0x03, 0xf3, 0xc0, 0x1c, 0x30, 0x0b, 0x62, 0x8a, 0x92, 0x2b, 0x17, 0xbb, 0x3f,
+ 0x44, 0xb3, 0x90, 0x24, 0x17, 0x47, 0x49, 0x65, 0x41, 0x6a, 0x7c, 0x69, 0x51, 0x8e, 0x04, 0xa3,
+ 0x02, 0xa3, 0x06, 0x67, 0x10, 0x3b, 0x88, 0x1f, 0x5a, 0x94, 0x23, 0x24, 0xcf, 0xc5, 0x9d, 0x9c,
+ 0x9f, 0x97, 0x96, 0x99, 0x1e, 0x5f, 0x90, 0x58, 0x92, 0x21, 0xc1, 0x04, 0x96, 0xe5, 0x82, 0x08,
+ 0x05, 0x24, 0x96, 0x64, 0x38, 0xc9, 0x9c, 0x78, 0x28, 0xc7, 0x78, 0xe3, 0xa1, 0x1c, 0x43, 0xc3,
+ 0x23, 0x39, 0xc6, 0x13, 0x8f, 0xe4, 0x18, 0x2f, 0x3c, 0x92, 0x63, 0x7c, 0xf0, 0x48, 0x8e, 0x71,
+ 0xc2, 0x63, 0x39, 0x86, 0x24, 0x36, 0xb0, 0x5d, 0xc6, 0x80, 0x00, 0x00, 0x00, 0xff, 0xff, 0x07,
+ 0x00, 0xf2, 0x18, 0xbe, 0x00, 0x00, 0x00,
+}
diff --git a/pkg/api/runtimeoptions/v1/api.proto b/pkg/api/runtimeoptions/v1/api.proto
new file mode 100644
index 000000000..4f5b68fe5
--- /dev/null
+++ b/pkg/api/runtimeoptions/v1/api.proto
@@ -0,0 +1,22 @@
+// To regenerate api.pb.go run `make proto`
+syntax = "proto3";
+
+package cri.runtimeoptions.v1;
+
+import "github.com/gogo/protobuf/gogoproto/gogo.proto";
+
+option (gogoproto.goproto_stringer_all) = false;
+option (gogoproto.stringer_all) = true;
+option (gogoproto.goproto_getters_all) = true;
+option (gogoproto.marshaler_all) = true;
+option (gogoproto.sizer_all) = true;
+option (gogoproto.unmarshaler_all) = true;
+option (gogoproto.goproto_unrecognized_all) = false;
+
+message Options {
+ // TypeUrl specifies the type of the content inside the config file.
+ string type_url = 1;
+ // ConfigPath specifies the filesystem location of the config file
+ // used by the runtime.
+ string config_path = 2;
+}
diff --git a/pkg/atomic/atomic_boolean.go b/pkg/atomic/atomic_boolean.go
new file mode 100644
index 000000000..507e063dc
--- /dev/null
+++ b/pkg/atomic/atomic_boolean.go
@@ -0,0 +1,54 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package atomic
+
+import "sync/atomic"
+
+// Bool is an atomic Boolean,
+// Its methods are all atomic, thus safe to be called by
+// multiple goroutines simultaneously.
+type Bool interface {
+ Set()
+ Unset()
+ IsSet() bool
+}
+
+// NewBool creates an Bool with given default value
+func NewBool(ok bool) Bool {
+ ab := new(atomicBool)
+ if ok {
+ ab.Set()
+ }
+ return ab
+}
+
+type atomicBool int32
+
+// Set sets the Boolean to true
+func (ab *atomicBool) Set() {
+ atomic.StoreInt32((*int32)(ab), 1)
+}
+
+// Unset sets the Boolean to false
+func (ab *atomicBool) Unset() {
+ atomic.StoreInt32((*int32)(ab), 0)
+}
+
+// IsSet returns whether the Boolean is true
+func (ab *atomicBool) IsSet() bool {
+ return atomic.LoadInt32((*int32)(ab)) == 1
+}
diff --git a/pkg/atomic/atomic_boolean_test.go b/pkg/atomic/atomic_boolean_test.go
new file mode 100644
index 000000000..97e5a4b55
--- /dev/null
+++ b/pkg/atomic/atomic_boolean_test.go
@@ -0,0 +1,32 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package atomic
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestBoolean(t *testing.T) {
+ ab := NewBool(true)
+ assert.True(t, ab.IsSet())
+ ab.Unset()
+ assert.False(t, ab.IsSet())
+ ab.Set()
+ assert.True(t, ab.IsSet())
+}
diff --git a/pkg/config/config.go b/pkg/config/config.go
new file mode 100644
index 000000000..a0c86fa76
--- /dev/null
+++ b/pkg/config/config.go
@@ -0,0 +1,369 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package config
+
+import (
+ "context"
+ "time"
+
+ "github.com/BurntSushi/toml"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/containerd/plugin"
+ "github.com/pkg/errors"
+)
+
+// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
+// and a runtime for untrusted worload.
+type Runtime struct {
+ // Type is the runtime type to use in containerd e.g. io.containerd.runtime.v1.linux
+ Type string `toml:"runtime_type" json:"runtimeType"`
+ // Engine is the name of the runtime engine used by containerd.
+ // This only works for runtime type "io.containerd.runtime.v1.linux".
+ // DEPRECATED: use Options instead. Remove when shim v1 is deprecated.
+ Engine string `toml:"runtime_engine" json:"runtimeEngine"`
+ // PodAnnotations is a list of pod annotations passed to both pod sandbox as well as
+ // container OCI annotations.
+ PodAnnotations []string `toml:"pod_annotations" json:"PodAnnotations"`
+ // ContainerAnnotations is a list of container annotations passed through to the OCI config of the containers.
+ // Container annotations in CRI are usually generated by other Kubernetes node components (i.e., not users).
+ // Currently, only device plugins populate the annotations.
+ ContainerAnnotations []string `toml:"container_annotations" json:"ContainerAnnotations"`
+ // Root is the directory used by containerd for runtime state.
+ // DEPRECATED: use Options instead. Remove when shim v1 is deprecated.
+ // This only works for runtime type "io.containerd.runtime.v1.linux".
+ Root string `toml:"runtime_root" json:"runtimeRoot"`
+ // Options are config options for the runtime. If options is loaded
+ // from toml config, it will be toml.Primitive.
+ Options *toml.Primitive `toml:"options" json:"options"`
+ // PrivilegedWithoutHostDevices overloads the default behaviour for adding host devices to the
+ // runtime spec when the container is privileged. Defaults to false.
+ PrivilegedWithoutHostDevices bool `toml:"privileged_without_host_devices" json:"privileged_without_host_devices"`
+ // BaseRuntimeSpec is a json file with OCI spec to use as base spec that all container's will be created from.
+ BaseRuntimeSpec string `toml:"base_runtime_spec" json:"baseRuntimeSpec"`
+}
+
+// ContainerdConfig contains toml config related to containerd
+type ContainerdConfig struct {
+ // Snapshotter is the snapshotter used by containerd.
+ Snapshotter string `toml:"snapshotter" json:"snapshotter"`
+ // DefaultRuntimeName is the default runtime name to use from the runtimes table.
+ DefaultRuntimeName string `toml:"default_runtime_name" json:"defaultRuntimeName"`
+ // DefaultRuntime is the default runtime to use in containerd.
+ // This runtime is used when no runtime handler (or the empty string) is provided.
+ // DEPRECATED: use DefaultRuntimeName instead. Remove in containerd 1.4.
+ DefaultRuntime Runtime `toml:"default_runtime" json:"defaultRuntime"`
+ // UntrustedWorkloadRuntime is a runtime to run untrusted workloads on it.
+ // DEPRECATED: use `untrusted` runtime in Runtimes instead. Remove in containerd 1.4.
+ UntrustedWorkloadRuntime Runtime `toml:"untrusted_workload_runtime" json:"untrustedWorkloadRuntime"`
+ // Runtimes is a map from CRI RuntimeHandler strings, which specify types of runtime
+ // configurations, to the matching configurations.
+ Runtimes map[string]Runtime `toml:"runtimes" json:"runtimes"`
+ // NoPivot disables pivot-root (linux only), required when running a container in a RamDisk with runc
+ // This only works for runtime type "io.containerd.runtime.v1.linux".
+ NoPivot bool `toml:"no_pivot" json:"noPivot"`
+
+ // DisableSnapshotAnnotations disables to pass additional annotations (image
+ // related information) to snapshotters. These annotations are required by
+ // stargz snapshotter (https://github.com/containerd/stargz-snapshotter).
+ DisableSnapshotAnnotations bool `toml:"disable_snapshot_annotations" json:"disableSnapshotAnnotations"`
+
+ // DiscardUnpackedLayers is a boolean flag to specify whether to allow GC to
+ // remove layers from the content store after successfully unpacking these
+ // layers to the snapshotter.
+ DiscardUnpackedLayers bool `toml:"discard_unpacked_layers" json:"discardUnpackedLayers"`
+}
+
+// CniConfig contains toml config related to cni
+type CniConfig struct {
+ // NetworkPluginBinDir is the directory in which the binaries for the plugin is kept.
+ NetworkPluginBinDir string `toml:"bin_dir" json:"binDir"`
+ // NetworkPluginConfDir is the directory in which the admin places a CNI conf.
+ NetworkPluginConfDir string `toml:"conf_dir" json:"confDir"`
+ // NetworkPluginMaxConfNum is the max number of plugin config files that will
+ // be loaded from the cni config directory by go-cni. Set the value to 0 to
+ // load all config files (no arbitrary limit). The legacy default value is 1.
+ NetworkPluginMaxConfNum int `toml:"max_conf_num" json:"maxConfNum"`
+ // NetworkPluginConfTemplate is the file path of golang template used to generate
+ // cni config.
+ // When it is set, containerd will get cidr(s) from kubelet to replace {{.PodCIDR}},
+ // {{.PodCIDRRanges}} or {{.Routes}} in the template, and write the config into
+ // NetworkPluginConfDir.
+ // Ideally the cni config should be placed by system admin or cni daemon like calico,
+ // weaveworks etc. However, there are still users using kubenet
+ // (https://kubernetes.io/docs/concepts/cluster-administration/network-plugins/#kubenet)
+ // today, who don't have a cni daemonset in production. NetworkPluginConfTemplate is
+ // a temporary backward-compatible solution for them.
+ // TODO(random-liu): Deprecate this option when kubenet is deprecated.
+ NetworkPluginConfTemplate string `toml:"conf_template" json:"confTemplate"`
+}
+
+// Mirror contains the config related to the registry mirror
+type Mirror struct {
+ // Endpoints are endpoints for a namespace. CRI plugin will try the endpoints
+ // one by one until a working one is found. The endpoint must be a valid url
+ // with host specified.
+ // The scheme, host and path from the endpoint URL will be used.
+ Endpoints []string `toml:"endpoint" json:"endpoint"`
+}
+
+// AuthConfig contains the config related to authentication to a specific registry
+type AuthConfig struct {
+ // Username is the username to login the registry.
+ Username string `toml:"username" json:"username"`
+ // Password is the password to login the registry.
+ Password string `toml:"password" json:"password"`
+ // Auth is a base64 encoded string from the concatenation of the username,
+ // a colon, and the password.
+ Auth string `toml:"auth" json:"auth"`
+ // IdentityToken is used to authenticate the user and get
+ // an access token for the registry.
+ IdentityToken string `toml:"identitytoken" json:"identitytoken"`
+}
+
+// TLSConfig contains the CA/Cert/Key used for a registry
+type TLSConfig struct {
+ InsecureSkipVerify bool `toml:"insecure_skip_verify" json:"insecure_skip_verify"`
+ CAFile string `toml:"ca_file" json:"caFile"`
+ CertFile string `toml:"cert_file" json:"certFile"`
+ KeyFile string `toml:"key_file" json:"keyFile"`
+}
+
+// Registry is registry settings configured
+type Registry struct {
+ // Mirrors are namespace to mirror mapping for all namespaces.
+ Mirrors map[string]Mirror `toml:"mirrors" json:"mirrors"`
+ // Configs are configs for each registry.
+ // The key is the domain name or IP of the registry.
+ Configs map[string]RegistryConfig `toml:"configs" json:"configs"`
+
+ // Auths are registry endpoint to auth config mapping. The registry endpoint must
+ // be a valid url with host specified.
+ // DEPRECATED: Use Configs instead. Remove in containerd 1.4.
+ Auths map[string]AuthConfig `toml:"auths" json:"auths"`
+ // Headers adds additional HTTP headers that get sent to all registries
+ Headers map[string][]string `toml:"headers" json:"headers"`
+}
+
+// RegistryConfig contains configuration used to communicate with the registry.
+type RegistryConfig struct {
+ // Auth contains information to authenticate to the registry.
+ Auth *AuthConfig `toml:"auth" json:"auth"`
+ // TLS is a pair of CA/Cert/Key which then are used when creating the transport
+ // that communicates with the registry.
+ TLS *TLSConfig `toml:"tls" json:"tls"`
+}
+
+// ImageDecryption contains configuration to handling decryption of encrypted container images.
+type ImageDecryption struct {
+ // KeyModel specifies the trust model of where keys should reside.
+ //
+ // Details of field usage can be found in:
+ // https://github.com/containerd/cri/tree/master/docs/config.md
+ //
+ // Details of key models can be found in:
+ // https://github.com/containerd/cri/tree/master/docs/decryption.md
+ KeyModel string `toml:"key_model" json:"keyModel"`
+}
+
+// PluginConfig contains toml config related to CRI plugin,
+// it is a subset of Config.
+type PluginConfig struct {
+ // ContainerdConfig contains config related to containerd
+ ContainerdConfig `toml:"containerd" json:"containerd"`
+ // CniConfig contains config related to cni
+ CniConfig `toml:"cni" json:"cni"`
+ // Registry contains config related to the registry
+ Registry Registry `toml:"registry" json:"registry"`
+ // ImageDecryption contains config related to handling decryption of encrypted container images
+ ImageDecryption `toml:"image_decryption" json:"imageDecryption"`
+ // DisableTCPService disables serving CRI on the TCP server.
+ DisableTCPService bool `toml:"disable_tcp_service" json:"disableTCPService"`
+ // StreamServerAddress is the ip address streaming server is listening on.
+ StreamServerAddress string `toml:"stream_server_address" json:"streamServerAddress"`
+ // StreamServerPort is the port streaming server is listening on.
+ StreamServerPort string `toml:"stream_server_port" json:"streamServerPort"`
+ // StreamIdleTimeout is the maximum time a streaming connection
+ // can be idle before the connection is automatically closed.
+ // The string is in the golang duration format, see:
+ // https://golang.org/pkg/time/#ParseDuration
+ StreamIdleTimeout string `toml:"stream_idle_timeout" json:"streamIdleTimeout"`
+ // EnableSelinux indicates to enable the selinux support.
+ EnableSelinux bool `toml:"enable_selinux" json:"enableSelinux"`
+ // SelinuxCategoryRange allows the upper bound on the category range to be set.
+ // If not specified or set to 0, defaults to 1024 from the selinux package.
+ SelinuxCategoryRange int `toml:"selinux_category_range" json:"selinuxCategoryRange"`
+ // SandboxImage is the image used by sandbox container.
+ SandboxImage string `toml:"sandbox_image" json:"sandboxImage"`
+ // StatsCollectPeriod is the period (in seconds) of snapshots stats collection.
+ StatsCollectPeriod int `toml:"stats_collect_period" json:"statsCollectPeriod"`
+ // SystemdCgroup enables systemd cgroup support.
+ // This only works for runtime type "io.containerd.runtime.v1.linux".
+ // DEPRECATED: config runc runtime handler instead. Remove when shim v1 is deprecated.
+ SystemdCgroup bool `toml:"systemd_cgroup" json:"systemdCgroup"`
+ // EnableTLSStreaming indicates to enable the TLS streaming support.
+ EnableTLSStreaming bool `toml:"enable_tls_streaming" json:"enableTLSStreaming"`
+ // X509KeyPairStreaming is a x509 key pair used for TLS streaming
+ X509KeyPairStreaming `toml:"x509_key_pair_streaming" json:"x509KeyPairStreaming"`
+ // MaxContainerLogLineSize is the maximum log line size in bytes for a container.
+ // Log line longer than the limit will be split into multiple lines. Non-positive
+ // value means no limit.
+ MaxContainerLogLineSize int `toml:"max_container_log_line_size" json:"maxContainerLogSize"`
+ // DisableCgroup indicates to disable the cgroup support.
+ // This is useful when the containerd does not have permission to access cgroup.
+ DisableCgroup bool `toml:"disable_cgroup" json:"disableCgroup"`
+ // DisableApparmor indicates to disable the apparmor support.
+ // This is useful when the containerd does not have permission to access Apparmor.
+ DisableApparmor bool `toml:"disable_apparmor" json:"disableApparmor"`
+ // RestrictOOMScoreAdj indicates to limit the lower bound of OOMScoreAdj to the containerd's
+ // current OOMScoreADj.
+ // This is useful when the containerd does not have permission to decrease OOMScoreAdj.
+ RestrictOOMScoreAdj bool `toml:"restrict_oom_score_adj" json:"restrictOOMScoreAdj"`
+ // MaxConcurrentDownloads restricts the number of concurrent downloads for each image.
+ MaxConcurrentDownloads int `toml:"max_concurrent_downloads" json:"maxConcurrentDownloads"`
+ // DisableProcMount disables Kubernetes ProcMount support. This MUST be set to `true`
+ // when using containerd with Kubernetes <=1.11.
+ DisableProcMount bool `toml:"disable_proc_mount" json:"disableProcMount"`
+ // UnsetSeccompProfile is the profile containerd/cri will use If the provided seccomp profile is
+ // unset (`""`) for a container (default is `unconfined`)
+ UnsetSeccompProfile string `toml:"unset_seccomp_profile" json:"unsetSeccompProfile"`
+ // TolerateMissingHugetlbController if set to false will error out on create/update
+ // container requests with huge page limits if the cgroup controller for hugepages is not present.
+ // This helps with supporting Kubernetes <=1.18 out of the box. (default is `true`)
+ TolerateMissingHugetlbController bool `toml:"tolerate_missing_hugetlb_controller" json:"tolerateMissingHugetlbController"`
+ // DisableHugetlbController indicates to silently disable the hugetlb controller, even when it is
+ // present in /sys/fs/cgroup/cgroup.controllers.
+ // This helps with running rootless mode + cgroup v2 + systemd but without hugetlb delegation.
+ DisableHugetlbController bool `toml:"disable_hugetlb_controller" json:"disableHugetlbController"`
+ // IgnoreImageDefinedVolumes ignores volumes defined by the image. Useful for better resource
+ // isolation, security and early detection of issues in the mount configuration when using
+ // ReadOnlyRootFilesystem since containers won't silently mount a temporary volume.
+ IgnoreImageDefinedVolumes bool `toml:"ignore_image_defined_volumes" json:"ignoreImageDefinedVolumes"`
+}
+
+// X509KeyPairStreaming contains the x509 configuration for streaming
+type X509KeyPairStreaming struct {
+ // TLSCertFile is the path to a certificate file
+ TLSCertFile string `toml:"tls_cert_file" json:"tlsCertFile"`
+ // TLSKeyFile is the path to a private key file
+ TLSKeyFile string `toml:"tls_key_file" json:"tlsKeyFile"`
+}
+
+// Config contains all configurations for cri server.
+type Config struct {
+ // PluginConfig is the config for CRI plugin.
+ PluginConfig
+ // ContainerdRootDir is the root directory path for containerd.
+ ContainerdRootDir string `json:"containerdRootDir"`
+ // ContainerdEndpoint is the containerd endpoint path.
+ ContainerdEndpoint string `json:"containerdEndpoint"`
+ // RootDir is the root directory path for managing cri plugin files
+ // (metadata checkpoint etc.)
+ RootDir string `json:"rootDir"`
+ // StateDir is the root directory path for managing volatile pod/container data
+ StateDir string `json:"stateDir"`
+}
+
+const (
+ // RuntimeUntrusted is the implicit runtime defined for ContainerdConfig.UntrustedWorkloadRuntime
+ RuntimeUntrusted = "untrusted"
+ // RuntimeDefault is the implicit runtime defined for ContainerdConfig.DefaultRuntime
+ RuntimeDefault = "default"
+ // KeyModelNode is the key model where key for encrypted images reside
+ // on the worker nodes
+ KeyModelNode = "node"
+)
+
+// ValidatePluginConfig validates the given plugin configuration.
+func ValidatePluginConfig(ctx context.Context, c *PluginConfig) error {
+ if c.ContainerdConfig.Runtimes == nil {
+ c.ContainerdConfig.Runtimes = make(map[string]Runtime)
+ }
+
+ // Validation for deprecated untrusted_workload_runtime.
+ if c.ContainerdConfig.UntrustedWorkloadRuntime.Type != "" {
+ log.G(ctx).Warning("`untrusted_workload_runtime` is deprecated, please use `untrusted` runtime in `runtimes` instead")
+ if _, ok := c.ContainerdConfig.Runtimes[RuntimeUntrusted]; ok {
+ return errors.Errorf("conflicting definitions: configuration includes both `untrusted_workload_runtime` and `runtimes[%q]`", RuntimeUntrusted)
+ }
+ c.ContainerdConfig.Runtimes[RuntimeUntrusted] = c.ContainerdConfig.UntrustedWorkloadRuntime
+ }
+
+ // Validation for deprecated default_runtime field.
+ if c.ContainerdConfig.DefaultRuntime.Type != "" {
+ log.G(ctx).Warning("`default_runtime` is deprecated, please use `default_runtime_name` to reference the default configuration you have defined in `runtimes`")
+ c.ContainerdConfig.DefaultRuntimeName = RuntimeDefault
+ c.ContainerdConfig.Runtimes[RuntimeDefault] = c.ContainerdConfig.DefaultRuntime
+ }
+
+ // Validation for default_runtime_name
+ if c.ContainerdConfig.DefaultRuntimeName == "" {
+ return errors.New("`default_runtime_name` is empty")
+ }
+ if _, ok := c.ContainerdConfig.Runtimes[c.ContainerdConfig.DefaultRuntimeName]; !ok {
+ return errors.New("no corresponding runtime configured in `runtimes` for `default_runtime_name`")
+ }
+
+ // Validation for deprecated runtime options.
+ if c.SystemdCgroup {
+ if c.ContainerdConfig.Runtimes[c.ContainerdConfig.DefaultRuntimeName].Type != plugin.RuntimeLinuxV1 {
+ return errors.Errorf("`systemd_cgroup` only works for runtime %s", plugin.RuntimeLinuxV1)
+ }
+ log.G(ctx).Warning("`systemd_cgroup` is deprecated, please use runtime `options` instead")
+ }
+ if c.NoPivot {
+ if c.ContainerdConfig.Runtimes[c.ContainerdConfig.DefaultRuntimeName].Type != plugin.RuntimeLinuxV1 {
+ return errors.Errorf("`no_pivot` only works for runtime %s", plugin.RuntimeLinuxV1)
+ }
+ // NoPivot can't be deprecated yet, because there is no alternative config option
+ // for `io.containerd.runtime.v1.linux`.
+ }
+ for _, r := range c.ContainerdConfig.Runtimes {
+ if r.Engine != "" {
+ if r.Type != plugin.RuntimeLinuxV1 {
+ return errors.Errorf("`runtime_engine` only works for runtime %s", plugin.RuntimeLinuxV1)
+ }
+ log.G(ctx).Warning("`runtime_engine` is deprecated, please use runtime `options` instead")
+ }
+ if r.Root != "" {
+ if r.Type != plugin.RuntimeLinuxV1 {
+ return errors.Errorf("`runtime_root` only works for runtime %s", plugin.RuntimeLinuxV1)
+ }
+ log.G(ctx).Warning("`runtime_root` is deprecated, please use runtime `options` instead")
+ }
+ }
+
+ // Validation for deprecated auths options and mapping it to configs.
+ if len(c.Registry.Auths) != 0 {
+ if c.Registry.Configs == nil {
+ c.Registry.Configs = make(map[string]RegistryConfig)
+ }
+ for endpoint, auth := range c.Registry.Auths {
+ config := c.Registry.Configs[endpoint]
+ config.Auth = &auth
+ c.Registry.Configs[endpoint] = config
+ }
+ log.G(ctx).Warning("`auths` is deprecated, please use registry`configs` instead")
+ }
+
+ // Validation for stream_idle_timeout
+ if c.StreamIdleTimeout != "" {
+ if _, err := time.ParseDuration(c.StreamIdleTimeout); err != nil {
+ return errors.Wrap(err, "invalid stream idle timeout")
+ }
+ }
+ return nil
+}
diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go
new file mode 100644
index 000000000..3b544f7b8
--- /dev/null
+++ b/pkg/config/config_test.go
@@ -0,0 +1,334 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package config
+
+import (
+ "context"
+ "fmt"
+ "testing"
+
+ "github.com/containerd/containerd/plugin"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestValidateConfig(t *testing.T) {
+ for desc, test := range map[string]struct {
+ config *PluginConfig
+ expectedErr string
+ expected *PluginConfig
+ }{
+ "deprecated untrusted_workload_runtime": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ UntrustedWorkloadRuntime: Runtime{
+ Type: "untrusted",
+ },
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: "default",
+ },
+ },
+ },
+ },
+ expected: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ UntrustedWorkloadRuntime: Runtime{
+ Type: "untrusted",
+ },
+ Runtimes: map[string]Runtime{
+ RuntimeUntrusted: {
+ Type: "untrusted",
+ },
+ RuntimeDefault: {
+ Type: "default",
+ },
+ },
+ },
+ },
+ },
+ "both untrusted_workload_runtime and runtime[untrusted]": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ UntrustedWorkloadRuntime: Runtime{
+ Type: "untrusted-1",
+ },
+ Runtimes: map[string]Runtime{
+ RuntimeUntrusted: {
+ Type: "untrusted-2",
+ },
+ RuntimeDefault: {
+ Type: "default",
+ },
+ },
+ },
+ },
+ expectedErr: fmt.Sprintf("conflicting definitions: configuration includes both `untrusted_workload_runtime` and `runtimes[%q]`", RuntimeUntrusted),
+ },
+ "deprecated default_runtime": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntime: Runtime{
+ Type: "default",
+ },
+ },
+ },
+ expected: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntime: Runtime{
+ Type: "default",
+ },
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: "default",
+ },
+ },
+ },
+ },
+ },
+ "no default_runtime_name": {
+ config: &PluginConfig{},
+ expectedErr: "`default_runtime_name` is empty",
+ },
+ "no runtime[default_runtime_name]": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ },
+ },
+ expectedErr: "no corresponding runtime configured in `runtimes` for `default_runtime_name`",
+ },
+ "deprecated systemd_cgroup for v1 runtime": {
+ config: &PluginConfig{
+ SystemdCgroup: true,
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: plugin.RuntimeLinuxV1,
+ },
+ },
+ },
+ },
+ expected: &PluginConfig{
+ SystemdCgroup: true,
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: plugin.RuntimeLinuxV1,
+ },
+ },
+ },
+ },
+ },
+ "deprecated systemd_cgroup for v2 runtime": {
+ config: &PluginConfig{
+ SystemdCgroup: true,
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: plugin.RuntimeRuncV1,
+ },
+ },
+ },
+ },
+ expectedErr: fmt.Sprintf("`systemd_cgroup` only works for runtime %s", plugin.RuntimeLinuxV1),
+ },
+ "no_pivot for v1 runtime": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ NoPivot: true,
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: plugin.RuntimeLinuxV1,
+ },
+ },
+ },
+ },
+ expected: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ NoPivot: true,
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: plugin.RuntimeLinuxV1,
+ },
+ },
+ },
+ },
+ },
+ "no_pivot for v2 runtime": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ NoPivot: true,
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: plugin.RuntimeRuncV1,
+ },
+ },
+ },
+ },
+ expectedErr: fmt.Sprintf("`no_pivot` only works for runtime %s", plugin.RuntimeLinuxV1),
+ },
+ "deprecated runtime_engine for v1 runtime": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Engine: "runc",
+ Type: plugin.RuntimeLinuxV1,
+ },
+ },
+ },
+ },
+ expected: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Engine: "runc",
+ Type: plugin.RuntimeLinuxV1,
+ },
+ },
+ },
+ },
+ },
+ "deprecated runtime_engine for v2 runtime": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Engine: "runc",
+ Type: plugin.RuntimeRuncV1,
+ },
+ },
+ },
+ },
+ expectedErr: fmt.Sprintf("`runtime_engine` only works for runtime %s", plugin.RuntimeLinuxV1),
+ },
+ "deprecated runtime_root for v1 runtime": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Root: "/run/containerd/runc",
+ Type: plugin.RuntimeLinuxV1,
+ },
+ },
+ },
+ },
+ expected: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Root: "/run/containerd/runc",
+ Type: plugin.RuntimeLinuxV1,
+ },
+ },
+ },
+ },
+ },
+ "deprecated runtime_root for v2 runtime": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Root: "/run/containerd/runc",
+ Type: plugin.RuntimeRuncV1,
+ },
+ },
+ },
+ },
+ expectedErr: fmt.Sprintf("`runtime_root` only works for runtime %s", plugin.RuntimeLinuxV1),
+ },
+ "deprecated auths": {
+ config: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: plugin.RuntimeRuncV1,
+ },
+ },
+ },
+ Registry: Registry{
+ Auths: map[string]AuthConfig{
+ "https://gcr.io": {Username: "test"},
+ },
+ },
+ },
+ expected: &PluginConfig{
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: plugin.RuntimeRuncV1,
+ },
+ },
+ },
+ Registry: Registry{
+ Configs: map[string]RegistryConfig{
+ "https://gcr.io": {
+ Auth: &AuthConfig{
+ Username: "test",
+ },
+ },
+ },
+ Auths: map[string]AuthConfig{
+ "https://gcr.io": {Username: "test"},
+ },
+ },
+ },
+ },
+ "invalid stream_idle_timeout": {
+ config: &PluginConfig{
+ StreamIdleTimeout: "invalid",
+ ContainerdConfig: ContainerdConfig{
+ DefaultRuntimeName: RuntimeDefault,
+ Runtimes: map[string]Runtime{
+ RuntimeDefault: {
+ Type: "default",
+ },
+ },
+ },
+ },
+ expectedErr: "invalid stream idle timeout",
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ err := ValidatePluginConfig(context.Background(), test.config)
+ if test.expectedErr != "" {
+ assert.Contains(t, err.Error(), test.expectedErr)
+ } else {
+ assert.NoError(t, err)
+ assert.Equal(t, test.expected, test.config)
+ }
+ })
+ }
+}
diff --git a/pkg/config/config_unix.go b/pkg/config/config_unix.go
new file mode 100644
index 000000000..9df456b53
--- /dev/null
+++ b/pkg/config/config_unix.go
@@ -0,0 +1,75 @@
+// +build !windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package config
+
+import (
+ "github.com/BurntSushi/toml"
+ "github.com/containerd/containerd"
+ "github.com/containerd/cri/pkg/streaming"
+)
+
+// DefaultConfig returns default configurations of cri plugin.
+func DefaultConfig() PluginConfig {
+ return PluginConfig{
+ CniConfig: CniConfig{
+ NetworkPluginBinDir: "/opt/cni/bin",
+ NetworkPluginConfDir: "/etc/cni/net.d",
+ NetworkPluginMaxConfNum: 1, // only one CNI plugin config file will be loaded
+ NetworkPluginConfTemplate: "",
+ },
+ ContainerdConfig: ContainerdConfig{
+ Snapshotter: containerd.DefaultSnapshotter,
+ DefaultRuntimeName: "runc",
+ NoPivot: false,
+ Runtimes: map[string]Runtime{
+ "runc": {
+ Type: "io.containerd.runc.v2",
+ Options: new(toml.Primitive),
+ },
+ },
+ },
+ DisableTCPService: true,
+ StreamServerAddress: "127.0.0.1",
+ StreamServerPort: "0",
+ StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
+ EnableSelinux: false,
+ SelinuxCategoryRange: 1024,
+ EnableTLSStreaming: false,
+ X509KeyPairStreaming: X509KeyPairStreaming{
+ TLSKeyFile: "",
+ TLSCertFile: "",
+ },
+ SandboxImage: "k8s.gcr.io/pause:3.2",
+ StatsCollectPeriod: 10,
+ SystemdCgroup: false,
+ MaxContainerLogLineSize: 16 * 1024,
+ Registry: Registry{
+ Mirrors: map[string]Mirror{
+ "docker.io": {
+ Endpoints: []string{"https://registry-1.docker.io"},
+ },
+ },
+ },
+ MaxConcurrentDownloads: 3,
+ DisableProcMount: false,
+ TolerateMissingHugetlbController: true,
+ DisableHugetlbController: true,
+ IgnoreImageDefinedVolumes: false,
+ }
+}
diff --git a/pkg/config/config_windows.go b/pkg/config/config_windows.go
new file mode 100644
index 000000000..d559b4160
--- /dev/null
+++ b/pkg/config/config_windows.go
@@ -0,0 +1,71 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package config
+
+import (
+ "os"
+ "path/filepath"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/cri/pkg/streaming"
+)
+
+// DefaultConfig returns default configurations of cri plugin.
+func DefaultConfig() PluginConfig {
+ return PluginConfig{
+ CniConfig: CniConfig{
+ NetworkPluginBinDir: filepath.Join(os.Getenv("ProgramFiles"), "containerd", "cni", "bin"),
+ NetworkPluginConfDir: filepath.Join(os.Getenv("ProgramFiles"), "containerd", "cni", "conf"),
+ NetworkPluginMaxConfNum: 1,
+ NetworkPluginConfTemplate: "",
+ },
+ ContainerdConfig: ContainerdConfig{
+ Snapshotter: containerd.DefaultSnapshotter,
+ DefaultRuntimeName: "runhcs-wcow-process",
+ NoPivot: false,
+ Runtimes: map[string]Runtime{
+ "runhcs-wcow-process": {
+ Type: "io.containerd.runhcs.v1",
+ },
+ },
+ },
+ DisableTCPService: true,
+ StreamServerAddress: "127.0.0.1",
+ StreamServerPort: "0",
+ StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
+ EnableTLSStreaming: false,
+ X509KeyPairStreaming: X509KeyPairStreaming{
+ TLSKeyFile: "",
+ TLSCertFile: "",
+ },
+ SandboxImage: "mcr.microsoft.com/oss/kubernetes/pause:1.4.0",
+ StatsCollectPeriod: 10,
+ MaxContainerLogLineSize: 16 * 1024,
+ Registry: Registry{
+ Mirrors: map[string]Mirror{
+ "docker.io": {
+ Endpoints: []string{"https://registry-1.docker.io"},
+ },
+ },
+ },
+ MaxConcurrentDownloads: 3,
+ IgnoreImageDefinedVolumes: false,
+ // TODO(windows): Add platform specific config, so that most common defaults can be shared.
+ }
+}
diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go
new file mode 100644
index 000000000..b38221502
--- /dev/null
+++ b/pkg/constants/constants.go
@@ -0,0 +1,26 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package constants
+
+// TODO(random-liu): Merge annotations package into this package.
+
+const (
+ // K8sContainerdNamespace is the namespace we use to connect containerd.
+ K8sContainerdNamespace = "k8s.io"
+ // CRIVersion is the CRI version supported by the CRI plugin.
+ CRIVersion = "v1alpha2"
+)
diff --git a/pkg/containerd/opts/container.go b/pkg/containerd/opts/container.go
new file mode 100644
index 000000000..fe199d5fb
--- /dev/null
+++ b/pkg/containerd/opts/container.go
@@ -0,0 +1,118 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package opts
+
+import (
+ "context"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/containers"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/containerd/mount"
+ "github.com/containerd/continuity/fs"
+ "github.com/pkg/errors"
+)
+
+// WithNewSnapshot wraps `containerd.WithNewSnapshot` so that if creating the
+// snapshot fails we make sure the image is actually unpacked and and retry.
+func WithNewSnapshot(id string, i containerd.Image) containerd.NewContainerOpts {
+ f := containerd.WithNewSnapshot(id, i)
+ return func(ctx context.Context, client *containerd.Client, c *containers.Container) error {
+ if err := f(ctx, client, c); err != nil {
+ if !errdefs.IsNotFound(err) {
+ return err
+ }
+
+ if err := i.Unpack(ctx, c.Snapshotter); err != nil {
+ return errors.Wrap(err, "error unpacking image")
+ }
+ return f(ctx, client, c)
+ }
+ return nil
+ }
+}
+
+// WithVolumes copies ownership of volume in rootfs to its corresponding host path.
+// It doesn't update runtime spec.
+// The passed in map is a host path to container path map for all volumes.
+func WithVolumes(volumeMounts map[string]string) containerd.NewContainerOpts {
+ return func(ctx context.Context, client *containerd.Client, c *containers.Container) (err error) {
+ if c.Snapshotter == "" {
+ return errors.New("no snapshotter set for container")
+ }
+ if c.SnapshotKey == "" {
+ return errors.New("rootfs not created for container")
+ }
+ snapshotter := client.SnapshotService(c.Snapshotter)
+ mounts, err := snapshotter.Mounts(ctx, c.SnapshotKey)
+ if err != nil {
+ return err
+ }
+ root, err := ioutil.TempDir("", "ctd-volume")
+ if err != nil {
+ return err
+ }
+ // We change RemoveAll to Remove so that we either leak a temp dir
+ // if it fails but not RM snapshot data.
+ // refer to https://github.com/containerd/containerd/pull/1868
+ // https://github.com/containerd/containerd/pull/1785
+ defer os.Remove(root) // nolint: errcheck
+ if err := mount.All(mounts, root); err != nil {
+ return errors.Wrap(err, "failed to mount")
+ }
+ defer func() {
+ if uerr := mount.Unmount(root, 0); uerr != nil {
+ log.G(ctx).WithError(uerr).Errorf("Failed to unmount snapshot %q", c.SnapshotKey)
+ if err == nil {
+ err = uerr
+ }
+ }
+ }()
+
+ for host, volume := range volumeMounts {
+ src := filepath.Join(root, volume)
+ if _, err := os.Stat(src); err != nil {
+ if os.IsNotExist(err) {
+ // Skip copying directory if it does not exist.
+ continue
+ }
+ return errors.Wrap(err, "stat volume in rootfs")
+ }
+ if err := copyExistingContents(src, host); err != nil {
+ return errors.Wrap(err, "taking runtime copy of volume")
+ }
+ }
+ return nil
+ }
+}
+
+// copyExistingContents copies from the source to the destination and
+// ensures the ownership is appropriately set.
+func copyExistingContents(source, destination string) error {
+ dstList, err := ioutil.ReadDir(destination)
+ if err != nil {
+ return err
+ }
+ if len(dstList) != 0 {
+ return errors.Errorf("volume at %q is not initially empty", destination)
+ }
+ return fs.CopyDir(destination, source)
+}
diff --git a/pkg/containerd/opts/spec.go b/pkg/containerd/opts/spec.go
new file mode 100644
index 000000000..1afb4fc1a
--- /dev/null
+++ b/pkg/containerd/opts/spec.go
@@ -0,0 +1,113 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package opts
+
+import (
+ "context"
+ "os"
+ "path/filepath"
+ "strings"
+
+ "github.com/containerd/containerd/containers"
+ "github.com/containerd/containerd/oci"
+
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// DefaultSandboxCPUshares is default cpu shares for sandbox container.
+// TODO(windows): Revisit cpu shares for windows (https://github.com/containerd/cri/issues/1297)
+const DefaultSandboxCPUshares = 2
+
+// WithRelativeRoot sets the root for the container
+func WithRelativeRoot(root string) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
+ if s.Root == nil {
+ s.Root = &runtimespec.Root{}
+ }
+ s.Root.Path = root
+ return nil
+ }
+}
+
+// WithoutRoot sets the root to nil for the container.
+func WithoutRoot(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ s.Root = nil
+ return nil
+}
+
+// WithProcessArgs sets the process args on the spec based on the image and runtime config
+func WithProcessArgs(config *runtime.ContainerConfig, image *imagespec.ImageConfig) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
+ command, args := config.GetCommand(), config.GetArgs()
+ // The following logic is migrated from https://github.com/moby/moby/blob/master/daemon/commit.go
+ // TODO(random-liu): Clearly define the commands overwrite behavior.
+ if len(command) == 0 {
+ // Copy array to avoid data race.
+ if len(args) == 0 {
+ args = append([]string{}, image.Cmd...)
+ }
+ if command == nil {
+ command = append([]string{}, image.Entrypoint...)
+ }
+ }
+ if len(command) == 0 && len(args) == 0 {
+ return errors.New("no command specified")
+ }
+ return oci.WithProcessArgs(append(command, args...)...)(ctx, client, c, s)
+ }
+}
+
+// mounts defines how to sort runtime.Mount.
+// This is the same with the Docker implementation:
+// https://github.com/moby/moby/blob/17.05.x/daemon/volumes.go#L26
+type orderedMounts []*runtime.Mount
+
+// Len returns the number of mounts. Used in sorting.
+func (m orderedMounts) Len() int {
+ return len(m)
+}
+
+// Less returns true if the number of parts (a/b/c would be 3 parts) in the
+// mount indexed by parameter 1 is less than that of the mount indexed by
+// parameter 2. Used in sorting.
+func (m orderedMounts) Less(i, j int) bool {
+ return m.parts(i) < m.parts(j)
+}
+
+// Swap swaps two items in an array of mounts. Used in sorting
+func (m orderedMounts) Swap(i, j int) {
+ m[i], m[j] = m[j], m[i]
+}
+
+// parts returns the number of parts in the destination of a mount. Used in sorting.
+func (m orderedMounts) parts(i int) int {
+ return strings.Count(filepath.Clean(m[i].ContainerPath), string(os.PathSeparator))
+}
+
+// WithAnnotation sets the provided annotation
+func WithAnnotation(k, v string) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Annotations == nil {
+ s.Annotations = make(map[string]string)
+ }
+ s.Annotations[k] = v
+ return nil
+ }
+}
diff --git a/pkg/containerd/opts/spec_linux.go b/pkg/containerd/opts/spec_linux.go
new file mode 100644
index 000000000..e358d8d6e
--- /dev/null
+++ b/pkg/containerd/opts/spec_linux.go
@@ -0,0 +1,719 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package opts
+
+import (
+ "context"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "syscall"
+
+ "github.com/containerd/containerd/containers"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/containerd/mount"
+ "github.com/containerd/containerd/oci"
+ "github.com/opencontainers/runc/libcontainer/devices"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ osinterface "github.com/containerd/cri/pkg/os"
+ "github.com/containerd/cri/pkg/util"
+)
+
+// WithAdditionalGIDs adds any additional groups listed for a particular user in the
+// /etc/groups file of the image's root filesystem to the OCI spec's additionalGids array.
+func WithAdditionalGIDs(userstr string) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
+ if s.Process == nil {
+ s.Process = &runtimespec.Process{}
+ }
+ gids := s.Process.User.AdditionalGids
+ if err := oci.WithAdditionalGIDs(userstr)(ctx, client, c, s); err != nil {
+ return err
+ }
+ // Merge existing gids and new gids.
+ s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, gids)
+ return nil
+ }
+}
+
+func mergeGids(gids1, gids2 []uint32) []uint32 {
+ gidsMap := make(map[uint32]struct{})
+ for _, gid1 := range gids1 {
+ gidsMap[gid1] = struct{}{}
+ }
+ for _, gid2 := range gids2 {
+ gidsMap[gid2] = struct{}{}
+ }
+ var gids []uint32
+ for gid := range gidsMap {
+ gids = append(gids, gid)
+ }
+ sort.Slice(gids, func(i, j int) bool { return gids[i] < gids[j] })
+ return gids
+}
+
+// WithoutRunMount removes the `/run` inside the spec
+func WithoutRunMount(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ var (
+ mounts []runtimespec.Mount
+ current = s.Mounts
+ )
+ for _, m := range current {
+ if filepath.Clean(m.Destination) == "/run" {
+ continue
+ }
+ mounts = append(mounts, m)
+ }
+ s.Mounts = mounts
+ return nil
+}
+
+// WithoutDefaultSecuritySettings removes the default security settings generated on a spec
+func WithoutDefaultSecuritySettings(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Process == nil {
+ s.Process = &runtimespec.Process{}
+ }
+ // Make sure no default seccomp/apparmor is specified
+ s.Process.ApparmorProfile = ""
+ if s.Linux != nil {
+ s.Linux.Seccomp = nil
+ }
+ // Remove default rlimits (See issue #515)
+ s.Process.Rlimits = nil
+ return nil
+}
+
+// WithMounts sorts and adds runtime and CRI mounts to the spec
+func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
+ // mergeMounts merge CRI mounts with extra mounts. If a mount destination
+ // is mounted by both a CRI mount and an extra mount, the CRI mount will
+ // be kept.
+ var (
+ criMounts = config.GetMounts()
+ mounts = append([]*runtime.Mount{}, criMounts...)
+ )
+ // Copy all mounts from extra mounts, except for mounts overridden by CRI.
+ for _, e := range extra {
+ found := false
+ for _, c := range criMounts {
+ if filepath.Clean(e.ContainerPath) == filepath.Clean(c.ContainerPath) {
+ found = true
+ break
+ }
+ }
+ if !found {
+ mounts = append(mounts, e)
+ }
+ }
+
+ // Sort mounts in number of parts. This ensures that high level mounts don't
+ // shadow other mounts.
+ sort.Sort(orderedMounts(mounts))
+
+ // Mount cgroup into the container as readonly, which inherits docker's behavior.
+ s.Mounts = append(s.Mounts, runtimespec.Mount{
+ Source: "cgroup",
+ Destination: "/sys/fs/cgroup",
+ Type: "cgroup",
+ Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
+ })
+
+ // Copy all mounts from default mounts, except for
+ // - mounts overridden by supplied mount;
+ // - all mounts under /dev if a supplied /dev is present.
+ mountSet := make(map[string]struct{})
+ for _, m := range mounts {
+ mountSet[filepath.Clean(m.ContainerPath)] = struct{}{}
+ }
+
+ defaultMounts := s.Mounts
+ s.Mounts = nil
+
+ for _, m := range defaultMounts {
+ dst := filepath.Clean(m.Destination)
+ if _, ok := mountSet[dst]; ok {
+ // filter out mount overridden by a supplied mount
+ continue
+ }
+ if _, mountDev := mountSet["/dev"]; mountDev && strings.HasPrefix(dst, "/dev/") {
+ // filter out everything under /dev if /dev is a supplied mount
+ continue
+ }
+ s.Mounts = append(s.Mounts, m)
+ }
+
+ for _, mount := range mounts {
+ var (
+ dst = mount.GetContainerPath()
+ src = mount.GetHostPath()
+ )
+ // Create the host path if it doesn't exist.
+ // TODO(random-liu): Add CRI validation test for this case.
+ if _, err := osi.Stat(src); err != nil {
+ if !os.IsNotExist(err) {
+ return errors.Wrapf(err, "failed to stat %q", src)
+ }
+ if err := osi.MkdirAll(src, 0755); err != nil {
+ return errors.Wrapf(err, "failed to mkdir %q", src)
+ }
+ }
+ // TODO(random-liu): Add cri-containerd integration test or cri validation test
+ // for this.
+ src, err := osi.ResolveSymbolicLink(src)
+ if err != nil {
+ return errors.Wrapf(err, "failed to resolve symlink %q", src)
+ }
+ if s.Linux == nil {
+ s.Linux = &runtimespec.Linux{}
+ }
+ options := []string{"rbind"}
+ switch mount.GetPropagation() {
+ case runtime.MountPropagation_PROPAGATION_PRIVATE:
+ options = append(options, "rprivate")
+ // Since default root propagation in runc is rprivate ignore
+ // setting the root propagation
+ case runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL:
+ if err := ensureShared(src, osi.(osinterface.UNIX).LookupMount); err != nil {
+ return err
+ }
+ options = append(options, "rshared")
+ s.Linux.RootfsPropagation = "rshared"
+ case runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER:
+ if err := ensureSharedOrSlave(src, osi.(osinterface.UNIX).LookupMount); err != nil {
+ return err
+ }
+ options = append(options, "rslave")
+ if s.Linux.RootfsPropagation != "rshared" &&
+ s.Linux.RootfsPropagation != "rslave" {
+ s.Linux.RootfsPropagation = "rslave"
+ }
+ default:
+ log.G(ctx).Warnf("Unknown propagation mode for hostPath %q", mount.HostPath)
+ options = append(options, "rprivate")
+ }
+
+ // NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
+ // is readonly. This is different from docker's behavior, but make more sense.
+ if mount.GetReadonly() {
+ options = append(options, "ro")
+ } else {
+ options = append(options, "rw")
+ }
+
+ if mount.GetSelinuxRelabel() {
+ if err := label.Relabel(src, mountLabel, false); err != nil && err != unix.ENOTSUP {
+ return errors.Wrapf(err, "relabel %q with %q failed", src, mountLabel)
+ }
+ }
+ s.Mounts = append(s.Mounts, runtimespec.Mount{
+ Source: src,
+ Destination: dst,
+ Type: "bind",
+ Options: options,
+ })
+ }
+ return nil
+ }
+}
+
+// Ensure mount point on which path is mounted, is shared.
+func ensureShared(path string, lookupMount func(string) (mount.Info, error)) error {
+ mountInfo, err := lookupMount(path)
+ if err != nil {
+ return err
+ }
+
+ // Make sure source mount point is shared.
+ optsSplit := strings.Split(mountInfo.Optional, " ")
+ for _, opt := range optsSplit {
+ if strings.HasPrefix(opt, "shared:") {
+ return nil
+ }
+ }
+
+ return errors.Errorf("path %q is mounted on %q but it is not a shared mount", path, mountInfo.Mountpoint)
+}
+
+// ensure mount point on which path is mounted, is either shared or slave.
+func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, error)) error {
+ mountInfo, err := lookupMount(path)
+ if err != nil {
+ return err
+ }
+ // Make sure source mount point is shared.
+ optsSplit := strings.Split(mountInfo.Optional, " ")
+ for _, opt := range optsSplit {
+ if strings.HasPrefix(opt, "shared:") {
+ return nil
+ } else if strings.HasPrefix(opt, "master:") {
+ return nil
+ }
+ }
+ return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
+}
+
+func addDevice(s *runtimespec.Spec, rd runtimespec.LinuxDevice) {
+ for i, dev := range s.Linux.Devices {
+ if dev.Path == rd.Path {
+ s.Linux.Devices[i] = rd
+ return
+ }
+ }
+ s.Linux.Devices = append(s.Linux.Devices, rd)
+}
+
+// WithDevices sets the provided devices onto the container spec
+func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
+ if s.Linux == nil {
+ s.Linux = &runtimespec.Linux{}
+ }
+ if s.Linux.Resources == nil {
+ s.Linux.Resources = &runtimespec.LinuxResources{}
+ }
+ for _, device := range config.GetDevices() {
+ path, err := osi.ResolveSymbolicLink(device.HostPath)
+ if err != nil {
+ return err
+ }
+ dev, err := devices.DeviceFromPath(path, device.Permissions)
+ if err != nil {
+ return err
+ }
+ rd := runtimespec.LinuxDevice{
+ Path: device.ContainerPath,
+ Type: string(dev.Type),
+ Major: dev.Major,
+ Minor: dev.Minor,
+ UID: &dev.Uid,
+ GID: &dev.Gid,
+ }
+
+ addDevice(s, rd)
+
+ s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, runtimespec.LinuxDeviceCgroup{
+ Allow: true,
+ Type: string(dev.Type),
+ Major: &dev.Major,
+ Minor: &dev.Minor,
+ Access: string(dev.Permissions),
+ })
+ }
+ return nil
+ }
+}
+
+// WithCapabilities sets the provided capabilties from the security context
+func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts {
+ capabilities := sc.GetCapabilities()
+ if capabilities == nil {
+ return nullOpt
+ }
+
+ var opts []oci.SpecOpts
+ // Add/drop all capabilities if "all" is specified, so that
+ // following individual add/drop could still work. E.g.
+ // AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
+ // will be all capabilities without `CAP_CHOWN`.
+ if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") {
+ opts = append(opts, oci.WithAllCapabilities)
+ }
+ if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") {
+ opts = append(opts, oci.WithCapabilities(nil))
+ }
+
+ var caps []string
+ for _, c := range capabilities.GetAddCapabilities() {
+ if strings.ToUpper(c) == "ALL" {
+ continue
+ }
+ // Capabilities in CRI doesn't have `CAP_` prefix, so add it.
+ caps = append(caps, "CAP_"+strings.ToUpper(c))
+ }
+ opts = append(opts, oci.WithAddedCapabilities(caps))
+
+ caps = []string{}
+ for _, c := range capabilities.GetDropCapabilities() {
+ if strings.ToUpper(c) == "ALL" {
+ continue
+ }
+ caps = append(caps, "CAP_"+strings.ToUpper(c))
+ }
+ opts = append(opts, oci.WithDroppedCapabilities(caps))
+ return oci.Compose(opts...)
+}
+
+// WithoutAmbientCaps removes the ambient caps from the spec
+func WithoutAmbientCaps(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Process == nil {
+ s.Process = &runtimespec.Process{}
+ }
+ if s.Process.Capabilities == nil {
+ s.Process.Capabilities = &runtimespec.LinuxCapabilities{}
+ }
+ s.Process.Capabilities.Ambient = nil
+ return nil
+}
+
+// WithDisabledCgroups clears the Cgroups Path from the spec
+func WithDisabledCgroups(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Linux == nil {
+ s.Linux = &runtimespec.Linux{}
+ }
+ s.Linux.CgroupsPath = ""
+ return nil
+}
+
+// WithSelinuxLabels sets the mount and process labels
+func WithSelinuxLabels(process, mount string) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
+ if s.Linux == nil {
+ s.Linux = &runtimespec.Linux{}
+ }
+ if s.Process == nil {
+ s.Process = &runtimespec.Process{}
+ }
+ s.Linux.MountLabel = mount
+ s.Process.SelinuxLabel = process
+ return nil
+ }
+}
+
+// WithResources sets the provided resource restrictions
+func WithResources(resources *runtime.LinuxContainerResources, tolerateMissingHugetlbController, disableHugetlbController bool) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
+ if resources == nil {
+ return nil
+ }
+ if s.Linux == nil {
+ s.Linux = &runtimespec.Linux{}
+ }
+ if s.Linux.Resources == nil {
+ s.Linux.Resources = &runtimespec.LinuxResources{}
+ }
+ if s.Linux.Resources.CPU == nil {
+ s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
+ }
+ if s.Linux.Resources.Memory == nil {
+ s.Linux.Resources.Memory = &runtimespec.LinuxMemory{}
+ }
+ var (
+ p = uint64(resources.GetCpuPeriod())
+ q = resources.GetCpuQuota()
+ shares = uint64(resources.GetCpuShares())
+ limit = resources.GetMemoryLimitInBytes()
+ hugepages = resources.GetHugepageLimits()
+ )
+
+ if p != 0 {
+ s.Linux.Resources.CPU.Period = &p
+ }
+ if q != 0 {
+ s.Linux.Resources.CPU.Quota = &q
+ }
+ if shares != 0 {
+ s.Linux.Resources.CPU.Shares = &shares
+ }
+ if cpus := resources.GetCpusetCpus(); cpus != "" {
+ s.Linux.Resources.CPU.Cpus = cpus
+ }
+ if mems := resources.GetCpusetMems(); mems != "" {
+ s.Linux.Resources.CPU.Mems = resources.GetCpusetMems()
+ }
+ if limit != 0 {
+ s.Linux.Resources.Memory.Limit = &limit
+ }
+ if !disableHugetlbController {
+ if isHugetlbControllerPresent() {
+ for _, limit := range hugepages {
+ s.Linux.Resources.HugepageLimits = append(s.Linux.Resources.HugepageLimits, runtimespec.LinuxHugepageLimit{
+ Pagesize: limit.PageSize,
+ Limit: limit.Limit,
+ })
+ }
+ } else {
+ if !tolerateMissingHugetlbController {
+ return errors.Errorf("huge pages limits are specified but hugetlb cgroup controller is missing. " +
+ "Please set tolerate_missing_hugetlb_controller to `true` to ignore this error")
+ }
+ logrus.Warn("hugetlb cgroup controller is absent. skipping huge pages limits")
+ }
+ }
+ return nil
+ }
+}
+
+var (
+ supportsHugetlbOnce sync.Once
+ supportsHugetlb bool
+)
+
+func isHugetlbControllerPresent() bool {
+ supportsHugetlbOnce.Do(func() {
+ supportsHugetlb = false
+ if IsCgroup2UnifiedMode() {
+ supportsHugetlb, _ = cgroupv2HasHugetlb()
+ } else {
+ supportsHugetlb, _ = cgroupv1HasHugetlb()
+ }
+ })
+ return supportsHugetlb
+}
+
+var (
+ _cgroupv1HasHugetlbOnce sync.Once
+ _cgroupv1HasHugetlb bool
+ _cgroupv1HasHugetlbErr error
+ _cgroupv2HasHugetlbOnce sync.Once
+ _cgroupv2HasHugetlb bool
+ _cgroupv2HasHugetlbErr error
+ isUnifiedOnce sync.Once
+ isUnified bool
+)
+
+// cgroupv1HasHugetlb returns whether the hugetlb controller is present on
+// cgroup v1.
+func cgroupv1HasHugetlb() (bool, error) {
+ _cgroupv1HasHugetlbOnce.Do(func() {
+ if _, err := ioutil.ReadDir("/sys/fs/cgroup/hugetlb"); err != nil {
+ _cgroupv1HasHugetlbErr = errors.Wrap(err, "readdir /sys/fs/cgroup/hugetlb")
+ _cgroupv1HasHugetlb = false
+ } else {
+ _cgroupv1HasHugetlbErr = nil
+ _cgroupv1HasHugetlb = true
+ }
+ })
+ return _cgroupv1HasHugetlb, _cgroupv1HasHugetlbErr
+}
+
+// cgroupv2HasHugetlb returns whether the hugetlb controller is present on
+// cgroup v2.
+func cgroupv2HasHugetlb() (bool, error) {
+ _cgroupv2HasHugetlbOnce.Do(func() {
+ controllers, err := ioutil.ReadFile("/sys/fs/cgroup/cgroup.controllers")
+ if err != nil {
+ _cgroupv2HasHugetlbErr = errors.Wrap(err, "read /sys/fs/cgroup/cgroup.controllers")
+ return
+ }
+ _cgroupv2HasHugetlb = strings.Contains(string(controllers), "hugetlb")
+ })
+ return _cgroupv2HasHugetlb, _cgroupv2HasHugetlbErr
+}
+
+// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode.
+func IsCgroup2UnifiedMode() bool {
+ isUnifiedOnce.Do(func() {
+ var st syscall.Statfs_t
+ if err := syscall.Statfs("/sys/fs/cgroup", &st); err != nil {
+ panic("cannot statfs cgroup root")
+ }
+ isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC
+ })
+ return isUnified
+}
+
+// WithOOMScoreAdj sets the oom score
+func WithOOMScoreAdj(config *runtime.ContainerConfig, restrict bool) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Process == nil {
+ s.Process = &runtimespec.Process{}
+ }
+
+ resources := config.GetLinux().GetResources()
+ if resources == nil {
+ return nil
+ }
+ adj := int(resources.GetOomScoreAdj())
+ if restrict {
+ var err error
+ adj, err = restrictOOMScoreAdj(adj)
+ if err != nil {
+ return err
+ }
+ }
+ s.Process.OOMScoreAdj = &adj
+ return nil
+ }
+}
+
+// WithSysctls sets the provided sysctls onto the spec
+func WithSysctls(sysctls map[string]string) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Linux == nil {
+ s.Linux = &runtimespec.Linux{}
+ }
+ if s.Linux.Sysctl == nil {
+ s.Linux.Sysctl = make(map[string]string)
+ }
+ for k, v := range sysctls {
+ s.Linux.Sysctl[k] = v
+ }
+ return nil
+ }
+}
+
+// WithPodOOMScoreAdj sets the oom score for the pod sandbox
+func WithPodOOMScoreAdj(adj int, restrict bool) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Process == nil {
+ s.Process = &runtimespec.Process{}
+ }
+ if restrict {
+ var err error
+ adj, err = restrictOOMScoreAdj(adj)
+ if err != nil {
+ return err
+ }
+ }
+ s.Process.OOMScoreAdj = &adj
+ return nil
+ }
+}
+
+// WithSupplementalGroups sets the supplemental groups for the process
+func WithSupplementalGroups(groups []int64) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Process == nil {
+ s.Process = &runtimespec.Process{}
+ }
+ var guids []uint32
+ for _, g := range groups {
+ guids = append(guids, uint32(g))
+ }
+ s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, guids)
+ return nil
+ }
+}
+
+// WithPodNamespaces sets the pod namespaces for the container
+func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, pid uint32) oci.SpecOpts {
+ namespaces := config.GetNamespaceOptions()
+
+ opts := []oci.SpecOpts{
+ oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.NetworkNamespace, Path: GetNetworkNamespace(pid)}),
+ oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.IPCNamespace, Path: GetIPCNamespace(pid)}),
+ oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UTSNamespace, Path: GetUTSNamespace(pid)}),
+ }
+ if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
+ opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(pid)}))
+ }
+ return oci.Compose(opts...)
+}
+
+// WithDefaultSandboxShares sets the default sandbox CPU shares
+func WithDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Linux == nil {
+ s.Linux = &runtimespec.Linux{}
+ }
+ if s.Linux.Resources == nil {
+ s.Linux.Resources = &runtimespec.LinuxResources{}
+ }
+ if s.Linux.Resources.CPU == nil {
+ s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
+ }
+ i := uint64(DefaultSandboxCPUshares)
+ s.Linux.Resources.CPU.Shares = &i
+ return nil
+}
+
+// WithoutNamespace removes the provided namespace
+func WithoutNamespace(t runtimespec.LinuxNamespaceType) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Linux == nil {
+ return nil
+ }
+ var namespaces []runtimespec.LinuxNamespace
+ for i, ns := range s.Linux.Namespaces {
+ if ns.Type != t {
+ namespaces = append(namespaces, s.Linux.Namespaces[i])
+ }
+ }
+ s.Linux.Namespaces = namespaces
+ return nil
+ }
+}
+
+func nullOpt(_ context.Context, _ oci.Client, _ *containers.Container, _ *runtimespec.Spec) error {
+ return nil
+}
+
+func getCurrentOOMScoreAdj() (int, error) {
+ b, err := ioutil.ReadFile("/proc/self/oom_score_adj")
+ if err != nil {
+ return 0, errors.Wrap(err, "could not get the daemon oom_score_adj")
+ }
+ s := strings.TrimSpace(string(b))
+ i, err := strconv.Atoi(s)
+ if err != nil {
+ return 0, errors.Wrap(err, "could not get the daemon oom_score_adj")
+ }
+ return i, nil
+}
+
+func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) {
+ currentOOMScoreAdj, err := getCurrentOOMScoreAdj()
+ if err != nil {
+ return preferredOOMScoreAdj, err
+ }
+ if preferredOOMScoreAdj < currentOOMScoreAdj {
+ return currentOOMScoreAdj, nil
+ }
+ return preferredOOMScoreAdj, nil
+}
+
+const (
+ // netNSFormat is the format of network namespace of a process.
+ netNSFormat = "/proc/%v/ns/net"
+ // ipcNSFormat is the format of ipc namespace of a process.
+ ipcNSFormat = "/proc/%v/ns/ipc"
+ // utsNSFormat is the format of uts namespace of a process.
+ utsNSFormat = "/proc/%v/ns/uts"
+ // pidNSFormat is the format of pid namespace of a process.
+ pidNSFormat = "/proc/%v/ns/pid"
+)
+
+// GetNetworkNamespace returns the network namespace of a process.
+func GetNetworkNamespace(pid uint32) string {
+ return fmt.Sprintf(netNSFormat, pid)
+}
+
+// GetIPCNamespace returns the ipc namespace of a process.
+func GetIPCNamespace(pid uint32) string {
+ return fmt.Sprintf(ipcNSFormat, pid)
+}
+
+// GetUTSNamespace returns the uts namespace of a process.
+func GetUTSNamespace(pid uint32) string {
+ return fmt.Sprintf(utsNSFormat, pid)
+}
+
+// GetPIDNamespace returns the pid namespace of a process.
+func GetPIDNamespace(pid uint32) string {
+ return fmt.Sprintf(pidNSFormat, pid)
+}
diff --git a/pkg/containerd/opts/spec_linux_test.go b/pkg/containerd/opts/spec_linux_test.go
new file mode 100644
index 000000000..1c9942f80
--- /dev/null
+++ b/pkg/containerd/opts/spec_linux_test.go
@@ -0,0 +1,47 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package opts
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestMergeGids(t *testing.T) {
+ gids1 := []uint32{3, 2, 1}
+ gids2 := []uint32{2, 3, 4}
+ assert.Equal(t, []uint32{1, 2, 3, 4}, mergeGids(gids1, gids2))
+}
+
+func TestRestrictOOMScoreAdj(t *testing.T) {
+ current, err := getCurrentOOMScoreAdj()
+ require.NoError(t, err)
+
+ got, err := restrictOOMScoreAdj(current - 1)
+ require.NoError(t, err)
+ assert.Equal(t, got, current)
+
+ got, err = restrictOOMScoreAdj(current)
+ require.NoError(t, err)
+ assert.Equal(t, got, current)
+
+ got, err = restrictOOMScoreAdj(current + 1)
+ require.NoError(t, err)
+ assert.Equal(t, got, current+1)
+}
diff --git a/pkg/containerd/opts/spec_test.go b/pkg/containerd/opts/spec_test.go
new file mode 100644
index 000000000..3e540c3f4
--- /dev/null
+++ b/pkg/containerd/opts/spec_test.go
@@ -0,0 +1,46 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package opts
+
+import (
+ "sort"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestOrderedMounts(t *testing.T) {
+ mounts := []*runtime.Mount{
+ {ContainerPath: "/a/b/c"},
+ {ContainerPath: "/a/b"},
+ {ContainerPath: "/a/b/c/d"},
+ {ContainerPath: "/a"},
+ {ContainerPath: "/b"},
+ {ContainerPath: "/b/c"},
+ }
+ expected := []*runtime.Mount{
+ {ContainerPath: "/a"},
+ {ContainerPath: "/b"},
+ {ContainerPath: "/a/b"},
+ {ContainerPath: "/b/c"},
+ {ContainerPath: "/a/b/c"},
+ {ContainerPath: "/a/b/c/d"},
+ }
+ sort.Stable(orderedMounts(mounts))
+ assert.Equal(t, expected, mounts)
+}
diff --git a/pkg/containerd/opts/spec_windows.go b/pkg/containerd/opts/spec_windows.go
new file mode 100644
index 000000000..50ee19d48
--- /dev/null
+++ b/pkg/containerd/opts/spec_windows.go
@@ -0,0 +1,224 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package opts
+
+import (
+ "context"
+ "path/filepath"
+ "sort"
+ "strings"
+
+ "github.com/containerd/containerd/containers"
+ "github.com/containerd/containerd/oci"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ osinterface "github.com/containerd/cri/pkg/os"
+)
+
+// WithWindowsNetworkNamespace sets windows network namespace for container.
+// TODO(windows): Move this into container/containerd.
+func WithWindowsNetworkNamespace(path string) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Windows == nil {
+ s.Windows = &runtimespec.Windows{}
+ }
+ if s.Windows.Network == nil {
+ s.Windows.Network = &runtimespec.WindowsNetwork{}
+ }
+ s.Windows.Network.NetworkNamespace = path
+ return nil
+ }
+}
+
+// namedPipePath returns true if the given path is to a named pipe.
+func namedPipePath(p string) bool {
+ return strings.HasPrefix(p, `\\.\pipe\`)
+}
+
+// cleanMount returns a cleaned version of the mount path. The input is returned
+// as-is if it is a named pipe path.
+func cleanMount(p string) string {
+ if namedPipePath(p) {
+ return p
+ }
+ return filepath.Clean(p)
+}
+
+// WithWindowsMounts sorts and adds runtime and CRI mounts to the spec for
+// windows container.
+func WithWindowsMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) error {
+ // mergeMounts merge CRI mounts with extra mounts. If a mount destination
+ // is mounted by both a CRI mount and an extra mount, the CRI mount will
+ // be kept.
+ var (
+ criMounts = config.GetMounts()
+ mounts = append([]*runtime.Mount{}, criMounts...)
+ )
+ // Copy all mounts from extra mounts, except for mounts overridden by CRI.
+ for _, e := range extra {
+ found := false
+ for _, c := range criMounts {
+ if cleanMount(e.ContainerPath) == cleanMount(c.ContainerPath) {
+ found = true
+ break
+ }
+ }
+ if !found {
+ mounts = append(mounts, e)
+ }
+ }
+
+ // Sort mounts in number of parts. This ensures that high level mounts don't
+ // shadow other mounts.
+ sort.Sort(orderedMounts(mounts))
+
+ // Copy all mounts from default mounts, except for
+ // mounts overridden by supplied mount;
+ mountSet := make(map[string]struct{})
+ for _, m := range mounts {
+ mountSet[cleanMount(m.ContainerPath)] = struct{}{}
+ }
+
+ defaultMounts := s.Mounts
+ s.Mounts = nil
+
+ for _, m := range defaultMounts {
+ dst := cleanMount(m.Destination)
+ if _, ok := mountSet[dst]; ok {
+ // filter out mount overridden by a supplied mount
+ continue
+ }
+ s.Mounts = append(s.Mounts, m)
+ }
+
+ for _, mount := range mounts {
+ var (
+ dst = mount.GetContainerPath()
+ src = mount.GetHostPath()
+ )
+ // In the case of a named pipe mount on Windows, don't stat the file
+ // or do other operations that open it, as that could interfere with
+ // the listening process. filepath.Clean also breaks named pipe
+ // paths, so don't use it.
+ if !namedPipePath(src) {
+ if _, err := osi.Stat(src); err != nil {
+ // If the source doesn't exist, return an error instead
+ // of creating the source. This aligns with Docker's
+ // behavior on windows.
+ return errors.Wrapf(err, "failed to stat %q", src)
+ }
+ var err error
+ src, err = osi.ResolveSymbolicLink(src)
+ if err != nil {
+ return errors.Wrapf(err, "failed to resolve symlink %q", src)
+ }
+ // hcsshim requires clean path, especially '/' -> '\'.
+ src = filepath.Clean(src)
+ dst = filepath.Clean(dst)
+ }
+
+ var options []string
+ // NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
+ // is readonly. This is different from docker's behavior, but make more sense.
+ if mount.GetReadonly() {
+ options = append(options, "ro")
+ } else {
+ options = append(options, "rw")
+ }
+ s.Mounts = append(s.Mounts, runtimespec.Mount{
+ Source: src,
+ Destination: dst,
+ Options: options,
+ })
+ }
+ return nil
+ }
+}
+
+// WithWindowsResources sets the provided resource restrictions for windows.
+func WithWindowsResources(resources *runtime.WindowsContainerResources) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if resources == nil {
+ return nil
+ }
+ if s.Windows == nil {
+ s.Windows = &runtimespec.Windows{}
+ }
+ if s.Windows.Resources == nil {
+ s.Windows.Resources = &runtimespec.WindowsResources{}
+ }
+ if s.Windows.Resources.CPU == nil {
+ s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
+ }
+ if s.Windows.Resources.Memory == nil {
+ s.Windows.Resources.Memory = &runtimespec.WindowsMemoryResources{}
+ }
+
+ var (
+ count = uint64(resources.GetCpuCount())
+ shares = uint16(resources.GetCpuShares())
+ max = uint16(resources.GetCpuMaximum())
+ limit = uint64(resources.GetMemoryLimitInBytes())
+ )
+ if count != 0 {
+ s.Windows.Resources.CPU.Count = &count
+ }
+ if shares != 0 {
+ s.Windows.Resources.CPU.Shares = &shares
+ }
+ if max != 0 {
+ s.Windows.Resources.CPU.Maximum = &max
+ }
+ if limit != 0 {
+ s.Windows.Resources.Memory.Limit = &limit
+ }
+ return nil
+ }
+}
+
+// WithWindowsDefaultSandboxShares sets the default sandbox CPU shares
+func WithWindowsDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Windows == nil {
+ s.Windows = &runtimespec.Windows{}
+ }
+ if s.Windows.Resources == nil {
+ s.Windows.Resources = &runtimespec.WindowsResources{}
+ }
+ if s.Windows.Resources.CPU == nil {
+ s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
+ }
+ i := uint16(DefaultSandboxCPUshares)
+ s.Windows.Resources.CPU.Shares = &i
+ return nil
+}
+
+// WithWindowsCredentialSpec assigns `credentialSpec` to the
+// `runtime.Spec.Windows.CredentialSpec` field.
+func WithWindowsCredentialSpec(credentialSpec string) oci.SpecOpts {
+ return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
+ if s.Windows == nil {
+ s.Windows = &runtimespec.Windows{}
+ }
+ s.Windows.CredentialSpec = credentialSpec
+ return nil
+ }
+}
diff --git a/pkg/containerd/opts/task.go b/pkg/containerd/opts/task.go
new file mode 100644
index 000000000..622e3e9ce
--- /dev/null
+++ b/pkg/containerd/opts/task.go
@@ -0,0 +1,38 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package opts
+
+import (
+ "context"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/runtime/linux/runctypes"
+)
+
+// WithContainerdShimCgroup returns function that sets the containerd
+// shim cgroup path
+func WithContainerdShimCgroup(path string) containerd.NewTaskOpts {
+ return func(_ context.Context, _ *containerd.Client, r *containerd.TaskInfo) error {
+ r.Options = &runctypes.CreateOptions{
+ ShimCgroup: path,
+ }
+ return nil
+ }
+}
+
+//TODO: Since Options is an interface different WithXXX will be needed to set different
+// combinations of CreateOptions.
diff --git a/pkg/containerd/platforms/default_unix.go b/pkg/containerd/platforms/default_unix.go
new file mode 100644
index 000000000..ca7de553c
--- /dev/null
+++ b/pkg/containerd/platforms/default_unix.go
@@ -0,0 +1,28 @@
+// +build !windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package platforms
+
+import (
+ "github.com/containerd/containerd/platforms"
+)
+
+// Default returns the current platform's default platform specification.
+func Default() platforms.MatchComparer {
+ return platforms.Default()
+}
diff --git a/pkg/containerd/platforms/default_windows.go b/pkg/containerd/platforms/default_windows.go
new file mode 100644
index 000000000..f8679f21f
--- /dev/null
+++ b/pkg/containerd/platforms/default_windows.go
@@ -0,0 +1,77 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package platforms
+
+import (
+ "fmt"
+ "strconv"
+ "strings"
+
+ "github.com/containerd/containerd/platforms"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "golang.org/x/sys/windows"
+)
+
+type matchComparer struct {
+ defaults platforms.Matcher
+ osVersionPrefix string
+}
+
+// Match matches platform with the same windows major, minor
+// and build version.
+func (m matchComparer) Match(p imagespec.Platform) bool {
+ if m.defaults.Match(p) {
+ // TODO(windows): Figure out whether OSVersion is deprecated.
+ return strings.HasPrefix(p.OSVersion, m.osVersionPrefix)
+ }
+ return false
+}
+
+// Less sorts matched platforms in front of other platforms.
+// For matched platforms, it puts platforms with larger revision
+// number in front.
+func (m matchComparer) Less(p1, p2 imagespec.Platform) bool {
+ m1, m2 := m.Match(p1), m.Match(p2)
+ if m1 && m2 {
+ r1, r2 := revision(p1.OSVersion), revision(p2.OSVersion)
+ return r1 > r2
+ }
+ return m1 && !m2
+}
+
+func revision(v string) int {
+ parts := strings.Split(v, ".")
+ if len(parts) < 4 {
+ return 0
+ }
+ r, err := strconv.Atoi(parts[3])
+ if err != nil {
+ return 0
+ }
+ return r
+}
+
+// Default returns the current platform's default platform specification.
+func Default() platforms.MatchComparer {
+ major, minor, build := windows.RtlGetNtVersionNumbers()
+ return matchComparer{
+ defaults: platforms.Only(platforms.DefaultSpec()),
+ osVersionPrefix: fmt.Sprintf("%d.%d.%d", major, minor, build),
+ }
+}
diff --git a/pkg/containerd/platforms/default_windows_test.go b/pkg/containerd/platforms/default_windows_test.go
new file mode 100644
index 000000000..0f45c97f7
--- /dev/null
+++ b/pkg/containerd/platforms/default_windows_test.go
@@ -0,0 +1,150 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package platforms
+
+import (
+ "sort"
+ "testing"
+
+ "github.com/containerd/containerd/platforms"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMatchComparerMatch(t *testing.T) {
+ m := matchComparer{
+ defaults: platforms.Only(imagespec.Platform{
+ Architecture: "amd64",
+ OS: "windows",
+ }),
+ osVersionPrefix: "10.0.17763",
+ }
+ for _, test := range []struct {
+ platform imagespec.Platform
+ match bool
+ }{
+ {
+ platform: imagespec.Platform{
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17763.1",
+ },
+ match: true,
+ },
+ {
+ platform: imagespec.Platform{
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17763.2",
+ },
+ match: true,
+ },
+ {
+ platform: imagespec.Platform{
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17762.1",
+ },
+ match: false,
+ },
+ {
+ platform: imagespec.Platform{
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17764.1",
+ },
+ match: false,
+ },
+ {
+ platform: imagespec.Platform{
+ Architecture: "amd64",
+ OS: "windows",
+ },
+ match: false,
+ },
+ } {
+ assert.Equal(t, test.match, m.Match(test.platform))
+ }
+}
+
+func TestMatchComparerLess(t *testing.T) {
+ m := matchComparer{
+ defaults: platforms.Only(imagespec.Platform{
+ Architecture: "amd64",
+ OS: "windows",
+ }),
+ osVersionPrefix: "10.0.17763",
+ }
+ platforms := []imagespec.Platform{
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17764.1",
+ },
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ },
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17763.1",
+ },
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17763.2",
+ },
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17762.1",
+ },
+ }
+ expected := []imagespec.Platform{
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17763.2",
+ },
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17763.1",
+ },
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17764.1",
+ },
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ },
+ {
+ Architecture: "amd64",
+ OS: "windows",
+ OSVersion: "10.0.17762.1",
+ },
+ }
+ sort.SliceStable(platforms, func(i, j int) bool {
+ return m.Less(platforms[i], platforms[j])
+ })
+ assert.Equal(t, expected, platforms)
+}
diff --git a/pkg/containerd/util/util.go b/pkg/containerd/util/util.go
new file mode 100644
index 000000000..ec062df0d
--- /dev/null
+++ b/pkg/containerd/util/util.go
@@ -0,0 +1,46 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package util
+
+import (
+ "time"
+
+ "github.com/containerd/containerd/namespaces"
+ "golang.org/x/net/context"
+
+ "github.com/containerd/cri/pkg/constants"
+)
+
+// deferCleanupTimeout is the default timeout for containerd cleanup operations
+// in defer.
+const deferCleanupTimeout = 1 * time.Minute
+
+// DeferContext returns a context for containerd cleanup operations in defer.
+// A default timeout is applied to avoid cleanup operation pending forever.
+func DeferContext() (context.Context, context.CancelFunc) {
+ return context.WithTimeout(NamespacedContext(), deferCleanupTimeout)
+}
+
+// NamespacedContext returns a context with kubernetes namespace set.
+func NamespacedContext() context.Context {
+ return WithNamespace(context.Background())
+}
+
+// WithNamespace adds kubernetes namespace to the context.
+func WithNamespace(ctx context.Context) context.Context {
+ return namespaces.WithNamespace(ctx, constants.K8sContainerdNamespace)
+}
diff --git a/pkg/cri/cri.go b/pkg/cri/cri.go
new file mode 100644
index 000000000..7d9cc5fc7
--- /dev/null
+++ b/pkg/cri/cri.go
@@ -0,0 +1,192 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package cri
+
+import (
+ "flag"
+ "path/filepath"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/api/services/containers/v1"
+ "github.com/containerd/containerd/api/services/diff/v1"
+ "github.com/containerd/containerd/api/services/images/v1"
+ introspectionapi "github.com/containerd/containerd/api/services/introspection/v1"
+ "github.com/containerd/containerd/api/services/namespaces/v1"
+ "github.com/containerd/containerd/api/services/tasks/v1"
+ "github.com/containerd/containerd/content"
+ "github.com/containerd/containerd/leases"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/containerd/platforms"
+ "github.com/containerd/containerd/plugin"
+ "github.com/containerd/containerd/services"
+ "github.com/containerd/containerd/snapshots"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "k8s.io/klog/v2"
+
+ criconfig "github.com/containerd/cri/pkg/config"
+ "github.com/containerd/cri/pkg/constants"
+ criplatforms "github.com/containerd/cri/pkg/containerd/platforms"
+ "github.com/containerd/cri/pkg/server"
+)
+
+// TODO(random-liu): Use github.com/pkg/errors for our errors.
+// Register CRI service plugin
+func init() {
+ config := criconfig.DefaultConfig()
+ plugin.Register(&plugin.Registration{
+ Type: plugin.GRPCPlugin,
+ ID: "cri",
+ Config: &config,
+ Requires: []plugin.Type{
+ plugin.ServicePlugin,
+ },
+ InitFn: initCRIService,
+ })
+}
+
+func initCRIService(ic *plugin.InitContext) (interface{}, error) {
+ ic.Meta.Platforms = []imagespec.Platform{platforms.DefaultSpec()}
+ ic.Meta.Exports = map[string]string{"CRIVersion": constants.CRIVersion}
+ ctx := ic.Context
+ pluginConfig := ic.Config.(*criconfig.PluginConfig)
+ if err := criconfig.ValidatePluginConfig(ctx, pluginConfig); err != nil {
+ return nil, errors.Wrap(err, "invalid plugin config")
+ }
+
+ c := criconfig.Config{
+ PluginConfig: *pluginConfig,
+ ContainerdRootDir: filepath.Dir(ic.Root),
+ ContainerdEndpoint: ic.Address,
+ RootDir: ic.Root,
+ StateDir: ic.State,
+ }
+ log.G(ctx).Infof("Start cri plugin with config %+v", c)
+
+ if err := setGLogLevel(); err != nil {
+ return nil, errors.Wrap(err, "failed to set glog level")
+ }
+
+ servicesOpts, err := getServicesOpts(ic)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get services")
+ }
+
+ log.G(ctx).Info("Connect containerd service")
+ client, err := containerd.New(
+ "",
+ containerd.WithDefaultNamespace(constants.K8sContainerdNamespace),
+ containerd.WithDefaultPlatform(criplatforms.Default()),
+ containerd.WithServices(servicesOpts...),
+ )
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create containerd client")
+ }
+
+ s, err := server.NewCRIService(c, client)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create CRI service")
+ }
+
+ go func() {
+ if err := s.Run(); err != nil {
+ log.G(ctx).WithError(err).Fatal("Failed to run CRI service")
+ }
+ // TODO(random-liu): Whether and how we can stop containerd.
+ }()
+ return s, nil
+}
+
+// getServicesOpts get service options from plugin context.
+func getServicesOpts(ic *plugin.InitContext) ([]containerd.ServicesOpt, error) {
+ plugins, err := ic.GetByType(plugin.ServicePlugin)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get service plugin")
+ }
+
+ opts := []containerd.ServicesOpt{
+ containerd.WithEventService(ic.Events),
+ }
+ for s, fn := range map[string]func(interface{}) containerd.ServicesOpt{
+ services.ContentService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithContentStore(s.(content.Store))
+ },
+ services.ImagesService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithImageService(s.(images.ImagesClient))
+ },
+ services.SnapshotsService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithSnapshotters(s.(map[string]snapshots.Snapshotter))
+ },
+ services.ContainersService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithContainerService(s.(containers.ContainersClient))
+ },
+ services.TasksService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithTaskService(s.(tasks.TasksClient))
+ },
+ services.DiffService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithDiffService(s.(diff.DiffClient))
+ },
+ services.NamespacesService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithNamespaceService(s.(namespaces.NamespacesClient))
+ },
+ services.LeasesService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithLeasesService(s.(leases.Manager))
+ },
+ services.IntrospectionService: func(s interface{}) containerd.ServicesOpt {
+ return containerd.WithIntrospectionService(s.(introspectionapi.IntrospectionClient))
+ },
+ } {
+ p := plugins[s]
+ if p == nil {
+ return nil, errors.Errorf("service %q not found", s)
+ }
+ i, err := p.Instance()
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to get instance of service %q", s)
+ }
+ if i == nil {
+ return nil, errors.Errorf("instance of service %q not found", s)
+ }
+ opts = append(opts, fn(i))
+ }
+ return opts, nil
+}
+
+// Set glog level.
+func setGLogLevel() error {
+ l := logrus.GetLevel()
+ fs := flag.NewFlagSet("klog", flag.PanicOnError)
+ klog.InitFlags(fs)
+ if err := fs.Set("logtostderr", "true"); err != nil {
+ return err
+ }
+ switch l {
+ case logrus.TraceLevel:
+ return fs.Set("v", "5")
+ case logrus.DebugLevel:
+ return fs.Set("v", "4")
+ case logrus.InfoLevel:
+ return fs.Set("v", "2")
+ // glog doesn't support following filters. Defaults to v=0.
+ case logrus.WarnLevel:
+ case logrus.ErrorLevel:
+ case logrus.FatalLevel:
+ case logrus.PanicLevel:
+ }
+ return nil
+}
diff --git a/pkg/ioutil/read_closer.go b/pkg/ioutil/read_closer.go
new file mode 100644
index 000000000..fbc30a6f7
--- /dev/null
+++ b/pkg/ioutil/read_closer.go
@@ -0,0 +1,57 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package ioutil
+
+import "io"
+
+// writeCloseInformer wraps a reader with a close function.
+type wrapReadCloser struct {
+ reader *io.PipeReader
+ writer *io.PipeWriter
+}
+
+// NewWrapReadCloser creates a wrapReadCloser from a reader.
+// NOTE(random-liu): To avoid goroutine leakage, the reader passed in
+// must be eventually closed by the caller.
+func NewWrapReadCloser(r io.Reader) io.ReadCloser {
+ pr, pw := io.Pipe()
+ go func() {
+ _, _ = io.Copy(pw, r)
+ pr.Close()
+ pw.Close()
+ }()
+ return &wrapReadCloser{
+ reader: pr,
+ writer: pw,
+ }
+}
+
+// Read reads up to len(p) bytes into p.
+func (w *wrapReadCloser) Read(p []byte) (int, error) {
+ n, err := w.reader.Read(p)
+ if err == io.ErrClosedPipe {
+ return n, io.EOF
+ }
+ return n, err
+}
+
+// Close closes read closer.
+func (w *wrapReadCloser) Close() error {
+ w.reader.Close()
+ w.writer.Close()
+ return nil
+}
diff --git a/pkg/ioutil/read_closer_test.go b/pkg/ioutil/read_closer_test.go
new file mode 100644
index 000000000..b77e20ec9
--- /dev/null
+++ b/pkg/ioutil/read_closer_test.go
@@ -0,0 +1,47 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package ioutil
+
+import (
+ "bytes"
+ "io"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestWrapReadCloser(t *testing.T) {
+ buf := bytes.NewBufferString("abc")
+
+ rc := NewWrapReadCloser(buf)
+ dst := make([]byte, 1)
+ n, err := rc.Read(dst)
+ assert.Equal(t, 1, n)
+ assert.NoError(t, err)
+ assert.Equal(t, []byte("a"), dst)
+
+ n, err = rc.Read(dst)
+ assert.Equal(t, 1, n)
+ assert.NoError(t, err)
+ assert.Equal(t, []byte("b"), dst)
+
+ rc.Close()
+ n, err = rc.Read(dst)
+ assert.Equal(t, 0, n)
+ assert.Equal(t, io.EOF, err)
+ assert.Equal(t, []byte("b"), dst)
+}
diff --git a/pkg/ioutil/write_closer.go b/pkg/ioutil/write_closer.go
new file mode 100644
index 000000000..c816c514a
--- /dev/null
+++ b/pkg/ioutil/write_closer.go
@@ -0,0 +1,102 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package ioutil
+
+import (
+ "io"
+ "sync"
+)
+
+// writeCloseInformer wraps passed in write closer with a close channel.
+// Caller could wait on the close channel for the write closer to be
+// closed.
+type writeCloseInformer struct {
+ close chan struct{}
+ wc io.WriteCloser
+}
+
+// NewWriteCloseInformer creates the writeCloseInformer from a write closer.
+func NewWriteCloseInformer(wc io.WriteCloser) (io.WriteCloser, <-chan struct{}) {
+ close := make(chan struct{})
+ return &writeCloseInformer{
+ close: close,
+ wc: wc,
+ }, close
+}
+
+// Write passes through the data into the internal write closer.
+func (w *writeCloseInformer) Write(p []byte) (int, error) {
+ return w.wc.Write(p)
+}
+
+// Close closes the internal write closer and inform the close channel.
+func (w *writeCloseInformer) Close() error {
+ err := w.wc.Close()
+ close(w.close)
+ return err
+}
+
+// nopWriteCloser wraps passed in writer with a nop close function.
+type nopWriteCloser struct {
+ w io.Writer
+}
+
+// NewNopWriteCloser creates the nopWriteCloser from a writer.
+func NewNopWriteCloser(w io.Writer) io.WriteCloser {
+ return &nopWriteCloser{w: w}
+}
+
+// Write passes through the data into the internal writer.
+func (n *nopWriteCloser) Write(p []byte) (int, error) {
+ return n.w.Write(p)
+}
+
+// Close is a nop close function.
+func (n *nopWriteCloser) Close() error {
+ return nil
+}
+
+// serialWriteCloser wraps a write closer and makes sure all writes
+// are done in serial.
+// Parallel write won't intersect with each other. Use case:
+// 1) Pipe: Write content longer than PIPE_BUF.
+// See http://man7.org/linux/man-pages/man7/pipe.7.html
+// 2) <3.14 Linux Kernel: write is not atomic
+// See http://man7.org/linux/man-pages/man2/write.2.html
+type serialWriteCloser struct {
+ mu sync.Mutex
+ wc io.WriteCloser
+}
+
+// NewSerialWriteCloser creates a SerialWriteCloser from a write closer.
+func NewSerialWriteCloser(wc io.WriteCloser) io.WriteCloser {
+ return &serialWriteCloser{wc: wc}
+}
+
+// Write writes a group of byte arrays in order atomically.
+func (s *serialWriteCloser) Write(data []byte) (int, error) {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ return s.wc.Write(data)
+}
+
+// Close closes the write closer.
+func (s *serialWriteCloser) Close() error {
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ return s.wc.Close()
+}
diff --git a/pkg/ioutil/write_closer_test.go b/pkg/ioutil/write_closer_test.go
new file mode 100644
index 000000000..25272a5fd
--- /dev/null
+++ b/pkg/ioutil/write_closer_test.go
@@ -0,0 +1,108 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package ioutil
+
+import (
+ "io/ioutil"
+ "os"
+ "sort"
+ "strconv"
+ "strings"
+ "sync"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestWriteCloseInformer(t *testing.T) {
+ original := &writeCloser{}
+ wci, close := NewWriteCloseInformer(original)
+ data := "test"
+
+ n, err := wci.Write([]byte(data))
+ assert.Equal(t, len(data), n)
+ assert.Equal(t, data, original.buf.String())
+ assert.NoError(t, err)
+
+ select {
+ case <-close:
+ assert.Fail(t, "write closer closed")
+ default:
+ }
+
+ wci.Close()
+ assert.True(t, original.closed)
+
+ select {
+ case <-close:
+ default:
+ assert.Fail(t, "write closer not closed")
+ }
+}
+
+func TestSerialWriteCloser(t *testing.T) {
+ const (
+ // Test 10 times to make sure it always pass.
+ testCount = 10
+
+ goroutine = 10
+ dataLen = 100000
+ )
+ for n := 0; n < testCount; n++ {
+ testData := make([][]byte, goroutine)
+ for i := 0; i < goroutine; i++ {
+ testData[i] = []byte(repeatNumber(i, dataLen) + "\n")
+ }
+
+ f, err := ioutil.TempFile("", "serial-write-closer")
+ require.NoError(t, err)
+ defer os.RemoveAll(f.Name())
+ defer f.Close()
+ wc := NewSerialWriteCloser(f)
+ defer wc.Close()
+
+ // Write data in parallel
+ var wg sync.WaitGroup
+ wg.Add(goroutine)
+ for i := 0; i < goroutine; i++ {
+ go func(id int) {
+ n, err := wc.Write(testData[id])
+ assert.NoError(t, err)
+ assert.Equal(t, dataLen+1, n)
+ wg.Done()
+ }(i)
+ }
+ wg.Wait()
+ wc.Close()
+
+ // Check test result
+ content, err := ioutil.ReadFile(f.Name())
+ require.NoError(t, err)
+ resultData := strings.Split(strings.TrimSpace(string(content)), "\n")
+ require.Len(t, resultData, goroutine)
+ sort.Strings(resultData)
+ for i := 0; i < goroutine; i++ {
+ expected := repeatNumber(i, dataLen)
+ assert.Equal(t, expected, resultData[i])
+ }
+ }
+}
+
+func repeatNumber(num, count int) string {
+ return strings.Repeat(strconv.Itoa(num), count)
+}
diff --git a/pkg/ioutil/writer_group.go b/pkg/ioutil/writer_group.go
new file mode 100644
index 000000000..0ed550497
--- /dev/null
+++ b/pkg/ioutil/writer_group.go
@@ -0,0 +1,105 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package ioutil
+
+import (
+ "errors"
+ "io"
+ "sync"
+)
+
+// WriterGroup is a group of writers. Writer could be dynamically
+// added and removed.
+type WriterGroup struct {
+ mu sync.Mutex
+ writers map[string]io.WriteCloser
+ closed bool
+}
+
+var _ io.Writer = &WriterGroup{}
+
+// NewWriterGroup creates an empty writer group.
+func NewWriterGroup() *WriterGroup {
+ return &WriterGroup{
+ writers: make(map[string]io.WriteCloser),
+ }
+}
+
+// Add adds a writer into the group. The writer will be closed
+// if the writer group is closed.
+func (g *WriterGroup) Add(key string, w io.WriteCloser) {
+ g.mu.Lock()
+ defer g.mu.Unlock()
+ if g.closed {
+ w.Close()
+ return
+ }
+ g.writers[key] = w
+}
+
+// Get gets a writer from the group, returns nil if the writer
+// doesn't exist.
+func (g *WriterGroup) Get(key string) io.WriteCloser {
+ g.mu.Lock()
+ defer g.mu.Unlock()
+ return g.writers[key]
+}
+
+// Remove removes a writer from the group.
+func (g *WriterGroup) Remove(key string) {
+ g.mu.Lock()
+ defer g.mu.Unlock()
+ w, ok := g.writers[key]
+ if !ok {
+ return
+ }
+ w.Close()
+ delete(g.writers, key)
+}
+
+// Write writes data into each writer. If a writer returns error,
+// it will be closed and removed from the writer group. It returns
+// error if writer group is empty.
+func (g *WriterGroup) Write(p []byte) (int, error) {
+ g.mu.Lock()
+ defer g.mu.Unlock()
+ for k, w := range g.writers {
+ n, err := w.Write(p)
+ if err == nil && len(p) == n {
+ continue
+ }
+ // The writer is closed or in bad state, remove it.
+ w.Close()
+ delete(g.writers, k)
+ }
+ if len(g.writers) == 0 {
+ return 0, errors.New("writer group is empty")
+ }
+ return len(p), nil
+}
+
+// Close closes the writer group. Write will return error after
+// closed.
+func (g *WriterGroup) Close() {
+ g.mu.Lock()
+ defer g.mu.Unlock()
+ for _, w := range g.writers {
+ w.Close()
+ }
+ g.writers = nil
+ g.closed = true
+}
diff --git a/pkg/ioutil/writer_group_test.go b/pkg/ioutil/writer_group_test.go
new file mode 100644
index 000000000..289ea25fa
--- /dev/null
+++ b/pkg/ioutil/writer_group_test.go
@@ -0,0 +1,115 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package ioutil
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+type writeCloser struct {
+ buf bytes.Buffer
+ closed bool
+}
+
+func (wc *writeCloser) Write(p []byte) (int, error) {
+ return wc.buf.Write(p)
+}
+
+func (wc *writeCloser) Close() error {
+ wc.closed = true
+ return nil
+}
+
+func TestEmptyWriterGroup(t *testing.T) {
+ wg := NewWriterGroup()
+ _, err := wg.Write([]byte("test"))
+ assert.Error(t, err)
+}
+
+func TestClosedWriterGroup(t *testing.T) {
+ wg := NewWriterGroup()
+ wc := &writeCloser{}
+ key, data := "test key", "test data"
+
+ wg.Add(key, wc)
+
+ n, err := wg.Write([]byte(data))
+ assert.Equal(t, len(data), n)
+ assert.Equal(t, data, wc.buf.String())
+ assert.NoError(t, err)
+
+ wg.Close()
+ assert.True(t, wc.closed)
+
+ newWC := &writeCloser{}
+ wg.Add(key, newWC)
+ assert.True(t, newWC.closed)
+
+ _, err = wg.Write([]byte(data))
+ assert.Error(t, err)
+}
+
+func TestAddGetRemoveWriter(t *testing.T) {
+ wg := NewWriterGroup()
+ wc1, wc2 := &writeCloser{}, &writeCloser{}
+ key1, key2 := "test key 1", "test key 2"
+
+ wg.Add(key1, wc1)
+ _, err := wg.Write([]byte("test data 1"))
+ assert.NoError(t, err)
+ assert.Equal(t, "test data 1", wc1.buf.String())
+
+ wg.Add(key2, wc2)
+ _, err = wg.Write([]byte("test data 2"))
+ assert.NoError(t, err)
+ assert.Equal(t, "test data 1test data 2", wc1.buf.String())
+ assert.Equal(t, "test data 2", wc2.buf.String())
+
+ assert.Equal(t, wc1, wg.Get(key1))
+
+ wg.Remove(key1)
+ _, err = wg.Write([]byte("test data 3"))
+ assert.NoError(t, err)
+ assert.Equal(t, "test data 1test data 2", wc1.buf.String())
+ assert.Equal(t, "test data 2test data 3", wc2.buf.String())
+
+ assert.Equal(t, nil, wg.Get(key1))
+
+ wg.Close()
+}
+
+func TestReplaceWriter(t *testing.T) {
+ wg := NewWriterGroup()
+ wc1, wc2 := &writeCloser{}, &writeCloser{}
+ key := "test-key"
+
+ wg.Add(key, wc1)
+ _, err := wg.Write([]byte("test data 1"))
+ assert.NoError(t, err)
+ assert.Equal(t, "test data 1", wc1.buf.String())
+
+ wg.Add(key, wc2)
+ _, err = wg.Write([]byte("test data 2"))
+ assert.NoError(t, err)
+ assert.Equal(t, "test data 1", wc1.buf.String())
+ assert.Equal(t, "test data 2", wc2.buf.String())
+
+ wg.Close()
+}
diff --git a/pkg/netns/netns_linux.go b/pkg/netns/netns_linux.go
new file mode 100644
index 000000000..ff879d9d6
--- /dev/null
+++ b/pkg/netns/netns_linux.go
@@ -0,0 +1,220 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+// Copyright 2018 CNI authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package netns
+
+import (
+ "crypto/rand"
+ "fmt"
+ "os"
+ "path"
+ "runtime"
+ "sync"
+
+ "github.com/containerd/containerd/mount"
+ cnins "github.com/containernetworking/plugins/pkg/ns"
+ "github.com/docker/docker/pkg/symlink"
+ "github.com/pkg/errors"
+ "golang.org/x/sys/unix"
+)
+
+const nsRunDir = "/var/run/netns"
+
+// Some of the following functions are migrated from
+// https://github.com/containernetworking/plugins/blob/master/pkg/testutils/netns_linux.go
+
+// newNS creates a new persistent (bind-mounted) network namespace and returns the
+// path to the network namespace.
+func newNS() (nsPath string, err error) {
+ b := make([]byte, 16)
+ if _, err := rand.Reader.Read(b); err != nil {
+ return "", errors.Wrap(err, "failed to generate random netns name")
+ }
+
+ // Create the directory for mounting network namespaces
+ // This needs to be a shared mountpoint in case it is mounted in to
+ // other namespaces (containers)
+ if err := os.MkdirAll(nsRunDir, 0755); err != nil {
+ return "", err
+ }
+
+ // create an empty file at the mount point
+ nsName := fmt.Sprintf("cni-%x-%x-%x-%x-%x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:])
+ nsPath = path.Join(nsRunDir, nsName)
+ mountPointFd, err := os.Create(nsPath)
+ if err != nil {
+ return "", err
+ }
+ mountPointFd.Close()
+
+ defer func() {
+ // Ensure the mount point is cleaned up on errors
+ if err != nil {
+ os.RemoveAll(nsPath) // nolint: errcheck
+ }
+ }()
+
+ var wg sync.WaitGroup
+ wg.Add(1)
+
+ // do namespace work in a dedicated goroutine, so that we can safely
+ // Lock/Unlock OSThread without upsetting the lock/unlock state of
+ // the caller of this function
+ go (func() {
+ defer wg.Done()
+ runtime.LockOSThread()
+ // Don't unlock. By not unlocking, golang will kill the OS thread when the
+ // goroutine is done (for go1.10+)
+
+ var origNS cnins.NetNS
+ origNS, err = cnins.GetNS(getCurrentThreadNetNSPath())
+ if err != nil {
+ return
+ }
+ defer origNS.Close()
+
+ // create a new netns on the current thread
+ err = unix.Unshare(unix.CLONE_NEWNET)
+ if err != nil {
+ return
+ }
+
+ // Put this thread back to the orig ns, since it might get reused (pre go1.10)
+ defer origNS.Set() // nolint: errcheck
+
+ // bind mount the netns from the current thread (from /proc) onto the
+ // mount point. This causes the namespace to persist, even when there
+ // are no threads in the ns.
+ err = unix.Mount(getCurrentThreadNetNSPath(), nsPath, "none", unix.MS_BIND, "")
+ if err != nil {
+ err = errors.Wrapf(err, "failed to bind mount ns at %s", nsPath)
+ }
+ })()
+ wg.Wait()
+
+ if err != nil {
+ return "", errors.Wrap(err, "failed to create namespace")
+ }
+
+ return nsPath, nil
+}
+
+// unmountNS unmounts the NS held by the netns object. unmountNS is idempotent.
+func unmountNS(path string) error {
+ if _, err := os.Stat(path); err != nil {
+ if os.IsNotExist(err) {
+ return nil
+ }
+ return errors.Wrap(err, "failed to stat netns")
+ }
+ path, err := symlink.FollowSymlinkInScope(path, "/")
+ if err != nil {
+ return errors.Wrap(err, "failed to follow symlink")
+ }
+ if err := mount.Unmount(path, unix.MNT_DETACH); err != nil && !os.IsNotExist(err) {
+ return errors.Wrap(err, "failed to umount netns")
+ }
+ if err := os.RemoveAll(path); err != nil {
+ return errors.Wrap(err, "failed to remove netns")
+ }
+ return nil
+}
+
+// getCurrentThreadNetNSPath copied from pkg/ns
+func getCurrentThreadNetNSPath() string {
+ // /proc/self/ns/net returns the namespace of the main thread, not
+ // of whatever thread this goroutine is running on. Make sure we
+ // use the thread's net namespace since the thread is switching around
+ return fmt.Sprintf("/proc/%d/task/%d/ns/net", os.Getpid(), unix.Gettid())
+}
+
+// NetNS holds network namespace.
+type NetNS struct {
+ path string
+}
+
+// NewNetNS creates a network namespace.
+func NewNetNS() (*NetNS, error) {
+ path, err := newNS()
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to setup netns")
+ }
+ return &NetNS{path: path}, nil
+}
+
+// LoadNetNS loads existing network namespace.
+func LoadNetNS(path string) *NetNS {
+ return &NetNS{path: path}
+}
+
+// Remove removes network namepace. Remove is idempotent, meaning it might
+// be invoked multiple times and provides consistent result.
+func (n *NetNS) Remove() error {
+ return unmountNS(n.path)
+}
+
+// Closed checks whether the network namespace has been closed.
+func (n *NetNS) Closed() (bool, error) {
+ ns, err := cnins.GetNS(n.path)
+ if err != nil {
+ if _, ok := err.(cnins.NSPathNotExistErr); ok {
+ // The network namespace has already been removed.
+ return true, nil
+ }
+ if _, ok := err.(cnins.NSPathNotNSErr); ok {
+ // The network namespace is not mounted, remove it.
+ if err := os.RemoveAll(n.path); err != nil {
+ return false, errors.Wrap(err, "remove netns")
+ }
+ return true, nil
+ }
+ return false, errors.Wrap(err, "get netns fd")
+ }
+ if err := ns.Close(); err != nil {
+ return false, errors.Wrap(err, "close netns fd")
+ }
+ return false, nil
+}
+
+// GetPath returns network namespace path for sandbox container
+func (n *NetNS) GetPath() string {
+ return n.path
+}
+
+// Do runs a function in the network namespace.
+func (n *NetNS) Do(f func(cnins.NetNS) error) error {
+ ns, err := cnins.GetNS(n.path)
+ if err != nil {
+ return errors.Wrap(err, "get netns fd")
+ }
+ defer ns.Close() // nolint: errcheck
+ return ns.Do(f)
+}
diff --git a/pkg/netns/netns_other.go b/pkg/netns/netns_other.go
new file mode 100644
index 000000000..253987ea6
--- /dev/null
+++ b/pkg/netns/netns_other.go
@@ -0,0 +1,58 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package netns
+
+import (
+ "github.com/pkg/errors"
+)
+
+var errNotImplementedOnUnix = errors.New("not implemented on unix")
+
+// NetNS holds network namespace.
+type NetNS struct {
+ path string
+}
+
+// NewNetNS creates a network namespace.
+func NewNetNS() (*NetNS, error) {
+ return nil, errNotImplementedOnUnix
+}
+
+// LoadNetNS loads existing network namespace.
+func LoadNetNS(path string) *NetNS {
+ return &NetNS{path: path}
+}
+
+// Remove removes network namepace. Remove is idempotent, meaning it might
+// be invoked multiple times and provides consistent result.
+func (n *NetNS) Remove() error {
+ return errNotImplementedOnUnix
+}
+
+// Closed checks whether the network namespace has been closed.
+func (n *NetNS) Closed() (bool, error) {
+ return false, errNotImplementedOnUnix
+}
+
+// GetPath returns network namespace path for sandbox container
+func (n *NetNS) GetPath() string {
+ return n.path
+}
+
+// NOTE: Do function is not supported.
diff --git a/pkg/netns/netns_windows.go b/pkg/netns/netns_windows.go
new file mode 100644
index 000000000..62dac4151
--- /dev/null
+++ b/pkg/netns/netns_windows.go
@@ -0,0 +1,78 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package netns
+
+import "github.com/Microsoft/hcsshim/hcn"
+
+// NetNS holds network namespace for sandbox
+type NetNS struct {
+ path string
+}
+
+// NewNetNS creates a network namespace for the sandbox
+func NewNetNS() (*NetNS, error) {
+ temp := hcn.HostComputeNamespace{}
+ hcnNamespace, err := temp.Create()
+ if err != nil {
+ return nil, err
+ }
+
+ return &NetNS{path: hcnNamespace.Id}, nil
+}
+
+// LoadNetNS loads existing network namespace.
+func LoadNetNS(path string) *NetNS {
+ return &NetNS{path: path}
+}
+
+// Remove removes network namepace if it exists and not closed. Remove is idempotent,
+// meaning it might be invoked multiple times and provides consistent result.
+func (n *NetNS) Remove() error {
+ hcnNamespace, err := hcn.GetNamespaceByID(n.path)
+ if err != nil {
+ if hcn.IsNotFoundError(err) {
+ return nil
+ }
+ return err
+ }
+ err = hcnNamespace.Delete()
+ if err == nil || hcn.IsNotFoundError(err) {
+ return nil
+ }
+ return err
+}
+
+// Closed checks whether the network namespace has been closed.
+func (n *NetNS) Closed() (bool, error) {
+ _, err := hcn.GetNamespaceByID(n.path)
+ if err == nil {
+ return false, nil
+ }
+ if hcn.IsNotFoundError(err) {
+ return true, nil
+ }
+ return false, err
+}
+
+// GetPath returns network namespace path for sandbox container
+func (n *NetNS) GetPath() string {
+ return n.path
+}
+
+// NOTE: Do function is not supported.
diff --git a/pkg/os/mount_linux.go b/pkg/os/mount_linux.go
new file mode 100644
index 000000000..15228e57d
--- /dev/null
+++ b/pkg/os/mount_linux.go
@@ -0,0 +1,37 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package os
+
+import (
+ "github.com/containerd/containerd/mount"
+ "golang.org/x/sys/unix"
+)
+
+// Mount will call unix.Mount to mount the file.
+func (RealOS) Mount(source string, target string, fstype string, flags uintptr, data string) error {
+ return unix.Mount(source, target, fstype, flags, data)
+}
+
+// Unmount will call Unmount to unmount the file.
+func (RealOS) Unmount(target string) error {
+ return mount.Unmount(target, unix.MNT_DETACH)
+}
+
+// LookupMount gets mount info of a given path.
+func (RealOS) LookupMount(path string) (mount.Info, error) {
+ return mount.Lookup(path)
+}
diff --git a/pkg/os/mount_other.go b/pkg/os/mount_other.go
new file mode 100644
index 000000000..3a778d058
--- /dev/null
+++ b/pkg/os/mount_other.go
@@ -0,0 +1,26 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package os
+
+import "github.com/containerd/containerd/mount"
+
+// LookupMount gets mount info of a given path.
+func (RealOS) LookupMount(path string) (mount.Info, error) {
+ return mount.Lookup(path)
+}
diff --git a/pkg/os/mount_unix.go b/pkg/os/mount_unix.go
new file mode 100644
index 000000000..e81def359
--- /dev/null
+++ b/pkg/os/mount_unix.go
@@ -0,0 +1,33 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package os
+
+import (
+ "github.com/containerd/containerd/mount"
+)
+
+// Mount will call unix.Mount to mount the file.
+func (RealOS) Mount(source string, target string, fstype string, flags uintptr, data string) error {
+ return mount.ErrNotImplementOnUnix
+}
+
+// Unmount will call Unmount to unmount the file.
+func (RealOS) Unmount(target string) error {
+ return mount.Unmount(target, 0)
+}
diff --git a/pkg/os/os.go b/pkg/os/os.go
new file mode 100644
index 000000000..807a34d92
--- /dev/null
+++ b/pkg/os/os.go
@@ -0,0 +1,102 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package os
+
+import (
+ "io"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+
+ "github.com/docker/docker/pkg/symlink"
+)
+
+// OS collects system level operations that need to be mocked out
+// during tests.
+type OS interface {
+ MkdirAll(path string, perm os.FileMode) error
+ RemoveAll(path string) error
+ Stat(name string) (os.FileInfo, error)
+ ResolveSymbolicLink(name string) (string, error)
+ FollowSymlinkInScope(path, scope string) (string, error)
+ CopyFile(src, dest string, perm os.FileMode) error
+ WriteFile(filename string, data []byte, perm os.FileMode) error
+ Hostname() (string, error)
+}
+
+// RealOS is used to dispatch the real system level operations.
+type RealOS struct{}
+
+// MkdirAll will call os.MkdirAll to create a directory.
+func (RealOS) MkdirAll(path string, perm os.FileMode) error {
+ return os.MkdirAll(path, perm)
+}
+
+// RemoveAll will call os.RemoveAll to remove the path and its children.
+func (RealOS) RemoveAll(path string) error {
+ return os.RemoveAll(path)
+}
+
+// Stat will call os.Stat to get the status of the given file.
+func (RealOS) Stat(name string) (os.FileInfo, error) {
+ return os.Stat(name)
+}
+
+// ResolveSymbolicLink will follow any symbolic links
+func (RealOS) ResolveSymbolicLink(path string) (string, error) {
+ info, err := os.Lstat(path)
+ if err != nil {
+ return "", err
+ }
+ if info.Mode()&os.ModeSymlink != os.ModeSymlink {
+ return path, nil
+ }
+ return filepath.EvalSymlinks(path)
+}
+
+// FollowSymlinkInScope will call symlink.FollowSymlinkInScope.
+func (RealOS) FollowSymlinkInScope(path, scope string) (string, error) {
+ return symlink.FollowSymlinkInScope(path, scope)
+}
+
+// CopyFile will copy src file to dest file
+func (RealOS) CopyFile(src, dest string, perm os.FileMode) error {
+ in, err := os.Open(src)
+ if err != nil {
+ return err
+ }
+ defer in.Close()
+
+ out, err := os.OpenFile(dest, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, perm)
+ if err != nil {
+ return err
+ }
+ defer out.Close()
+
+ _, err = io.Copy(out, in)
+ return err
+}
+
+// WriteFile will call ioutil.WriteFile to write data into a file.
+func (RealOS) WriteFile(filename string, data []byte, perm os.FileMode) error {
+ return ioutil.WriteFile(filename, data, perm)
+}
+
+// Hostname will call os.Hostname to get the hostname of the host.
+func (RealOS) Hostname() (string, error) {
+ return os.Hostname()
+}
diff --git a/pkg/os/os_unix.go b/pkg/os/os_unix.go
new file mode 100644
index 000000000..eaf0984dd
--- /dev/null
+++ b/pkg/os/os_unix.go
@@ -0,0 +1,31 @@
+// +build !windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package os
+
+import (
+ "github.com/containerd/containerd/mount"
+)
+
+// UNIX collects unix system level operations that need to be
+// mocked out during tests.
+type UNIX interface {
+ Mount(source string, target string, fstype string, flags uintptr, data string) error
+ Unmount(target string) error
+ LookupMount(path string) (mount.Info, error)
+}
diff --git a/pkg/os/testing/fake_os.go b/pkg/os/testing/fake_os.go
new file mode 100644
index 000000000..957b77424
--- /dev/null
+++ b/pkg/os/testing/fake_os.go
@@ -0,0 +1,254 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package testing
+
+import (
+ "os"
+ "sync"
+
+ containerdmount "github.com/containerd/containerd/mount"
+
+ osInterface "github.com/containerd/cri/pkg/os"
+)
+
+// CalledDetail is the struct contains called function name and arguments.
+type CalledDetail struct {
+ // Name of the function called.
+ Name string
+ // Arguments of the function called.
+ Arguments []interface{}
+}
+
+// FakeOS mocks out certain OS calls to avoid perturbing the filesystem
+// If a member of the form `*Fn` is set, that function will be called in place
+// of the real call.
+type FakeOS struct {
+ sync.Mutex
+ MkdirAllFn func(string, os.FileMode) error
+ RemoveAllFn func(string) error
+ StatFn func(string) (os.FileInfo, error)
+ ResolveSymbolicLinkFn func(string) (string, error)
+ FollowSymlinkInScopeFn func(string, string) (string, error)
+ CopyFileFn func(string, string, os.FileMode) error
+ WriteFileFn func(string, []byte, os.FileMode) error
+ MountFn func(source string, target string, fstype string, flags uintptr, data string) error
+ UnmountFn func(target string) error
+ LookupMountFn func(path string) (containerdmount.Info, error)
+ HostnameFn func() (string, error)
+ calls []CalledDetail
+ errors map[string]error
+}
+
+var _ osInterface.OS = &FakeOS{}
+
+// getError get error for call
+func (f *FakeOS) getError(op string) error {
+ f.Lock()
+ defer f.Unlock()
+ err, ok := f.errors[op]
+ if ok {
+ delete(f.errors, op)
+ return err
+ }
+ return nil
+}
+
+// InjectError inject error for call
+func (f *FakeOS) InjectError(fn string, err error) {
+ f.Lock()
+ defer f.Unlock()
+ f.errors[fn] = err
+}
+
+// InjectErrors inject errors for calls
+func (f *FakeOS) InjectErrors(errs map[string]error) {
+ f.Lock()
+ defer f.Unlock()
+ for fn, err := range errs {
+ f.errors[fn] = err
+ }
+}
+
+// ClearErrors clear errors for call
+func (f *FakeOS) ClearErrors() {
+ f.Lock()
+ defer f.Unlock()
+ f.errors = make(map[string]error)
+}
+
+func (f *FakeOS) appendCalls(name string, args ...interface{}) {
+ f.Lock()
+ defer f.Unlock()
+ f.calls = append(f.calls, CalledDetail{Name: name, Arguments: args})
+}
+
+// GetCalls get detail of calls.
+func (f *FakeOS) GetCalls() []CalledDetail {
+ f.Lock()
+ defer f.Unlock()
+ return append([]CalledDetail{}, f.calls...)
+}
+
+// NewFakeOS creates a FakeOS.
+func NewFakeOS() *FakeOS {
+ return &FakeOS{
+ errors: make(map[string]error),
+ }
+}
+
+// MkdirAll is a fake call that invokes MkdirAllFn or just returns nil.
+func (f *FakeOS) MkdirAll(path string, perm os.FileMode) error {
+ f.appendCalls("MkdirAll", path, perm)
+ if err := f.getError("MkdirAll"); err != nil {
+ return err
+ }
+
+ if f.MkdirAllFn != nil {
+ return f.MkdirAllFn(path, perm)
+ }
+ return nil
+}
+
+// RemoveAll is a fake call that invokes RemoveAllFn or just returns nil.
+func (f *FakeOS) RemoveAll(path string) error {
+ f.appendCalls("RemoveAll", path)
+ if err := f.getError("RemoveAll"); err != nil {
+ return err
+ }
+
+ if f.RemoveAllFn != nil {
+ return f.RemoveAllFn(path)
+ }
+ return nil
+}
+
+// Stat is a fake call that invokes StatFn or just return nil.
+func (f *FakeOS) Stat(name string) (os.FileInfo, error) {
+ f.appendCalls("Stat", name)
+ if err := f.getError("Stat"); err != nil {
+ return nil, err
+ }
+
+ if f.StatFn != nil {
+ return f.StatFn(name)
+ }
+ return nil, nil
+}
+
+// ResolveSymbolicLink is a fake call that invokes ResolveSymbolicLinkFn or returns its input
+func (f *FakeOS) ResolveSymbolicLink(path string) (string, error) {
+ f.appendCalls("ResolveSymbolicLink", path)
+ if err := f.getError("ResolveSymbolicLink"); err != nil {
+ return "", err
+ }
+
+ if f.ResolveSymbolicLinkFn != nil {
+ return f.ResolveSymbolicLinkFn(path)
+ }
+ return path, nil
+}
+
+// FollowSymlinkInScope is a fake call that invokes FollowSymlinkInScope or returns its input
+func (f *FakeOS) FollowSymlinkInScope(path, scope string) (string, error) {
+ f.appendCalls("FollowSymlinkInScope", path, scope)
+ if err := f.getError("FollowSymlinkInScope"); err != nil {
+ return "", err
+ }
+
+ if f.FollowSymlinkInScopeFn != nil {
+ return f.FollowSymlinkInScopeFn(path, scope)
+ }
+ return path, nil
+}
+
+// CopyFile is a fake call that invokes CopyFileFn or just return nil.
+func (f *FakeOS) CopyFile(src, dest string, perm os.FileMode) error {
+ f.appendCalls("CopyFile", src, dest, perm)
+ if err := f.getError("CopyFile"); err != nil {
+ return err
+ }
+
+ if f.CopyFileFn != nil {
+ return f.CopyFileFn(src, dest, perm)
+ }
+ return nil
+}
+
+// WriteFile is a fake call that invokes WriteFileFn or just return nil.
+func (f *FakeOS) WriteFile(filename string, data []byte, perm os.FileMode) error {
+ f.appendCalls("WriteFile", filename, data, perm)
+ if err := f.getError("WriteFile"); err != nil {
+ return err
+ }
+
+ if f.WriteFileFn != nil {
+ return f.WriteFileFn(filename, data, perm)
+ }
+ return nil
+}
+
+// Mount is a fake call that invokes MountFn or just return nil.
+func (f *FakeOS) Mount(source string, target string, fstype string, flags uintptr, data string) error {
+ f.appendCalls("Mount", source, target, fstype, flags, data)
+ if err := f.getError("Mount"); err != nil {
+ return err
+ }
+
+ if f.MountFn != nil {
+ return f.MountFn(source, target, fstype, flags, data)
+ }
+ return nil
+}
+
+// Unmount is a fake call that invokes UnmountFn or just return nil.
+func (f *FakeOS) Unmount(target string) error {
+ f.appendCalls("Unmount", target)
+ if err := f.getError("Unmount"); err != nil {
+ return err
+ }
+
+ if f.UnmountFn != nil {
+ return f.UnmountFn(target)
+ }
+ return nil
+}
+
+// LookupMount is a fake call that invokes LookupMountFn or just return nil.
+func (f *FakeOS) LookupMount(path string) (containerdmount.Info, error) {
+ f.appendCalls("LookupMount", path)
+ if err := f.getError("LookupMount"); err != nil {
+ return containerdmount.Info{}, err
+ }
+
+ if f.LookupMountFn != nil {
+ return f.LookupMountFn(path)
+ }
+ return containerdmount.Info{}, nil
+}
+
+// Hostname is a fake call that invokes HostnameFn or just return nil.
+func (f *FakeOS) Hostname() (string, error) {
+ f.appendCalls("Hostname")
+ if err := f.getError("Hostname"); err != nil {
+ return "", err
+ }
+
+ if f.HostnameFn != nil {
+ return f.HostnameFn()
+ }
+ return "", nil
+}
diff --git a/pkg/os/testing/fake_os_unix.go b/pkg/os/testing/fake_os_unix.go
new file mode 100644
index 000000000..67fa4baca
--- /dev/null
+++ b/pkg/os/testing/fake_os_unix.go
@@ -0,0 +1,23 @@
+// +build !windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package testing
+
+import osInterface "github.com/containerd/cri/pkg/os"
+
+var _ osInterface.UNIX = &FakeOS{}
diff --git a/pkg/registrar/registrar.go b/pkg/registrar/registrar.go
new file mode 100644
index 000000000..b83e7ce85
--- /dev/null
+++ b/pkg/registrar/registrar.go
@@ -0,0 +1,102 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package registrar
+
+import (
+ "sync"
+
+ "github.com/pkg/errors"
+)
+
+// Registrar stores one-to-one name<->key mappings.
+// Names and keys must be unique.
+// Registrar is safe for concurrent access.
+type Registrar struct {
+ lock sync.Mutex
+ nameToKey map[string]string
+ keyToName map[string]string
+}
+
+// NewRegistrar creates a new Registrar with the empty indexes.
+func NewRegistrar() *Registrar {
+ return &Registrar{
+ nameToKey: make(map[string]string),
+ keyToName: make(map[string]string),
+ }
+}
+
+// Reserve registers a name<->key mapping, name or key must not
+// be empty.
+// Reserve is idempotent.
+// Attempting to reserve a conflict key<->name mapping results
+// in an error.
+// A name<->key reservation is globally unique.
+func (r *Registrar) Reserve(name, key string) error {
+ r.lock.Lock()
+ defer r.lock.Unlock()
+
+ if name == "" || key == "" {
+ return errors.Errorf("invalid name %q or key %q", name, key)
+ }
+
+ if k, exists := r.nameToKey[name]; exists {
+ if k != key {
+ return errors.Errorf("name %q is reserved for %q", name, k)
+ }
+ return nil
+ }
+
+ if n, exists := r.keyToName[key]; exists {
+ if n != name {
+ return errors.Errorf("key %q is reserved for %q", key, n)
+ }
+ return nil
+ }
+
+ r.nameToKey[name] = key
+ r.keyToName[key] = name
+ return nil
+}
+
+// ReleaseByName releases the reserved name<->key mapping by name.
+// Once released, the name and the key can be reserved again.
+func (r *Registrar) ReleaseByName(name string) {
+ r.lock.Lock()
+ defer r.lock.Unlock()
+
+ key, exists := r.nameToKey[name]
+ if !exists {
+ return
+ }
+
+ delete(r.nameToKey, name)
+ delete(r.keyToName, key)
+}
+
+// ReleaseByKey release the reserved name<->key mapping by key.
+func (r *Registrar) ReleaseByKey(key string) {
+ r.lock.Lock()
+ defer r.lock.Unlock()
+
+ name, exists := r.keyToName[key]
+ if !exists {
+ return
+ }
+
+ delete(r.nameToKey, name)
+ delete(r.keyToName, key)
+}
diff --git a/pkg/registrar/registrar_test.go b/pkg/registrar/registrar_test.go
new file mode 100644
index 000000000..318bfa2fd
--- /dev/null
+++ b/pkg/registrar/registrar_test.go
@@ -0,0 +1,54 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package registrar
+
+import (
+ "testing"
+
+ assertlib "github.com/stretchr/testify/assert"
+)
+
+func TestRegistrar(t *testing.T) {
+ r := NewRegistrar()
+ assert := assertlib.New(t)
+
+ t.Logf("should be able to reserve a name<->key mapping")
+ assert.NoError(r.Reserve("test-name-1", "test-id-1"))
+
+ t.Logf("should be able to reserve a new name<->key mapping")
+ assert.NoError(r.Reserve("test-name-2", "test-id-2"))
+
+ t.Logf("should be able to reserve the same name<->key mapping")
+ assert.NoError(r.Reserve("test-name-1", "test-id-1"))
+
+ t.Logf("should not be able to reserve conflict name<->key mapping")
+ assert.Error(r.Reserve("test-name-1", "test-id-conflict"))
+ assert.Error(r.Reserve("test-name-conflict", "test-id-2"))
+
+ t.Logf("should be able to release name<->key mapping by key")
+ r.ReleaseByKey("test-id-1")
+
+ t.Logf("should be able to release name<->key mapping by name")
+ r.ReleaseByName("test-name-2")
+
+ t.Logf("should be able to reserve new name<->key mapping after release")
+ assert.NoError(r.Reserve("test-name-1", "test-id-new"))
+ assert.NoError(r.Reserve("test-name-new", "test-id-2"))
+
+ t.Logf("should be able to reserve same name/key name<->key")
+ assert.NoError(r.Reserve("same-name-id", "same-name-id"))
+}
diff --git a/pkg/seccomp/fixtures/proc_self_status b/pkg/seccomp/fixtures/proc_self_status
new file mode 100644
index 000000000..0e0084f6c
--- /dev/null
+++ b/pkg/seccomp/fixtures/proc_self_status
@@ -0,0 +1,47 @@
+Name: cat
+State: R (running)
+Tgid: 19383
+Ngid: 0
+Pid: 19383
+PPid: 19275
+TracerPid: 0
+Uid: 1000 1000 1000 1000
+Gid: 1000 1000 1000 1000
+FDSize: 256
+Groups: 24 25 27 29 30 44 46 102 104 108 111 1000 1001
+NStgid: 19383
+NSpid: 19383
+NSpgid: 19383
+NSsid: 19275
+VmPeak: 5944 kB
+VmSize: 5944 kB
+VmLck: 0 kB
+VmPin: 0 kB
+VmHWM: 744 kB
+VmRSS: 744 kB
+VmData: 324 kB
+VmStk: 136 kB
+VmExe: 48 kB
+VmLib: 1776 kB
+VmPTE: 32 kB
+VmPMD: 12 kB
+VmSwap: 0 kB
+Threads: 1
+SigQ: 0/30067
+SigPnd: 0000000000000000
+ShdPnd: 0000000000000000
+SigBlk: 0000000000000000
+SigIgn: 0000000000000080
+SigCgt: 0000000000000000
+CapInh: 0000000000000000
+CapPrm: 0000000000000000
+CapEff: 0000000000000000
+CapBnd: 0000003fffffffff
+CapAmb: 0000000000000000
+Seccomp: 0
+Cpus_allowed: f
+Cpus_allowed_list: 0-3
+Mems_allowed: 00000000,00000001
+Mems_allowed_list: 0
+voluntary_ctxt_switches: 0
+nonvoluntary_ctxt_switches: 1
diff --git a/pkg/seccomp/seccomp_linux.go b/pkg/seccomp/seccomp_linux.go
new file mode 100644
index 000000000..d41a98bf1
--- /dev/null
+++ b/pkg/seccomp/seccomp_linux.go
@@ -0,0 +1,88 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+ Copyright The runc Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package seccomp
+
+import (
+ "bufio"
+ "os"
+ "strings"
+
+ "golang.org/x/sys/unix"
+)
+
+// IsEnabled returns if the kernel has been configured to support seccomp.
+// From https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L86-L102
+func IsEnabled() bool {
+ // Try to read from /proc/self/status for kernels > 3.8
+ s, err := parseStatusFile("/proc/self/status")
+ if err != nil {
+ // Check if Seccomp is supported, via CONFIG_SECCOMP.
+ if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
+ // Make sure the kernel has CONFIG_SECCOMP_FILTER.
+ if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
+ return true
+ }
+ }
+ return false
+ }
+ _, ok := s["Seccomp"]
+ return ok
+}
+
+// parseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux.go#L243-L268
+func parseStatusFile(path string) (map[string]string, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+
+ s := bufio.NewScanner(f)
+ status := make(map[string]string)
+
+ for s.Scan() {
+ text := s.Text()
+ parts := strings.Split(text, ":")
+
+ if len(parts) <= 1 {
+ continue
+ }
+
+ status[parts[0]] = parts[1]
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+
+ return status, nil
+}
diff --git a/pkg/seccomp/seccomp_linux_test.go b/pkg/seccomp/seccomp_linux_test.go
new file mode 100644
index 000000000..850ab97e1
--- /dev/null
+++ b/pkg/seccomp/seccomp_linux_test.go
@@ -0,0 +1,48 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+ Copyright The runc Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package seccomp
+
+import "testing"
+
+// TestParseStatusFile is from https://github.com/opencontainers/runc/blob/v1.0.0-rc91/libcontainer/seccomp/seccomp_linux_test.go
+func TestParseStatusFile(t *testing.T) {
+ s, err := parseStatusFile("fixtures/proc_self_status")
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if _, ok := s["Seccomp"]; !ok {
+
+ t.Fatal("expected to find 'Seccomp' in the map but did not.")
+ }
+}
diff --git a/pkg/seccomp/seccomp_unsupported.go b/pkg/seccomp/seccomp_unsupported.go
new file mode 100644
index 000000000..3f7562605
--- /dev/null
+++ b/pkg/seccomp/seccomp_unsupported.go
@@ -0,0 +1,23 @@
+// +build !linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package seccomp
+
+func IsEnabled() bool {
+ return false
+}
diff --git a/pkg/server/bandwidth/doc.go b/pkg/server/bandwidth/doc.go
new file mode 100644
index 000000000..1fd55229d
--- /dev/null
+++ b/pkg/server/bandwidth/doc.go
@@ -0,0 +1,34 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package bandwidth provides utilities for bandwidth shaping
+package bandwidth
diff --git a/pkg/server/bandwidth/fake_shaper.go b/pkg/server/bandwidth/fake_shaper.go
new file mode 100644
index 000000000..e987ceca6
--- /dev/null
+++ b/pkg/server/bandwidth/fake_shaper.go
@@ -0,0 +1,72 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package bandwidth
+
+import (
+ "errors"
+
+ "k8s.io/apimachinery/pkg/api/resource"
+)
+
+// FakeShaper provides an implementation of the bandwith.Shaper.
+// Beware this is implementation has no features besides Reset and GetCIDRs.
+type FakeShaper struct {
+ CIDRs []string
+ ResetCIDRs []string
+}
+
+// Limit is not implemented
+func (f *FakeShaper) Limit(cidr string, egress, ingress *resource.Quantity) error {
+ return errors.New("unimplemented")
+}
+
+// Reset appends a particular CIDR to the set of ResetCIDRs being managed by this shaper
+func (f *FakeShaper) Reset(cidr string) error {
+ f.ResetCIDRs = append(f.ResetCIDRs, cidr)
+ return nil
+}
+
+// ReconcileInterface is not implemented
+func (f *FakeShaper) ReconcileInterface() error {
+ return errors.New("unimplemented")
+}
+
+// ReconcileCIDR is not implemented
+func (f *FakeShaper) ReconcileCIDR(cidr string, egress, ingress *resource.Quantity) error {
+ return errors.New("unimplemented")
+}
+
+// GetCIDRs returns the set of CIDRs that are being managed by this shaper
+func (f *FakeShaper) GetCIDRs() ([]string, error) {
+ return f.CIDRs, nil
+}
diff --git a/pkg/server/bandwidth/interfaces.go b/pkg/server/bandwidth/interfaces.go
new file mode 100644
index 000000000..9eb4bfb60
--- /dev/null
+++ b/pkg/server/bandwidth/interfaces.go
@@ -0,0 +1,56 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package bandwidth
+
+import "k8s.io/apimachinery/pkg/api/resource"
+
+// Shaper is designed so that the shaper structs created
+// satisfy the Shaper interface.
+type Shaper interface {
+ // Limit the bandwidth for a particular CIDR on a particular interface
+ // * ingress and egress are in bits/second
+ // * cidr is expected to be a valid network CIDR (e.g. '1.2.3.4/32' or '10.20.0.1/16')
+ // 'egress' bandwidth limit applies to all packets on the interface whose source matches 'cidr'
+ // 'ingress' bandwidth limit applies to all packets on the interface whose destination matches 'cidr'
+ // Limits are aggregate limits for the CIDR, not per IP address. CIDRs must be unique, but can be overlapping, traffic
+ // that matches multiple CIDRs counts against all limits.
+ Limit(cidr string, egress, ingress *resource.Quantity) error
+ // Remove a bandwidth limit for a particular CIDR on a particular network interface
+ Reset(cidr string) error
+ // Reconcile the interface managed by this shaper with the state on the ground.
+ ReconcileInterface() error
+ // Reconcile a CIDR managed by this shaper with the state on the ground
+ ReconcileCIDR(cidr string, egress, ingress *resource.Quantity) error
+ // GetCIDRs returns the set of CIDRs that are being managed by this shaper
+ GetCIDRs() ([]string, error)
+}
diff --git a/pkg/server/bandwidth/linux.go b/pkg/server/bandwidth/linux.go
new file mode 100644
index 000000000..e8d710825
--- /dev/null
+++ b/pkg/server/bandwidth/linux.go
@@ -0,0 +1,361 @@
+// +build linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package bandwidth
+
+import (
+ "bufio"
+ "bytes"
+ "encoding/hex"
+ "fmt"
+ "net"
+ "regexp"
+ "strings"
+
+ "k8s.io/apimachinery/pkg/api/resource"
+ "k8s.io/apimachinery/pkg/util/sets"
+ "k8s.io/utils/exec"
+
+ "k8s.io/klog/v2"
+)
+
+var (
+ classShowMatcher = regexp.MustCompile(`class htb (1:\d+)`)
+ classAndHandleMatcher = regexp.MustCompile(`filter parent 1:.*fh (\d+::\d+).*flowid (\d+:\d+)`)
+)
+
+// tcShaper provides an implementation of the Shaper interface on Linux using the 'tc' tool.
+// In general, using this requires that the caller posses the NET_CAP_ADMIN capability, though if you
+// do this within an container, it only requires the NS_CAPABLE capability for manipulations to that
+// container's network namespace.
+// Uses the hierarchical token bucket queuing discipline (htb), this requires Linux 2.4.20 or newer
+// or a custom kernel with that queuing discipline backported.
+type tcShaper struct {
+ e exec.Interface
+ iface string
+}
+
+// NewTCShaper makes a new tcShaper for the given interface
+func NewTCShaper(iface string) Shaper {
+ shaper := &tcShaper{
+ e: exec.New(),
+ iface: iface,
+ }
+ return shaper
+}
+
+func (t *tcShaper) execAndLog(cmdStr string, args ...string) error {
+ klog.V(6).Infof("Running: %s %s", cmdStr, strings.Join(args, " "))
+ cmd := t.e.Command(cmdStr, args...)
+ out, err := cmd.CombinedOutput()
+ klog.V(6).Infof("Output from tc: %s", string(out))
+ return err
+}
+
+func (t *tcShaper) nextClassID() (int, error) {
+ data, err := t.e.Command("tc", "class", "show", "dev", t.iface).CombinedOutput()
+ if err != nil {
+ return -1, err
+ }
+
+ scanner := bufio.NewScanner(bytes.NewBuffer(data))
+ classes := sets.String{}
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ // skip empty lines
+ if len(line) == 0 {
+ continue
+ }
+ // expected tc line:
+ // class htb 1:1 root prio 0 rate 1000Kbit ceil 1000Kbit burst 1600b cburst 1600b
+ matches := classShowMatcher.FindStringSubmatch(line)
+ if len(matches) != 2 {
+ return -1, fmt.Errorf("unexpected output from tc: %s (%v)", scanner.Text(), matches)
+ }
+ classes.Insert(matches[1])
+ }
+
+ // Make sure it doesn't go forever
+ for nextClass := 1; nextClass < 10000; nextClass++ {
+ if !classes.Has(fmt.Sprintf("1:%d", nextClass)) {
+ return nextClass, nil
+ }
+ }
+ // This should really never happen
+ return -1, fmt.Errorf("exhausted class space, please try again")
+}
+
+// Convert a CIDR from text to a hex representation
+// Strips any masked parts of the IP, so 1.2.3.4/16 becomes hex(1.2.0.0)/ffffffff
+func hexCIDR(cidr string) (string, error) {
+ ip, ipnet, err := net.ParseCIDR(cidr)
+ if err != nil {
+ return "", err
+ }
+ ip = ip.Mask(ipnet.Mask)
+ hexIP := hex.EncodeToString([]byte(ip))
+ hexMask := ipnet.Mask.String()
+ return hexIP + "/" + hexMask, nil
+}
+
+// Convert a CIDR from hex representation to text, opposite of the above.
+func asciiCIDR(cidr string) (string, error) {
+ parts := strings.Split(cidr, "/")
+ if len(parts) != 2 {
+ return "", fmt.Errorf("unexpected CIDR format: %s", cidr)
+ }
+ ipData, err := hex.DecodeString(parts[0])
+ if err != nil {
+ return "", err
+ }
+ ip := net.IP(ipData)
+
+ maskData, err := hex.DecodeString(parts[1])
+ if err != nil {
+ return "", err
+ }
+ mask := net.IPMask(maskData)
+ size, _ := mask.Size()
+
+ return fmt.Sprintf("%s/%d", ip.String(), size), nil
+}
+
+func (t *tcShaper) findCIDRClass(cidr string) (classAndHandleList [][]string, found bool, err error) {
+ data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput()
+ if err != nil {
+ return classAndHandleList, false, err
+ }
+
+ hex, err := hexCIDR(cidr)
+ if err != nil {
+ return classAndHandleList, false, err
+ }
+ spec := fmt.Sprintf("match %s", hex)
+
+ scanner := bufio.NewScanner(bytes.NewBuffer(data))
+ filter := ""
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if len(line) == 0 {
+ continue
+ }
+ if strings.HasPrefix(line, "filter") {
+ filter = line
+ continue
+ }
+ if strings.Contains(line, spec) {
+ // expected tc line:
+ // `filter parent 1: protocol ip pref 1 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1` (old version) or
+ // `filter parent 1: protocol ip pref 1 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw` (new version)
+ matches := classAndHandleMatcher.FindStringSubmatch(filter)
+ if len(matches) != 3 {
+ return classAndHandleList, false, fmt.Errorf("unexpected output from tc: %s %d (%v)", filter, len(matches), matches)
+ }
+ resultTmp := []string{matches[2], matches[1]}
+ classAndHandleList = append(classAndHandleList, resultTmp)
+ }
+ }
+ if len(classAndHandleList) > 0 {
+ return classAndHandleList, true, nil
+ }
+ return classAndHandleList, false, nil
+}
+
+func makeKBitString(rsrc *resource.Quantity) string {
+ return fmt.Sprintf("%dkbit", (rsrc.Value() / 1000))
+}
+
+func (t *tcShaper) makeNewClass(rate string) (int, error) {
+ class, err := t.nextClassID()
+ if err != nil {
+ return -1, err
+ }
+ if err := t.execAndLog("tc", "class", "add",
+ "dev", t.iface,
+ "parent", "1:",
+ "classid", fmt.Sprintf("1:%d", class),
+ "htb", "rate", rate); err != nil {
+ return -1, err
+ }
+ return class, nil
+}
+
+func (t *tcShaper) Limit(cidr string, upload, download *resource.Quantity) (err error) {
+ var downloadClass, uploadClass int
+ if download != nil {
+ if downloadClass, err = t.makeNewClass(makeKBitString(download)); err != nil {
+ return err
+ }
+ if err := t.execAndLog("tc", "filter", "add",
+ "dev", t.iface,
+ "protocol", "ip",
+ "parent", "1:0",
+ "prio", "1", "u32",
+ "match", "ip", "dst", cidr,
+ "flowid", fmt.Sprintf("1:%d", downloadClass)); err != nil {
+ return err
+ }
+ }
+ if upload != nil {
+ if uploadClass, err = t.makeNewClass(makeKBitString(upload)); err != nil {
+ return err
+ }
+ if err := t.execAndLog("tc", "filter", "add",
+ "dev", t.iface,
+ "protocol", "ip",
+ "parent", "1:0",
+ "prio", "1", "u32",
+ "match", "ip", "src", cidr,
+ "flowid", fmt.Sprintf("1:%d", uploadClass)); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// tests to see if an interface exists, if it does, return true and the status line for the interface
+// returns false, "", if an error occurs.
+func (t *tcShaper) interfaceExists() (bool, string, error) {
+ data, err := t.e.Command("tc", "qdisc", "show", "dev", t.iface).CombinedOutput()
+ if err != nil {
+ return false, "", err
+ }
+ value := strings.TrimSpace(string(data))
+ if len(value) == 0 {
+ return false, "", nil
+ }
+ // Newer versions of tc and/or the kernel return the following instead of nothing:
+ // qdisc noqueue 0: root refcnt 2
+ fields := strings.Fields(value)
+ if len(fields) > 1 && fields[1] == "noqueue" {
+ return false, "", nil
+ }
+ return true, value, nil
+}
+
+func (t *tcShaper) ReconcileCIDR(cidr string, upload, download *resource.Quantity) error {
+ _, found, err := t.findCIDRClass(cidr)
+ if err != nil {
+ return err
+ }
+ if !found {
+ return t.Limit(cidr, upload, download)
+ }
+ // TODO: actually check bandwidth limits here
+ return nil
+}
+
+func (t *tcShaper) ReconcileInterface() error {
+ exists, output, err := t.interfaceExists()
+ if err != nil {
+ return err
+ }
+ if !exists {
+ klog.V(4).Info("Didn't find bandwidth interface, creating")
+ return t.initializeInterface()
+ }
+ fields := strings.Split(output, " ")
+ if len(fields) < 12 || fields[1] != "htb" || fields[2] != "1:" {
+ if err := t.deleteInterface(fields[2]); err != nil {
+ return err
+ }
+ return t.initializeInterface()
+ }
+ return nil
+}
+
+func (t *tcShaper) initializeInterface() error {
+ return t.execAndLog("tc", "qdisc", "add", "dev", t.iface, "root", "handle", "1:", "htb", "default", "30")
+}
+
+func (t *tcShaper) Reset(cidr string) error {
+ classAndHandle, found, err := t.findCIDRClass(cidr)
+ if err != nil {
+ return err
+ }
+ if !found {
+ return fmt.Errorf("Failed to find cidr: %s on interface: %s", cidr, t.iface)
+ }
+ for i := 0; i < len(classAndHandle); i++ {
+ if err := t.execAndLog("tc", "filter", "del",
+ "dev", t.iface,
+ "parent", "1:",
+ "proto", "ip",
+ "prio", "1",
+ "handle", classAndHandle[i][1], "u32"); err != nil {
+ return err
+ }
+ if err := t.execAndLog("tc", "class", "del",
+ "dev", t.iface,
+ "parent", "1:",
+ "classid", classAndHandle[i][0]); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+func (t *tcShaper) deleteInterface(class string) error {
+ return t.execAndLog("tc", "qdisc", "delete", "dev", t.iface, "root", "handle", class)
+}
+
+func (t *tcShaper) GetCIDRs() ([]string, error) {
+ data, err := t.e.Command("tc", "filter", "show", "dev", t.iface).CombinedOutput()
+ if err != nil {
+ return nil, err
+ }
+
+ result := []string{}
+ scanner := bufio.NewScanner(bytes.NewBuffer(data))
+ for scanner.Scan() {
+ line := strings.TrimSpace(scanner.Text())
+ if len(line) == 0 {
+ continue
+ }
+ if strings.Contains(line, "match") {
+ parts := strings.Split(line, " ")
+ // expected tc line:
+ // match at
+ if len(parts) != 4 {
+ return nil, fmt.Errorf("unexpected output: %v", parts)
+ }
+ cidr, err := asciiCIDR(parts[1])
+ if err != nil {
+ return nil, err
+ }
+ result = append(result, cidr)
+ }
+ }
+ return result, nil
+}
diff --git a/pkg/server/bandwidth/unsupported.go b/pkg/server/bandwidth/unsupported.go
new file mode 100644
index 000000000..12c5ad83b
--- /dev/null
+++ b/pkg/server/bandwidth/unsupported.go
@@ -0,0 +1,69 @@
+// +build !linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package bandwidth
+
+import (
+ "errors"
+
+ "k8s.io/apimachinery/pkg/api/resource"
+)
+
+type unsupportedShaper struct {
+}
+
+// NewTCShaper makes a new unsupportedShapper for the given interface
+func NewTCShaper(iface string) Shaper {
+ return &unsupportedShaper{}
+}
+
+func (f *unsupportedShaper) Limit(cidr string, egress, ingress *resource.Quantity) error {
+ return errors.New("unimplemented")
+}
+
+func (f *unsupportedShaper) Reset(cidr string) error {
+ return nil
+}
+
+func (f *unsupportedShaper) ReconcileInterface() error {
+ return errors.New("unimplemented")
+}
+
+func (f *unsupportedShaper) ReconcileCIDR(cidr string, egress, ingress *resource.Quantity) error {
+ return errors.New("unimplemented")
+}
+
+func (f *unsupportedShaper) GetCIDRs() ([]string, error) {
+ return []string{}, nil
+}
diff --git a/pkg/server/bandwidth/utils.go b/pkg/server/bandwidth/utils.go
new file mode 100644
index 000000000..f90718b6b
--- /dev/null
+++ b/pkg/server/bandwidth/utils.go
@@ -0,0 +1,82 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package bandwidth
+
+import (
+ "fmt"
+
+ "k8s.io/apimachinery/pkg/api/resource"
+)
+
+var minRsrc = resource.MustParse("1k")
+var maxRsrc = resource.MustParse("1P")
+
+func validateBandwidthIsReasonable(rsrc *resource.Quantity) error {
+ if rsrc.Value() < minRsrc.Value() {
+ return fmt.Errorf("resource is unreasonably small (< 1kbit)")
+ }
+ if rsrc.Value() > maxRsrc.Value() {
+ return fmt.Errorf("resoruce is unreasonably large (> 1Pbit)")
+ }
+ return nil
+}
+
+// ExtractPodBandwidthResources extracts the ingress and egress from the given pod annotations
+func ExtractPodBandwidthResources(podAnnotations map[string]string) (ingress, egress *resource.Quantity, err error) {
+ if podAnnotations == nil {
+ return nil, nil, nil
+ }
+ str, found := podAnnotations["kubernetes.io/ingress-bandwidth"]
+ if found {
+ ingressValue, err := resource.ParseQuantity(str)
+ if err != nil {
+ return nil, nil, err
+ }
+ ingress = &ingressValue
+ if err := validateBandwidthIsReasonable(ingress); err != nil {
+ return nil, nil, err
+ }
+ }
+ str, found = podAnnotations["kubernetes.io/egress-bandwidth"]
+ if found {
+ egressValue, err := resource.ParseQuantity(str)
+ if err != nil {
+ return nil, nil, err
+ }
+ egress = &egressValue
+ if err := validateBandwidthIsReasonable(egress); err != nil {
+ return nil, nil, err
+ }
+ }
+ return ingress, egress, nil
+}
diff --git a/pkg/server/cni_conf_syncer.go b/pkg/server/cni_conf_syncer.go
new file mode 100644
index 000000000..03131a9be
--- /dev/null
+++ b/pkg/server/cni_conf_syncer.go
@@ -0,0 +1,121 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "os"
+ "sync"
+
+ cni "github.com/containerd/go-cni"
+ "github.com/fsnotify/fsnotify"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+// cniNetConfSyncer is used to reload cni network conf triggered by fs change
+// events.
+type cniNetConfSyncer struct {
+ // only used for lastSyncStatus
+ sync.RWMutex
+ lastSyncStatus error
+
+ watcher *fsnotify.Watcher
+ confDir string
+ netPlugin cni.CNI
+ loadOpts []cni.CNIOpt
+}
+
+// newCNINetConfSyncer creates cni network conf syncer.
+func newCNINetConfSyncer(confDir string, netPlugin cni.CNI, loadOpts []cni.CNIOpt) (*cniNetConfSyncer, error) {
+ watcher, err := fsnotify.NewWatcher()
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create fsnotify watcher")
+ }
+
+ if err := os.MkdirAll(confDir, 0700); err != nil {
+ return nil, errors.Wrapf(err, "failed to create cni conf dir=%s for watch", confDir)
+ }
+
+ if err := watcher.Add(confDir); err != nil {
+ return nil, errors.Wrapf(err, "failed to watch cni conf dir %s", confDir)
+ }
+
+ syncer := &cniNetConfSyncer{
+ watcher: watcher,
+ confDir: confDir,
+ netPlugin: netPlugin,
+ loadOpts: loadOpts,
+ }
+
+ if err := syncer.netPlugin.Load(syncer.loadOpts...); err != nil {
+ logrus.WithError(err).Error("failed to load cni during init, please check CRI plugin status before setting up network for pods")
+ syncer.updateLastStatus(err)
+ }
+ return syncer, nil
+}
+
+// syncLoop monitors any fs change events from cni conf dir and tries to reload
+// cni configuration.
+func (syncer *cniNetConfSyncer) syncLoop() error {
+ for {
+ select {
+ case event := <-syncer.watcher.Events:
+ // Only reload config when receiving write/rename/remove
+ // events
+ //
+ // TODO(fuweid): Might only reload target cni config
+ // files to prevent no-ops.
+ if event.Op&(fsnotify.Chmod|fsnotify.Create) > 0 {
+ logrus.Debugf("ignore event from cni conf dir: %s", event)
+ continue
+ }
+ logrus.Debugf("receiving change event from cni conf dir: %s", event)
+
+ lerr := syncer.netPlugin.Load(syncer.loadOpts...)
+ if lerr != nil {
+ logrus.WithError(lerr).
+ Errorf("failed to reload cni configuration after receiving fs change event(%s)", event)
+ }
+ syncer.updateLastStatus(lerr)
+
+ case err := <-syncer.watcher.Errors:
+ if err != nil {
+ logrus.WithError(err).Error("failed to continue sync cni conf change")
+ return err
+ }
+ }
+ }
+}
+
+// lastStatus retrieves last sync status.
+func (syncer *cniNetConfSyncer) lastStatus() error {
+ syncer.RLock()
+ defer syncer.RUnlock()
+ return syncer.lastSyncStatus
+}
+
+// updateLastStatus will be called after every single cni load.
+func (syncer *cniNetConfSyncer) updateLastStatus(err error) {
+ syncer.Lock()
+ defer syncer.Unlock()
+ syncer.lastSyncStatus = err
+}
+
+// stop stops watcher in the syncLoop.
+func (syncer *cniNetConfSyncer) stop() error {
+ return syncer.watcher.Close()
+}
diff --git a/pkg/server/container_attach.go b/pkg/server/container_attach.go
new file mode 100644
index 000000000..c8101ff7c
--- /dev/null
+++ b/pkg/server/container_attach.go
@@ -0,0 +1,84 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "io"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/log"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ "k8s.io/client-go/tools/remotecommand"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ cio "github.com/containerd/cri/pkg/server/io"
+)
+
+// Attach prepares a streaming endpoint to attach to a running container, and returns the address.
+func (c *criService) Attach(ctx context.Context, r *runtime.AttachRequest) (*runtime.AttachResponse, error) {
+ cntr, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to find container in store")
+ }
+ state := cntr.Status.Get().State()
+ if state != runtime.ContainerState_CONTAINER_RUNNING {
+ return nil, errors.Errorf("container is in %s state", criContainerStateToString(state))
+ }
+ return c.streamServer.GetAttach(r)
+}
+
+func (c *criService) attachContainer(ctx context.Context, id string, stdin io.Reader, stdout, stderr io.WriteCloser,
+ tty bool, resize <-chan remotecommand.TerminalSize) error {
+ ctx, cancel := context.WithCancel(ctx)
+ defer cancel()
+ // Get container from our container store.
+ cntr, err := c.containerStore.Get(id)
+ if err != nil {
+ return errors.Wrapf(err, "failed to find container %q in store", id)
+ }
+ id = cntr.ID
+
+ state := cntr.Status.Get().State()
+ if state != runtime.ContainerState_CONTAINER_RUNNING {
+ return errors.Errorf("container is in %s state", criContainerStateToString(state))
+ }
+
+ task, err := cntr.Container.Task(ctx, nil)
+ if err != nil {
+ return errors.Wrap(err, "failed to load task")
+ }
+ handleResizing(ctx, resize, func(size remotecommand.TerminalSize) {
+ if err := task.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to resize task %q console", id)
+ }
+ })
+
+ opts := cio.AttachOptions{
+ Stdin: stdin,
+ Stdout: stdout,
+ Stderr: stderr,
+ Tty: tty,
+ StdinOnce: cntr.Config.StdinOnce,
+ CloseStdin: func() error {
+ return task.CloseIO(ctx, containerd.WithStdinCloser)
+ },
+ }
+ // TODO(random-liu): Figure out whether we need to support historical output.
+ cntr.IO.Attach(opts)
+ return nil
+}
diff --git a/pkg/server/container_create.go b/pkg/server/container_create.go
new file mode 100644
index 000000000..01eac7bc8
--- /dev/null
+++ b/pkg/server/container_create.go
@@ -0,0 +1,343 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "path/filepath"
+ "time"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/containers"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/containerd/oci"
+ "github.com/containerd/typeurl"
+ "github.com/davecgh/go-spew/spew"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ selinux "github.com/opencontainers/selinux/go-selinux"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ customopts "github.com/containerd/cri/pkg/containerd/opts"
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ cio "github.com/containerd/cri/pkg/server/io"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ "github.com/containerd/cri/pkg/util"
+)
+
+func init() {
+ typeurl.Register(&containerstore.Metadata{},
+ "github.com/containerd/cri/pkg/store/container", "Metadata")
+}
+
+// CreateContainer creates a new container in the given PodSandbox.
+func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (_ *runtime.CreateContainerResponse, retErr error) {
+ config := r.GetConfig()
+ log.G(ctx).Debugf("Container config %+v", config)
+ sandboxConfig := r.GetSandboxConfig()
+ sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to find sandbox id %q", r.GetPodSandboxId())
+ }
+ sandboxID := sandbox.ID
+ s, err := sandbox.Container.Task(ctx, nil)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get sandbox container task")
+ }
+ sandboxPid := s.Pid()
+
+ // Generate unique id and name for the container and reserve the name.
+ // Reserve the container name to avoid concurrent `CreateContainer` request creating
+ // the same container.
+ id := util.GenerateID()
+ metadata := config.GetMetadata()
+ if metadata == nil {
+ return nil, errors.New("container config must include metadata")
+ }
+ containerName := metadata.Name
+ name := makeContainerName(metadata, sandboxConfig.GetMetadata())
+ log.G(ctx).Debugf("Generated id %q for container %q", id, name)
+ if err = c.containerNameIndex.Reserve(name, id); err != nil {
+ return nil, errors.Wrapf(err, "failed to reserve container name %q", name)
+ }
+ defer func() {
+ // Release the name if the function returns with an error.
+ if retErr != nil {
+ c.containerNameIndex.ReleaseByName(name)
+ }
+ }()
+
+ // Create initial internal container metadata.
+ meta := containerstore.Metadata{
+ ID: id,
+ Name: name,
+ SandboxID: sandboxID,
+ Config: config,
+ }
+
+ // Prepare container image snapshot. For container, the image should have
+ // been pulled before creating the container, so do not ensure the image.
+ image, err := c.localResolve(config.GetImage().GetImage())
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to resolve image %q", config.GetImage().GetImage())
+ }
+ containerdImage, err := c.toContainerdImage(ctx, image)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
+ }
+
+ // Run container using the same runtime with sandbox.
+ sandboxInfo, err := sandbox.Container.Info(ctx)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to get sandbox %q info", sandboxID)
+ }
+
+ // Create container root directory.
+ containerRootDir := c.getContainerRootDir(id)
+ if err = c.os.MkdirAll(containerRootDir, 0755); err != nil {
+ return nil, errors.Wrapf(err, "failed to create container root directory %q",
+ containerRootDir)
+ }
+ defer func() {
+ if retErr != nil {
+ // Cleanup the container root directory.
+ if err = c.os.RemoveAll(containerRootDir); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to remove container root directory %q",
+ containerRootDir)
+ }
+ }
+ }()
+ volatileContainerRootDir := c.getVolatileContainerRootDir(id)
+ if err = c.os.MkdirAll(volatileContainerRootDir, 0755); err != nil {
+ return nil, errors.Wrapf(err, "failed to create volatile container root directory %q",
+ volatileContainerRootDir)
+ }
+ defer func() {
+ if retErr != nil {
+ // Cleanup the volatile container root directory.
+ if err = c.os.RemoveAll(volatileContainerRootDir); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to remove volatile container root directory %q",
+ volatileContainerRootDir)
+ }
+ }
+ }()
+
+ var volumeMounts []*runtime.Mount
+ if !c.config.IgnoreImageDefinedVolumes {
+ // Create container image volumes mounts.
+ volumeMounts = c.volumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config)
+ } else if len(image.ImageSpec.Config.Volumes) != 0 {
+ log.G(ctx).Debugf("Ignoring volumes defined in image %v because IgnoreImageDefinedVolumes is set", image.ID)
+ }
+
+ // Generate container mounts.
+ mounts := c.containerMounts(sandboxID, config)
+
+ ociRuntime, err := c.getSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get sandbox runtime")
+ }
+ log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id)
+
+ spec, err := c.containerSpec(id, sandboxID, sandboxPid, sandbox.NetNSPath, containerName, config, sandboxConfig,
+ &image.ImageSpec.Config, append(mounts, volumeMounts...), ociRuntime)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to generate container %q spec", id)
+ }
+
+ meta.ProcessLabel = spec.Process.SelinuxLabel
+
+ // handle any KVM based runtime
+ if err := modifyProcessLabel(ociRuntime.Type, spec); err != nil {
+ return nil, err
+ }
+
+ if config.GetLinux().GetSecurityContext().GetPrivileged() {
+ // If privileged don't set the SELinux label but still record it on the container so
+ // the unused MCS label can be release later
+ spec.Process.SelinuxLabel = ""
+ }
+ defer func() {
+ if retErr != nil {
+ selinux.ReleaseLabel(spec.Process.SelinuxLabel)
+ }
+ }()
+
+ log.G(ctx).Debugf("Container %q spec: %#+v", id, spew.NewFormatter(spec))
+
+ // Set snapshotter before any other options.
+ opts := []containerd.NewContainerOpts{
+ containerd.WithSnapshotter(c.config.ContainerdConfig.Snapshotter),
+ // Prepare container rootfs. This is always writeable even if
+ // the container wants a readonly rootfs since we want to give
+ // the runtime (runc) a chance to modify (e.g. to create mount
+ // points corresponding to spec.Mounts) before making the
+ // rootfs readonly (requested by spec.Root.Readonly).
+ customopts.WithNewSnapshot(id, containerdImage),
+ }
+ if len(volumeMounts) > 0 {
+ mountMap := make(map[string]string)
+ for _, v := range volumeMounts {
+ mountMap[filepath.Clean(v.HostPath)] = v.ContainerPath
+ }
+ opts = append(opts, customopts.WithVolumes(mountMap))
+ }
+ meta.ImageRef = image.ID
+ meta.StopSignal = image.ImageSpec.Config.StopSignal
+
+ // Validate log paths and compose full container log path.
+ if sandboxConfig.GetLogDirectory() != "" && config.GetLogPath() != "" {
+ meta.LogPath = filepath.Join(sandboxConfig.GetLogDirectory(), config.GetLogPath())
+ log.G(ctx).Debugf("Composed container full log path %q using sandbox log dir %q and container log path %q",
+ meta.LogPath, sandboxConfig.GetLogDirectory(), config.GetLogPath())
+ } else {
+ log.G(ctx).Infof("Logging will be disabled due to empty log paths for sandbox (%q) or container (%q)",
+ sandboxConfig.GetLogDirectory(), config.GetLogPath())
+ }
+
+ containerIO, err := cio.NewContainerIO(id,
+ cio.WithNewFIFOs(volatileContainerRootDir, config.GetTty(), config.GetStdin()))
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create container io")
+ }
+ defer func() {
+ if retErr != nil {
+ if err := containerIO.Close(); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to close container io %q", id)
+ }
+ }
+ }()
+
+ specOpts, err := c.containerSpecOpts(config, &image.ImageSpec.Config)
+ if err != nil {
+ return nil, errors.Wrap(err, "")
+ }
+
+ containerLabels := buildLabels(config.Labels, containerKindContainer)
+
+ runtimeOptions, err := getRuntimeOptions(sandboxInfo)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get runtime options")
+ }
+ opts = append(opts,
+ containerd.WithSpec(spec, specOpts...),
+ containerd.WithRuntime(sandboxInfo.Runtime.Name, runtimeOptions),
+ containerd.WithContainerLabels(containerLabels),
+ containerd.WithContainerExtension(containerMetadataExtension, &meta))
+ var cntr containerd.Container
+ if cntr, err = c.client.NewContainer(ctx, id, opts...); err != nil {
+ return nil, errors.Wrap(err, "failed to create containerd container")
+ }
+ defer func() {
+ if retErr != nil {
+ deferCtx, deferCancel := ctrdutil.DeferContext()
+ defer deferCancel()
+ if err := cntr.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
+ }
+ }
+ }()
+
+ status := containerstore.Status{CreatedAt: time.Now().UnixNano()}
+ container, err := containerstore.NewContainer(meta,
+ containerstore.WithStatus(status, containerRootDir),
+ containerstore.WithContainer(cntr),
+ containerstore.WithContainerIO(containerIO),
+ )
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to create internal container object for %q", id)
+ }
+ defer func() {
+ if retErr != nil {
+ // Cleanup container checkpoint on error.
+ if err := container.Delete(); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to cleanup container checkpoint for %q", id)
+ }
+ }
+ }()
+
+ // Add container into container store.
+ if err := c.containerStore.Add(container); err != nil {
+ return nil, errors.Wrapf(err, "failed to add container %q into store", id)
+ }
+
+ return &runtime.CreateContainerResponse{ContainerId: id}, nil
+}
+
+// volumeMounts sets up image volumes for container. Rely on the removal of container
+// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts
+// specified with the same destination.
+func (c *criService) volumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
+ if len(config.Volumes) == 0 {
+ return nil
+ }
+ var mounts []*runtime.Mount
+ for dst := range config.Volumes {
+ if isInCRIMounts(dst, criMounts) {
+ // Skip the image volume, if there is CRI defined volume mapping.
+ // TODO(random-liu): This should be handled by Kubelet in the future.
+ // Kubelet should decide what to use for image volume, and also de-duplicate
+ // the image volume and user mounts.
+ continue
+ }
+ volumeID := util.GenerateID()
+ src := filepath.Join(containerRootDir, "volumes", volumeID)
+ // addOCIBindMounts will create these volumes.
+ mounts = append(mounts, &runtime.Mount{
+ ContainerPath: dst,
+ HostPath: src,
+ SelinuxRelabel: true,
+ })
+ }
+ return mounts
+}
+
+// runtimeSpec returns a default runtime spec used in cri-containerd.
+func (c *criService) runtimeSpec(id string, baseSpecFile string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) {
+ // GenerateSpec needs namespace.
+ ctx := ctrdutil.NamespacedContext()
+ container := &containers.Container{ID: id}
+
+ if baseSpecFile != "" {
+ baseSpec, ok := c.baseOCISpecs[baseSpecFile]
+ if !ok {
+ return nil, errors.Errorf("can't find base OCI spec %q", baseSpecFile)
+ }
+
+ spec := oci.Spec{}
+ if err := util.DeepCopy(&spec, &baseSpec); err != nil {
+ return nil, errors.Wrap(err, "failed to clone OCI spec")
+ }
+
+ // Fix up cgroups path
+ applyOpts := append([]oci.SpecOpts{oci.WithNamespacedCgroup()}, opts...)
+
+ if err := oci.ApplyOpts(ctx, nil, container, &spec, applyOpts...); err != nil {
+ return nil, errors.Wrap(err, "failed to apply OCI options")
+ }
+
+ return &spec, nil
+ }
+
+ spec, err := oci.GenerateSpec(ctx, nil, container, opts...)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate spec")
+ }
+
+ return spec, nil
+}
diff --git a/pkg/server/container_create_linux.go b/pkg/server/container_create_linux.go
new file mode 100644
index 000000000..0b930394e
--- /dev/null
+++ b/pkg/server/container_create_linux.go
@@ -0,0 +1,462 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "bufio"
+ "io"
+ "os"
+ "strconv"
+ "strings"
+
+ "github.com/containerd/cgroups"
+ "github.com/containerd/containerd/contrib/apparmor"
+ "github.com/containerd/containerd/contrib/seccomp"
+ "github.com/containerd/containerd/oci"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ selinux "github.com/opencontainers/selinux/go-selinux"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ "github.com/containerd/cri/pkg/config"
+ customopts "github.com/containerd/cri/pkg/containerd/opts"
+)
+
+const (
+ // profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
+ profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
+ // runtimeDefault indicates that we should use or create a runtime default profile.
+ runtimeDefault = "runtime/default"
+ // dockerDefault indicates that we should use or create a docker default profile.
+ dockerDefault = "docker/default"
+ // appArmorDefaultProfileName is name to use when creating a default apparmor profile.
+ appArmorDefaultProfileName = "cri-containerd.apparmor.d"
+ // unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
+ unconfinedProfile = "unconfined"
+ // seccompDefaultProfile is the default seccomp profile.
+ seccompDefaultProfile = dockerDefault
+)
+
+// containerMounts sets up necessary container system file mounts
+// including /dev/shm, /etc/hosts and /etc/resolv.conf.
+func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
+ var mounts []*runtime.Mount
+ securityContext := config.GetLinux().GetSecurityContext()
+ if !isInCRIMounts(etcHostname, config.GetMounts()) {
+ // /etc/hostname is added since 1.1.6, 1.2.4 and 1.3.
+ // For in-place upgrade, the old sandbox doesn't have the hostname file,
+ // do not mount this in that case.
+ // TODO(random-liu): Remove the check and always mount this when
+ // containerd 1.1 and 1.2 are deprecated.
+ hostpath := c.getSandboxHostname(sandboxID)
+ if _, err := c.os.Stat(hostpath); err == nil {
+ mounts = append(mounts, &runtime.Mount{
+ ContainerPath: etcHostname,
+ HostPath: hostpath,
+ Readonly: securityContext.GetReadonlyRootfs(),
+ })
+ }
+ }
+
+ if !isInCRIMounts(etcHosts, config.GetMounts()) {
+ mounts = append(mounts, &runtime.Mount{
+ ContainerPath: etcHosts,
+ HostPath: c.getSandboxHosts(sandboxID),
+ Readonly: securityContext.GetReadonlyRootfs(),
+ })
+ }
+
+ // Mount sandbox resolv.config.
+ // TODO: Need to figure out whether we should always mount it as read-only
+ if !isInCRIMounts(resolvConfPath, config.GetMounts()) {
+ mounts = append(mounts, &runtime.Mount{
+ ContainerPath: resolvConfPath,
+ HostPath: c.getResolvPath(sandboxID),
+ Readonly: securityContext.GetReadonlyRootfs(),
+ })
+ }
+
+ if !isInCRIMounts(devShm, config.GetMounts()) {
+ sandboxDevShm := c.getSandboxDevShm(sandboxID)
+ if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
+ sandboxDevShm = devShm
+ }
+ mounts = append(mounts, &runtime.Mount{
+ ContainerPath: devShm,
+ HostPath: sandboxDevShm,
+ Readonly: false,
+ })
+ }
+ return mounts
+}
+
+func (c *criService) containerSpec(id string, sandboxID string, sandboxPid uint32, netNSPath string, containerName string,
+ config *runtime.ContainerConfig, sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig,
+ extraMounts []*runtime.Mount, ociRuntime config.Runtime) (_ *runtimespec.Spec, retErr error) {
+
+ specOpts := []oci.SpecOpts{
+ customopts.WithoutRunMount,
+ customopts.WithoutDefaultSecuritySettings,
+ customopts.WithRelativeRoot(relativeRootfsPath),
+ customopts.WithProcessArgs(config, imageConfig),
+ oci.WithDefaultPathEnv,
+ // this will be set based on the security context below
+ oci.WithNewPrivileges,
+ }
+ if config.GetWorkingDir() != "" {
+ specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
+ } else if imageConfig.WorkingDir != "" {
+ specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
+ }
+
+ if config.GetTty() {
+ specOpts = append(specOpts, oci.WithTTY)
+ }
+
+ // Add HOSTNAME env.
+ var (
+ err error
+ hostname = sandboxConfig.GetHostname()
+ )
+ if hostname == "" {
+ if hostname, err = c.os.Hostname(); err != nil {
+ return nil, err
+ }
+ }
+ specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname}))
+
+ // Apply envs from image config first, so that envs from container config
+ // can override them.
+ env := imageConfig.Env
+ for _, e := range config.GetEnvs() {
+ env = append(env, e.GetKey()+"="+e.GetValue())
+ }
+ specOpts = append(specOpts, oci.WithEnv(env))
+
+ securityContext := config.GetLinux().GetSecurityContext()
+ labelOptions, err := toLabel(securityContext.GetSelinuxOptions())
+ if err != nil {
+ return nil, err
+ }
+ if len(labelOptions) == 0 {
+ // Use pod level SELinux config
+ if sandbox, err := c.sandboxStore.Get(sandboxID); err == nil {
+ labelOptions, err = selinux.DupSecOpt(sandbox.ProcessLabel)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+
+ processLabel, mountLabel, err := label.InitLabels(labelOptions)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
+ }
+ defer func() {
+ if retErr != nil {
+ _ = label.ReleaseLabel(processLabel)
+ }
+ }()
+
+ specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
+
+ if !c.config.DisableProcMount {
+ // Apply masked paths if specified.
+ // If the container is privileged, this will be cleared later on.
+ if maskedPaths := securityContext.GetMaskedPaths(); maskedPaths != nil {
+ specOpts = append(specOpts, oci.WithMaskedPaths(maskedPaths))
+ }
+
+ // Apply readonly paths if specified.
+ // If the container is privileged, this will be cleared later on.
+ if readonlyPaths := securityContext.GetReadonlyPaths(); readonlyPaths != nil {
+ specOpts = append(specOpts, oci.WithReadonlyPaths(readonlyPaths))
+ }
+ }
+
+ if securityContext.GetPrivileged() {
+ if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() {
+ return nil, errors.New("no privileged container allowed in sandbox")
+ }
+ specOpts = append(specOpts, oci.WithPrivileged)
+ if !ociRuntime.PrivilegedWithoutHostDevices {
+ specOpts = append(specOpts, oci.WithHostDevices, oci.WithAllDevicesAllowed)
+ } else {
+ // add requested devices by the config as host devices are not automatically added
+ specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext))
+ }
+ } else { // not privileged
+ specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext))
+ }
+
+ // Clear all ambient capabilities. The implication of non-root + caps
+ // is not clearly defined in Kubernetes.
+ // See https://github.com/kubernetes/kubernetes/issues/56374
+ // Keep docker's behavior for now.
+ specOpts = append(specOpts,
+ customopts.WithoutAmbientCaps,
+ customopts.WithSelinuxLabels(processLabel, mountLabel),
+ )
+
+ // TODO: Figure out whether we should set no new privilege for sandbox container by default
+ if securityContext.GetNoNewPrivs() {
+ specOpts = append(specOpts, oci.WithNoNewPrivileges)
+ }
+ // TODO(random-liu): [P1] Set selinux options (privileged or not).
+ if securityContext.GetReadonlyRootfs() {
+ specOpts = append(specOpts, oci.WithRootFSReadonly())
+ }
+
+ if c.config.DisableCgroup {
+ specOpts = append(specOpts, customopts.WithDisabledCgroups)
+ } else {
+ specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources(), c.config.TolerateMissingHugetlbController, c.config.DisableHugetlbController))
+ if sandboxConfig.GetLinux().GetCgroupParent() != "" {
+ cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id)
+ specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
+ }
+ }
+
+ supplementalGroups := securityContext.GetSupplementalGroups()
+
+ for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
+ ociRuntime.PodAnnotations) {
+ specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
+ }
+
+ for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
+ ociRuntime.ContainerAnnotations) {
+ specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
+ }
+
+ specOpts = append(specOpts,
+ customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
+ customopts.WithPodNamespaces(securityContext, sandboxPid),
+ customopts.WithSupplementalGroups(supplementalGroups),
+ customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
+ customopts.WithAnnotation(annotations.SandboxID, sandboxID),
+ customopts.WithAnnotation(annotations.ContainerName, containerName),
+ )
+ // cgroupns is used for hiding /sys/fs/cgroup from containers.
+ // For compatibility, cgroupns is not used when running in cgroup v1 mode or in privileged.
+ // https://github.com/containers/libpod/issues/4363
+ // https://github.com/kubernetes/enhancements/blob/0e409b47497e398b369c281074485c8de129694f/keps/sig-node/20191118-cgroups-v2.md#cgroup-namespace
+ if cgroups.Mode() == cgroups.Unified && !securityContext.GetPrivileged() {
+ specOpts = append(specOpts, oci.WithLinuxNamespace(
+ runtimespec.LinuxNamespace{
+ Type: runtimespec.CgroupNamespace,
+ }))
+ }
+ return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec, specOpts...)
+}
+
+func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
+ var specOpts []oci.SpecOpts
+ securityContext := config.GetLinux().GetSecurityContext()
+ // Set container username. This could only be done by containerd, because it needs
+ // access to the container rootfs. Pass user name to containerd, and let it overwrite
+ // the spec for us.
+ userstr, err := generateUserString(
+ securityContext.GetRunAsUsername(),
+ securityContext.GetRunAsUser(),
+ securityContext.GetRunAsGroup())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate user string")
+ }
+ if userstr == "" {
+ // Lastly, since no user override was passed via CRI try to set via OCI
+ // Image
+ userstr = imageConfig.User
+ }
+ if userstr != "" {
+ specOpts = append(specOpts, oci.WithUser(userstr))
+ }
+
+ if securityContext.GetRunAsUsername() != "" {
+ userstr = securityContext.GetRunAsUsername()
+ } else {
+ // Even if RunAsUser is not set, we still call `GetValue` to get uid 0.
+ // Because it is still useful to get additional gids for uid 0.
+ userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10)
+ }
+ specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr))
+
+ apparmorSpecOpts, err := generateApparmorSpecOpts(
+ securityContext.GetApparmorProfile(),
+ securityContext.GetPrivileged(),
+ c.apparmorEnabled())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate apparmor spec opts")
+ }
+ if apparmorSpecOpts != nil {
+ specOpts = append(specOpts, apparmorSpecOpts)
+ }
+
+ seccompSpecOpts, err := c.generateSeccompSpecOpts(
+ securityContext.GetSeccompProfilePath(),
+ securityContext.GetPrivileged(),
+ c.seccompEnabled())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
+ }
+ if seccompSpecOpts != nil {
+ specOpts = append(specOpts, seccompSpecOpts)
+ }
+ return specOpts, nil
+}
+
+// generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
+func (c *criService) generateSeccompSpecOpts(seccompProf string, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
+ if privileged {
+ // Do not set seccomp profile when container is privileged
+ return nil, nil
+ }
+ if seccompProf == "" {
+ seccompProf = c.config.UnsetSeccompProfile
+ }
+ // Set seccomp profile
+ if seccompProf == runtimeDefault || seccompProf == dockerDefault {
+ // use correct default profile (Eg. if not configured otherwise, the default is docker/default)
+ seccompProf = seccompDefaultProfile
+ }
+ if !seccompEnabled {
+ if seccompProf != "" && seccompProf != unconfinedProfile {
+ return nil, errors.New("seccomp is not supported")
+ }
+ return nil, nil
+ }
+ switch seccompProf {
+ case "", unconfinedProfile:
+ // Do not set seccomp profile.
+ return nil, nil
+ case dockerDefault:
+ // Note: WithDefaultProfile specOpts must be added after capabilities
+ return seccomp.WithDefaultProfile(), nil
+ default:
+ // Require and Trim default profile name prefix
+ if !strings.HasPrefix(seccompProf, profileNamePrefix) {
+ return nil, errors.Errorf("invalid seccomp profile %q", seccompProf)
+ }
+ return seccomp.WithProfile(strings.TrimPrefix(seccompProf, profileNamePrefix)), nil
+ }
+}
+
+// generateApparmorSpecOpts generates containerd SpecOpts for apparmor.
+func generateApparmorSpecOpts(apparmorProf string, privileged, apparmorEnabled bool) (oci.SpecOpts, error) {
+ if !apparmorEnabled {
+ // Should fail loudly if user try to specify apparmor profile
+ // but we don't support it.
+ if apparmorProf != "" && apparmorProf != unconfinedProfile {
+ return nil, errors.New("apparmor is not supported")
+ }
+ return nil, nil
+ }
+ switch apparmorProf {
+ // Based on kubernetes#51746, default apparmor profile should be applied
+ // for when apparmor is not specified.
+ case runtimeDefault, "":
+ if privileged {
+ // Do not set apparmor profile when container is privileged
+ return nil, nil
+ }
+ // TODO (mikebrow): delete created apparmor default profile
+ return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil
+ case unconfinedProfile:
+ return nil, nil
+ default:
+ // Require and Trim default profile name prefix
+ if !strings.HasPrefix(apparmorProf, profileNamePrefix) {
+ return nil, errors.Errorf("invalid apparmor profile %q", apparmorProf)
+ }
+ appArmorProfile := strings.TrimPrefix(apparmorProf, profileNamePrefix)
+ if profileExists, err := appArmorProfileExists(appArmorProfile); !profileExists {
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate apparmor spec opts")
+ }
+ return nil, errors.Errorf("apparmor profile not found %s", appArmorProfile)
+ }
+ return apparmor.WithProfile(appArmorProfile), nil
+ }
+}
+
+// appArmorProfileExists scans apparmor/profiles for the requested profile
+func appArmorProfileExists(profile string) (bool, error) {
+ if profile == "" {
+ return false, errors.New("nil apparmor profile is not supported")
+ }
+ profiles, err := os.Open("/sys/kernel/security/apparmor/profiles")
+ if err != nil {
+ return false, err
+ }
+ defer profiles.Close()
+
+ rbuff := bufio.NewReader(profiles)
+ for {
+ line, err := rbuff.ReadString('\n')
+ switch err {
+ case nil:
+ if strings.HasPrefix(line, profile+" (") {
+ return true, nil
+ }
+ case io.EOF:
+ return false, nil
+ default:
+ return false, err
+ }
+ }
+}
+
+// generateUserString generates valid user string based on OCI Image Spec
+// v1.0.0.
+//
+// CRI defines that the following combinations are valid:
+//
+// (none) -> ""
+// username -> username
+// username, uid -> username
+// username, uid, gid -> username:gid
+// username, gid -> username:gid
+// uid -> uid
+// uid, gid -> uid:gid
+// gid -> error
+//
+// TODO(random-liu): Add group name support in CRI.
+func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) {
+ var userstr, groupstr string
+ if uid != nil {
+ userstr = strconv.FormatInt(uid.GetValue(), 10)
+ }
+ if username != "" {
+ userstr = username
+ }
+ if gid != nil {
+ groupstr = strconv.FormatInt(gid.GetValue(), 10)
+ }
+ if userstr == "" {
+ if groupstr != "" {
+ return "", errors.Errorf("user group %q is specified without user", groupstr)
+ }
+ return "", nil
+ }
+ if groupstr != "" {
+ userstr = userstr + ":" + groupstr
+ }
+ return userstr, nil
+}
diff --git a/pkg/server/container_create_linux_test.go b/pkg/server/container_create_linux_test.go
new file mode 100644
index 000000000..7187ec383
--- /dev/null
+++ b/pkg/server/container_create_linux_test.go
@@ -0,0 +1,1253 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path/filepath"
+ "reflect"
+ "strings"
+ "testing"
+
+ "github.com/containerd/containerd/containers"
+ "github.com/containerd/containerd/contrib/apparmor"
+ "github.com/containerd/containerd/contrib/seccomp"
+ "github.com/containerd/containerd/mount"
+ "github.com/containerd/containerd/oci"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
+ "github.com/opencontainers/runc/libcontainer/devices"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/pkg/errors"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ "github.com/containerd/cri/pkg/config"
+ "github.com/containerd/cri/pkg/containerd/opts"
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ ostesting "github.com/containerd/cri/pkg/os/testing"
+ "github.com/containerd/cri/pkg/util"
+)
+
+func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig,
+ *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) {
+ config := &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "test-name",
+ Attempt: 1,
+ },
+ Image: &runtime.ImageSpec{
+ Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
+ },
+ Command: []string{"test", "command"},
+ Args: []string{"test", "args"},
+ WorkingDir: "test-cwd",
+ Envs: []*runtime.KeyValue{
+ {Key: "k1", Value: "v1"},
+ {Key: "k2", Value: "v2"},
+ {Key: "k3", Value: "v3=v3bis"},
+ {Key: "k4", Value: "v4=v4bis=foop"},
+ },
+ Mounts: []*runtime.Mount{
+ // everything default
+ {
+ ContainerPath: "container-path-1",
+ HostPath: "host-path-1",
+ },
+ // readOnly
+ {
+ ContainerPath: "container-path-2",
+ HostPath: "host-path-2",
+ Readonly: true,
+ },
+ },
+ Labels: map[string]string{"a": "b"},
+ Annotations: map[string]string{"ca-c": "ca-d"},
+ Linux: &runtime.LinuxContainerConfig{
+ Resources: &runtime.LinuxContainerResources{
+ CpuPeriod: 100,
+ CpuQuota: 200,
+ CpuShares: 300,
+ MemoryLimitInBytes: 400,
+ OomScoreAdj: 500,
+ CpusetCpus: "0-1",
+ CpusetMems: "2-3",
+ },
+ SecurityContext: &runtime.LinuxContainerSecurityContext{
+ SupplementalGroups: []int64{1111, 2222},
+ NoNewPrivs: true,
+ },
+ },
+ }
+ sandboxConfig := &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "test-sandbox-name",
+ Uid: "test-sandbox-uid",
+ Namespace: "test-sandbox-ns",
+ Attempt: 2,
+ },
+ Annotations: map[string]string{"c": "d"},
+ Linux: &runtime.LinuxPodSandboxConfig{
+ CgroupParent: "/test/cgroup/parent",
+ SecurityContext: &runtime.LinuxSandboxSecurityContext{},
+ },
+ }
+ imageConfig := &imagespec.ImageConfig{
+ Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"},
+ Entrypoint: []string{"/entrypoint"},
+ Cmd: []string{"cmd"},
+ WorkingDir: "/workspace",
+ }
+ specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) {
+ assert.Equal(t, relativeRootfsPath, spec.Root.Path)
+ assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args)
+ assert.Equal(t, "test-cwd", spec.Process.Cwd)
+ assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop")
+ assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop")
+
+ t.Logf("Check cgroups bind mount")
+ checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, nil)
+
+ t.Logf("Check bind mount")
+ checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "bind", []string{"rbind", "rprivate", "rw"}, nil)
+ checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "bind", []string{"rbind", "rprivate", "ro"}, nil)
+
+ t.Logf("Check resource limits")
+ assert.EqualValues(t, *spec.Linux.Resources.CPU.Period, 100)
+ assert.EqualValues(t, *spec.Linux.Resources.CPU.Quota, 200)
+ assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, 300)
+ assert.EqualValues(t, spec.Linux.Resources.CPU.Cpus, "0-1")
+ assert.EqualValues(t, spec.Linux.Resources.CPU.Mems, "2-3")
+ assert.EqualValues(t, *spec.Linux.Resources.Memory.Limit, 400)
+ assert.EqualValues(t, *spec.Process.OOMScoreAdj, 500)
+
+ t.Logf("Check supplemental groups")
+ assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111))
+ assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222))
+
+ t.Logf("Check no_new_privs")
+ assert.Equal(t, spec.Process.NoNewPrivileges, true)
+
+ t.Logf("Check cgroup path")
+ assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath)
+
+ t.Logf("Check namespaces")
+ assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.NetworkNamespace,
+ Path: opts.GetNetworkNamespace(sandboxPid),
+ })
+ assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.IPCNamespace,
+ Path: opts.GetIPCNamespace(sandboxPid),
+ })
+ assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.UTSNamespace,
+ Path: opts.GetUTSNamespace(sandboxPid),
+ })
+ assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.PIDNamespace,
+ Path: opts.GetPIDNamespace(sandboxPid),
+ })
+
+ t.Logf("Check PodSandbox annotations")
+ assert.Contains(t, spec.Annotations, annotations.SandboxID)
+ assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID)
+
+ assert.Contains(t, spec.Annotations, annotations.ContainerType)
+ assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer)
+ }
+ return config, sandboxConfig, imageConfig, specCheck
+}
+
+func TestContainerCapabilities(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ for desc, test := range map[string]struct {
+ capability *runtime.Capability
+ includes []string
+ excludes []string
+ }{
+ "should be able to add/drop capabilities": {
+ capability: &runtime.Capability{
+ AddCapabilities: []string{"SYS_ADMIN"},
+ DropCapabilities: []string{"CHOWN"},
+ },
+ includes: []string{"CAP_SYS_ADMIN"},
+ excludes: []string{"CAP_CHOWN"},
+ },
+ "should be able to add all capabilities": {
+ capability: &runtime.Capability{
+ AddCapabilities: []string{"ALL"},
+ },
+ includes: oci.GetAllCapabilities(),
+ },
+ "should be able to drop all capabilities": {
+ capability: &runtime.Capability{
+ DropCapabilities: []string{"ALL"},
+ },
+ excludes: oci.GetAllCapabilities(),
+ },
+ "should be able to drop capabilities with add all": {
+ capability: &runtime.Capability{
+ AddCapabilities: []string{"ALL"},
+ DropCapabilities: []string{"CHOWN"},
+ },
+ includes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_CHOWN"),
+ excludes: []string{"CAP_CHOWN"},
+ },
+ "should be able to add capabilities with drop all": {
+ capability: &runtime.Capability{
+ AddCapabilities: []string{"SYS_ADMIN"},
+ DropCapabilities: []string{"ALL"},
+ },
+ includes: []string{"CAP_SYS_ADMIN"},
+ excludes: util.SubtractStringSlice(oci.GetAllCapabilities(), "CAP_SYS_ADMIN"),
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+
+ containerConfig.Linux.SecurityContext.Capabilities = test.capability
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+
+ if selinux.GetEnabled() {
+ assert.NotEqual(t, "", spec.Process.SelinuxLabel)
+ assert.NotEqual(t, "", spec.Linux.MountLabel)
+ }
+
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ for _, include := range test.includes {
+ assert.Contains(t, spec.Process.Capabilities.Bounding, include)
+ assert.Contains(t, spec.Process.Capabilities.Effective, include)
+ assert.Contains(t, spec.Process.Capabilities.Inheritable, include)
+ assert.Contains(t, spec.Process.Capabilities.Permitted, include)
+ }
+ for _, exclude := range test.excludes {
+ assert.NotContains(t, spec.Process.Capabilities.Bounding, exclude)
+ assert.NotContains(t, spec.Process.Capabilities.Effective, exclude)
+ assert.NotContains(t, spec.Process.Capabilities.Inheritable, exclude)
+ assert.NotContains(t, spec.Process.Capabilities.Permitted, exclude)
+ }
+ assert.Empty(t, spec.Process.Capabilities.Ambient)
+ }
+}
+
+func TestContainerSpecTty(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ for _, tty := range []bool{true, false} {
+ containerConfig.Tty = tty
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ assert.Equal(t, tty, spec.Process.Terminal)
+ if tty {
+ assert.Contains(t, spec.Process.Env, "TERM=xterm")
+ } else {
+ assert.NotContains(t, spec.Process.Env, "TERM=xterm")
+ }
+ }
+}
+
+func TestContainerSpecDefaultPath(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ expectedDefault := "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ for _, pathenv := range []string{"", "PATH=/usr/local/bin/games"} {
+ expected := expectedDefault
+ if pathenv != "" {
+ imageConfig.Env = append(imageConfig.Env, pathenv)
+ expected = pathenv
+ }
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ assert.Contains(t, spec.Process.Env, expected)
+ }
+}
+
+func TestContainerSpecReadonlyRootfs(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ for _, readonly := range []bool{true, false} {
+ containerConfig.Linux.SecurityContext.ReadonlyRootfs = readonly
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ assert.Equal(t, readonly, spec.Root.Readonly)
+ }
+}
+
+func TestContainerSpecWithExtraMounts(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ mountInConfig := &runtime.Mount{
+ // Test cleanpath
+ ContainerPath: "test-container-path/",
+ HostPath: "test-host-path",
+ Readonly: false,
+ }
+ containerConfig.Mounts = append(containerConfig.Mounts, mountInConfig)
+ extraMounts := []*runtime.Mount{
+ {
+ ContainerPath: "test-container-path",
+ HostPath: "test-host-path-extra",
+ Readonly: true,
+ },
+ {
+ ContainerPath: "/sys",
+ HostPath: "test-sys-extra",
+ Readonly: false,
+ },
+ {
+ ContainerPath: "/dev",
+ HostPath: "test-dev-extra",
+ Readonly: false,
+ },
+ }
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, extraMounts, ociRuntime)
+ require.NoError(t, err)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ var mounts, sysMounts, devMounts []runtimespec.Mount
+ for _, m := range spec.Mounts {
+ if strings.HasPrefix(m.Destination, "test-container-path") {
+ mounts = append(mounts, m)
+ } else if m.Destination == "/sys" {
+ sysMounts = append(sysMounts, m)
+ } else if strings.HasPrefix(m.Destination, "/dev") {
+ devMounts = append(devMounts, m)
+ }
+ }
+ t.Logf("CRI mount should override extra mount")
+ require.Len(t, mounts, 1)
+ assert.Equal(t, "test-host-path", mounts[0].Source)
+ assert.Contains(t, mounts[0].Options, "rw")
+
+ t.Logf("Extra mount should override default mount")
+ require.Len(t, sysMounts, 1)
+ assert.Equal(t, "test-sys-extra", sysMounts[0].Source)
+ assert.Contains(t, sysMounts[0].Options, "rw")
+
+ t.Logf("Dev mount should override all default dev mounts")
+ require.Len(t, devMounts, 1)
+ assert.Equal(t, "test-dev-extra", devMounts[0].Source)
+ assert.Contains(t, devMounts[0].Options, "rw")
+}
+
+func TestContainerAndSandboxPrivileged(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ for desc, test := range map[string]struct {
+ containerPrivileged bool
+ sandboxPrivileged bool
+ expectError bool
+ }{
+ "privileged container in non-privileged sandbox should fail": {
+ containerPrivileged: true,
+ sandboxPrivileged: false,
+ expectError: true,
+ },
+ "privileged container in privileged sandbox should be fine": {
+ containerPrivileged: true,
+ sandboxPrivileged: true,
+ expectError: false,
+ },
+ "non-privileged container in privileged sandbox should be fine": {
+ containerPrivileged: false,
+ sandboxPrivileged: true,
+ expectError: false,
+ },
+ "non-privileged container in non-privileged sandbox should be fine": {
+ containerPrivileged: false,
+ sandboxPrivileged: false,
+ expectError: false,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ containerConfig.Linux.SecurityContext.Privileged = test.containerPrivileged
+ sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
+ Privileged: test.sandboxPrivileged,
+ }
+ _, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ if test.expectError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+ }
+}
+
+func TestContainerMounts(t *testing.T) {
+ const testSandboxID = "test-id"
+ for desc, test := range map[string]struct {
+ statFn func(string) (os.FileInfo, error)
+ criMounts []*runtime.Mount
+ securityContext *runtime.LinuxContainerSecurityContext
+ expectedMounts []*runtime.Mount
+ }{
+ "should setup ro mount when rootfs is read-only": {
+ securityContext: &runtime.LinuxContainerSecurityContext{
+ ReadonlyRootfs: true,
+ },
+ expectedMounts: []*runtime.Mount{
+ {
+ ContainerPath: "/etc/hostname",
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
+ Readonly: true,
+ },
+ {
+ ContainerPath: "/etc/hosts",
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
+ Readonly: true,
+ },
+ {
+ ContainerPath: resolvConfPath,
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
+ Readonly: true,
+ },
+ {
+ ContainerPath: "/dev/shm",
+ HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
+ Readonly: false,
+ },
+ },
+ },
+ "should setup rw mount when rootfs is read-write": {
+ securityContext: &runtime.LinuxContainerSecurityContext{},
+ expectedMounts: []*runtime.Mount{
+ {
+ ContainerPath: "/etc/hostname",
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
+ Readonly: false,
+ },
+ {
+ ContainerPath: "/etc/hosts",
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
+ Readonly: false,
+ },
+ {
+ ContainerPath: resolvConfPath,
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
+ Readonly: false,
+ },
+ {
+ ContainerPath: "/dev/shm",
+ HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
+ Readonly: false,
+ },
+ },
+ },
+ "should use host /dev/shm when host ipc is set": {
+ securityContext: &runtime.LinuxContainerSecurityContext{
+ NamespaceOptions: &runtime.NamespaceOption{Ipc: runtime.NamespaceMode_NODE},
+ },
+ expectedMounts: []*runtime.Mount{
+ {
+ ContainerPath: "/etc/hostname",
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"),
+ Readonly: false,
+ },
+ {
+ ContainerPath: "/etc/hosts",
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
+ Readonly: false,
+ },
+ {
+ ContainerPath: resolvConfPath,
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
+ Readonly: false,
+ },
+ {
+ ContainerPath: "/dev/shm",
+ HostPath: "/dev/shm",
+ Readonly: false,
+ },
+ },
+ },
+ "should skip container mounts if already mounted by CRI": {
+ criMounts: []*runtime.Mount{
+ {
+ ContainerPath: "/etc/hostname",
+ HostPath: "/test-etc-hostname",
+ },
+ {
+ ContainerPath: "/etc/hosts",
+ HostPath: "/test-etc-host",
+ },
+ {
+ ContainerPath: resolvConfPath,
+ HostPath: "test-resolv-conf",
+ },
+ {
+ ContainerPath: "/dev/shm",
+ HostPath: "test-dev-shm",
+ },
+ },
+ securityContext: &runtime.LinuxContainerSecurityContext{},
+ expectedMounts: nil,
+ },
+ "should skip hostname mount if the old sandbox doesn't have hostname file": {
+ statFn: func(path string) (os.FileInfo, error) {
+ assert.Equal(t, filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hostname"), path)
+ return nil, errors.New("random error")
+ },
+ securityContext: &runtime.LinuxContainerSecurityContext{},
+ expectedMounts: []*runtime.Mount{
+ {
+ ContainerPath: "/etc/hosts",
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "hosts"),
+ Readonly: false,
+ },
+ {
+ ContainerPath: resolvConfPath,
+ HostPath: filepath.Join(testRootDir, sandboxesDir, testSandboxID, "resolv.conf"),
+ Readonly: false,
+ },
+ {
+ ContainerPath: "/dev/shm",
+ HostPath: filepath.Join(testStateDir, sandboxesDir, testSandboxID, "shm"),
+ Readonly: false,
+ },
+ },
+ },
+ } {
+ config := &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "test-name",
+ Attempt: 1,
+ },
+ Mounts: test.criMounts,
+ Linux: &runtime.LinuxContainerConfig{
+ SecurityContext: test.securityContext,
+ },
+ }
+ c := newTestCRIService()
+ c.os.(*ostesting.FakeOS).StatFn = test.statFn
+ mounts := c.containerMounts(testSandboxID, config)
+ assert.Equal(t, test.expectedMounts, mounts, desc)
+ }
+}
+
+func TestPrivilegedBindMount(t *testing.T) {
+ testPid := uint32(1234)
+ c := newTestCRIService()
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+
+ for desc, test := range map[string]struct {
+ privileged bool
+ expectedSysFSRO bool
+ expectedCgroupFSRO bool
+ }{
+ "sysfs and cgroupfs should mount as 'ro' by default": {
+ expectedSysFSRO: true,
+ expectedCgroupFSRO: true,
+ },
+ "sysfs and cgroupfs should not mount as 'ro' if privileged": {
+ privileged: true,
+ expectedSysFSRO: false,
+ expectedCgroupFSRO: false,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+
+ containerConfig.Linux.SecurityContext.Privileged = test.privileged
+ sandboxConfig.Linux.SecurityContext.Privileged = test.privileged
+
+ spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+
+ assert.NoError(t, err)
+ if test.expectedSysFSRO {
+ checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"ro"}, []string{"rw"})
+ } else {
+ checkMount(t, spec.Mounts, "sysfs", "/sys", "sysfs", []string{"rw"}, []string{"ro"})
+ }
+ if test.expectedCgroupFSRO {
+ checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"ro"}, []string{"rw"})
+ } else {
+ checkMount(t, spec.Mounts, "cgroup", "/sys/fs/cgroup", "cgroup", []string{"rw"}, []string{"ro"})
+ }
+ }
+}
+
+func TestMountPropagation(t *testing.T) {
+
+ sharedLookupMountFn := func(string) (mount.Info, error) {
+ return mount.Info{
+ Mountpoint: "host-path",
+ Optional: "shared:",
+ }, nil
+ }
+
+ slaveLookupMountFn := func(string) (mount.Info, error) {
+ return mount.Info{
+ Mountpoint: "host-path",
+ Optional: "master:",
+ }, nil
+ }
+
+ othersLookupMountFn := func(string) (mount.Info, error) {
+ return mount.Info{
+ Mountpoint: "host-path",
+ Optional: "others",
+ }, nil
+ }
+
+ for desc, test := range map[string]struct {
+ criMount *runtime.Mount
+ fakeLookupMountFn func(string) (mount.Info, error)
+ optionsCheck []string
+ expectErr bool
+ }{
+ "HostPath should mount as 'rprivate' if propagation is MountPropagation_PROPAGATION_PRIVATE": {
+ criMount: &runtime.Mount{
+ ContainerPath: "container-path",
+ HostPath: "host-path",
+ Propagation: runtime.MountPropagation_PROPAGATION_PRIVATE,
+ },
+ fakeLookupMountFn: nil,
+ optionsCheck: []string{"rbind", "rprivate"},
+ expectErr: false,
+ },
+ "HostPath should mount as 'rslave' if propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER": {
+ criMount: &runtime.Mount{
+ ContainerPath: "container-path",
+ HostPath: "host-path",
+ Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER,
+ },
+ fakeLookupMountFn: slaveLookupMountFn,
+ optionsCheck: []string{"rbind", "rslave"},
+ expectErr: false,
+ },
+ "HostPath should mount as 'rshared' if propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL": {
+ criMount: &runtime.Mount{
+ ContainerPath: "container-path",
+ HostPath: "host-path",
+ Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL,
+ },
+ fakeLookupMountFn: sharedLookupMountFn,
+ optionsCheck: []string{"rbind", "rshared"},
+ expectErr: false,
+ },
+ "HostPath should mount as 'rprivate' if propagation is illegal": {
+ criMount: &runtime.Mount{
+ ContainerPath: "container-path",
+ HostPath: "host-path",
+ Propagation: runtime.MountPropagation(42),
+ },
+ fakeLookupMountFn: nil,
+ optionsCheck: []string{"rbind", "rprivate"},
+ expectErr: false,
+ },
+ "Expect an error if HostPath isn't shared and mount propagation is MountPropagation_PROPAGATION_BIDIRECTIONAL": {
+ criMount: &runtime.Mount{
+ ContainerPath: "container-path",
+ HostPath: "host-path",
+ Propagation: runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL,
+ },
+ fakeLookupMountFn: slaveLookupMountFn,
+ expectErr: true,
+ },
+ "Expect an error if HostPath isn't slave or shared and mount propagation is MountPropagation_PROPAGATION_HOST_TO_CONTAINER": {
+ criMount: &runtime.Mount{
+ ContainerPath: "container-path",
+ HostPath: "host-path",
+ Propagation: runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER,
+ },
+ fakeLookupMountFn: othersLookupMountFn,
+ expectErr: true,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ c := newTestCRIService()
+ c.os.(*ostesting.FakeOS).LookupMountFn = test.fakeLookupMountFn
+ config, _, _, _ := getCreateContainerTestData()
+
+ var spec runtimespec.Spec
+ spec.Linux = &runtimespec.Linux{}
+
+ err := opts.WithMounts(c.os, config, []*runtime.Mount{test.criMount}, "")(context.Background(), nil, nil, &spec)
+ if test.expectErr {
+ require.Error(t, err)
+ } else {
+ require.NoError(t, err)
+ checkMount(t, spec.Mounts, test.criMount.HostPath, test.criMount.ContainerPath, "bind", test.optionsCheck, nil)
+ }
+ }
+}
+
+func TestPidNamespace(t *testing.T) {
+ testID := "test-id"
+ testPid := uint32(1234)
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ for desc, test := range map[string]struct {
+ pidNS runtime.NamespaceMode
+ expected runtimespec.LinuxNamespace
+ }{
+ "node namespace mode": {
+ pidNS: runtime.NamespaceMode_NODE,
+ expected: runtimespec.LinuxNamespace{
+ Type: runtimespec.PIDNamespace,
+ Path: opts.GetPIDNamespace(testPid),
+ },
+ },
+ "container namespace mode": {
+ pidNS: runtime.NamespaceMode_CONTAINER,
+ expected: runtimespec.LinuxNamespace{
+ Type: runtimespec.PIDNamespace,
+ },
+ },
+ "pod namespace mode": {
+ pidNS: runtime.NamespaceMode_POD,
+ expected: runtimespec.LinuxNamespace{
+ Type: runtimespec.PIDNamespace,
+ Path: opts.GetPIDNamespace(testPid),
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ containerConfig.Linux.SecurityContext.NamespaceOptions = &runtime.NamespaceOption{Pid: test.pidNS}
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+ assert.Contains(t, spec.Linux.Namespaces, test.expected)
+ }
+}
+
+func TestNoDefaultRunMount(t *testing.T) {
+ testID := "test-id"
+ testPid := uint32(1234)
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ assert.NoError(t, err)
+ for _, mount := range spec.Mounts {
+ assert.NotEqual(t, "/run", mount.Destination)
+ }
+}
+
+func TestGenerateSeccompSpecOpts(t *testing.T) {
+ for desc, test := range map[string]struct {
+ profile string
+ privileged bool
+ disable bool
+ specOpts oci.SpecOpts
+ expectErr bool
+ defaultProfile string
+ }{
+ "should return error if seccomp is specified when seccomp is not supported": {
+ profile: runtimeDefault,
+ disable: true,
+ expectErr: true,
+ },
+ "should not return error if seccomp is not specified when seccomp is not supported": {
+ profile: "",
+ disable: true,
+ },
+ "should not return error if seccomp is unconfined when seccomp is not supported": {
+ profile: unconfinedProfile,
+ disable: true,
+ },
+ "should not set seccomp when privileged is true": {
+ profile: seccompDefaultProfile,
+ privileged: true,
+ },
+ "should not set seccomp when seccomp is unconfined": {
+ profile: unconfinedProfile,
+ },
+ "should not set seccomp when seccomp is not specified": {
+ profile: "",
+ },
+ "should set default seccomp when seccomp is runtime/default": {
+ profile: runtimeDefault,
+ specOpts: seccomp.WithDefaultProfile(),
+ },
+ "should set default seccomp when seccomp is docker/default": {
+ profile: dockerDefault,
+ specOpts: seccomp.WithDefaultProfile(),
+ },
+ "should set specified profile when local profile is specified": {
+ profile: profileNamePrefix + "test-profile",
+ specOpts: seccomp.WithProfile("test-profile"),
+ },
+ "should return error if specified profile is invalid": {
+ profile: "test-profile",
+ expectErr: true,
+ },
+ "should use default profile when seccomp is empty": {
+ defaultProfile: profileNamePrefix + "test-profile",
+ specOpts: seccomp.WithProfile("test-profile"),
+ },
+ "should fallback to docker/default when seccomp is empty and default is runtime/default": {
+ defaultProfile: runtimeDefault,
+ specOpts: seccomp.WithDefaultProfile(),
+ },
+ } {
+ t.Run(fmt.Sprintf("TestCase %q", desc), func(t *testing.T) {
+ cri := &criService{}
+ cri.config.UnsetSeccompProfile = test.defaultProfile
+ specOpts, err := cri.generateSeccompSpecOpts(test.profile, test.privileged, !test.disable)
+ assert.Equal(t,
+ reflect.ValueOf(test.specOpts).Pointer(),
+ reflect.ValueOf(specOpts).Pointer())
+ if test.expectErr {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+ })
+ }
+}
+
+func TestGenerateApparmorSpecOpts(t *testing.T) {
+ for desc, test := range map[string]struct {
+ profile string
+ privileged bool
+ disable bool
+ specOpts oci.SpecOpts
+ expectErr bool
+ }{
+ "should return error if apparmor is specified when apparmor is not supported": {
+ profile: runtimeDefault,
+ disable: true,
+ expectErr: true,
+ },
+ "should not return error if apparmor is not specified when apparmor is not supported": {
+ profile: "",
+ disable: true,
+ },
+ "should set default apparmor when apparmor is not specified": {
+ profile: "",
+ specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName),
+ },
+ "should not apparmor when apparmor is not specified and privileged is true": {
+ profile: "",
+ privileged: true,
+ },
+ "should not return error if apparmor is unconfined when apparmor is not supported": {
+ profile: unconfinedProfile,
+ disable: true,
+ },
+ "should not apparmor when apparmor is unconfined": {
+ profile: unconfinedProfile,
+ },
+ "should not apparmor when apparmor is unconfined and privileged is true": {
+ profile: unconfinedProfile,
+ privileged: true,
+ },
+ "should set default apparmor when apparmor is runtime/default": {
+ profile: runtimeDefault,
+ specOpts: apparmor.WithDefaultProfile(appArmorDefaultProfileName),
+ },
+ "should not apparmor when apparmor is default and privileged is true": {
+ profile: runtimeDefault,
+ privileged: true,
+ },
+ // TODO (mikebrow) add success with existing defined profile tests
+ "should return error when undefined local profile is specified": {
+ profile: profileNamePrefix + "test-profile",
+ expectErr: true,
+ },
+ "should return error when undefined local profile is specified and privileged is true": {
+ profile: profileNamePrefix + "test-profile",
+ privileged: true,
+ expectErr: true,
+ },
+ "should return error if specified profile is invalid": {
+ profile: "test-profile",
+ expectErr: true,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ specOpts, err := generateApparmorSpecOpts(test.profile, test.privileged, !test.disable)
+ assert.Equal(t,
+ reflect.ValueOf(test.specOpts).Pointer(),
+ reflect.ValueOf(specOpts).Pointer())
+ if test.expectErr {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+ }
+}
+
+func TestMaskedAndReadonlyPaths(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+
+ defaultSpec, err := oci.GenerateSpec(ctrdutil.NamespacedContext(), nil, &containers.Container{ID: testID})
+ require.NoError(t, err)
+
+ for desc, test := range map[string]struct {
+ disableProcMount bool
+ masked []string
+ readonly []string
+ expectedMasked []string
+ expectedReadonly []string
+ privileged bool
+ }{
+ "should apply default if not specified when disable_proc_mount = true": {
+ disableProcMount: true,
+ masked: nil,
+ readonly: nil,
+ expectedMasked: defaultSpec.Linux.MaskedPaths,
+ expectedReadonly: defaultSpec.Linux.ReadonlyPaths,
+ privileged: false,
+ },
+ "should apply default if not specified when disable_proc_mount = false": {
+ disableProcMount: false,
+ masked: nil,
+ readonly: nil,
+ expectedMasked: defaultSpec.Linux.MaskedPaths,
+ expectedReadonly: defaultSpec.Linux.ReadonlyPaths,
+ privileged: false,
+ },
+ "should be able to specify empty paths": {
+ masked: []string{},
+ readonly: []string{},
+ expectedMasked: []string{},
+ expectedReadonly: []string{},
+ privileged: false,
+ },
+ "should apply CRI specified paths": {
+ masked: []string{"/proc"},
+ readonly: []string{"/sys"},
+ expectedMasked: []string{"/proc"},
+ expectedReadonly: []string{"/sys"},
+ privileged: false,
+ },
+ "default should be nil for privileged": {
+ expectedMasked: nil,
+ expectedReadonly: nil,
+ privileged: true,
+ },
+ "should be able to specify empty paths, esp. if privileged": {
+ masked: []string{},
+ readonly: []string{},
+ expectedMasked: nil,
+ expectedReadonly: nil,
+ privileged: true,
+ },
+ "should not apply CRI specified paths if privileged": {
+ masked: []string{"/proc"},
+ readonly: []string{"/sys"},
+ expectedMasked: nil,
+ expectedReadonly: nil,
+ privileged: true,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ c.config.DisableProcMount = test.disableProcMount
+ containerConfig.Linux.SecurityContext.MaskedPaths = test.masked
+ containerConfig.Linux.SecurityContext.ReadonlyPaths = test.readonly
+ containerConfig.Linux.SecurityContext.Privileged = test.privileged
+ sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
+ Privileged: test.privileged,
+ }
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+ if !test.privileged { // specCheck presumes an unprivileged container
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ }
+ assert.Equal(t, test.expectedMasked, spec.Linux.MaskedPaths)
+ assert.Equal(t, test.expectedReadonly, spec.Linux.ReadonlyPaths)
+ }
+}
+
+func TestHostname(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) {
+ return "real-hostname", nil
+ }
+ for desc, test := range map[string]struct {
+ hostname string
+ networkNs runtime.NamespaceMode
+ expectedEnv string
+ }{
+ "should add HOSTNAME=sandbox.Hostname for pod network namespace": {
+ hostname: "test-hostname",
+ networkNs: runtime.NamespaceMode_POD,
+ expectedEnv: "HOSTNAME=test-hostname",
+ },
+ "should add HOSTNAME=sandbox.Hostname for host network namespace": {
+ hostname: "test-hostname",
+ networkNs: runtime.NamespaceMode_NODE,
+ expectedEnv: "HOSTNAME=test-hostname",
+ },
+ "should add HOSTNAME=os.Hostname for host network namespace if sandbox.Hostname is not set": {
+ hostname: "",
+ networkNs: runtime.NamespaceMode_NODE,
+ expectedEnv: "HOSTNAME=real-hostname",
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ sandboxConfig.Hostname = test.hostname
+ sandboxConfig.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
+ NamespaceOptions: &runtime.NamespaceOption{Network: test.networkNs},
+ }
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ assert.Contains(t, spec.Process.Env, test.expectedEnv)
+ }
+}
+
+func TestDisableCgroup(t *testing.T) {
+ containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ c.config.DisableCgroup = true
+ spec, err := c.containerSpec("test-id", "sandbox-id", 1234, "", "container-name", containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+
+ t.Log("resource limit should not be set")
+ assert.Nil(t, spec.Linux.Resources.Memory)
+ assert.Nil(t, spec.Linux.Resources.CPU)
+
+ t.Log("cgroup path should be empty")
+ assert.Empty(t, spec.Linux.CgroupsPath)
+}
+
+func TestGenerateUserString(t *testing.T) {
+ type testcase struct {
+ // the name of the test case
+ name string
+
+ u string
+ uid, gid *runtime.Int64Value
+
+ result string
+ expectedError bool
+ }
+ testcases := []testcase{
+ {
+ name: "Empty",
+ result: "",
+ },
+ {
+ name: "Username Only",
+ u: "testuser",
+ result: "testuser",
+ },
+ {
+ name: "Username, UID",
+ u: "testuser",
+ uid: &runtime.Int64Value{Value: 1},
+ result: "testuser",
+ },
+ {
+ name: "Username, UID, GID",
+ u: "testuser",
+ uid: &runtime.Int64Value{Value: 1},
+ gid: &runtime.Int64Value{Value: 10},
+ result: "testuser:10",
+ },
+ {
+ name: "Username, GID",
+ u: "testuser",
+ gid: &runtime.Int64Value{Value: 10},
+ result: "testuser:10",
+ },
+ {
+ name: "UID only",
+ uid: &runtime.Int64Value{Value: 1},
+ result: "1",
+ },
+ {
+ name: "UID, GID",
+ uid: &runtime.Int64Value{Value: 1},
+ gid: &runtime.Int64Value{Value: 10},
+ result: "1:10",
+ },
+ {
+ name: "GID only",
+ gid: &runtime.Int64Value{Value: 10},
+ result: "",
+ expectedError: true,
+ },
+ }
+ for _, tc := range testcases {
+ t.Run(tc.name, func(t *testing.T) {
+ r, err := generateUserString(tc.u, tc.uid, tc.gid)
+ if tc.expectedError {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+ assert.Equal(t, tc.result, r)
+ })
+ }
+}
+
+func TestPrivilegedDevices(t *testing.T) {
+ testPid := uint32(1234)
+ c := newTestCRIService()
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ containerConfig, sandboxConfig, imageConfig, _ := getCreateContainerTestData()
+
+ for desc, test := range map[string]struct {
+ privileged bool
+ privilegedWithoutHostDevices bool
+ expectHostDevices bool
+ }{
+ "expect no host devices when privileged is false": {
+ privileged: false,
+ privilegedWithoutHostDevices: false,
+ expectHostDevices: false,
+ },
+ "expect no host devices when privileged is false and privilegedWithoutHostDevices is true": {
+ privileged: false,
+ privilegedWithoutHostDevices: true,
+ expectHostDevices: false,
+ },
+ "expect host devices when privileged is true": {
+ privileged: true,
+ privilegedWithoutHostDevices: false,
+ expectHostDevices: true,
+ },
+ "expect no host devices when privileged is true and privilegedWithoutHostDevices is true": {
+ privileged: true,
+ privilegedWithoutHostDevices: true,
+ expectHostDevices: false,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+
+ containerConfig.Linux.SecurityContext.Privileged = test.privileged
+ sandboxConfig.Linux.SecurityContext.Privileged = test.privileged
+
+ ociRuntime := config.Runtime{
+ PrivilegedWithoutHostDevices: test.privilegedWithoutHostDevices,
+ }
+ spec, err := c.containerSpec(t.Name(), testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ assert.NoError(t, err)
+
+ hostDevicesRaw, err := devices.HostDevices()
+ assert.NoError(t, err)
+ var hostDevices []*libcontainerconfigs.Device
+ for _, dev := range hostDevicesRaw {
+ // https://github.com/containerd/cri/pull/1521#issuecomment-652807951
+ if dev.DeviceRule.Major != 0 {
+ hostDevices = append(hostDevices, dev)
+ }
+ }
+
+ if test.expectHostDevices {
+ assert.Len(t, spec.Linux.Devices, len(hostDevices))
+ } else {
+ assert.Empty(t, spec.Linux.Devices)
+ }
+ }
+}
+
+func TestBaseOCISpec(t *testing.T) {
+ c := newTestCRIService()
+ baseLimit := int64(100)
+ c.baseOCISpecs = map[string]*oci.Spec{
+ "/etc/containerd/cri-base.json": {
+ Process: &runtimespec.Process{
+ User: runtimespec.User{AdditionalGids: []uint32{9999}},
+ Capabilities: &runtimespec.LinuxCapabilities{
+ Permitted: []string{"CAP_SETUID"},
+ },
+ },
+ Linux: &runtimespec.Linux{
+ Resources: &runtimespec.LinuxResources{
+ Memory: &runtimespec.LinuxMemory{Limit: &baseLimit}, // Will be overwritten by `getCreateContainerTestData`
+ },
+ },
+ },
+ }
+
+ ociRuntime := config.Runtime{}
+ ociRuntime.BaseRuntimeSpec = "/etc/containerd/cri-base.json"
+
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ assert.NoError(t, err)
+
+ specCheck(t, testID, testSandboxID, testPid, spec)
+
+ assert.Contains(t, spec.Process.User.AdditionalGids, uint32(9999))
+ assert.Len(t, spec.Process.User.AdditionalGids, 3)
+
+ assert.Contains(t, spec.Process.Capabilities.Permitted, "CAP_SETUID")
+ assert.Len(t, spec.Process.Capabilities.Permitted, 1)
+
+ assert.Equal(t, *spec.Linux.Resources.Memory.Limit, containerConfig.Linux.Resources.MemoryLimitInBytes)
+}
diff --git a/pkg/server/container_create_other.go b/pkg/server/container_create_other.go
new file mode 100644
index 000000000..82bbb87e0
--- /dev/null
+++ b/pkg/server/container_create_other.go
@@ -0,0 +1,44 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd/oci"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/config"
+)
+
+// containerMounts sets up necessary container system file mounts
+// including /dev/shm, /etc/hosts and /etc/resolv.conf.
+func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
+ return []*runtime.Mount{}
+}
+
+func (c *criService) containerSpec(id string, sandboxID string, sandboxPid uint32, netNSPath string, containerName string,
+ config *runtime.ContainerConfig, sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig,
+ extraMounts []*runtime.Mount, ociRuntime config.Runtime) (_ *runtimespec.Spec, retErr error) {
+ return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec)
+}
+
+func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
+ return []oci.SpecOpts{}, nil
+}
diff --git a/pkg/server/container_create_other_test.go b/pkg/server/container_create_other_test.go
new file mode 100644
index 000000000..891c46765
--- /dev/null
+++ b/pkg/server/container_create_other_test.go
@@ -0,0 +1,40 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// checkMount is defined by all tests but not used here
+var _ = checkMount
+
+func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig,
+ *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) {
+ config := &runtime.ContainerConfig{}
+ sandboxConfig := &runtime.PodSandboxConfig{}
+ imageConfig := &imagespec.ImageConfig{}
+ specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) {
+ }
+ return config, sandboxConfig, imageConfig, specCheck
+}
diff --git a/pkg/server/container_create_test.go b/pkg/server/container_create_test.go
new file mode 100644
index 000000000..b1c6cc8b3
--- /dev/null
+++ b/pkg/server/container_create_test.go
@@ -0,0 +1,407 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "path/filepath"
+ "testing"
+
+ "github.com/containerd/containerd/oci"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/config"
+ "github.com/containerd/cri/pkg/constants"
+ "github.com/containerd/cri/pkg/containerd/opts"
+)
+
+func checkMount(t *testing.T, mounts []runtimespec.Mount, src, dest, typ string,
+ contains, notcontains []string) {
+ found := false
+ for _, m := range mounts {
+ if m.Source == src && m.Destination == dest {
+ assert.Equal(t, m.Type, typ)
+ for _, c := range contains {
+ assert.Contains(t, m.Options, c)
+ }
+ for _, n := range notcontains {
+ assert.NotContains(t, m.Options, n)
+ }
+ found = true
+ break
+ }
+ }
+ assert.True(t, found, "mount from %q to %q not found", src, dest)
+}
+
+func TestGeneralContainerSpec(t *testing.T) {
+ testID := "test-id"
+ testPid := uint32(1234)
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ ociRuntime := config.Runtime{}
+ c := newTestCRIService()
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName, containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ require.NoError(t, err)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+}
+
+func TestPodAnnotationPassthroughContainerSpec(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+
+ for desc, test := range map[string]struct {
+ podAnnotations []string
+ configChange func(*runtime.PodSandboxConfig)
+ specCheck func(*testing.T, *runtimespec.Spec)
+ }{
+ "a passthrough annotation should be passed as an OCI annotation": {
+ podAnnotations: []string{"c"},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ assert.Equal(t, spec.Annotations["c"], "d")
+ },
+ },
+ "a non-passthrough annotation should not be passed as an OCI annotation": {
+ configChange: func(c *runtime.PodSandboxConfig) {
+ c.Annotations["d"] = "e"
+ },
+ podAnnotations: []string{"c"},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ assert.Equal(t, spec.Annotations["c"], "d")
+ _, ok := spec.Annotations["d"]
+ assert.False(t, ok)
+ },
+ },
+ "passthrough annotations should support wildcard match": {
+ configChange: func(c *runtime.PodSandboxConfig) {
+ c.Annotations["t.f"] = "j"
+ c.Annotations["z.g"] = "o"
+ c.Annotations["z"] = "o"
+ c.Annotations["y.ca"] = "b"
+ c.Annotations["y"] = "b"
+ },
+ podAnnotations: []string{"t*", "z.*", "y.c*"},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ t.Logf("%+v", spec.Annotations)
+ assert.Equal(t, spec.Annotations["t.f"], "j")
+ assert.Equal(t, spec.Annotations["z.g"], "o")
+ assert.Equal(t, spec.Annotations["y.ca"], "b")
+ _, ok := spec.Annotations["y"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["z"]
+ assert.False(t, ok)
+ },
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ c := newTestCRIService()
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ if test.configChange != nil {
+ test.configChange(sandboxConfig)
+ }
+
+ ociRuntime := config.Runtime{
+ PodAnnotations: test.podAnnotations,
+ }
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName,
+ containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ assert.NoError(t, err)
+ assert.NotNil(t, spec)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ if test.specCheck != nil {
+ test.specCheck(t, spec)
+ }
+ })
+ }
+}
+
+func TestContainerSpecCommand(t *testing.T) {
+ for desc, test := range map[string]struct {
+ criEntrypoint []string
+ criArgs []string
+ imageEntrypoint []string
+ imageArgs []string
+ expected []string
+ expectErr bool
+ }{
+ "should use cri entrypoint if it's specified": {
+ criEntrypoint: []string{"a", "b"},
+ imageEntrypoint: []string{"c", "d"},
+ imageArgs: []string{"e", "f"},
+ expected: []string{"a", "b"},
+ },
+ "should use cri entrypoint if it's specified even if it's empty": {
+ criEntrypoint: []string{},
+ criArgs: []string{"a", "b"},
+ imageEntrypoint: []string{"c", "d"},
+ imageArgs: []string{"e", "f"},
+ expected: []string{"a", "b"},
+ },
+ "should use cri entrypoint and args if they are specified": {
+ criEntrypoint: []string{"a", "b"},
+ criArgs: []string{"c", "d"},
+ imageEntrypoint: []string{"e", "f"},
+ imageArgs: []string{"g", "h"},
+ expected: []string{"a", "b", "c", "d"},
+ },
+ "should use image entrypoint if cri entrypoint is not specified": {
+ criArgs: []string{"a", "b"},
+ imageEntrypoint: []string{"c", "d"},
+ imageArgs: []string{"e", "f"},
+ expected: []string{"c", "d", "a", "b"},
+ },
+ "should use image args if both cri entrypoint and args are not specified": {
+ imageEntrypoint: []string{"c", "d"},
+ imageArgs: []string{"e", "f"},
+ expected: []string{"c", "d", "e", "f"},
+ },
+ "should return error if both entrypoint and args are empty": {
+ expectErr: true,
+ },
+ } {
+
+ config, _, imageConfig, _ := getCreateContainerTestData()
+ config.Command = test.criEntrypoint
+ config.Args = test.criArgs
+ imageConfig.Entrypoint = test.imageEntrypoint
+ imageConfig.Cmd = test.imageArgs
+
+ var spec runtimespec.Spec
+ err := opts.WithProcessArgs(config, imageConfig)(context.Background(), nil, nil, &spec)
+ if test.expectErr {
+ assert.Error(t, err)
+ continue
+ }
+ assert.NoError(t, err)
+ assert.Equal(t, test.expected, spec.Process.Args, desc)
+ }
+}
+
+func TestVolumeMounts(t *testing.T) {
+ testContainerRootDir := "test-container-root"
+ for desc, test := range map[string]struct {
+ criMounts []*runtime.Mount
+ imageVolumes map[string]struct{}
+ expectedMountDest []string
+ }{
+ "should setup rw mount for image volumes": {
+ imageVolumes: map[string]struct{}{
+ "/test-volume-1": {},
+ "/test-volume-2": {},
+ },
+ expectedMountDest: []string{
+ "/test-volume-1",
+ "/test-volume-2",
+ },
+ },
+ "should skip image volumes if already mounted by CRI": {
+ criMounts: []*runtime.Mount{
+ {
+ ContainerPath: "/test-volume-1",
+ HostPath: "/test-hostpath-1",
+ },
+ },
+ imageVolumes: map[string]struct{}{
+ "/test-volume-1": {},
+ "/test-volume-2": {},
+ },
+ expectedMountDest: []string{
+ "/test-volume-2",
+ },
+ },
+ "should compare and return cleanpath": {
+ criMounts: []*runtime.Mount{
+ {
+ ContainerPath: "/test-volume-1",
+ HostPath: "/test-hostpath-1",
+ },
+ },
+ imageVolumes: map[string]struct{}{
+ "/test-volume-1/": {},
+ "/test-volume-2/": {},
+ },
+ expectedMountDest: []string{
+ "/test-volume-2/",
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ config := &imagespec.ImageConfig{
+ Volumes: test.imageVolumes,
+ }
+ c := newTestCRIService()
+ got := c.volumeMounts(testContainerRootDir, test.criMounts, config)
+ assert.Len(t, got, len(test.expectedMountDest))
+ for _, dest := range test.expectedMountDest {
+ found := false
+ for _, m := range got {
+ if m.ContainerPath == dest {
+ found = true
+ assert.Equal(t,
+ filepath.Dir(m.HostPath),
+ filepath.Join(testContainerRootDir, "volumes"))
+ break
+ }
+ }
+ assert.True(t, found)
+ }
+ }
+}
+
+func TestContainerAnnotationPassthroughContainerSpec(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+
+ for desc, test := range map[string]struct {
+ podAnnotations []string
+ containerAnnotations []string
+ podConfigChange func(*runtime.PodSandboxConfig)
+ configChange func(*runtime.ContainerConfig)
+ specCheck func(*testing.T, *runtimespec.Spec)
+ }{
+ "passthrough annotations from pod and container should be passed as an OCI annotation": {
+ podConfigChange: func(p *runtime.PodSandboxConfig) {
+ p.Annotations["pod.annotation.1"] = "1"
+ p.Annotations["pod.annotation.2"] = "2"
+ p.Annotations["pod.annotation.3"] = "3"
+ },
+ configChange: func(c *runtime.ContainerConfig) {
+ c.Annotations["container.annotation.1"] = "1"
+ c.Annotations["container.annotation.2"] = "2"
+ c.Annotations["container.annotation.3"] = "3"
+ },
+ podAnnotations: []string{"pod.annotation.1"},
+ containerAnnotations: []string{"container.annotation.1"},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ assert.Equal(t, "1", spec.Annotations["container.annotation.1"])
+ _, ok := spec.Annotations["container.annotation.2"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["container.annotation.3"]
+ assert.False(t, ok)
+ assert.Equal(t, "1", spec.Annotations["pod.annotation.1"])
+ _, ok = spec.Annotations["pod.annotation.2"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["pod.annotation.3"]
+ assert.False(t, ok)
+ },
+ },
+ "passthrough annotations from pod and container should support wildcard": {
+ podConfigChange: func(p *runtime.PodSandboxConfig) {
+ p.Annotations["pod.annotation.1"] = "1"
+ p.Annotations["pod.annotation.2"] = "2"
+ p.Annotations["pod.annotation.3"] = "3"
+ },
+ configChange: func(c *runtime.ContainerConfig) {
+ c.Annotations["container.annotation.1"] = "1"
+ c.Annotations["container.annotation.2"] = "2"
+ c.Annotations["container.annotation.3"] = "3"
+ },
+ podAnnotations: []string{"pod.annotation.*"},
+ containerAnnotations: []string{"container.annotation.*"},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ assert.Equal(t, "1", spec.Annotations["container.annotation.1"])
+ assert.Equal(t, "2", spec.Annotations["container.annotation.2"])
+ assert.Equal(t, "3", spec.Annotations["container.annotation.3"])
+ assert.Equal(t, "1", spec.Annotations["pod.annotation.1"])
+ assert.Equal(t, "2", spec.Annotations["pod.annotation.2"])
+ assert.Equal(t, "3", spec.Annotations["pod.annotation.3"])
+ },
+ },
+ "annotations should not pass through if no passthrough annotations are configured": {
+ podConfigChange: func(p *runtime.PodSandboxConfig) {
+ p.Annotations["pod.annotation.1"] = "1"
+ p.Annotations["pod.annotation.2"] = "2"
+ p.Annotations["pod.annotation.3"] = "3"
+ },
+ configChange: func(c *runtime.ContainerConfig) {
+ c.Annotations["container.annotation.1"] = "1"
+ c.Annotations["container.annotation.2"] = "2"
+ c.Annotations["container.annotation.3"] = "3"
+ },
+ podAnnotations: []string{},
+ containerAnnotations: []string{},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ _, ok := spec.Annotations["container.annotation.1"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["container.annotation.2"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["container.annotation.3"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["pod.annotation.1"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["pod.annotation.2"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["pod.annotation.3"]
+ assert.False(t, ok)
+ },
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ c := newTestCRIService()
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ if test.configChange != nil {
+ test.configChange(containerConfig)
+ }
+ if test.podConfigChange != nil {
+ test.podConfigChange(sandboxConfig)
+ }
+ ociRuntime := config.Runtime{
+ PodAnnotations: test.podAnnotations,
+ ContainerAnnotations: test.containerAnnotations,
+ }
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, "", testContainerName,
+ containerConfig, sandboxConfig, imageConfig, nil, ociRuntime)
+ assert.NoError(t, err)
+ assert.NotNil(t, spec)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ if test.specCheck != nil {
+ test.specCheck(t, spec)
+ }
+ })
+ }
+}
+
+func TestBaseRuntimeSpec(t *testing.T) {
+ c := newTestCRIService()
+ c.baseOCISpecs = map[string]*oci.Spec{
+ "/etc/containerd/cri-base.json": {
+ Version: "1.0.2",
+ Hostname: "old",
+ },
+ }
+
+ out, err := c.runtimeSpec("id1", "/etc/containerd/cri-base.json", oci.WithHostname("new"))
+ assert.NoError(t, err)
+
+ assert.Equal(t, "1.0.2", out.Version)
+ assert.Equal(t, "new", out.Hostname)
+
+ // Make sure original base spec not changed
+ assert.NotEqual(t, out, c.baseOCISpecs["/etc/containerd/cri-base.json"])
+ assert.Equal(t, c.baseOCISpecs["/etc/containerd/cri-base.json"].Hostname, "old")
+
+ assert.Equal(t, filepath.Join("/", constants.K8sContainerdNamespace, "id1"), out.Linux.CgroupsPath)
+}
diff --git a/pkg/server/container_create_windows.go b/pkg/server/container_create_windows.go
new file mode 100644
index 000000000..86a08d89e
--- /dev/null
+++ b/pkg/server/container_create_windows.go
@@ -0,0 +1,117 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd/oci"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ "github.com/containerd/cri/pkg/config"
+ customopts "github.com/containerd/cri/pkg/containerd/opts"
+)
+
+// No container mounts for windows.
+func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
+ return nil
+}
+
+func (c *criService) containerSpec(id string, sandboxID string, sandboxPid uint32, netNSPath string, containerName string,
+ config *runtime.ContainerConfig, sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig,
+ extraMounts []*runtime.Mount, ociRuntime config.Runtime) (*runtimespec.Spec, error) {
+ specOpts := []oci.SpecOpts{
+ customopts.WithProcessArgs(config, imageConfig),
+ }
+ if config.GetWorkingDir() != "" {
+ specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
+ } else if imageConfig.WorkingDir != "" {
+ specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
+ }
+
+ if config.GetTty() {
+ specOpts = append(specOpts, oci.WithTTY)
+ }
+
+ // Apply envs from image config first, so that envs from container config
+ // can override them.
+ env := imageConfig.Env
+ for _, e := range config.GetEnvs() {
+ env = append(env, e.GetKey()+"="+e.GetValue())
+ }
+ specOpts = append(specOpts, oci.WithEnv(env))
+
+ specOpts = append(specOpts,
+ // Clear the root location since hcsshim expects it.
+ // NOTE: readonly rootfs doesn't work on windows.
+ customopts.WithoutRoot,
+ customopts.WithWindowsNetworkNamespace(netNSPath),
+ oci.WithHostname(sandboxConfig.GetHostname()),
+ )
+
+ specOpts = append(specOpts, customopts.WithWindowsMounts(c.os, config, extraMounts))
+
+ // Start with the image config user and override below if RunAsUsername is not "".
+ username := imageConfig.User
+
+ windowsConfig := config.GetWindows()
+ if windowsConfig != nil {
+ specOpts = append(specOpts, customopts.WithWindowsResources(windowsConfig.GetResources()))
+ securityCtx := windowsConfig.GetSecurityContext()
+ if securityCtx != nil {
+ runAsUser := securityCtx.GetRunAsUsername()
+ if runAsUser != "" {
+ username = runAsUser
+ }
+ cs := securityCtx.GetCredentialSpec()
+ if cs != "" {
+ specOpts = append(specOpts, customopts.WithWindowsCredentialSpec(cs))
+ }
+ }
+ }
+
+ // There really isn't a good Windows way to verify that the username is available in the
+ // image as early as here like there is for Linux. Later on in the stack hcsshim
+ // will handle the behavior of erroring out if the user isn't available in the image
+ // when trying to run the init process.
+ specOpts = append(specOpts, oci.WithUser(username))
+
+ for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
+ ociRuntime.PodAnnotations) {
+ specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
+ }
+
+ for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
+ ociRuntime.ContainerAnnotations) {
+ specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
+ }
+
+ specOpts = append(specOpts,
+ customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
+ customopts.WithAnnotation(annotations.SandboxID, sandboxID),
+ customopts.WithAnnotation(annotations.ContainerName, containerName),
+ )
+ return c.runtimeSpec(id, ociRuntime.BaseRuntimeSpec, specOpts...)
+}
+
+// No extra spec options needed for windows.
+func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
+ return nil, nil
+}
diff --git a/pkg/server/container_create_windows_test.go b/pkg/server/container_create_windows_test.go
new file mode 100644
index 000000000..42f14d692
--- /dev/null
+++ b/pkg/server/container_create_windows_test.go
@@ -0,0 +1,189 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ "github.com/containerd/cri/pkg/config"
+)
+
+func getCreateContainerTestData() (*runtime.ContainerConfig, *runtime.PodSandboxConfig,
+ *imagespec.ImageConfig, func(*testing.T, string, string, uint32, *runtimespec.Spec)) {
+ config := &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "test-name",
+ Attempt: 1,
+ },
+ Image: &runtime.ImageSpec{
+ Image: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
+ },
+ Command: []string{"test", "command"},
+ Args: []string{"test", "args"},
+ WorkingDir: "test-cwd",
+ Envs: []*runtime.KeyValue{
+ {Key: "k1", Value: "v1"},
+ {Key: "k2", Value: "v2"},
+ {Key: "k3", Value: "v3=v3bis"},
+ {Key: "k4", Value: "v4=v4bis=foop"},
+ },
+ Mounts: []*runtime.Mount{
+ // everything default
+ {
+ ContainerPath: "container-path-1",
+ HostPath: "host-path-1",
+ },
+ // readOnly
+ {
+ ContainerPath: "container-path-2",
+ HostPath: "host-path-2",
+ Readonly: true,
+ },
+ },
+ Labels: map[string]string{"a": "b"},
+ Annotations: map[string]string{"c": "d"},
+ Windows: &runtime.WindowsContainerConfig{
+ Resources: &runtime.WindowsContainerResources{
+ CpuShares: 100,
+ CpuCount: 200,
+ CpuMaximum: 300,
+ MemoryLimitInBytes: 400,
+ },
+ SecurityContext: &runtime.WindowsContainerSecurityContext{
+ RunAsUsername: "test-user",
+ CredentialSpec: "{\"test\": \"spec\"}",
+ },
+ },
+ }
+ sandboxConfig := &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "test-sandbox-name",
+ Uid: "test-sandbox-uid",
+ Namespace: "test-sandbox-ns",
+ Attempt: 2,
+ },
+ Hostname: "test-hostname",
+ Annotations: map[string]string{"c": "d"},
+ }
+ imageConfig := &imagespec.ImageConfig{
+ Env: []string{"ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "ik4=iv4=iv4bis=boop"},
+ Entrypoint: []string{"/entrypoint"},
+ Cmd: []string{"cmd"},
+ WorkingDir: "/workspace",
+ User: "ContainerUser",
+ }
+ specCheck := func(t *testing.T, id string, sandboxID string, sandboxPid uint32, spec *runtimespec.Spec) {
+ assert.Nil(t, spec.Root)
+ assert.Equal(t, "test-hostname", spec.Hostname)
+ assert.Equal(t, []string{"test", "command", "test", "args"}, spec.Process.Args)
+ assert.Equal(t, "test-cwd", spec.Process.Cwd)
+ assert.Contains(t, spec.Process.Env, "k1=v1", "k2=v2", "k3=v3=v3bis", "ik4=iv4=iv4bis=boop")
+ assert.Contains(t, spec.Process.Env, "ik1=iv1", "ik2=iv2", "ik3=iv3=iv3bis", "k4=v4=v4bis=foop")
+
+ t.Logf("Check bind mount")
+ checkMount(t, spec.Mounts, "host-path-1", "container-path-1", "", []string{"rw"}, nil)
+ checkMount(t, spec.Mounts, "host-path-2", "container-path-2", "", []string{"ro"}, nil)
+
+ t.Logf("Check resource limits")
+ assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, 100)
+ assert.EqualValues(t, *spec.Windows.Resources.CPU.Count, 200)
+ assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300)
+ assert.EqualValues(t, *spec.Windows.Resources.CPU.Maximum, 300)
+ assert.EqualValues(t, *spec.Windows.Resources.Memory.Limit, 400)
+
+ // Also checks if override of the image configs user is behaving.
+ t.Logf("Check username")
+ assert.Contains(t, spec.Process.User.Username, "test-user")
+
+ t.Logf("Check credential spec")
+ assert.Contains(t, spec.Windows.CredentialSpec, "{\"test\": \"spec\"}")
+
+ t.Logf("Check PodSandbox annotations")
+ assert.Contains(t, spec.Annotations, annotations.SandboxID)
+ assert.EqualValues(t, spec.Annotations[annotations.SandboxID], sandboxID)
+
+ assert.Contains(t, spec.Annotations, annotations.ContainerType)
+ assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeContainer)
+ }
+ return config, sandboxConfig, imageConfig, specCheck
+}
+
+func TestContainerWindowsNetworkNamespace(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ nsPath := "test-cni"
+ c := newTestCRIService()
+
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
+ assert.NoError(t, err)
+ assert.NotNil(t, spec)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ assert.NotNil(t, spec.Windows)
+ assert.NotNil(t, spec.Windows.Network)
+ assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace)
+}
+
+func TestMountCleanPath(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ nsPath := "test-cni"
+ c := newTestCRIService()
+
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{
+ ContainerPath: "c:/test/container-path",
+ HostPath: "c:/test/host-path",
+ })
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
+ assert.NoError(t, err)
+ assert.NotNil(t, spec)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ checkMount(t, spec.Mounts, "c:\\test\\host-path", "c:\\test\\container-path", "", []string{"rw"}, nil)
+}
+
+func TestMountNamedPipe(t *testing.T) {
+ testID := "test-id"
+ testSandboxID := "sandbox-id"
+ testContainerName := "container-name"
+ testPid := uint32(1234)
+ nsPath := "test-cni"
+ c := newTestCRIService()
+
+ containerConfig, sandboxConfig, imageConfig, specCheck := getCreateContainerTestData()
+ containerConfig.Mounts = append(containerConfig.Mounts, &runtime.Mount{
+ ContainerPath: `\\.\pipe\foo`,
+ HostPath: `\\.\pipe\foo`,
+ })
+ spec, err := c.containerSpec(testID, testSandboxID, testPid, nsPath, testContainerName, containerConfig, sandboxConfig, imageConfig, nil, config.Runtime{})
+ assert.NoError(t, err)
+ assert.NotNil(t, spec)
+ specCheck(t, testID, testSandboxID, testPid, spec)
+ checkMount(t, spec.Mounts, `\\.\pipe\foo`, `\\.\pipe\foo`, "", []string{"rw"}, nil)
+}
diff --git a/pkg/server/container_exec.go b/pkg/server/container_exec.go
new file mode 100644
index 000000000..ae5498ff7
--- /dev/null
+++ b/pkg/server/container_exec.go
@@ -0,0 +1,36 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// Exec prepares a streaming endpoint to execute a command in the container, and returns the address.
+func (c *criService) Exec(ctx context.Context, r *runtime.ExecRequest) (*runtime.ExecResponse, error) {
+ cntr, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to find container %q in store", r.GetContainerId())
+ }
+ state := cntr.Status.Get().State()
+ if state != runtime.ContainerState_CONTAINER_RUNNING {
+ return nil, errors.Errorf("container is in %s state", criContainerStateToString(state))
+ }
+ return c.streamServer.GetExec(r)
+}
diff --git a/pkg/server/container_execsync.go b/pkg/server/container_execsync.go
new file mode 100644
index 000000000..1c019f651
--- /dev/null
+++ b/pkg/server/container_execsync.go
@@ -0,0 +1,211 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "bytes"
+ "io"
+ "syscall"
+ "time"
+
+ "github.com/containerd/containerd"
+ containerdio "github.com/containerd/containerd/cio"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/containerd/oci"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ "k8s.io/client-go/tools/remotecommand"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ cioutil "github.com/containerd/cri/pkg/ioutil"
+ cio "github.com/containerd/cri/pkg/server/io"
+ "github.com/containerd/cri/pkg/util"
+)
+
+// ExecSync executes a command in the container, and returns the stdout output.
+// If command exits with a non-zero exit code, an error is returned.
+func (c *criService) ExecSync(ctx context.Context, r *runtime.ExecSyncRequest) (*runtime.ExecSyncResponse, error) {
+ var stdout, stderr bytes.Buffer
+ exitCode, err := c.execInContainer(ctx, r.GetContainerId(), execOptions{
+ cmd: r.GetCmd(),
+ stdout: cioutil.NewNopWriteCloser(&stdout),
+ stderr: cioutil.NewNopWriteCloser(&stderr),
+ timeout: time.Duration(r.GetTimeout()) * time.Second,
+ })
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to exec in container")
+ }
+
+ return &runtime.ExecSyncResponse{
+ Stdout: stdout.Bytes(),
+ Stderr: stderr.Bytes(),
+ ExitCode: int32(*exitCode),
+ }, nil
+}
+
+// execOptions specifies how to execute command in container.
+type execOptions struct {
+ cmd []string
+ stdin io.Reader
+ stdout io.WriteCloser
+ stderr io.WriteCloser
+ tty bool
+ resize <-chan remotecommand.TerminalSize
+ timeout time.Duration
+}
+
+func (c *criService) execInternal(ctx context.Context, container containerd.Container, id string, opts execOptions) (*uint32, error) {
+ // Cancel the context before returning to ensure goroutines are stopped.
+ // This is important, because if `Start` returns error, `Wait` will hang
+ // forever unless we cancel the context.
+ ctx, cancel := context.WithCancel(ctx)
+ defer cancel()
+
+ spec, err := container.Spec(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get container spec")
+ }
+ task, err := container.Task(ctx, nil)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to load task")
+ }
+ pspec := spec.Process
+
+ pspec.Terminal = opts.tty
+ if opts.tty {
+ if err := oci.WithEnv([]string{"TERM=xterm"})(ctx, nil, nil, spec); err != nil {
+ return nil, errors.Wrap(err, "add TERM env var to spec")
+ }
+ }
+
+ pspec.Args = opts.cmd
+
+ if opts.stdout == nil {
+ opts.stdout = cio.NewDiscardLogger()
+ }
+ if opts.stderr == nil {
+ opts.stderr = cio.NewDiscardLogger()
+ }
+ execID := util.GenerateID()
+ log.G(ctx).Debugf("Generated exec id %q for container %q", execID, id)
+ volatileRootDir := c.getVolatileContainerRootDir(id)
+ var execIO *cio.ExecIO
+ process, err := task.Exec(ctx, execID, pspec,
+ func(id string) (containerdio.IO, error) {
+ var err error
+ execIO, err = cio.NewExecIO(id, volatileRootDir, opts.tty, opts.stdin != nil)
+ return execIO, err
+ },
+ )
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to create exec %q", execID)
+ }
+ defer func() {
+ deferCtx, deferCancel := ctrdutil.DeferContext()
+ defer deferCancel()
+ if _, err := process.Delete(deferCtx, containerd.WithProcessKill); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to delete exec process %q for container %q", execID, id)
+ }
+ }()
+
+ exitCh, err := process.Wait(ctx)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to wait for process %q", execID)
+ }
+ if err := process.Start(ctx); err != nil {
+ return nil, errors.Wrapf(err, "failed to start exec %q", execID)
+ }
+
+ handleResizing(ctx, opts.resize, func(size remotecommand.TerminalSize) {
+ if err := process.Resize(ctx, uint32(size.Width), uint32(size.Height)); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to resize process %q console for container %q", execID, id)
+ }
+ })
+
+ attachDone := execIO.Attach(cio.AttachOptions{
+ Stdin: opts.stdin,
+ Stdout: opts.stdout,
+ Stderr: opts.stderr,
+ Tty: opts.tty,
+ StdinOnce: true,
+ CloseStdin: func() error {
+ return process.CloseIO(ctx, containerd.WithStdinCloser)
+ },
+ })
+
+ execCtx := ctx
+ if opts.timeout > 0 {
+ var execCtxCancel context.CancelFunc
+ execCtx, execCtxCancel = context.WithTimeout(ctx, opts.timeout)
+ defer execCtxCancel()
+ }
+
+ select {
+ case <-execCtx.Done():
+ // Ignore the not found error because the process may exit itself before killing.
+ if err := process.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
+ return nil, errors.Wrapf(err, "failed to kill exec %q", execID)
+ }
+ // Wait for the process to be killed.
+ exitRes := <-exitCh
+ log.G(ctx).Infof("Timeout received while waiting for exec process kill %q code %d and error %v",
+ execID, exitRes.ExitCode(), exitRes.Error())
+ <-attachDone
+ log.G(ctx).Debugf("Stream pipe for exec process %q done", execID)
+ return nil, errors.Wrapf(execCtx.Err(), "timeout %v exceeded", opts.timeout)
+ case exitRes := <-exitCh:
+ code, _, err := exitRes.Result()
+ log.G(ctx).Infof("Exec process %q exits with exit code %d and error %v", execID, code, err)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed while waiting for exec %q", execID)
+ }
+ <-attachDone
+ log.G(ctx).Debugf("Stream pipe for exec process %q done", execID)
+ return &code, nil
+ }
+}
+
+// execInContainer executes a command inside the container synchronously, and
+// redirects stdio stream properly.
+// This function only returns when the exec process exits, this means that:
+// 1) As long as the exec process is running, the goroutine in the cri plugin
+// will be running and wait for the exit code;
+// 2) `kubectl exec -it` will hang until the exec process exits, even after io
+// is detached. This is different from dockershim, which leaves the exec process
+// running in background after io is detached.
+// https://github.com/kubernetes/kubernetes/blob/v1.15.0/pkg/kubelet/dockershim/exec.go#L127
+// For example, if the `kubectl exec -it` process is killed, IO will be closed. In
+// this case, the CRI plugin will still have a goroutine waiting for the exec process
+// to exit and log the exit code, but dockershim won't.
+func (c *criService) execInContainer(ctx context.Context, id string, opts execOptions) (*uint32, error) {
+ // Get container from our container store.
+ cntr, err := c.containerStore.Get(id)
+
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to find container %q in store", id)
+ }
+ id = cntr.ID
+
+ state := cntr.Status.Get().State()
+ if state != runtime.ContainerState_CONTAINER_RUNNING {
+ return nil, errors.Errorf("container is in %s state", criContainerStateToString(state))
+ }
+
+ return c.execInternal(ctx, cntr.Container, id, opts)
+}
diff --git a/pkg/server/container_list.go b/pkg/server/container_list.go
new file mode 100644
index 000000000..c9e88d13d
--- /dev/null
+++ b/pkg/server/container_list.go
@@ -0,0 +1,112 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "golang.org/x/net/context"
+
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+// ListContainers lists all containers matching the filter.
+func (c *criService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (*runtime.ListContainersResponse, error) {
+ // List all containers from store.
+ containersInStore := c.containerStore.List()
+
+ var containers []*runtime.Container
+ for _, container := range containersInStore {
+ containers = append(containers, toCRIContainer(container))
+ }
+
+ containers = c.filterCRIContainers(containers, r.GetFilter())
+ return &runtime.ListContainersResponse{Containers: containers}, nil
+}
+
+// toCRIContainer converts internal container object into CRI container.
+func toCRIContainer(container containerstore.Container) *runtime.Container {
+ status := container.Status.Get()
+ return &runtime.Container{
+ Id: container.ID,
+ PodSandboxId: container.SandboxID,
+ Metadata: container.Config.GetMetadata(),
+ Image: container.Config.GetImage(),
+ ImageRef: container.ImageRef,
+ State: status.State(),
+ CreatedAt: status.CreatedAt,
+ Labels: container.Config.GetLabels(),
+ Annotations: container.Config.GetAnnotations(),
+ }
+}
+
+func (c *criService) normalizeContainerFilter(filter *runtime.ContainerFilter) {
+ if cntr, err := c.containerStore.Get(filter.GetId()); err == nil {
+ filter.Id = cntr.ID
+ }
+ if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil {
+ filter.PodSandboxId = sb.ID
+ }
+}
+
+// filterCRIContainers filters CRIContainers.
+func (c *criService) filterCRIContainers(containers []*runtime.Container, filter *runtime.ContainerFilter) []*runtime.Container {
+ if filter == nil {
+ return containers
+ }
+
+ // The containerd cri plugin supports short ids so long as there is only one
+ // match. So we do a lookup against the store here if a pod id has been
+ // included in the filter.
+ sb := filter.GetPodSandboxId()
+ if sb != "" {
+ sandbox, err := c.sandboxStore.Get(sb)
+ if err == nil {
+ sb = sandbox.ID
+ }
+ }
+
+ c.normalizeContainerFilter(filter)
+ filtered := []*runtime.Container{}
+ for _, cntr := range containers {
+ if filter.GetId() != "" && filter.GetId() != cntr.Id {
+ continue
+ }
+ if sb != "" && sb != cntr.PodSandboxId {
+ continue
+ }
+ if filter.GetState() != nil && filter.GetState().GetState() != cntr.State {
+ continue
+ }
+ if filter.GetLabelSelector() != nil {
+ match := true
+ for k, v := range filter.GetLabelSelector() {
+ got, ok := cntr.Labels[k]
+ if !ok || got != v {
+ match = false
+ break
+ }
+ }
+ if !match {
+ continue
+ }
+ }
+ filtered = append(filtered, cntr)
+ }
+
+ return filtered
+}
diff --git a/pkg/server/container_list_test.go b/pkg/server/container_list_test.go
new file mode 100644
index 000000000..ccee3ce5a
--- /dev/null
+++ b/pkg/server/container_list_test.go
@@ -0,0 +1,345 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+func TestToCRIContainer(t *testing.T) {
+ config := &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "test-name",
+ Attempt: 1,
+ },
+ Image: &runtime.ImageSpec{Image: "test-image"},
+ Labels: map[string]string{"a": "b"},
+ Annotations: map[string]string{"c": "d"},
+ }
+ createdAt := time.Now().UnixNano()
+ container, err := containerstore.NewContainer(
+ containerstore.Metadata{
+ ID: "test-id",
+ Name: "test-name",
+ SandboxID: "test-sandbox-id",
+ Config: config,
+ ImageRef: "test-image-ref",
+ },
+ containerstore.WithFakeStatus(
+ containerstore.Status{
+ Pid: 1234,
+ CreatedAt: createdAt,
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ ExitCode: 1,
+ Reason: "test-reason",
+ Message: "test-message",
+ },
+ ),
+ )
+ assert.NoError(t, err)
+ expect := &runtime.Container{
+ Id: "test-id",
+ PodSandboxId: "test-sandbox-id",
+ Metadata: config.GetMetadata(),
+ Image: config.GetImage(),
+ ImageRef: "test-image-ref",
+ State: runtime.ContainerState_CONTAINER_EXITED,
+ CreatedAt: createdAt,
+ Labels: config.GetLabels(),
+ Annotations: config.GetAnnotations(),
+ }
+ c := toCRIContainer(container)
+ assert.Equal(t, expect, c)
+}
+
+func TestFilterContainers(t *testing.T) {
+ c := newTestCRIService()
+
+ testContainers := []*runtime.Container{
+ {
+ Id: "1",
+ PodSandboxId: "s-1",
+ Metadata: &runtime.ContainerMetadata{Name: "name-1", Attempt: 1},
+ State: runtime.ContainerState_CONTAINER_RUNNING,
+ },
+ {
+ Id: "2",
+ PodSandboxId: "s-2",
+ Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 2},
+ State: runtime.ContainerState_CONTAINER_EXITED,
+ Labels: map[string]string{"a": "b"},
+ },
+ {
+ Id: "3",
+ PodSandboxId: "s-2",
+ Metadata: &runtime.ContainerMetadata{Name: "name-2", Attempt: 3},
+ State: runtime.ContainerState_CONTAINER_CREATED,
+ Labels: map[string]string{"c": "d"},
+ },
+ }
+ for desc, test := range map[string]struct {
+ filter *runtime.ContainerFilter
+ expect []*runtime.Container
+ }{
+ "no filter": {
+ expect: testContainers,
+ },
+ "id filter": {
+ filter: &runtime.ContainerFilter{Id: "2"},
+ expect: []*runtime.Container{testContainers[1]},
+ },
+ "state filter": {
+ filter: &runtime.ContainerFilter{
+ State: &runtime.ContainerStateValue{
+ State: runtime.ContainerState_CONTAINER_EXITED,
+ },
+ },
+ expect: []*runtime.Container{testContainers[1]},
+ },
+ "label filter": {
+ filter: &runtime.ContainerFilter{
+ LabelSelector: map[string]string{"a": "b"},
+ },
+ expect: []*runtime.Container{testContainers[1]},
+ },
+ "sandbox id filter": {
+ filter: &runtime.ContainerFilter{PodSandboxId: "s-2"},
+ expect: []*runtime.Container{testContainers[1], testContainers[2]},
+ },
+ "mixed filter not matched": {
+ filter: &runtime.ContainerFilter{
+ Id: "1",
+ PodSandboxId: "s-2",
+ LabelSelector: map[string]string{"a": "b"},
+ },
+ expect: []*runtime.Container{},
+ },
+ "mixed filter matched": {
+ filter: &runtime.ContainerFilter{
+ PodSandboxId: "s-2",
+ State: &runtime.ContainerStateValue{
+ State: runtime.ContainerState_CONTAINER_CREATED,
+ },
+ LabelSelector: map[string]string{"c": "d"},
+ },
+ expect: []*runtime.Container{testContainers[2]},
+ },
+ } {
+ filtered := c.filterCRIContainers(testContainers, test.filter)
+ assert.Equal(t, test.expect, filtered, desc)
+ }
+}
+
+// containerForTest is a helper type for test.
+type containerForTest struct {
+ metadata containerstore.Metadata
+ status containerstore.Status
+}
+
+func (c containerForTest) toContainer() (containerstore.Container, error) {
+ return containerstore.NewContainer(
+ c.metadata,
+ containerstore.WithFakeStatus(c.status),
+ )
+}
+
+func TestListContainers(t *testing.T) {
+ c := newTestCRIService()
+ sandboxesInStore := []sandboxstore.Sandbox{
+ sandboxstore.NewSandbox(
+ sandboxstore.Metadata{
+ ID: "s-1abcdef1234",
+ Name: "sandboxname-1",
+ Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-1"}},
+ },
+ sandboxstore.Status{
+ State: sandboxstore.StateReady,
+ },
+ ),
+ sandboxstore.NewSandbox(
+ sandboxstore.Metadata{
+ ID: "s-2abcdef1234",
+ Name: "sandboxname-2",
+ Config: &runtime.PodSandboxConfig{Metadata: &runtime.PodSandboxMetadata{Name: "podname-2"}},
+ },
+ sandboxstore.Status{
+ State: sandboxstore.StateNotReady,
+ },
+ ),
+ }
+ createdAt := time.Now().UnixNano()
+ startedAt := time.Now().UnixNano()
+ finishedAt := time.Now().UnixNano()
+ containersInStore := []containerForTest{
+ {
+ metadata: containerstore.Metadata{
+ ID: "c-1container",
+ Name: "name-1",
+ SandboxID: "s-1abcdef1234",
+ Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-1"}},
+ },
+ status: containerstore.Status{CreatedAt: createdAt},
+ },
+ {
+ metadata: containerstore.Metadata{
+ ID: "c-2container",
+ Name: "name-2",
+ SandboxID: "s-1abcdef1234",
+ Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-2"}},
+ },
+ status: containerstore.Status{
+ CreatedAt: createdAt,
+ StartedAt: startedAt,
+ },
+ },
+ {
+ metadata: containerstore.Metadata{
+ ID: "c-3container",
+ Name: "name-3",
+ SandboxID: "s-1abcdef1234",
+ Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-3"}},
+ },
+ status: containerstore.Status{
+ CreatedAt: createdAt,
+ StartedAt: startedAt,
+ FinishedAt: finishedAt,
+ },
+ },
+ {
+ metadata: containerstore.Metadata{
+ ID: "c-4container",
+ Name: "name-4",
+ SandboxID: "s-2abcdef1234",
+ Config: &runtime.ContainerConfig{Metadata: &runtime.ContainerMetadata{Name: "name-4"}},
+ },
+ status: containerstore.Status{
+ CreatedAt: createdAt,
+ },
+ },
+ }
+
+ expectedContainers := []*runtime.Container{
+ {
+ Id: "c-1container",
+ PodSandboxId: "s-1abcdef1234",
+ Metadata: &runtime.ContainerMetadata{Name: "name-1"},
+ State: runtime.ContainerState_CONTAINER_CREATED,
+ CreatedAt: createdAt,
+ },
+ {
+ Id: "c-2container",
+ PodSandboxId: "s-1abcdef1234",
+ Metadata: &runtime.ContainerMetadata{Name: "name-2"},
+ State: runtime.ContainerState_CONTAINER_RUNNING,
+ CreatedAt: createdAt,
+ },
+ {
+ Id: "c-3container",
+ PodSandboxId: "s-1abcdef1234",
+ Metadata: &runtime.ContainerMetadata{Name: "name-3"},
+ State: runtime.ContainerState_CONTAINER_EXITED,
+ CreatedAt: createdAt,
+ },
+ {
+ Id: "c-4container",
+ PodSandboxId: "s-2abcdef1234",
+ Metadata: &runtime.ContainerMetadata{Name: "name-4"},
+ State: runtime.ContainerState_CONTAINER_CREATED,
+ CreatedAt: createdAt,
+ },
+ }
+
+ // Inject test sandbox metadata
+ for _, sb := range sandboxesInStore {
+ assert.NoError(t, c.sandboxStore.Add(sb))
+ }
+
+ // Inject test container metadata
+ for _, cntr := range containersInStore {
+ container, err := cntr.toContainer()
+ assert.NoError(t, err)
+ assert.NoError(t, c.containerStore.Add(container))
+ }
+
+ for testdesc, testdata := range map[string]struct {
+ filter *runtime.ContainerFilter
+ expect []*runtime.Container
+ }{
+ "test without filter": {
+ filter: &runtime.ContainerFilter{},
+ expect: expectedContainers,
+ },
+ "test filter by sandboxid": {
+ filter: &runtime.ContainerFilter{
+ PodSandboxId: "s-1abcdef1234",
+ },
+ expect: expectedContainers[:3],
+ },
+ "test filter by truncated sandboxid": {
+ filter: &runtime.ContainerFilter{
+ PodSandboxId: "s-1",
+ },
+ expect: expectedContainers[:3],
+ },
+ "test filter by containerid": {
+ filter: &runtime.ContainerFilter{
+ Id: "c-1container",
+ },
+ expect: expectedContainers[:1],
+ },
+ "test filter by truncated containerid": {
+ filter: &runtime.ContainerFilter{
+ Id: "c-1",
+ },
+ expect: expectedContainers[:1],
+ },
+ "test filter by containerid and sandboxid": {
+ filter: &runtime.ContainerFilter{
+ Id: "c-1container",
+ PodSandboxId: "s-1abcdef1234",
+ },
+ expect: expectedContainers[:1],
+ },
+ "test filter by truncated containerid and truncated sandboxid": {
+ filter: &runtime.ContainerFilter{
+ Id: "c-1",
+ PodSandboxId: "s-1",
+ },
+ expect: expectedContainers[:1],
+ },
+ } {
+ t.Logf("TestCase: %s", testdesc)
+ resp, err := c.ListContainers(context.Background(), &runtime.ListContainersRequest{Filter: testdata.filter})
+ assert.NoError(t, err)
+ require.NotNil(t, resp)
+ containers := resp.GetContainers()
+ assert.Len(t, containers, len(testdata.expect))
+ for _, cntr := range testdata.expect {
+ assert.Contains(t, containers, cntr)
+ }
+ }
+}
diff --git a/pkg/server/container_log_reopen.go b/pkg/server/container_log_reopen.go
new file mode 100644
index 000000000..b15bb6238
--- /dev/null
+++ b/pkg/server/container_log_reopen.go
@@ -0,0 +1,51 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// ReopenContainerLog asks the cri plugin to reopen the stdout/stderr log file for the container.
+// This is often called after the log file has been rotated.
+func (c *criService) ReopenContainerLog(ctx context.Context, r *runtime.ReopenContainerLogRequest) (*runtime.ReopenContainerLogResponse, error) {
+ container, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
+ }
+
+ if container.Status.Get().State() != runtime.ContainerState_CONTAINER_RUNNING {
+ return nil, errors.New("container is not running")
+ }
+
+ // Create new container logger and replace the existing ones.
+ stdoutWC, stderrWC, err := c.createContainerLoggers(container.LogPath, container.Config.GetTty())
+ if err != nil {
+ return nil, err
+ }
+ oldStdoutWC, oldStderrWC := container.IO.AddOutput("log", stdoutWC, stderrWC)
+ if oldStdoutWC != nil {
+ oldStdoutWC.Close()
+ }
+ if oldStderrWC != nil {
+ oldStderrWC.Close()
+ }
+ return &runtime.ReopenContainerLogResponse{}, nil
+}
diff --git a/pkg/server/container_remove.go b/pkg/server/container_remove.go
new file mode 100644
index 000000000..6426635dd
--- /dev/null
+++ b/pkg/server/container_remove.go
@@ -0,0 +1,135 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/store"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+// RemoveContainer removes the container.
+func (c *criService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (_ *runtime.RemoveContainerResponse, retErr error) {
+ container, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ if err != store.ErrNotExist {
+ return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
+ }
+ // Do not return error if container metadata doesn't exist.
+ log.G(ctx).Tracef("RemoveContainer called for container %q that does not exist", r.GetContainerId())
+ return &runtime.RemoveContainerResponse{}, nil
+ }
+ id := container.ID
+
+ // Forcibly stop the containers if they are in running or unknown state
+ state := container.Status.Get().State()
+ if state == runtime.ContainerState_CONTAINER_RUNNING ||
+ state == runtime.ContainerState_CONTAINER_UNKNOWN {
+ logrus.Infof("Forcibly stopping container %q", id)
+ if err := c.stopContainer(ctx, container, 0); err != nil {
+ return nil, errors.Wrapf(err, "failed to forcibly stop container %q", id)
+ }
+
+ }
+
+ // Set removing state to prevent other start/remove operations against this container
+ // while it's being removed.
+ if err := setContainerRemoving(container); err != nil {
+ return nil, errors.Wrapf(err, "failed to set removing state for container %q", id)
+ }
+ defer func() {
+ if retErr != nil {
+ // Reset removing if remove failed.
+ if err := resetContainerRemoving(container); err != nil {
+ log.G(ctx).WithError(err).Errorf("failed to reset removing state for container %q", id)
+ }
+ }
+ }()
+
+ // NOTE(random-liu): Docker set container to "Dead" state when start removing the
+ // container so as to avoid start/restart the container again. However, for current
+ // kubelet implementation, we'll never start a container once we decide to remove it,
+ // so we don't need the "Dead" state for now.
+
+ // Delete containerd container.
+ if err := container.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
+ if !errdefs.IsNotFound(err) {
+ return nil, errors.Wrapf(err, "failed to delete containerd container %q", id)
+ }
+ log.G(ctx).Tracef("Remove called for containerd container %q that does not exist", id)
+ }
+
+ // Delete container checkpoint.
+ if err := container.Delete(); err != nil {
+ return nil, errors.Wrapf(err, "failed to delete container checkpoint for %q", id)
+ }
+
+ containerRootDir := c.getContainerRootDir(id)
+ if err := ensureRemoveAll(ctx, containerRootDir); err != nil {
+ return nil, errors.Wrapf(err, "failed to remove container root directory %q",
+ containerRootDir)
+ }
+ volatileContainerRootDir := c.getVolatileContainerRootDir(id)
+ if err := ensureRemoveAll(ctx, volatileContainerRootDir); err != nil {
+ return nil, errors.Wrapf(err, "failed to remove volatile container root directory %q",
+ volatileContainerRootDir)
+ }
+
+ c.containerStore.Delete(id)
+
+ c.containerNameIndex.ReleaseByKey(id)
+
+ return &runtime.RemoveContainerResponse{}, nil
+}
+
+// setContainerRemoving sets the container into removing state. In removing state, the
+// container will not be started or removed again.
+func setContainerRemoving(container containerstore.Container) error {
+ return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
+ // Do not remove container if it's still running or unknown.
+ if status.State() == runtime.ContainerState_CONTAINER_RUNNING {
+ return status, errors.New("container is still running, to stop first")
+ }
+ if status.State() == runtime.ContainerState_CONTAINER_UNKNOWN {
+ return status, errors.New("container state is unknown, to stop first")
+ }
+ if status.Starting {
+ return status, errors.New("container is in starting state, can't be removed")
+ }
+ if status.Removing {
+ return status, errors.New("container is already in removing state")
+ }
+ status.Removing = true
+ return status, nil
+ })
+}
+
+// resetContainerRemoving resets the container removing state on remove failure. So
+// that we could remove the container again.
+func resetContainerRemoving(container containerstore.Container) error {
+ return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
+ status.Removing = false
+ return status, nil
+ })
+}
diff --git a/pkg/server/container_remove_test.go b/pkg/server/container_remove_test.go
new file mode 100644
index 000000000..7093f4808
--- /dev/null
+++ b/pkg/server/container_remove_test.go
@@ -0,0 +1,85 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+// TestSetContainerRemoving tests setContainerRemoving sets removing
+// state correctly.
+func TestSetContainerRemoving(t *testing.T) {
+ testID := "test-id"
+ for desc, test := range map[string]struct {
+ status containerstore.Status
+ expectErr bool
+ }{
+ "should return error when container is in running state": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ },
+ expectErr: true,
+ },
+ "should return error when container is in starting state": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ Starting: true,
+ },
+ expectErr: true,
+ },
+ "should return error when container is in removing state": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ Removing: true,
+ },
+ expectErr: true,
+ },
+ "should not return error when container is not running and removing": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ },
+ expectErr: false,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ container, err := containerstore.NewContainer(
+ containerstore.Metadata{ID: testID},
+ containerstore.WithFakeStatus(test.status),
+ )
+ assert.NoError(t, err)
+ err = setContainerRemoving(container)
+ if test.expectErr {
+ assert.Error(t, err)
+ assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated")
+ } else {
+ assert.NoError(t, err)
+ assert.True(t, container.Status.Get().Removing, "removing should be set")
+ assert.NoError(t, resetContainerRemoving(container))
+ assert.False(t, container.Status.Get().Removing, "removing should be reset")
+ }
+ }
+}
diff --git a/pkg/server/container_start.go b/pkg/server/container_start.go
new file mode 100644
index 000000000..4be89c9a6
--- /dev/null
+++ b/pkg/server/container_start.go
@@ -0,0 +1,223 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "io"
+ "time"
+
+ "github.com/containerd/containerd"
+ containerdio "github.com/containerd/containerd/cio"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/nri"
+ v1 "github.com/containerd/nri/types/v1"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ cioutil "github.com/containerd/cri/pkg/ioutil"
+ cio "github.com/containerd/cri/pkg/server/io"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+// StartContainer starts the container.
+func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (retRes *runtime.StartContainerResponse, retErr error) {
+ cntr, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
+ }
+
+ id := cntr.ID
+ meta := cntr.Metadata
+ container := cntr.Container
+ config := meta.Config
+
+ // Set starting state to prevent other start/remove operations against this container
+ // while it's being started.
+ if err := setContainerStarting(cntr); err != nil {
+ return nil, errors.Wrapf(err, "failed to set starting state for container %q", id)
+ }
+ defer func() {
+ if retErr != nil {
+ // Set container to exited if fail to start.
+ if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
+ status.Pid = 0
+ status.FinishedAt = time.Now().UnixNano()
+ status.ExitCode = errorStartExitCode
+ status.Reason = errorStartReason
+ status.Message = retErr.Error()
+ return status, nil
+ }); err != nil {
+ log.G(ctx).WithError(err).Errorf("failed to set start failure state for container %q", id)
+ }
+ }
+ if err := resetContainerStarting(cntr); err != nil {
+ log.G(ctx).WithError(err).Errorf("failed to reset starting state for container %q", id)
+ }
+ }()
+
+ // Get sandbox config from sandbox store.
+ sandbox, err := c.sandboxStore.Get(meta.SandboxID)
+ if err != nil {
+ return nil, errors.Wrapf(err, "sandbox %q not found", meta.SandboxID)
+ }
+ sandboxID := meta.SandboxID
+ if sandbox.Status.Get().State != sandboxstore.StateReady {
+ return nil, errors.Errorf("sandbox container %q is not running", sandboxID)
+ }
+
+ ioCreation := func(id string) (_ containerdio.IO, err error) {
+ stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, config.GetTty())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create container loggers")
+ }
+ cntr.IO.AddOutput("log", stdoutWC, stderrWC)
+ cntr.IO.Pipe()
+ return cntr.IO, nil
+ }
+
+ ctrInfo, err := container.Info(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get container info")
+ }
+
+ taskOpts := c.taskOpts(ctrInfo.Runtime.Name)
+ task, err := container.NewTask(ctx, ioCreation, taskOpts...)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create containerd task")
+ }
+ defer func() {
+ if retErr != nil {
+ deferCtx, deferCancel := ctrdutil.DeferContext()
+ defer deferCancel()
+ // It's possible that task is deleted by event monitor.
+ if _, err := task.Delete(deferCtx, WithNRISandboxDelete(sandboxID), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
+ log.G(ctx).WithError(err).Errorf("Failed to delete containerd task %q", id)
+ }
+ }
+ }()
+
+ // wait is a long running background request, no timeout needed.
+ exitCh, err := task.Wait(ctrdutil.NamespacedContext())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to wait for containerd task")
+ }
+ nric, err := nri.New()
+ if err != nil {
+ log.G(ctx).WithError(err).Error("unable to create nri client")
+ }
+ if nric != nil {
+ nriSB := &nri.Sandbox{
+ ID: sandboxID,
+ Labels: sandbox.Config.Labels,
+ }
+ if _, err := nric.InvokeWithSandbox(ctx, task, v1.Create, nriSB); err != nil {
+ return nil, errors.Wrap(err, "nri invoke")
+ }
+ }
+
+ // Start containerd task.
+ if err := task.Start(ctx); err != nil {
+ return nil, errors.Wrapf(err, "failed to start containerd task %q", id)
+ }
+
+ // Update container start timestamp.
+ if err := cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
+ status.Pid = task.Pid()
+ status.StartedAt = time.Now().UnixNano()
+ return status, nil
+ }); err != nil {
+ return nil, errors.Wrapf(err, "failed to update container %q state", id)
+ }
+
+ // start the monitor after updating container state, this ensures that
+ // event monitor receives the TaskExit event and update container state
+ // after this.
+ c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh)
+
+ return &runtime.StartContainerResponse{}, nil
+}
+
+// setContainerStarting sets the container into starting state. In starting state, the
+// container will not be removed or started again.
+func setContainerStarting(container containerstore.Container) error {
+ return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
+ // Return error if container is not in created state.
+ if status.State() != runtime.ContainerState_CONTAINER_CREATED {
+ return status, errors.Errorf("container is in %s state", criContainerStateToString(status.State()))
+ }
+ // Do not start the container when there is a removal in progress.
+ if status.Removing {
+ return status, errors.New("container is in removing state, can't be started")
+ }
+ if status.Starting {
+ return status, errors.New("container is already in starting state")
+ }
+ status.Starting = true
+ return status, nil
+ })
+}
+
+// resetContainerStarting resets the container starting state on start failure. So
+// that we could remove the container later.
+func resetContainerStarting(container containerstore.Container) error {
+ return container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
+ status.Starting = false
+ return status, nil
+ })
+}
+
+// createContainerLoggers creates container loggers and return write closer for stdout and stderr.
+func (c *criService) createContainerLoggers(logPath string, tty bool) (stdout io.WriteCloser, stderr io.WriteCloser, err error) {
+ if logPath != "" {
+ // Only generate container log when log path is specified.
+ f, err := openLogFile(logPath)
+ if err != nil {
+ return nil, nil, errors.Wrap(err, "failed to create and open log file")
+ }
+ defer func() {
+ if err != nil {
+ f.Close()
+ }
+ }()
+ var stdoutCh, stderrCh <-chan struct{}
+ wc := cioutil.NewSerialWriteCloser(f)
+ stdout, stdoutCh = cio.NewCRILogger(logPath, wc, cio.Stdout, c.config.MaxContainerLogLineSize)
+ // Only redirect stderr when there is no tty.
+ if !tty {
+ stderr, stderrCh = cio.NewCRILogger(logPath, wc, cio.Stderr, c.config.MaxContainerLogLineSize)
+ }
+ go func() {
+ if stdoutCh != nil {
+ <-stdoutCh
+ }
+ if stderrCh != nil {
+ <-stderrCh
+ }
+ logrus.Debugf("Finish redirecting log file %q, closing it", logPath)
+ f.Close()
+ }()
+ } else {
+ stdout = cio.NewDiscardLogger()
+ stderr = cio.NewDiscardLogger()
+ }
+ return
+}
diff --git a/pkg/server/container_start_test.go b/pkg/server/container_start_test.go
new file mode 100644
index 000000000..644bb45cd
--- /dev/null
+++ b/pkg/server/container_start_test.go
@@ -0,0 +1,98 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+// TestSetContainerStarting tests setContainerStarting sets removing
+// state correctly.
+func TestSetContainerStarting(t *testing.T) {
+ testID := "test-id"
+ for desc, test := range map[string]struct {
+ status containerstore.Status
+ expectErr bool
+ }{
+
+ "should not return error when container is in created state": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ },
+ expectErr: false,
+ },
+ "should return error when container is in running state": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ },
+ expectErr: true,
+ },
+ "should return error when container is in exited state": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ },
+ expectErr: true,
+ },
+ "should return error when container is in unknown state": {
+ status: containerstore.Status{
+ CreatedAt: 0,
+ StartedAt: 0,
+ FinishedAt: 0,
+ },
+ expectErr: true,
+ },
+ "should return error when container is in starting state": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ Starting: true,
+ },
+ expectErr: true,
+ },
+ "should return error when container is in removing state": {
+ status: containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ Removing: true,
+ },
+ expectErr: true,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ container, err := containerstore.NewContainer(
+ containerstore.Metadata{ID: testID},
+ containerstore.WithFakeStatus(test.status),
+ )
+ assert.NoError(t, err)
+ err = setContainerStarting(container)
+ if test.expectErr {
+ assert.Error(t, err)
+ assert.Equal(t, test.status, container.Status.Get(), "metadata should not be updated")
+ } else {
+ assert.NoError(t, err)
+ assert.True(t, container.Status.Get().Starting, "starting should be set")
+ assert.NoError(t, resetContainerStarting(container))
+ assert.False(t, container.Status.Get().Starting, "starting should be reset")
+ }
+ }
+}
diff --git a/pkg/server/container_stats.go b/pkg/server/container_stats.go
new file mode 100644
index 000000000..22607cdd8
--- /dev/null
+++ b/pkg/server/container_stats.go
@@ -0,0 +1,47 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ tasks "github.com/containerd/containerd/api/services/tasks/v1"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// ContainerStats returns stats of the container. If the container does not
+// exist, the call returns an error.
+func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerStatsRequest) (*runtime.ContainerStatsResponse, error) {
+ cntr, err := c.containerStore.Get(in.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to find container")
+ }
+ request := &tasks.MetricsRequest{Filters: []string{"id==" + cntr.ID}}
+ resp, err := c.client.TaskService().Metrics(ctx, request)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to fetch metrics for task")
+ }
+ if len(resp.Metrics) != 1 {
+ return nil, errors.Errorf("unexpected metrics response: %+v", resp.Metrics)
+ }
+
+ cs, err := c.containerMetrics(cntr.Metadata, resp.Metrics[0])
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to decode container metrics")
+ }
+ return &runtime.ContainerStatsResponse{Stats: cs}, nil
+}
diff --git a/pkg/server/container_stats_list.go b/pkg/server/container_stats_list.go
new file mode 100644
index 000000000..0a9be8741
--- /dev/null
+++ b/pkg/server/container_stats_list.go
@@ -0,0 +1,116 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ tasks "github.com/containerd/containerd/api/services/tasks/v1"
+ "github.com/containerd/containerd/api/types"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+// ListContainerStats returns stats of all running containers.
+func (c *criService) ListContainerStats(
+ ctx context.Context,
+ in *runtime.ListContainerStatsRequest,
+) (*runtime.ListContainerStatsResponse, error) {
+ request, containers, err := c.buildTaskMetricsRequest(in)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to build metrics request")
+ }
+ resp, err := c.client.TaskService().Metrics(ctx, &request)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to fetch metrics for tasks")
+ }
+ criStats, err := c.toCRIContainerStats(resp.Metrics, containers)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to convert to cri containerd stats format")
+ }
+ return criStats, nil
+}
+
+func (c *criService) toCRIContainerStats(
+ stats []*types.Metric,
+ containers []containerstore.Container,
+) (*runtime.ListContainerStatsResponse, error) {
+ statsMap := make(map[string]*types.Metric)
+ for _, stat := range stats {
+ statsMap[stat.ID] = stat
+ }
+ containerStats := new(runtime.ListContainerStatsResponse)
+ for _, cntr := range containers {
+ cs, err := c.containerMetrics(cntr.Metadata, statsMap[cntr.ID])
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to decode container metrics for %q", cntr.ID)
+ }
+ containerStats.Stats = append(containerStats.Stats, cs)
+ }
+ return containerStats, nil
+}
+
+func (c *criService) normalizeContainerStatsFilter(filter *runtime.ContainerStatsFilter) {
+ if cntr, err := c.containerStore.Get(filter.GetId()); err == nil {
+ filter.Id = cntr.ID
+ }
+ if sb, err := c.sandboxStore.Get(filter.GetPodSandboxId()); err == nil {
+ filter.PodSandboxId = sb.ID
+ }
+}
+
+// buildTaskMetricsRequest constructs a tasks.MetricsRequest based on
+// the information in the stats request and the containerStore
+func (c *criService) buildTaskMetricsRequest(
+ r *runtime.ListContainerStatsRequest,
+) (tasks.MetricsRequest, []containerstore.Container, error) {
+ var req tasks.MetricsRequest
+ if r.GetFilter() == nil {
+ return req, nil, nil
+ }
+ c.normalizeContainerStatsFilter(r.GetFilter())
+ var containers []containerstore.Container
+ for _, cntr := range c.containerStore.List() {
+ if r.GetFilter().GetId() != "" && cntr.ID != r.GetFilter().GetId() {
+ continue
+ }
+ if r.GetFilter().GetPodSandboxId() != "" && cntr.SandboxID != r.GetFilter().GetPodSandboxId() {
+ continue
+ }
+ if r.GetFilter().GetLabelSelector() != nil &&
+ !matchLabelSelector(r.GetFilter().GetLabelSelector(), cntr.Config.GetLabels()) {
+ continue
+ }
+ containers = append(containers, cntr)
+ req.Filters = append(req.Filters, "id=="+cntr.ID)
+ }
+ return req, containers, nil
+}
+
+func matchLabelSelector(selector, labels map[string]string) bool {
+ for k, v := range selector {
+ if val, ok := labels[k]; ok {
+ if v != val {
+ return false
+ }
+ } else {
+ return false
+ }
+ }
+ return true
+}
diff --git a/pkg/server/container_stats_list_linux.go b/pkg/server/container_stats_list_linux.go
new file mode 100644
index 000000000..d160b0928
--- /dev/null
+++ b/pkg/server/container_stats_list_linux.go
@@ -0,0 +1,127 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "fmt"
+
+ "github.com/containerd/containerd/api/types"
+ v1 "github.com/containerd/containerd/metrics/types/v1"
+ v2 "github.com/containerd/containerd/metrics/types/v2"
+ "github.com/containerd/typeurl"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+func (c *criService) containerMetrics(
+ meta containerstore.Metadata,
+ stats *types.Metric,
+) (*runtime.ContainerStats, error) {
+ var cs runtime.ContainerStats
+ var usedBytes, inodesUsed uint64
+ sn, err := c.snapshotStore.Get(meta.ID)
+ // If snapshotstore doesn't have cached snapshot information
+ // set WritableLayer usage to zero
+ if err == nil {
+ usedBytes = sn.Size
+ inodesUsed = sn.Inodes
+ }
+ cs.WritableLayer = &runtime.FilesystemUsage{
+ Timestamp: sn.Timestamp,
+ FsId: &runtime.FilesystemIdentifier{
+ Mountpoint: c.imageFSPath,
+ },
+ UsedBytes: &runtime.UInt64Value{Value: usedBytes},
+ InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
+ }
+ cs.Attributes = &runtime.ContainerAttributes{
+ Id: meta.ID,
+ Metadata: meta.Config.GetMetadata(),
+ Labels: meta.Config.GetLabels(),
+ Annotations: meta.Config.GetAnnotations(),
+ }
+
+ if stats != nil {
+ s, err := typeurl.UnmarshalAny(stats.Data)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to extract container metrics")
+ }
+ switch metrics := s.(type) {
+ case *v1.Metrics:
+ if metrics.CPU != nil && metrics.CPU.Usage != nil {
+ cs.Cpu = &runtime.CpuUsage{
+ Timestamp: stats.Timestamp.UnixNano(),
+ UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total},
+ }
+ }
+ if metrics.Memory != nil && metrics.Memory.Usage != nil {
+ cs.Memory = &runtime.MemoryUsage{
+ Timestamp: stats.Timestamp.UnixNano(),
+ WorkingSetBytes: &runtime.UInt64Value{
+ Value: getWorkingSet(metrics.Memory),
+ },
+ }
+ }
+ case *v2.Metrics:
+ if metrics.CPU != nil {
+ cs.Cpu = &runtime.CpuUsage{
+ Timestamp: stats.Timestamp.UnixNano(),
+ UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.UsageUsec * 1000},
+ }
+ }
+ if metrics.Memory != nil {
+ cs.Memory = &runtime.MemoryUsage{
+ Timestamp: stats.Timestamp.UnixNano(),
+ WorkingSetBytes: &runtime.UInt64Value{
+ Value: getWorkingSetV2(metrics.Memory),
+ },
+ }
+ }
+ default:
+ return &cs, errors.New(fmt.Sprintf("unxpected metrics type: %v", metrics))
+ }
+ }
+
+ return &cs, nil
+}
+
+// getWorkingSet calculates workingset memory from cgroup memory stats.
+// The caller should make sure memory is not nil.
+// workingset = usage - total_inactive_file
+func getWorkingSet(memory *v1.MemoryStat) uint64 {
+ if memory.Usage == nil {
+ return 0
+ }
+ var workingSet uint64
+ if memory.TotalInactiveFile < memory.Usage.Usage {
+ workingSet = memory.Usage.Usage - memory.TotalInactiveFile
+ }
+ return workingSet
+}
+
+// getWorkingSetV2 calculates workingset memory from cgroupv2 memory stats.
+// The caller should make sure memory is not nil.
+// workingset = usage - inactive_file
+func getWorkingSetV2(memory *v2.MemoryStat) uint64 {
+ var workingSet uint64
+ if memory.InactiveFile < memory.Usage {
+ workingSet = memory.Usage - memory.InactiveFile
+ }
+ return workingSet
+}
diff --git a/pkg/server/container_stats_list_linux_test.go b/pkg/server/container_stats_list_linux_test.go
new file mode 100644
index 000000000..a35b5f21a
--- /dev/null
+++ b/pkg/server/container_stats_list_linux_test.go
@@ -0,0 +1,55 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ v1 "github.com/containerd/cgroups/stats/v1"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestGetWorkingSet(t *testing.T) {
+ for desc, test := range map[string]struct {
+ memory *v1.MemoryStat
+ expected uint64
+ }{
+ "nil memory usage": {
+ memory: &v1.MemoryStat{},
+ expected: 0,
+ },
+ "memory usage higher than inactive_total_file": {
+ memory: &v1.MemoryStat{
+ TotalInactiveFile: 1000,
+ Usage: &v1.MemoryEntry{Usage: 2000},
+ },
+ expected: 1000,
+ },
+ "memory usage lower than inactive_total_file": {
+ memory: &v1.MemoryStat{
+ TotalInactiveFile: 2000,
+ Usage: &v1.MemoryEntry{Usage: 1000},
+ },
+ expected: 0,
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ got := getWorkingSet(test.memory)
+ assert.Equal(t, test.expected, got)
+ })
+ }
+}
diff --git a/pkg/server/container_stats_list_other.go b/pkg/server/container_stats_list_other.go
new file mode 100644
index 000000000..b4bc28ff5
--- /dev/null
+++ b/pkg/server/container_stats_list_other.go
@@ -0,0 +1,36 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd/api/types"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+func (c *criService) containerMetrics(
+ meta containerstore.Metadata,
+ stats *types.Metric,
+) (*runtime.ContainerStats, error) {
+ var cs runtime.ContainerStats
+ return &cs, errors.Wrap(errdefs.ErrNotImplemented, "container metrics")
+}
diff --git a/pkg/server/container_stats_list_windows.go b/pkg/server/container_stats_list_windows.go
new file mode 100644
index 000000000..4bd3b64c1
--- /dev/null
+++ b/pkg/server/container_stats_list_windows.go
@@ -0,0 +1,84 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats"
+ "github.com/containerd/containerd/api/types"
+ "github.com/containerd/typeurl"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+func (c *criService) containerMetrics(
+ meta containerstore.Metadata,
+ stats *types.Metric,
+) (*runtime.ContainerStats, error) {
+ var cs runtime.ContainerStats
+ var usedBytes, inodesUsed uint64
+ sn, err := c.snapshotStore.Get(meta.ID)
+ // If snapshotstore doesn't have cached snapshot information
+ // set WritableLayer usage to zero
+ if err == nil {
+ usedBytes = sn.Size
+ inodesUsed = sn.Inodes
+ }
+ cs.WritableLayer = &runtime.FilesystemUsage{
+ Timestamp: sn.Timestamp,
+ FsId: &runtime.FilesystemIdentifier{
+ Mountpoint: c.imageFSPath,
+ },
+ UsedBytes: &runtime.UInt64Value{Value: usedBytes},
+ InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
+ }
+ cs.Attributes = &runtime.ContainerAttributes{
+ Id: meta.ID,
+ Metadata: meta.Config.GetMetadata(),
+ Labels: meta.Config.GetLabels(),
+ Annotations: meta.Config.GetAnnotations(),
+ }
+
+ if stats != nil {
+ s, err := typeurl.UnmarshalAny(stats.Data)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to extract container metrics")
+ }
+ wstats := s.(*wstats.Statistics).GetWindows()
+ if wstats == nil {
+ return nil, errors.New("windows stats is empty")
+ }
+ if wstats.Processor != nil {
+ cs.Cpu = &runtime.CpuUsage{
+ Timestamp: wstats.Timestamp.UnixNano(),
+ UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS},
+ }
+ }
+ if wstats.Memory != nil {
+ cs.Memory = &runtime.MemoryUsage{
+ Timestamp: wstats.Timestamp.UnixNano(),
+ WorkingSetBytes: &runtime.UInt64Value{
+ Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes,
+ },
+ }
+ }
+ }
+ return &cs, nil
+}
diff --git a/pkg/server/container_status.go b/pkg/server/container_status.go
new file mode 100644
index 000000000..aeeb76db3
--- /dev/null
+++ b/pkg/server/container_status.go
@@ -0,0 +1,173 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "encoding/json"
+
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/store"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+// ContainerStatus inspects the container and returns the status.
+func (c *criService) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (*runtime.ContainerStatusResponse, error) {
+ container, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
+ }
+
+ // TODO(random-liu): Clean up the following logic in CRI.
+ // Current assumption:
+ // * ImageSpec in container config is image ID.
+ // * ImageSpec in container status is image tag.
+ // * ImageRef in container status is repo digest.
+ spec := container.Config.GetImage()
+ imageRef := container.ImageRef
+ image, err := c.imageStore.Get(imageRef)
+ if err != nil {
+ if err != store.ErrNotExist {
+ return nil, errors.Wrapf(err, "failed to get image %q", imageRef)
+ }
+ } else {
+ repoTags, repoDigests := parseImageReferences(image.References)
+ if len(repoTags) > 0 {
+ // Based on current behavior of dockershim, this field should be
+ // image tag.
+ spec = &runtime.ImageSpec{Image: repoTags[0]}
+ }
+ if len(repoDigests) > 0 {
+ // Based on the CRI definition, this field will be consumed by user.
+ imageRef = repoDigests[0]
+ }
+ }
+ status := toCRIContainerStatus(container, spec, imageRef)
+ if status.GetCreatedAt() == 0 {
+ // CRI doesn't allow CreatedAt == 0.
+ info, err := container.Container.Info(ctx)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to get CreatedAt in %q state", status.State)
+ }
+ status.CreatedAt = info.CreatedAt.UnixNano()
+ }
+
+ info, err := toCRIContainerInfo(ctx, container, r.GetVerbose())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get verbose container info")
+ }
+
+ return &runtime.ContainerStatusResponse{
+ Status: status,
+ Info: info,
+ }, nil
+}
+
+// toCRIContainerStatus converts internal container object to CRI container status.
+func toCRIContainerStatus(container containerstore.Container, spec *runtime.ImageSpec, imageRef string) *runtime.ContainerStatus {
+ meta := container.Metadata
+ status := container.Status.Get()
+ reason := status.Reason
+ if status.State() == runtime.ContainerState_CONTAINER_EXITED && reason == "" {
+ if status.ExitCode == 0 {
+ reason = completeExitReason
+ } else {
+ reason = errorExitReason
+ }
+ }
+
+ return &runtime.ContainerStatus{
+ Id: meta.ID,
+ Metadata: meta.Config.GetMetadata(),
+ State: status.State(),
+ CreatedAt: status.CreatedAt,
+ StartedAt: status.StartedAt,
+ FinishedAt: status.FinishedAt,
+ ExitCode: status.ExitCode,
+ Image: spec,
+ ImageRef: imageRef,
+ Reason: reason,
+ Message: status.Message,
+ Labels: meta.Config.GetLabels(),
+ Annotations: meta.Config.GetAnnotations(),
+ Mounts: meta.Config.GetMounts(),
+ LogPath: meta.LogPath,
+ }
+}
+
+// ContainerInfo is extra information for a container.
+type ContainerInfo struct {
+ // TODO(random-liu): Add sandboxID in CRI container status.
+ SandboxID string `json:"sandboxID"`
+ Pid uint32 `json:"pid"`
+ Removing bool `json:"removing"`
+ SnapshotKey string `json:"snapshotKey"`
+ Snapshotter string `json:"snapshotter"`
+ RuntimeType string `json:"runtimeType"`
+ RuntimeOptions interface{} `json:"runtimeOptions"`
+ Config *runtime.ContainerConfig `json:"config"`
+ RuntimeSpec *runtimespec.Spec `json:"runtimeSpec"`
+}
+
+// toCRIContainerInfo converts internal container object information to CRI container status response info map.
+func toCRIContainerInfo(ctx context.Context, container containerstore.Container, verbose bool) (map[string]string, error) {
+ if !verbose {
+ return nil, nil
+ }
+
+ meta := container.Metadata
+ status := container.Status.Get()
+
+ // TODO(random-liu): Change CRI status info to use array instead of map.
+ ci := &ContainerInfo{
+ SandboxID: container.SandboxID,
+ Pid: status.Pid,
+ Removing: status.Removing,
+ Config: meta.Config,
+ }
+
+ var err error
+ ci.RuntimeSpec, err = container.Container.Spec(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get container runtime spec")
+ }
+
+ ctrInfo, err := container.Container.Info(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get container info")
+ }
+ ci.SnapshotKey = ctrInfo.SnapshotKey
+ ci.Snapshotter = ctrInfo.Snapshotter
+
+ runtimeOptions, err := getRuntimeOptions(ctrInfo)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get runtime options")
+ }
+ ci.RuntimeType = ctrInfo.Runtime.Name
+ ci.RuntimeOptions = runtimeOptions
+
+ infoBytes, err := json.Marshal(ci)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to marshal info %v", ci)
+ }
+ return map[string]string{
+ "info": string(infoBytes),
+ }, nil
+}
diff --git a/pkg/server/container_status_test.go b/pkg/server/container_status_test.go
new file mode 100644
index 000000000..7ee9ed087
--- /dev/null
+++ b/pkg/server/container_status_test.go
@@ -0,0 +1,227 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ imagestore "github.com/containerd/cri/pkg/store/image"
+)
+
+func getContainerStatusTestData() (*containerstore.Metadata, *containerstore.Status,
+ *imagestore.Image, *runtime.ContainerStatus) {
+ imageID := "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
+ testID := "test-id"
+ config := &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "test-name",
+ Attempt: 1,
+ },
+ Image: &runtime.ImageSpec{Image: "test-image"},
+ Mounts: []*runtime.Mount{{
+ ContainerPath: "test-container-path",
+ HostPath: "test-host-path",
+ }},
+ Labels: map[string]string{"a": "b"},
+ Annotations: map[string]string{"c": "d"},
+ }
+
+ createdAt := time.Now().UnixNano()
+ startedAt := time.Now().UnixNano()
+
+ metadata := &containerstore.Metadata{
+ ID: testID,
+ Name: "test-long-name",
+ SandboxID: "test-sandbox-id",
+ Config: config,
+ ImageRef: imageID,
+ LogPath: "test-log-path",
+ }
+ status := &containerstore.Status{
+ Pid: 1234,
+ CreatedAt: createdAt,
+ StartedAt: startedAt,
+ }
+ image := &imagestore.Image{
+ ID: imageID,
+ References: []string{
+ "gcr.io/library/busybox:latest",
+ "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ },
+ }
+ expected := &runtime.ContainerStatus{
+ Id: testID,
+ Metadata: config.GetMetadata(),
+ State: runtime.ContainerState_CONTAINER_RUNNING,
+ CreatedAt: createdAt,
+ StartedAt: startedAt,
+ Image: &runtime.ImageSpec{Image: "gcr.io/library/busybox:latest"},
+ ImageRef: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ Reason: completeExitReason,
+ Labels: config.GetLabels(),
+ Annotations: config.GetAnnotations(),
+ Mounts: config.GetMounts(),
+ LogPath: "test-log-path",
+ }
+
+ return metadata, status, image, expected
+}
+
+func TestToCRIContainerStatus(t *testing.T) {
+ for desc, test := range map[string]struct {
+ finishedAt int64
+ exitCode int32
+ reason string
+ message string
+ expectedState runtime.ContainerState
+ expectedReason string
+ }{
+ "container running": {
+ expectedState: runtime.ContainerState_CONTAINER_RUNNING,
+ },
+ "container exited with reason": {
+ finishedAt: time.Now().UnixNano(),
+ exitCode: 1,
+ reason: "test-reason",
+ message: "test-message",
+ expectedState: runtime.ContainerState_CONTAINER_EXITED,
+ expectedReason: "test-reason",
+ },
+ "container exited with exit code 0 without reason": {
+ finishedAt: time.Now().UnixNano(),
+ exitCode: 0,
+ message: "test-message",
+ expectedState: runtime.ContainerState_CONTAINER_EXITED,
+ expectedReason: completeExitReason,
+ },
+ "container exited with non-zero exit code without reason": {
+ finishedAt: time.Now().UnixNano(),
+ exitCode: 1,
+ message: "test-message",
+ expectedState: runtime.ContainerState_CONTAINER_EXITED,
+ expectedReason: errorExitReason,
+ },
+ } {
+ metadata, status, _, expected := getContainerStatusTestData()
+ // Update status with test case.
+ status.FinishedAt = test.finishedAt
+ status.ExitCode = test.exitCode
+ status.Reason = test.reason
+ status.Message = test.message
+ container, err := containerstore.NewContainer(
+ *metadata,
+ containerstore.WithFakeStatus(*status),
+ )
+ assert.NoError(t, err)
+ // Set expectation based on test case.
+ expected.State = test.expectedState
+ expected.Reason = test.expectedReason
+ expected.FinishedAt = test.finishedAt
+ expected.ExitCode = test.exitCode
+ expected.Message = test.message
+ containerStatus := toCRIContainerStatus(container,
+ expected.Image,
+ expected.ImageRef)
+ assert.Equal(t, expected, containerStatus, desc)
+ }
+}
+
+// TODO(mikebrow): add a fake containerd container.Container.Spec client api so we can test verbose is true option
+func TestToCRIContainerInfo(t *testing.T) {
+ metadata, status, _, _ := getContainerStatusTestData()
+ container, err := containerstore.NewContainer(
+ *metadata,
+ containerstore.WithFakeStatus(*status),
+ )
+ assert.NoError(t, err)
+
+ info, err := toCRIContainerInfo(context.Background(),
+ container,
+ false)
+ assert.NoError(t, err)
+ assert.Nil(t, info)
+}
+
+func TestContainerStatus(t *testing.T) {
+ for desc, test := range map[string]struct {
+ exist bool
+ imageExist bool
+ finishedAt int64
+ reason string
+ expectedState runtime.ContainerState
+ expectErr bool
+ }{
+ "container running": {
+ exist: true,
+ imageExist: true,
+ expectedState: runtime.ContainerState_CONTAINER_RUNNING,
+ },
+ "container exited": {
+ exist: true,
+ imageExist: true,
+ finishedAt: time.Now().UnixNano(),
+ reason: "test-reason",
+ expectedState: runtime.ContainerState_CONTAINER_EXITED,
+ },
+ "container not exist": {
+ exist: false,
+ imageExist: true,
+ expectErr: true,
+ },
+ "image not exist": {
+ exist: false,
+ imageExist: false,
+ expectErr: true,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ c := newTestCRIService()
+ metadata, status, image, expected := getContainerStatusTestData()
+ // Update status with test case.
+ status.FinishedAt = test.finishedAt
+ status.Reason = test.reason
+ container, err := containerstore.NewContainer(
+ *metadata,
+ containerstore.WithFakeStatus(*status),
+ )
+ assert.NoError(t, err)
+ if test.exist {
+ assert.NoError(t, c.containerStore.Add(container))
+ }
+ if test.imageExist {
+ c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{*image})
+ assert.NoError(t, err)
+ }
+ resp, err := c.ContainerStatus(context.Background(), &runtime.ContainerStatusRequest{ContainerId: container.ID})
+ if test.expectErr {
+ assert.Error(t, err)
+ assert.Nil(t, resp)
+ continue
+ }
+ // Set expectation based on test case.
+ expected.FinishedAt = test.finishedAt
+ expected.Reason = test.reason
+ expected.State = test.expectedState
+ assert.Equal(t, expected, resp.GetStatus())
+ }
+}
diff --git a/pkg/server/container_stop.go b/pkg/server/container_stop.go
new file mode 100644
index 000000000..92075d6b6
--- /dev/null
+++ b/pkg/server/container_stop.go
@@ -0,0 +1,186 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "syscall"
+ "time"
+
+ "github.com/containerd/containerd"
+ eventtypes "github.com/containerd/containerd/api/events"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ "github.com/containerd/cri/pkg/store"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+// StopContainer stops a running container with a grace period (i.e., timeout).
+func (c *criService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (*runtime.StopContainerResponse, error) {
+ // Get container config from container store.
+ container, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrapf(err, "an error occurred when try to find container %q", r.GetContainerId())
+ }
+
+ if err := c.stopContainer(ctx, container, time.Duration(r.GetTimeout())*time.Second); err != nil {
+ return nil, err
+ }
+
+ return &runtime.StopContainerResponse{}, nil
+}
+
+// stopContainer stops a container based on the container metadata.
+func (c *criService) stopContainer(ctx context.Context, container containerstore.Container, timeout time.Duration) error {
+ id := container.ID
+
+ // Return without error if container is not running. This makes sure that
+ // stop only takes real action after the container is started.
+ state := container.Status.Get().State()
+ if state != runtime.ContainerState_CONTAINER_RUNNING &&
+ state != runtime.ContainerState_CONTAINER_UNKNOWN {
+ log.G(ctx).Infof("Container to stop %q must be in running or unknown state, current state %q",
+ id, criContainerStateToString(state))
+ return nil
+ }
+
+ task, err := container.Container.Task(ctx, nil)
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrapf(err, "failed to get task for container %q", id)
+ }
+ // Don't return for unknown state, some cleanup needs to be done.
+ if state == runtime.ContainerState_CONTAINER_UNKNOWN {
+ return cleanupUnknownContainer(ctx, id, container)
+ }
+ return nil
+ }
+
+ // Handle unknown state.
+ if state == runtime.ContainerState_CONTAINER_UNKNOWN {
+ // Start an exit handler for containers in unknown state.
+ waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
+ defer waitCancel()
+ exitCh, err := task.Wait(waitCtx)
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrapf(err, "failed to wait for task for %q", id)
+ }
+ return cleanupUnknownContainer(ctx, id, container)
+ }
+
+ exitCtx, exitCancel := context.WithCancel(context.Background())
+ stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh)
+ defer func() {
+ exitCancel()
+ // This ensures that exit monitor is stopped before
+ // `Wait` is cancelled, so no exit event is generated
+ // because of the `Wait` cancellation.
+ <-stopCh
+ }()
+ }
+
+ // We only need to kill the task. The event handler will Delete the
+ // task from containerd after it handles the Exited event.
+ if timeout > 0 {
+ stopSignal := "SIGTERM"
+ if container.StopSignal != "" {
+ stopSignal = container.StopSignal
+ } else {
+ // The image may have been deleted, and the `StopSignal` field is
+ // just introduced to handle that.
+ // However, for containers created before the `StopSignal` field is
+ // introduced, still try to get the stop signal from the image config.
+ // If the image has been deleted, logging an error and using the
+ // default SIGTERM is still better than returning error and leaving
+ // the container unstoppable. (See issue #990)
+ // TODO(random-liu): Remove this logic when containerd 1.2 is deprecated.
+ image, err := c.imageStore.Get(container.ImageRef)
+ if err != nil {
+ if err != store.ErrNotExist {
+ return errors.Wrapf(err, "failed to get image %q", container.ImageRef)
+ }
+ log.G(ctx).Warningf("Image %q not found, stop container with signal %q", container.ImageRef, stopSignal)
+ } else {
+ if image.ImageSpec.Config.StopSignal != "" {
+ stopSignal = image.ImageSpec.Config.StopSignal
+ }
+ }
+ }
+ sig, err := containerd.ParseSignal(stopSignal)
+ if err != nil {
+ return errors.Wrapf(err, "failed to parse stop signal %q", stopSignal)
+ }
+ log.G(ctx).Infof("Stop container %q with signal %v", id, sig)
+ if err = task.Kill(ctx, sig); err != nil && !errdefs.IsNotFound(err) {
+ return errors.Wrapf(err, "failed to stop container %q", id)
+ }
+
+ sigTermCtx, sigTermCtxCancel := context.WithTimeout(ctx, timeout)
+ defer sigTermCtxCancel()
+ err = c.waitContainerStop(sigTermCtx, container)
+ if err == nil {
+ // Container stopped on first signal no need for SIGKILL
+ return nil
+ }
+ // If the parent context was cancelled or exceeded return immediately
+ if ctx.Err() != nil {
+ return ctx.Err()
+ }
+ // sigTermCtx was exceeded. Send SIGKILL
+ log.G(ctx).Debugf("Stop container %q with signal %v timed out", id, sig)
+ }
+
+ log.G(ctx).Infof("Kill container %q", id)
+ if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
+ return errors.Wrapf(err, "failed to kill container %q", id)
+ }
+
+ // Wait for a fixed timeout until container stop is observed by event monitor.
+ err = c.waitContainerStop(ctx, container)
+ if err != nil {
+ return errors.Wrapf(err, "an error occurs during waiting for container %q to be killed", id)
+ }
+ return nil
+}
+
+// waitContainerStop waits for container to be stopped until context is
+// cancelled or the context deadline is exceeded.
+func (c *criService) waitContainerStop(ctx context.Context, container containerstore.Container) error {
+ select {
+ case <-ctx.Done():
+ return errors.Wrapf(ctx.Err(), "wait container %q", container.ID)
+ case <-container.Stopped():
+ return nil
+ }
+}
+
+// cleanupUnknownContainer cleanup stopped container in unknown state.
+func cleanupUnknownContainer(ctx context.Context, id string, cntr containerstore.Container) error {
+ // Reuse handleContainerExit to do the cleanup.
+ return handleContainerExit(ctx, &eventtypes.TaskExit{
+ ContainerID: id,
+ ID: id,
+ Pid: 0,
+ ExitStatus: unknownExitCode,
+ ExitedAt: time.Now(),
+ }, cntr)
+}
diff --git a/pkg/server/container_stop_test.go b/pkg/server/container_stop_test.go
new file mode 100644
index 000000000..8394f62e0
--- /dev/null
+++ b/pkg/server/container_stop_test.go
@@ -0,0 +1,85 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "golang.org/x/net/context"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+func TestWaitContainerStop(t *testing.T) {
+ id := "test-id"
+ for desc, test := range map[string]struct {
+ status *containerstore.Status
+ cancel bool
+ timeout time.Duration
+ expectErr bool
+ }{
+ "should return error if timeout exceeds": {
+ status: &containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ },
+ timeout: 200 * time.Millisecond,
+ expectErr: true,
+ },
+ "should return error if context is cancelled": {
+ status: &containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ },
+ timeout: time.Hour,
+ cancel: true,
+ expectErr: true,
+ },
+ "should not return error if container is stopped before timeout": {
+ status: &containerstore.Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ },
+ timeout: time.Hour,
+ expectErr: false,
+ },
+ } {
+ c := newTestCRIService()
+ container, err := containerstore.NewContainer(
+ containerstore.Metadata{ID: id},
+ containerstore.WithFakeStatus(*test.status),
+ )
+ assert.NoError(t, err)
+ assert.NoError(t, c.containerStore.Add(container))
+ ctx := context.Background()
+ if test.cancel {
+ cancelledCtx, cancel := context.WithCancel(ctx)
+ cancel()
+ ctx = cancelledCtx
+ }
+ if test.timeout > 0 {
+ timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout)
+ defer cancel()
+ ctx = timeoutCtx
+ }
+ err = c.waitContainerStop(ctx, container)
+ assert.Equal(t, test.expectErr, err != nil, desc)
+ }
+}
diff --git a/pkg/server/container_update_resources_linux.go b/pkg/server/container_update_resources_linux.go
new file mode 100644
index 000000000..ce3f8a812
--- /dev/null
+++ b/pkg/server/container_update_resources_linux.go
@@ -0,0 +1,148 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ gocontext "context"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/containers"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/typeurl"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/containerd/opts"
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ "github.com/containerd/cri/pkg/util"
+)
+
+// UpdateContainerResources updates ContainerConfig of the container.
+func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) {
+ container, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to find container")
+ }
+ // Update resources in status update transaction, so that:
+ // 1) There won't be race condition with container start.
+ // 2) There won't be concurrent resource update to the same container.
+ if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
+ return status, c.updateContainerResources(ctx, container, r.GetLinux(), status)
+ }); err != nil {
+ return nil, errors.Wrap(err, "failed to update resources")
+ }
+ return &runtime.UpdateContainerResourcesResponse{}, nil
+}
+
+func (c *criService) updateContainerResources(ctx context.Context,
+ cntr containerstore.Container,
+ resources *runtime.LinuxContainerResources,
+ status containerstore.Status) (retErr error) {
+ id := cntr.ID
+ // Do not update the container when there is a removal in progress.
+ if status.Removing {
+ return errors.Errorf("container %q is in removing state", id)
+ }
+
+ // Update container spec. If the container is not started yet, updating
+ // spec makes sure that the resource limits are correct when start;
+ // if the container is already started, updating spec is still required,
+ // the spec will become our source of truth for resource limits.
+ oldSpec, err := cntr.Container.Spec(ctx)
+ if err != nil {
+ return errors.Wrap(err, "failed to get container spec")
+ }
+ newSpec, err := updateOCILinuxResource(ctx, oldSpec, resources,
+ c.config.TolerateMissingHugetlbController, c.config.DisableHugetlbController)
+ if err != nil {
+ return errors.Wrap(err, "failed to update resource in spec")
+ }
+
+ if err := updateContainerSpec(ctx, cntr.Container, newSpec); err != nil {
+ return err
+ }
+ defer func() {
+ if retErr != nil {
+ deferCtx, deferCancel := ctrdutil.DeferContext()
+ defer deferCancel()
+ // Reset spec on error.
+ if err := updateContainerSpec(deferCtx, cntr.Container, oldSpec); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to update spec %+v for container %q", oldSpec, id)
+ }
+ }
+ }()
+
+ // If container is not running, only update spec is enough, new resource
+ // limit will be applied when container start.
+ if status.State() != runtime.ContainerState_CONTAINER_RUNNING {
+ return nil
+ }
+
+ task, err := cntr.Container.Task(ctx, nil)
+ if err != nil {
+ if errdefs.IsNotFound(err) {
+ // Task exited already.
+ return nil
+ }
+ return errors.Wrap(err, "failed to get task")
+ }
+ // newSpec.Linux won't be nil
+ if err := task.Update(ctx, containerd.WithResources(newSpec.Linux.Resources)); err != nil {
+ if errdefs.IsNotFound(err) {
+ // Task exited already.
+ return nil
+ }
+ return errors.Wrap(err, "failed to update resources")
+ }
+ return nil
+}
+
+// updateContainerSpec updates container spec.
+func updateContainerSpec(ctx context.Context, cntr containerd.Container, spec *runtimespec.Spec) error {
+ any, err := typeurl.MarshalAny(spec)
+ if err != nil {
+ return errors.Wrapf(err, "failed to marshal spec %+v", spec)
+ }
+ if err := cntr.Update(ctx, func(ctx gocontext.Context, client *containerd.Client, c *containers.Container) error {
+ c.Spec = any
+ return nil
+ }); err != nil {
+ return errors.Wrap(err, "failed to update container spec")
+ }
+ return nil
+}
+
+// updateOCILinuxResource updates container resource limit.
+func updateOCILinuxResource(ctx context.Context, spec *runtimespec.Spec, new *runtime.LinuxContainerResources,
+ tolerateMissingHugetlbController, disableHugetlbController bool) (*runtimespec.Spec, error) {
+ // Copy to make sure old spec is not changed.
+ var cloned runtimespec.Spec
+ if err := util.DeepCopy(&cloned, spec); err != nil {
+ return nil, errors.Wrap(err, "failed to deep copy")
+ }
+ if cloned.Linux == nil {
+ cloned.Linux = &runtimespec.Linux{}
+ }
+ if err := opts.WithResources(new, tolerateMissingHugetlbController, disableHugetlbController)(ctx, nil, nil, &cloned); err != nil {
+ return nil, errors.Wrap(err, "unable to set linux container resources")
+ }
+ return &cloned, nil
+}
diff --git a/pkg/server/container_update_resources_linux_test.go b/pkg/server/container_update_resources_linux_test.go
new file mode 100644
index 000000000..ffbc3f88f
--- /dev/null
+++ b/pkg/server/container_update_resources_linux_test.go
@@ -0,0 +1,162 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "testing"
+
+ "github.com/golang/protobuf/proto"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestUpdateOCILinuxResource(t *testing.T) {
+ oomscoreadj := new(int)
+ *oomscoreadj = -500
+ for desc, test := range map[string]struct {
+ spec *runtimespec.Spec
+ resources *runtime.LinuxContainerResources
+ expected *runtimespec.Spec
+ expectErr bool
+ }{
+ "should be able to update each resource": {
+ spec: &runtimespec.Spec{
+ Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
+ Linux: &runtimespec.Linux{
+ Resources: &runtimespec.LinuxResources{
+ Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
+ CPU: &runtimespec.LinuxCPU{
+ Shares: proto.Uint64(1111),
+ Quota: proto.Int64(2222),
+ Period: proto.Uint64(3333),
+ Cpus: "0-1",
+ Mems: "2-3",
+ },
+ },
+ },
+ },
+ resources: &runtime.LinuxContainerResources{
+ CpuPeriod: 6666,
+ CpuQuota: 5555,
+ CpuShares: 4444,
+ MemoryLimitInBytes: 54321,
+ OomScoreAdj: 500,
+ CpusetCpus: "4-5",
+ CpusetMems: "6-7",
+ },
+ expected: &runtimespec.Spec{
+ Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
+ Linux: &runtimespec.Linux{
+ Resources: &runtimespec.LinuxResources{
+ Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(54321)},
+ CPU: &runtimespec.LinuxCPU{
+ Shares: proto.Uint64(4444),
+ Quota: proto.Int64(5555),
+ Period: proto.Uint64(6666),
+ Cpus: "4-5",
+ Mems: "6-7",
+ },
+ },
+ },
+ },
+ },
+ "should skip empty fields": {
+ spec: &runtimespec.Spec{
+ Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
+ Linux: &runtimespec.Linux{
+ Resources: &runtimespec.LinuxResources{
+ Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
+ CPU: &runtimespec.LinuxCPU{
+ Shares: proto.Uint64(1111),
+ Quota: proto.Int64(2222),
+ Period: proto.Uint64(3333),
+ Cpus: "0-1",
+ Mems: "2-3",
+ },
+ },
+ },
+ },
+ resources: &runtime.LinuxContainerResources{
+ CpuQuota: 5555,
+ CpuShares: 4444,
+ MemoryLimitInBytes: 54321,
+ OomScoreAdj: 500,
+ CpusetMems: "6-7",
+ },
+ expected: &runtimespec.Spec{
+ Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
+ Linux: &runtimespec.Linux{
+ Resources: &runtimespec.LinuxResources{
+ Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(54321)},
+ CPU: &runtimespec.LinuxCPU{
+ Shares: proto.Uint64(4444),
+ Quota: proto.Int64(5555),
+ Period: proto.Uint64(3333),
+ Cpus: "0-1",
+ Mems: "6-7",
+ },
+ },
+ },
+ },
+ },
+ "should be able to fill empty fields": {
+ spec: &runtimespec.Spec{
+ Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
+ Linux: &runtimespec.Linux{
+ Resources: &runtimespec.LinuxResources{
+ Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(12345)},
+ },
+ },
+ },
+ resources: &runtime.LinuxContainerResources{
+ CpuPeriod: 6666,
+ CpuQuota: 5555,
+ CpuShares: 4444,
+ MemoryLimitInBytes: 54321,
+ OomScoreAdj: 500,
+ CpusetCpus: "4-5",
+ CpusetMems: "6-7",
+ },
+ expected: &runtimespec.Spec{
+ Process: &runtimespec.Process{OOMScoreAdj: oomscoreadj},
+ Linux: &runtimespec.Linux{
+ Resources: &runtimespec.LinuxResources{
+ Memory: &runtimespec.LinuxMemory{Limit: proto.Int64(54321)},
+ CPU: &runtimespec.LinuxCPU{
+ Shares: proto.Uint64(4444),
+ Quota: proto.Int64(5555),
+ Period: proto.Uint64(6666),
+ Cpus: "4-5",
+ Mems: "6-7",
+ },
+ },
+ },
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ got, err := updateOCILinuxResource(context.Background(), test.spec, test.resources, false, false)
+ if test.expectErr {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+ assert.Equal(t, test.expected, got)
+ }
+}
diff --git a/pkg/server/container_update_resources_other.go b/pkg/server/container_update_resources_other.go
new file mode 100644
index 000000000..57975fdc3
--- /dev/null
+++ b/pkg/server/container_update_resources_other.go
@@ -0,0 +1,44 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ containerstore "github.com/containerd/cri/pkg/store/container"
+)
+
+// UpdateContainerResources updates ContainerConfig of the container.
+func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (retRes *runtime.UpdateContainerResourcesResponse, retErr error) {
+ container, err := c.containerStore.Get(r.GetContainerId())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to find container")
+ }
+ // Update resources in status update transaction, so that:
+ // 1) There won't be race condition with container start.
+ // 2) There won't be concurrent resource update to the same container.
+ if err := container.Status.Update(func(status containerstore.Status) (containerstore.Status, error) {
+ return status, nil
+ }); err != nil {
+ return nil, errors.Wrap(err, "failed to update resources")
+ }
+ return &runtime.UpdateContainerResourcesResponse{}, nil
+}
diff --git a/pkg/server/container_update_resources_windows.go b/pkg/server/container_update_resources_windows.go
new file mode 100644
index 000000000..72d0d459e
--- /dev/null
+++ b/pkg/server/container_update_resources_windows.go
@@ -0,0 +1,31 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd/errdefs"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// UpdateContainerResources updates ContainerConfig of the container.
+// TODO(windows): Figure out whether windows support this.
+func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (*runtime.UpdateContainerResourcesResponse, error) {
+ return nil, errdefs.ErrNotImplemented
+}
diff --git a/pkg/server/events.go b/pkg/server/events.go
new file mode 100644
index 000000000..8f35bf0bd
--- /dev/null
+++ b/pkg/server/events.go
@@ -0,0 +1,461 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "sync"
+ "time"
+
+ "github.com/containerd/containerd"
+ eventtypes "github.com/containerd/containerd/api/events"
+ containerdio "github.com/containerd/containerd/cio"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/events"
+ "github.com/containerd/typeurl"
+ gogotypes "github.com/gogo/protobuf/types"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ "k8s.io/apimachinery/pkg/util/clock"
+
+ "github.com/containerd/cri/pkg/constants"
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ "github.com/containerd/cri/pkg/store"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+const (
+ backOffInitDuration = 1 * time.Second
+ backOffMaxDuration = 5 * time.Minute
+ backOffExpireCheckDuration = 1 * time.Second
+
+ // handleEventTimeout is the timeout for handling 1 event. Event monitor
+ // handles events in serial, if one event blocks the event monitor, no
+ // other events can be handled.
+ // Add a timeout for each event handling, events that timeout will be requeued and
+ // handled again in the future.
+ handleEventTimeout = 10 * time.Second
+
+ exitChannelSize = 1024
+)
+
+// eventMonitor monitors containerd event and updates internal state correspondingly.
+// TODO(random-liu): Handle event for each container in a separate goroutine.
+type eventMonitor struct {
+ c *criService
+ ch <-chan *events.Envelope
+ // exitCh receives container/sandbox exit events from exit monitors.
+ exitCh chan *eventtypes.TaskExit
+ errCh <-chan error
+ ctx context.Context
+ cancel context.CancelFunc
+ backOff *backOff
+}
+
+type backOff struct {
+ queuePool map[string]*backOffQueue
+ // tickerMu is mutex used to protect the ticker.
+ tickerMu sync.Mutex
+ ticker *time.Ticker
+ minDuration time.Duration
+ maxDuration time.Duration
+ checkDuration time.Duration
+ clock clock.Clock
+}
+
+type backOffQueue struct {
+ events []interface{}
+ expireTime time.Time
+ duration time.Duration
+ clock clock.Clock
+}
+
+// Create new event monitor. New event monitor will start subscribing containerd event. All events
+// happen after it should be monitored.
+func newEventMonitor(c *criService) *eventMonitor {
+ ctx, cancel := context.WithCancel(context.Background())
+ return &eventMonitor{
+ c: c,
+ ctx: ctx,
+ cancel: cancel,
+ exitCh: make(chan *eventtypes.TaskExit, exitChannelSize),
+ backOff: newBackOff(),
+ }
+}
+
+// subscribe starts to subscribe containerd events.
+func (em *eventMonitor) subscribe(subscriber events.Subscriber) {
+ // note: filters are any match, if you want any match but not in namespace foo
+ // then you have to manually filter namespace foo
+ filters := []string{
+ `topic=="/tasks/oom"`,
+ `topic~="/images/"`,
+ }
+ em.ch, em.errCh = subscriber.Subscribe(em.ctx, filters...)
+}
+
+// startExitMonitor starts an exit monitor for a given container/sandbox.
+func (em *eventMonitor) startExitMonitor(ctx context.Context, id string, pid uint32, exitCh <-chan containerd.ExitStatus) <-chan struct{} {
+ stopCh := make(chan struct{})
+ go func() {
+ defer close(stopCh)
+ select {
+ case exitRes := <-exitCh:
+ exitStatus, exitedAt, err := exitRes.Result()
+ if err != nil {
+ logrus.WithError(err).Errorf("Failed to get task exit status for %q", id)
+ exitStatus = unknownExitCode
+ exitedAt = time.Now()
+ }
+ em.exitCh <- &eventtypes.TaskExit{
+ ContainerID: id,
+ ID: id,
+ Pid: pid,
+ ExitStatus: exitStatus,
+ ExitedAt: exitedAt,
+ }
+ case <-ctx.Done():
+ }
+ }()
+ return stopCh
+}
+
+func convertEvent(e *gogotypes.Any) (string, interface{}, error) {
+ id := ""
+ evt, err := typeurl.UnmarshalAny(e)
+ if err != nil {
+ return "", nil, errors.Wrap(err, "failed to unmarshalany")
+ }
+
+ switch e := evt.(type) {
+ case *eventtypes.TaskOOM:
+ id = e.ContainerID
+ case *eventtypes.ImageCreate:
+ id = e.Name
+ case *eventtypes.ImageUpdate:
+ id = e.Name
+ case *eventtypes.ImageDelete:
+ id = e.Name
+ default:
+ return "", nil, errors.New("unsupported event")
+ }
+ return id, evt, nil
+}
+
+// start starts the event monitor which monitors and handles all subscribed events. It returns
+// an error channel for the caller to wait for stop errors from the event monitor.
+// start must be called after subscribe.
+func (em *eventMonitor) start() <-chan error {
+ errCh := make(chan error)
+ if em.ch == nil || em.errCh == nil {
+ panic("event channel is nil")
+ }
+ backOffCheckCh := em.backOff.start()
+ go func() {
+ defer close(errCh)
+ for {
+ select {
+ case e := <-em.exitCh:
+ logrus.Debugf("Received exit event %+v", e)
+ id := e.ID
+ if em.backOff.isInBackOff(id) {
+ logrus.Infof("Events for %q is in backoff, enqueue event %+v", id, e)
+ em.backOff.enBackOff(id, e)
+ break
+ }
+ if err := em.handleEvent(e); err != nil {
+ logrus.WithError(err).Errorf("Failed to handle exit event %+v for %s", e, id)
+ em.backOff.enBackOff(id, e)
+ }
+ case e := <-em.ch:
+ logrus.Debugf("Received containerd event timestamp - %v, namespace - %q, topic - %q", e.Timestamp, e.Namespace, e.Topic)
+ if e.Namespace != constants.K8sContainerdNamespace {
+ logrus.Debugf("Ignoring events in namespace - %q", e.Namespace)
+ break
+ }
+ id, evt, err := convertEvent(e.Event)
+ if err != nil {
+ logrus.WithError(err).Errorf("Failed to convert event %+v", e)
+ break
+ }
+ if em.backOff.isInBackOff(id) {
+ logrus.Infof("Events for %q is in backoff, enqueue event %+v", id, evt)
+ em.backOff.enBackOff(id, evt)
+ break
+ }
+ if err := em.handleEvent(evt); err != nil {
+ logrus.WithError(err).Errorf("Failed to handle event %+v for %s", evt, id)
+ em.backOff.enBackOff(id, evt)
+ }
+ case err := <-em.errCh:
+ // Close errCh in defer directly if there is no error.
+ if err != nil {
+ logrus.WithError(err).Errorf("Failed to handle event stream")
+ errCh <- err
+ }
+ return
+ case <-backOffCheckCh:
+ ids := em.backOff.getExpiredIDs()
+ for _, id := range ids {
+ queue := em.backOff.deBackOff(id)
+ for i, any := range queue.events {
+ if err := em.handleEvent(any); err != nil {
+ logrus.WithError(err).Errorf("Failed to handle backOff event %+v for %s", any, id)
+ em.backOff.reBackOff(id, queue.events[i:], queue.duration)
+ break
+ }
+ }
+ }
+ }
+ }
+ }()
+ return errCh
+}
+
+// stop stops the event monitor. It will close the event channel.
+// Once event monitor is stopped, it can't be started.
+func (em *eventMonitor) stop() {
+ em.backOff.stop()
+ em.cancel()
+}
+
+// handleEvent handles a containerd event.
+func (em *eventMonitor) handleEvent(any interface{}) error {
+ ctx := ctrdutil.NamespacedContext()
+ ctx, cancel := context.WithTimeout(ctx, handleEventTimeout)
+ defer cancel()
+
+ switch e := any.(type) {
+ case *eventtypes.TaskExit:
+ logrus.Infof("TaskExit event %+v", e)
+ // Use ID instead of ContainerID to rule out TaskExit event for exec.
+ cntr, err := em.c.containerStore.Get(e.ID)
+ if err == nil {
+ if err := handleContainerExit(ctx, e, cntr); err != nil {
+ return errors.Wrap(err, "failed to handle container TaskExit event")
+ }
+ return nil
+ } else if err != store.ErrNotExist {
+ return errors.Wrap(err, "can't find container for TaskExit event")
+ }
+ sb, err := em.c.sandboxStore.Get(e.ID)
+ if err == nil {
+ if err := handleSandboxExit(ctx, e, sb); err != nil {
+ return errors.Wrap(err, "failed to handle sandbox TaskExit event")
+ }
+ return nil
+ } else if err != store.ErrNotExist {
+ return errors.Wrap(err, "can't find sandbox for TaskExit event")
+ }
+ return nil
+ case *eventtypes.TaskOOM:
+ logrus.Infof("TaskOOM event %+v", e)
+ // For TaskOOM, we only care which container it belongs to.
+ cntr, err := em.c.containerStore.Get(e.ContainerID)
+ if err != nil {
+ if err != store.ErrNotExist {
+ return errors.Wrap(err, "can't find container for TaskOOM event")
+ }
+ return nil
+ }
+ err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
+ status.Reason = oomExitReason
+ return status, nil
+ })
+ if err != nil {
+ return errors.Wrap(err, "failed to update container status for TaskOOM event")
+ }
+ case *eventtypes.ImageCreate:
+ logrus.Infof("ImageCreate event %+v", e)
+ return em.c.updateImage(ctx, e.Name)
+ case *eventtypes.ImageUpdate:
+ logrus.Infof("ImageUpdate event %+v", e)
+ return em.c.updateImage(ctx, e.Name)
+ case *eventtypes.ImageDelete:
+ logrus.Infof("ImageDelete event %+v", e)
+ return em.c.updateImage(ctx, e.Name)
+ }
+
+ return nil
+}
+
+// handleContainerExit handles TaskExit event for container.
+func handleContainerExit(ctx context.Context, e *eventtypes.TaskExit, cntr containerstore.Container) error {
+ // Attach container IO so that `Delete` could cleanup the stream properly.
+ task, err := cntr.Container.Task(ctx,
+ func(*containerdio.FIFOSet) (containerdio.IO, error) {
+ // We can't directly return cntr.IO here, because
+ // even if cntr.IO is nil, the cio.IO interface
+ // is not.
+ // See https://tour.golang.org/methods/12:
+ // Note that an interface value that holds a nil
+ // concrete value is itself non-nil.
+ if cntr.IO != nil {
+ return cntr.IO, nil
+ }
+ return nil, nil
+ },
+ )
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrapf(err, "failed to load task for container")
+ }
+ } else {
+ // TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker
+ if _, err = task.Delete(ctx, WithNRISandboxDelete(cntr.SandboxID), containerd.WithProcessKill); err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to stop container")
+ }
+ // Move on to make sure container status is updated.
+ }
+ }
+ err = cntr.Status.UpdateSync(func(status containerstore.Status) (containerstore.Status, error) {
+ // If FinishedAt has been set (e.g. with start failure), keep as
+ // it is.
+ if status.FinishedAt != 0 {
+ return status, nil
+ }
+ status.Pid = 0
+ status.FinishedAt = e.ExitedAt.UnixNano()
+ status.ExitCode = int32(e.ExitStatus)
+ // Unknown state can only transit to EXITED state, so we need
+ // to handle unknown state here.
+ if status.Unknown {
+ logrus.Debugf("Container %q transited from UNKNOWN to EXITED", cntr.ID)
+ status.Unknown = false
+ }
+ return status, nil
+ })
+ if err != nil {
+ return errors.Wrap(err, "failed to update container state")
+ }
+ // Using channel to propagate the information of container stop
+ cntr.Stop()
+ return nil
+}
+
+// handleSandboxExit handles TaskExit event for sandbox.
+func handleSandboxExit(ctx context.Context, e *eventtypes.TaskExit, sb sandboxstore.Sandbox) error {
+ // No stream attached to sandbox container.
+ task, err := sb.Container.Task(ctx, nil)
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to load task for sandbox")
+ }
+ } else {
+ // TODO(random-liu): [P1] This may block the loop, we may want to spawn a worker
+ if _, err = task.Delete(ctx, WithNRISandboxDelete(sb.ID), containerd.WithProcessKill); err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to stop sandbox")
+ }
+ // Move on to make sure container status is updated.
+ }
+ }
+ err = sb.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
+ status.State = sandboxstore.StateNotReady
+ status.Pid = 0
+ return status, nil
+ })
+ if err != nil {
+ return errors.Wrap(err, "failed to update sandbox state")
+ }
+ // Using channel to propagate the information of sandbox stop
+ sb.Stop()
+ return nil
+}
+
+func newBackOff() *backOff {
+ return &backOff{
+ queuePool: map[string]*backOffQueue{},
+ minDuration: backOffInitDuration,
+ maxDuration: backOffMaxDuration,
+ checkDuration: backOffExpireCheckDuration,
+ clock: clock.RealClock{},
+ }
+}
+
+func (b *backOff) getExpiredIDs() []string {
+ var ids []string
+ for id, q := range b.queuePool {
+ if q.isExpire() {
+ ids = append(ids, id)
+ }
+ }
+ return ids
+}
+
+func (b *backOff) isInBackOff(key string) bool {
+ if _, ok := b.queuePool[key]; ok {
+ return true
+ }
+ return false
+}
+
+// enBackOff start to backOff and put event to the tail of queue
+func (b *backOff) enBackOff(key string, evt interface{}) {
+ if queue, ok := b.queuePool[key]; ok {
+ queue.events = append(queue.events, evt)
+ return
+ }
+ b.queuePool[key] = newBackOffQueue([]interface{}{evt}, b.minDuration, b.clock)
+}
+
+// enBackOff get out the whole queue
+func (b *backOff) deBackOff(key string) *backOffQueue {
+ queue := b.queuePool[key]
+ delete(b.queuePool, key)
+ return queue
+}
+
+// enBackOff start to backOff again and put events to the queue
+func (b *backOff) reBackOff(key string, events []interface{}, oldDuration time.Duration) {
+ duration := 2 * oldDuration
+ if duration > b.maxDuration {
+ duration = b.maxDuration
+ }
+ b.queuePool[key] = newBackOffQueue(events, duration, b.clock)
+}
+
+func (b *backOff) start() <-chan time.Time {
+ b.tickerMu.Lock()
+ defer b.tickerMu.Unlock()
+ b.ticker = time.NewTicker(b.checkDuration)
+ return b.ticker.C
+}
+
+func (b *backOff) stop() {
+ b.tickerMu.Lock()
+ defer b.tickerMu.Unlock()
+ if b.ticker != nil {
+ b.ticker.Stop()
+ }
+}
+
+func newBackOffQueue(events []interface{}, init time.Duration, c clock.Clock) *backOffQueue {
+ return &backOffQueue{
+ events: events,
+ duration: init,
+ expireTime: c.Now().Add(init),
+ clock: c,
+ }
+}
+
+func (q *backOffQueue) isExpire() bool {
+ // return time.Now >= expireTime
+ return !q.clock.Now().Before(q.expireTime)
+}
diff --git a/pkg/server/events_test.go b/pkg/server/events_test.go
new file mode 100644
index 000000000..c7b49aa51
--- /dev/null
+++ b/pkg/server/events_test.go
@@ -0,0 +1,134 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ eventtypes "github.com/containerd/containerd/api/events"
+ "github.com/containerd/typeurl"
+ "github.com/stretchr/testify/assert"
+ "k8s.io/apimachinery/pkg/util/clock"
+)
+
+// TestBackOff tests the logic of backOff struct.
+func TestBackOff(t *testing.T) {
+ testStartTime := time.Now()
+ testClock := clock.NewFakeClock(testStartTime)
+ inputQueues := map[string]*backOffQueue{
+ "container1": {
+ events: []interface{}{
+ &eventtypes.TaskOOM{ContainerID: "container1"},
+ &eventtypes.TaskOOM{ContainerID: "container1"},
+ },
+ },
+ "container2": {
+ events: []interface{}{
+ &eventtypes.TaskOOM{ContainerID: "container2"},
+ &eventtypes.TaskOOM{ContainerID: "container2"},
+ },
+ },
+ }
+ expectedQueues := map[string]*backOffQueue{
+ "container2": {
+ events: []interface{}{
+ &eventtypes.TaskOOM{ContainerID: "container2"},
+ &eventtypes.TaskOOM{ContainerID: "container2"},
+ },
+ expireTime: testClock.Now().Add(backOffInitDuration),
+ duration: backOffInitDuration,
+ clock: testClock,
+ },
+ "container1": {
+ events: []interface{}{
+ &eventtypes.TaskOOM{ContainerID: "container1"},
+ &eventtypes.TaskOOM{ContainerID: "container1"},
+ },
+ expireTime: testClock.Now().Add(backOffInitDuration),
+ duration: backOffInitDuration,
+ clock: testClock,
+ },
+ }
+
+ t.Logf("Should be able to backOff a event")
+ actual := newBackOff()
+ actual.clock = testClock
+ for k, queue := range inputQueues {
+ for _, event := range queue.events {
+ actual.enBackOff(k, event)
+ }
+ }
+ assert.Equal(t, actual.queuePool, expectedQueues)
+
+ t.Logf("Should be able to check if the container is in backOff state")
+ for k, queue := range inputQueues {
+ for _, e := range queue.events {
+ any, err := typeurl.MarshalAny(e)
+ assert.NoError(t, err)
+ key, _, err := convertEvent(any)
+ assert.NoError(t, err)
+ assert.Equal(t, k, key)
+ assert.Equal(t, actual.isInBackOff(key), true)
+ }
+ }
+
+ t.Logf("Should be able to check that a container isn't in backOff state")
+ notExistKey := "containerNotExist"
+ assert.Equal(t, actual.isInBackOff(notExistKey), false)
+
+ t.Logf("No containers should be expired")
+ assert.Empty(t, actual.getExpiredIDs())
+
+ t.Logf("Should be able to get all keys which are expired for backOff")
+ testClock.Sleep(backOffInitDuration)
+ actKeyList := actual.getExpiredIDs()
+ assert.Equal(t, len(inputQueues), len(actKeyList))
+ for k := range inputQueues {
+ assert.Contains(t, actKeyList, k)
+ }
+
+ t.Logf("Should be able to get out all backOff events")
+ doneQueues := map[string]*backOffQueue{}
+ for k := range inputQueues {
+ actQueue := actual.deBackOff(k)
+ doneQueues[k] = actQueue
+ assert.Equal(t, actQueue, expectedQueues[k])
+ }
+
+ t.Logf("Should not get out the event again after having got out the backOff event")
+ for k := range inputQueues {
+ var expect *backOffQueue
+ actQueue := actual.deBackOff(k)
+ assert.Equal(t, actQueue, expect)
+ }
+
+ t.Logf("Should be able to reBackOff")
+ for k, queue := range doneQueues {
+ failEventIndex := 1
+ events := queue.events[failEventIndex:]
+ actual.reBackOff(k, events, queue.duration)
+ actQueue := actual.deBackOff(k)
+ expQueue := &backOffQueue{
+ events: events,
+ expireTime: testClock.Now().Add(2 * queue.duration),
+ duration: 2 * queue.duration,
+ clock: testClock,
+ }
+ assert.Equal(t, actQueue, expQueue)
+ }
+}
diff --git a/pkg/server/helpers.go b/pkg/server/helpers.go
new file mode 100644
index 000000000..34da9a254
--- /dev/null
+++ b/pkg/server/helpers.go
@@ -0,0 +1,390 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "fmt"
+ "path"
+ "path/filepath"
+ "strconv"
+ "strings"
+
+ "github.com/BurntSushi/toml"
+ runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/containers"
+ "github.com/containerd/containerd/plugin"
+ "github.com/containerd/containerd/reference/docker"
+ "github.com/containerd/containerd/runtime/linux/runctypes"
+ runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
+ "github.com/containerd/typeurl"
+ imagedigest "github.com/opencontainers/go-digest"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ runtimeoptions "github.com/containerd/cri/pkg/api/runtimeoptions/v1"
+ criconfig "github.com/containerd/cri/pkg/config"
+ "github.com/containerd/cri/pkg/store"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ imagestore "github.com/containerd/cri/pkg/store/image"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+const (
+ // errorStartReason is the exit reason when fails to start container.
+ errorStartReason = "StartError"
+ // errorStartExitCode is the exit code when fails to start container.
+ // 128 is the same with Docker's behavior.
+ // TODO(windows): Figure out what should be used for windows.
+ errorStartExitCode = 128
+ // completeExitReason is the exit reason when container exits with code 0.
+ completeExitReason = "Completed"
+ // errorExitReason is the exit reason when container exits with code non-zero.
+ errorExitReason = "Error"
+ // oomExitReason is the exit reason when process in container is oom killed.
+ oomExitReason = "OOMKilled"
+
+ // sandboxesDir contains all sandbox root. A sandbox root is the running
+ // directory of the sandbox, all files created for the sandbox will be
+ // placed under this directory.
+ sandboxesDir = "sandboxes"
+ // containersDir contains all container root.
+ containersDir = "containers"
+ // Delimiter used to construct container/sandbox names.
+ nameDelimiter = "_"
+
+ // criContainerdPrefix is common prefix for cri-containerd
+ criContainerdPrefix = "io.cri-containerd"
+ // containerKindLabel is a label key indicating container is sandbox container or application container
+ containerKindLabel = criContainerdPrefix + ".kind"
+ // containerKindSandbox is a label value indicating container is sandbox container
+ containerKindSandbox = "sandbox"
+ // containerKindContainer is a label value indicating container is application container
+ containerKindContainer = "container"
+ // imageLabelKey is the label key indicating the image is managed by cri plugin.
+ imageLabelKey = criContainerdPrefix + ".image"
+ // imageLabelValue is the label value indicating the image is managed by cri plugin.
+ imageLabelValue = "managed"
+ // sandboxMetadataExtension is an extension name that identify metadata of sandbox in CreateContainerRequest
+ sandboxMetadataExtension = criContainerdPrefix + ".sandbox.metadata"
+ // containerMetadataExtension is an extension name that identify metadata of container in CreateContainerRequest
+ containerMetadataExtension = criContainerdPrefix + ".container.metadata"
+
+ // defaultIfName is the default network interface for the pods
+ defaultIfName = "eth0"
+
+ // runtimeRunhcsV1 is the runtime type for runhcs.
+ runtimeRunhcsV1 = "io.containerd.runhcs.v1"
+)
+
+// makeSandboxName generates sandbox name from sandbox metadata. The name
+// generated is unique as long as sandbox metadata is unique.
+func makeSandboxName(s *runtime.PodSandboxMetadata) string {
+ return strings.Join([]string{
+ s.Name, // 0
+ s.Namespace, // 1
+ s.Uid, // 2
+ fmt.Sprintf("%d", s.Attempt), // 3
+ }, nameDelimiter)
+}
+
+// makeContainerName generates container name from sandbox and container metadata.
+// The name generated is unique as long as the sandbox container combination is
+// unique.
+func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetadata) string {
+ return strings.Join([]string{
+ c.Name, // 0
+ s.Name, // 1: pod name
+ s.Namespace, // 2: pod namespace
+ s.Uid, // 3: pod uid
+ fmt.Sprintf("%d", c.Attempt), // 4
+ }, nameDelimiter)
+}
+
+// getSandboxRootDir returns the root directory for managing sandbox files,
+// e.g. hosts files.
+func (c *criService) getSandboxRootDir(id string) string {
+ return filepath.Join(c.config.RootDir, sandboxesDir, id)
+}
+
+// getVolatileSandboxRootDir returns the root directory for managing volatile sandbox files,
+// e.g. named pipes.
+func (c *criService) getVolatileSandboxRootDir(id string) string {
+ return filepath.Join(c.config.StateDir, sandboxesDir, id)
+}
+
+// getContainerRootDir returns the root directory for managing container files,
+// e.g. state checkpoint.
+func (c *criService) getContainerRootDir(id string) string {
+ return filepath.Join(c.config.RootDir, containersDir, id)
+}
+
+// getVolatileContainerRootDir returns the root directory for managing volatile container files,
+// e.g. named pipes.
+func (c *criService) getVolatileContainerRootDir(id string) string {
+ return filepath.Join(c.config.StateDir, containersDir, id)
+}
+
+// criContainerStateToString formats CRI container state to string.
+func criContainerStateToString(state runtime.ContainerState) string {
+ return runtime.ContainerState_name[int32(state)]
+}
+
+// getRepoDigestAngTag returns image repoDigest and repoTag of the named image reference.
+func getRepoDigestAndTag(namedRef docker.Named, digest imagedigest.Digest, schema1 bool) (string, string) {
+ var repoTag, repoDigest string
+ if _, ok := namedRef.(docker.NamedTagged); ok {
+ repoTag = namedRef.String()
+ }
+ if _, ok := namedRef.(docker.Canonical); ok {
+ repoDigest = namedRef.String()
+ } else if !schema1 {
+ // digest is not actual repo digest for schema1 image.
+ repoDigest = namedRef.Name() + "@" + digest.String()
+ }
+ return repoDigest, repoTag
+}
+
+// localResolve resolves image reference locally and returns corresponding image metadata. It
+// returns store.ErrNotExist if the reference doesn't exist.
+func (c *criService) localResolve(refOrID string) (imagestore.Image, error) {
+ getImageID := func(refOrId string) string {
+ if _, err := imagedigest.Parse(refOrID); err == nil {
+ return refOrID
+ }
+ return func(ref string) string {
+ // ref is not image id, try to resolve it locally.
+ // TODO(random-liu): Handle this error better for debugging.
+ normalized, err := docker.ParseDockerRef(ref)
+ if err != nil {
+ return ""
+ }
+ id, err := c.imageStore.Resolve(normalized.String())
+ if err != nil {
+ return ""
+ }
+ return id
+ }(refOrID)
+ }
+
+ imageID := getImageID(refOrID)
+ if imageID == "" {
+ // Try to treat ref as imageID
+ imageID = refOrID
+ }
+ return c.imageStore.Get(imageID)
+}
+
+// toContainerdImage converts an image object in image store to containerd image handler.
+func (c *criService) toContainerdImage(ctx context.Context, image imagestore.Image) (containerd.Image, error) {
+ // image should always have at least one reference.
+ if len(image.References) == 0 {
+ return nil, errors.Errorf("invalid image with no reference %q", image.ID)
+ }
+ return c.client.GetImage(ctx, image.References[0])
+}
+
+// getUserFromImage gets uid or user name of the image user.
+// If user is numeric, it will be treated as uid; or else, it is treated as user name.
+func getUserFromImage(user string) (*int64, string) {
+ // return both empty if user is not specified in the image.
+ if user == "" {
+ return nil, ""
+ }
+ // split instances where the id may contain user:group
+ user = strings.Split(user, ":")[0]
+ // user could be either uid or user name. Try to interpret as numeric uid.
+ uid, err := strconv.ParseInt(user, 10, 64)
+ if err != nil {
+ // If user is non numeric, assume it's user name.
+ return nil, user
+ }
+ // If user is a numeric uid.
+ return &uid, ""
+}
+
+// ensureImageExists returns corresponding metadata of the image reference, if image is not
+// pulled yet, the function will pull the image.
+func (c *criService) ensureImageExists(ctx context.Context, ref string, config *runtime.PodSandboxConfig) (*imagestore.Image, error) {
+ image, err := c.localResolve(ref)
+ if err != nil && err != store.ErrNotExist {
+ return nil, errors.Wrapf(err, "failed to get image %q", ref)
+ }
+ if err == nil {
+ return &image, nil
+ }
+ // Pull image to ensure the image exists
+ resp, err := c.PullImage(ctx, &runtime.PullImageRequest{Image: &runtime.ImageSpec{Image: ref}, SandboxConfig: config})
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to pull image %q", ref)
+ }
+ imageID := resp.GetImageRef()
+ newImage, err := c.imageStore.Get(imageID)
+ if err != nil {
+ // It's still possible that someone removed the image right after it is pulled.
+ return nil, errors.Wrapf(err, "failed to get image %q after pulling", imageID)
+ }
+ return &newImage, nil
+}
+
+// isInCRIMounts checks whether a destination is in CRI mount list.
+func isInCRIMounts(dst string, mounts []*runtime.Mount) bool {
+ for _, m := range mounts {
+ if filepath.Clean(m.ContainerPath) == filepath.Clean(dst) {
+ return true
+ }
+ }
+ return false
+}
+
+// filterLabel returns a label filter. Use `%q` here because containerd
+// filter needs extra quote to work properly.
+func filterLabel(k, v string) string {
+ return fmt.Sprintf("labels.%q==%q", k, v)
+}
+
+// buildLabel builds the labels from config to be passed to containerd
+func buildLabels(configLabels map[string]string, containerType string) map[string]string {
+ labels := make(map[string]string)
+ for k, v := range configLabels {
+ labels[k] = v
+ }
+ labels[containerKindLabel] = containerType
+ return labels
+}
+
+// toRuntimeAuthConfig converts cri plugin auth config to runtime auth config.
+func toRuntimeAuthConfig(a criconfig.AuthConfig) *runtime.AuthConfig {
+ return &runtime.AuthConfig{
+ Username: a.Username,
+ Password: a.Password,
+ Auth: a.Auth,
+ IdentityToken: a.IdentityToken,
+ }
+}
+
+// parseImageReferences parses a list of arbitrary image references and returns
+// the repotags and repodigests
+func parseImageReferences(refs []string) ([]string, []string) {
+ var tags, digests []string
+ for _, ref := range refs {
+ parsed, err := docker.ParseAnyReference(ref)
+ if err != nil {
+ continue
+ }
+ if _, ok := parsed.(docker.Canonical); ok {
+ digests = append(digests, parsed.String())
+ } else if _, ok := parsed.(docker.Tagged); ok {
+ tags = append(tags, parsed.String())
+ }
+ }
+ return tags, digests
+}
+
+// generateRuntimeOptions generates runtime options from cri plugin config.
+func generateRuntimeOptions(r criconfig.Runtime, c criconfig.Config) (interface{}, error) {
+ if r.Options == nil {
+ if r.Type != plugin.RuntimeLinuxV1 {
+ return nil, nil
+ }
+ // This is a legacy config, generate runctypes.RuncOptions.
+ return &runctypes.RuncOptions{
+ Runtime: r.Engine,
+ RuntimeRoot: r.Root,
+ SystemdCgroup: c.SystemdCgroup,
+ }, nil
+ }
+ options := getRuntimeOptionsType(r.Type)
+ if err := toml.PrimitiveDecode(*r.Options, options); err != nil {
+ return nil, err
+ }
+ return options, nil
+}
+
+// getRuntimeOptionsType gets empty runtime options by the runtime type name.
+func getRuntimeOptionsType(t string) interface{} {
+ switch t {
+ case plugin.RuntimeRuncV1:
+ fallthrough
+ case plugin.RuntimeRuncV2:
+ return &runcoptions.Options{}
+ case plugin.RuntimeLinuxV1:
+ return &runctypes.RuncOptions{}
+ case runtimeRunhcsV1:
+ return &runhcsoptions.Options{}
+ default:
+ return &runtimeoptions.Options{}
+ }
+}
+
+// getRuntimeOptions get runtime options from container metadata.
+func getRuntimeOptions(c containers.Container) (interface{}, error) {
+ if c.Runtime.Options == nil {
+ return nil, nil
+ }
+ opts, err := typeurl.UnmarshalAny(c.Runtime.Options)
+ if err != nil {
+ return nil, err
+ }
+ return opts, nil
+}
+
+const (
+ // unknownExitCode is the exit code when exit reason is unknown.
+ unknownExitCode = 255
+ // unknownExitReason is the exit reason when exit reason is unknown.
+ unknownExitReason = "Unknown"
+)
+
+// unknownContainerStatus returns the default container status when its status is unknown.
+func unknownContainerStatus() containerstore.Status {
+ return containerstore.Status{
+ CreatedAt: 0,
+ StartedAt: 0,
+ FinishedAt: 0,
+ ExitCode: unknownExitCode,
+ Reason: unknownExitReason,
+ Unknown: true,
+ }
+}
+
+// unknownSandboxStatus returns the default sandbox status when its status is unknown.
+func unknownSandboxStatus() sandboxstore.Status {
+ return sandboxstore.Status{
+ State: sandboxstore.StateUnknown,
+ }
+}
+
+// getPassthroughAnnotations filters requested pod annotations by comparing
+// against permitted annotations for the given runtime.
+func getPassthroughAnnotations(podAnnotations map[string]string,
+ runtimePodAnnotations []string) (passthroughAnnotations map[string]string) {
+ passthroughAnnotations = make(map[string]string)
+
+ for podAnnotationKey, podAnnotationValue := range podAnnotations {
+ for _, pattern := range runtimePodAnnotations {
+ // Use path.Match instead of filepath.Match here.
+ // filepath.Match treated `\\` as path separator
+ // on windows, which is not what we want.
+ if ok, _ := path.Match(pattern, podAnnotationKey); ok {
+ passthroughAnnotations[podAnnotationKey] = podAnnotationValue
+ }
+ }
+ }
+ return passthroughAnnotations
+}
diff --git a/pkg/server/helpers_linux.go b/pkg/server/helpers_linux.go
new file mode 100644
index 000000000..6b8b048dc
--- /dev/null
+++ b/pkg/server/helpers_linux.go
@@ -0,0 +1,290 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "fmt"
+ "os"
+ "path"
+ "path/filepath"
+ "regexp"
+ "sort"
+ "strings"
+ "syscall"
+ "time"
+
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/containerd/mount"
+ "github.com/containerd/cri/pkg/seccomp"
+ "github.com/containerd/cri/pkg/seutil"
+ runcapparmor "github.com/opencontainers/runc/libcontainer/apparmor"
+ "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/selinux/go-selinux/label"
+ "github.com/pkg/errors"
+ "golang.org/x/sys/unix"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+const (
+ // defaultSandboxOOMAdj is default omm adj for sandbox container. (kubernetes#47938).
+ defaultSandboxOOMAdj = -998
+ // defaultShmSize is the default size of the sandbox shm.
+ defaultShmSize = int64(1024 * 1024 * 64)
+ // relativeRootfsPath is the rootfs path relative to bundle path.
+ relativeRootfsPath = "rootfs"
+ // According to http://man7.org/linux/man-pages/man5/resolv.conf.5.html:
+ // "The search list is currently limited to six domains with a total of 256 characters."
+ maxDNSSearches = 6
+ // devShm is the default path of /dev/shm.
+ devShm = "/dev/shm"
+ // etcHosts is the default path of /etc/hosts file.
+ etcHosts = "/etc/hosts"
+ // etcHostname is the default path of /etc/hostname file.
+ etcHostname = "/etc/hostname"
+ // resolvConfPath is the abs path of resolv.conf on host or container.
+ resolvConfPath = "/etc/resolv.conf"
+ // hostnameEnv is the key for HOSTNAME env.
+ hostnameEnv = "HOSTNAME"
+)
+
+// getCgroupsPath generates container cgroups path.
+func getCgroupsPath(cgroupsParent, id string) string {
+ base := path.Base(cgroupsParent)
+ if strings.HasSuffix(base, ".slice") {
+ // For a.slice/b.slice/c.slice, base is c.slice.
+ // runc systemd cgroup path format is "slice:prefix:name".
+ return strings.Join([]string{base, "cri-containerd", id}, ":")
+ }
+ return filepath.Join(cgroupsParent, id)
+}
+
+// getSandboxHostname returns the hostname file path inside the sandbox root directory.
+func (c *criService) getSandboxHostname(id string) string {
+ return filepath.Join(c.getSandboxRootDir(id), "hostname")
+}
+
+// getSandboxHosts returns the hosts file path inside the sandbox root directory.
+func (c *criService) getSandboxHosts(id string) string {
+ return filepath.Join(c.getSandboxRootDir(id), "hosts")
+}
+
+// getResolvPath returns resolv.conf filepath for specified sandbox.
+func (c *criService) getResolvPath(id string) string {
+ return filepath.Join(c.getSandboxRootDir(id), "resolv.conf")
+}
+
+// getSandboxDevShm returns the shm file path inside the sandbox root directory.
+func (c *criService) getSandboxDevShm(id string) string {
+ return filepath.Join(c.getVolatileSandboxRootDir(id), "shm")
+}
+
+func toLabel(selinuxOptions *runtime.SELinuxOption) ([]string, error) {
+ var labels []string
+
+ if selinuxOptions == nil {
+ return nil, nil
+ }
+ if err := checkSelinuxLevel(selinuxOptions.Level); err != nil {
+ return nil, err
+ }
+ if selinuxOptions.User != "" {
+ labels = append(labels, "user:"+selinuxOptions.User)
+ }
+ if selinuxOptions.Role != "" {
+ labels = append(labels, "role:"+selinuxOptions.Role)
+ }
+ if selinuxOptions.Type != "" {
+ labels = append(labels, "type:"+selinuxOptions.Type)
+ }
+ if selinuxOptions.Level != "" {
+ labels = append(labels, "level:"+selinuxOptions.Level)
+ }
+
+ return labels, nil
+}
+
+func initLabelsFromOpt(selinuxOpts *runtime.SELinuxOption) (string, string, error) {
+ labels, err := toLabel(selinuxOpts)
+ if err != nil {
+ return "", "", err
+ }
+ return label.InitLabels(labels)
+}
+
+func checkSelinuxLevel(level string) error {
+ if len(level) == 0 {
+ return nil
+ }
+
+ matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}(\.c\d{1,4})?(,c\d{1,4}(\.c\d{1,4})?)*)?$`, level)
+ if err != nil {
+ return errors.Wrapf(err, "the format of 'level' %q is not correct", level)
+ }
+ if !matched {
+ return fmt.Errorf("the format of 'level' %q is not correct", level)
+ }
+ return nil
+}
+
+func (c *criService) apparmorEnabled() bool {
+ return runcapparmor.IsEnabled() && !c.config.DisableApparmor
+}
+
+func (c *criService) seccompEnabled() bool {
+ return seccomp.IsEnabled()
+}
+
+// openLogFile opens/creates a container log file.
+func openLogFile(path string) (*os.File, error) {
+ return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
+}
+
+// unmountRecursive unmounts the target and all mounts underneath, starting with
+// the deepest mount first.
+func unmountRecursive(ctx context.Context, target string) error {
+ mounts, err := mount.Self()
+ if err != nil {
+ return err
+ }
+
+ var toUnmount []string
+ for _, m := range mounts {
+ p, err := filepath.Rel(target, m.Mountpoint)
+ if err != nil {
+ return err
+ }
+ if !strings.HasPrefix(p, "..") {
+ toUnmount = append(toUnmount, m.Mountpoint)
+ }
+ }
+
+ // Make the deepest mount be first
+ sort.Slice(toUnmount, func(i, j int) bool {
+ return len(toUnmount[i]) > len(toUnmount[j])
+ })
+
+ for i, mountPath := range toUnmount {
+ if err := mount.UnmountAll(mountPath, unix.MNT_DETACH); err != nil {
+ if i == len(toUnmount)-1 { // last mount
+ return err
+ }
+ // This is some submount, we can ignore this error for now, the final unmount will fail if this is a real problem
+ log.G(ctx).WithError(err).Debugf("failed to unmount submount %s", mountPath)
+ }
+ }
+ return nil
+}
+
+// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
+// often be remedied.
+// Only use `ensureRemoveAll` if you really want to make every effort to remove
+// a directory.
+//
+// Because of the way `os.Remove` (and by extension `os.RemoveAll`) works, there
+// can be a race between reading directory entries and then actually attempting
+// to remove everything in the directory.
+// These types of errors do not need to be returned since it's ok for the dir to
+// be gone we can just retry the remove operation.
+//
+// This should not return a `os.ErrNotExist` kind of error under any circumstances
+func ensureRemoveAll(ctx context.Context, dir string) error {
+ notExistErr := make(map[string]bool)
+
+ // track retries
+ exitOnErr := make(map[string]int)
+ maxRetry := 50
+
+ // Attempt to unmount anything beneath this dir first.
+ if err := unmountRecursive(ctx, dir); err != nil {
+ log.G(ctx).WithError(err).Debugf("failed to do initial unmount of %s", dir)
+ }
+
+ for {
+ err := os.RemoveAll(dir)
+ if err == nil {
+ return nil
+ }
+
+ pe, ok := err.(*os.PathError)
+ if !ok {
+ return err
+ }
+
+ if os.IsNotExist(err) {
+ if notExistErr[pe.Path] {
+ return err
+ }
+ notExistErr[pe.Path] = true
+
+ // There is a race where some subdir can be removed but after the
+ // parent dir entries have been read.
+ // So the path could be from `os.Remove(subdir)`
+ // If the reported non-existent path is not the passed in `dir` we
+ // should just retry, but otherwise return with no error.
+ if pe.Path == dir {
+ return nil
+ }
+ continue
+ }
+
+ if pe.Err != syscall.EBUSY {
+ return err
+ }
+ if e := mount.Unmount(pe.Path, unix.MNT_DETACH); e != nil {
+ return errors.Wrapf(e, "error while removing %s", dir)
+ }
+
+ if exitOnErr[pe.Path] == maxRetry {
+ return err
+ }
+ exitOnErr[pe.Path]++
+ time.Sleep(100 * time.Millisecond)
+ }
+}
+
+var vmbasedRuntimes = []string{
+ "io.containerd.kata",
+}
+
+func isVMBasedRuntime(runtimeType string) bool {
+ for _, rt := range vmbasedRuntimes {
+ if strings.Contains(runtimeType, rt) {
+ return true
+ }
+ }
+ return false
+}
+
+func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
+ if !isVMBasedRuntime(runtimeType) {
+ return nil
+ }
+ l, err := getKVMLabel(spec.Process.SelinuxLabel)
+ if err != nil {
+ return errors.Wrap(err, "failed to get selinux kvm label")
+ }
+ spec.Process.SelinuxLabel = l
+ return nil
+}
+
+func getKVMLabel(l string) (string, error) {
+ if !seutil.HasType("container_kvm_t") {
+ return "", nil
+ }
+ return seutil.ChangeToKVM(l)
+}
diff --git a/pkg/server/helpers_linux_test.go b/pkg/server/helpers_linux_test.go
new file mode 100644
index 000000000..ca19c154a
--- /dev/null
+++ b/pkg/server/helpers_linux_test.go
@@ -0,0 +1,106 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "golang.org/x/net/context"
+ "golang.org/x/sys/unix"
+)
+
+func TestGetCgroupsPath(t *testing.T) {
+ testID := "test-id"
+ for desc, test := range map[string]struct {
+ cgroupsParent string
+ expected string
+ }{
+ "should support regular cgroup path": {
+ cgroupsParent: "/a/b",
+ expected: "/a/b/test-id",
+ },
+ "should support systemd cgroup path": {
+ cgroupsParent: "/a.slice/b.slice",
+ expected: "b.slice:cri-containerd:test-id",
+ },
+ "should support tailing slash for regular cgroup path": {
+ cgroupsParent: "/a/b/",
+ expected: "/a/b/test-id",
+ },
+ "should support tailing slash for systemd cgroup path": {
+ cgroupsParent: "/a.slice/b.slice/",
+ expected: "b.slice:cri-containerd:test-id",
+ },
+ "should treat root cgroup as regular cgroup path": {
+ cgroupsParent: "/",
+ expected: "/test-id",
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ got := getCgroupsPath(test.cgroupsParent, testID)
+ assert.Equal(t, test.expected, got)
+ }
+}
+
+func TestEnsureRemoveAllWithMount(t *testing.T) {
+ if os.Getuid() != 0 {
+ t.Skip("skipping test that requires root")
+ }
+
+ dir1, err := ioutil.TempDir("", "test-ensure-removeall-with-dir1")
+ if err != nil {
+ t.Fatal(err)
+ }
+ dir2, err := ioutil.TempDir("", "test-ensure-removeall-with-dir2")
+ if err != nil {
+ t.Fatal(err)
+ }
+ defer os.RemoveAll(dir2)
+
+ bindDir := filepath.Join(dir1, "bind")
+ if err := os.MkdirAll(bindDir, 0755); err != nil {
+ t.Fatal(err)
+ }
+
+ if err := unix.Mount(dir2, bindDir, "none", unix.MS_BIND, ""); err != nil {
+ t.Fatal(err)
+ }
+
+ done := make(chan struct{})
+ go func() {
+ err = ensureRemoveAll(context.Background(), dir1)
+ close(done)
+ }()
+
+ select {
+ case <-done:
+ if err != nil {
+ t.Fatal(err)
+ }
+ case <-time.After(5 * time.Second):
+ t.Fatal("timeout waiting for EnsureRemoveAll to finish")
+ }
+
+ if _, err := os.Stat(dir1); !os.IsNotExist(err) {
+ t.Fatalf("expected %q to not exist", dir1)
+ }
+}
diff --git a/pkg/server/helpers_other.go b/pkg/server/helpers_other.go
new file mode 100644
index 000000000..6a67375d7
--- /dev/null
+++ b/pkg/server/helpers_other.go
@@ -0,0 +1,43 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "os"
+
+ "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+// openLogFile opens/creates a container log file.
+func openLogFile(path string) (*os.File, error) {
+ return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
+}
+
+// ensureRemoveAll wraps `os.RemoveAll` to check for specific errors that can
+// often be remedied.
+// Only use `ensureRemoveAll` if you really want to make every effort to remove
+// a directory.
+func ensureRemoveAll(ctx context.Context, dir string) error {
+ return os.RemoveAll(dir)
+}
+
+func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
+ return nil
+}
diff --git a/pkg/server/helpers_selinux_test.go b/pkg/server/helpers_selinux_test.go
new file mode 100644
index 000000000..53ed59c5a
--- /dev/null
+++ b/pkg/server/helpers_selinux_test.go
@@ -0,0 +1,159 @@
+// +build selinux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestInitSelinuxOpts(t *testing.T) {
+ if !selinux.GetEnabled() {
+ t.Skip("selinux is not enabled")
+ }
+
+ for desc, test := range map[string]struct {
+ selinuxOpt *runtime.SELinuxOption
+ processLabel string
+ mountLabel string
+ expectErr bool
+ }{
+ "Should return empty strings for processLabel and mountLabel when selinuxOpt is nil": {
+ selinuxOpt: nil,
+ processLabel: ".*:c[0-9]{1,3},c[0-9]{1,3}",
+ mountLabel: ".*:c[0-9]{1,3},c[0-9]{1,3}",
+ },
+ "Should overlay fields on processLabel when selinuxOpt has been initialized partially": {
+ selinuxOpt: &runtime.SELinuxOption{
+ User: "",
+ Role: "user_r",
+ Type: "",
+ Level: "s0:c1,c2",
+ },
+ processLabel: "system_u:user_r:(container_file_t|svirt_lxc_net_t):s0:c1,c2",
+ mountLabel: "system_u:object_r:(container_file_t|svirt_sandbox_file_t):s0:c1,c2",
+ },
+ "Should be resolved correctly when selinuxOpt has been initialized completely": {
+ selinuxOpt: &runtime.SELinuxOption{
+ User: "user_u",
+ Role: "user_r",
+ Type: "user_t",
+ Level: "s0:c1,c2",
+ },
+ processLabel: "user_u:user_r:user_t:s0:c1,c2",
+ mountLabel: "user_u:object_r:(container_file_t|svirt_sandbox_file_t):s0:c1,c2",
+ },
+ "Should be resolved correctly when selinuxOpt has been initialized with level=''": {
+ selinuxOpt: &runtime.SELinuxOption{
+ User: "user_u",
+ Role: "user_r",
+ Type: "user_t",
+ Level: "",
+ },
+ processLabel: "user_u:user_r:user_t:s0:c[0-9]{1,3},c[0-9]{1,3}",
+ mountLabel: "user_u:object_r:(container_file_t|svirt_sandbox_file_t):s0",
+ },
+ "Should return error when the format of 'level' is not correct": {
+ selinuxOpt: &runtime.SELinuxOption{
+ User: "user_u",
+ Role: "user_r",
+ Type: "user_t",
+ Level: "s0,c1,c2",
+ },
+ expectErr: true,
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ processLabel, mountLabel, err := initLabelsFromOpt(test.selinuxOpt)
+ if test.expectErr {
+ assert.Error(t, err)
+ } else {
+ assert.Regexp(t, test.processLabel, processLabel)
+ assert.Regexp(t, test.mountLabel, mountLabel)
+ }
+ })
+ }
+}
+
+func TestCheckSelinuxLevel(t *testing.T) {
+ for desc, test := range map[string]struct {
+ level string
+ expectNoMatch bool
+ }{
+ "s0": {
+ level: "s0",
+ },
+ "s0-s0": {
+ level: "s0-s0",
+ },
+ "s0:c0": {
+ level: "s0:c0",
+ },
+ "s0:c0.c3": {
+ level: "s0:c0.c3",
+ },
+ "s0:c0,c3": {
+ level: "s0:c0,c3",
+ },
+ "s0-s0:c0,c3": {
+ level: "s0-s0:c0,c3",
+ },
+ "s0-s0:c0,c3.c6": {
+ level: "s0-s0:c0,c3.c6",
+ },
+ "s0-s0:c0,c3.c6,c8.c10": {
+ level: "s0-s0:c0,c3.c6,c8.c10",
+ },
+ "s0-s0:c0,c3.c6,c8,c10": {
+ level: "s0-s0:c0,c3.c6",
+ },
+ "s0,c0,c3": {
+ level: "s0,c0,c3",
+ expectNoMatch: true,
+ },
+ "s0:c0.c3.c6": {
+ level: "s0:c0.c3.c6",
+ expectNoMatch: true,
+ },
+ "s0-s0,c0,c3": {
+ level: "s0-s0,c0,c3",
+ expectNoMatch: true,
+ },
+ "s0-s0:c0.c3.c6": {
+ level: "s0-s0:c0.c3.c6",
+ expectNoMatch: true,
+ },
+ "s0-s0:c0,c3.c6.c8": {
+ level: "s0-s0:c0,c3.c6.c8",
+ expectNoMatch: true,
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ err := checkSelinuxLevel(test.level)
+ if test.expectNoMatch {
+ assert.Error(t, err)
+ } else {
+ assert.NoError(t, err)
+ }
+ })
+ }
+}
diff --git a/pkg/server/helpers_test.go b/pkg/server/helpers_test.go
new file mode 100644
index 000000000..4bb38e736
--- /dev/null
+++ b/pkg/server/helpers_test.go
@@ -0,0 +1,498 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "io/ioutil"
+ "testing"
+
+ "github.com/BurntSushi/toml"
+ "github.com/containerd/containerd/oci"
+ "github.com/containerd/containerd/plugin"
+ "github.com/containerd/containerd/reference/docker"
+ "github.com/containerd/containerd/runtime/linux/runctypes"
+ runcoptions "github.com/containerd/containerd/runtime/v2/runc/options"
+ imagedigest "github.com/opencontainers/go-digest"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ criconfig "github.com/containerd/cri/pkg/config"
+ "github.com/containerd/cri/pkg/store"
+ imagestore "github.com/containerd/cri/pkg/store/image"
+)
+
+// TestGetUserFromImage tests the logic of getting image uid or user name of image user.
+func TestGetUserFromImage(t *testing.T) {
+ newI64 := func(i int64) *int64 { return &i }
+ for c, test := range map[string]struct {
+ user string
+ uid *int64
+ name string
+ }{
+ "no gid": {
+ user: "0",
+ uid: newI64(0),
+ },
+ "uid/gid": {
+ user: "0:1",
+ uid: newI64(0),
+ },
+ "empty user": {
+ user: "",
+ },
+ "multiple spearators": {
+ user: "1:2:3",
+ uid: newI64(1),
+ },
+ "root username": {
+ user: "root:root",
+ name: "root",
+ },
+ "username": {
+ user: "test:test",
+ name: "test",
+ },
+ } {
+ t.Logf("TestCase - %q", c)
+ actualUID, actualName := getUserFromImage(test.user)
+ assert.Equal(t, test.uid, actualUID)
+ assert.Equal(t, test.name, actualName)
+ }
+}
+
+func TestGetRepoDigestAndTag(t *testing.T) {
+ digest := imagedigest.Digest("sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582")
+ for desc, test := range map[string]struct {
+ ref string
+ schema1 bool
+ expectedRepoDigest string
+ expectedRepoTag string
+ }{
+ "repo tag should be empty if original ref has no tag": {
+ ref: "gcr.io/library/busybox@" + digest.String(),
+ expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(),
+ },
+ "repo tag should not be empty if original ref has tag": {
+ ref: "gcr.io/library/busybox:latest",
+ expectedRepoDigest: "gcr.io/library/busybox@" + digest.String(),
+ expectedRepoTag: "gcr.io/library/busybox:latest",
+ },
+ "repo digest should be empty if original ref is schema1 and has no digest": {
+ ref: "gcr.io/library/busybox:latest",
+ schema1: true,
+ expectedRepoDigest: "",
+ expectedRepoTag: "gcr.io/library/busybox:latest",
+ },
+ "repo digest should not be empty if orignal ref is schema1 but has digest": {
+ ref: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594",
+ schema1: true,
+ expectedRepoDigest: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59594",
+ expectedRepoTag: "",
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ named, err := docker.ParseDockerRef(test.ref)
+ assert.NoError(t, err)
+ repoDigest, repoTag := getRepoDigestAndTag(named, digest, test.schema1)
+ assert.Equal(t, test.expectedRepoDigest, repoDigest)
+ assert.Equal(t, test.expectedRepoTag, repoTag)
+ }
+}
+
+func TestBuildLabels(t *testing.T) {
+ configLabels := map[string]string{
+ "a": "b",
+ "c": "d",
+ }
+ newLabels := buildLabels(configLabels, containerKindSandbox)
+ assert.Len(t, newLabels, 3)
+ assert.Equal(t, "b", newLabels["a"])
+ assert.Equal(t, "d", newLabels["c"])
+ assert.Equal(t, containerKindSandbox, newLabels[containerKindLabel])
+
+ newLabels["a"] = "e"
+ assert.Empty(t, configLabels[containerKindLabel], "should not add new labels into original label")
+ assert.Equal(t, "b", configLabels["a"], "change in new labels should not affect original label")
+}
+
+func TestParseImageReferences(t *testing.T) {
+ refs := []string{
+ "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ "gcr.io/library/busybox:1.2",
+ "sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ "arbitrary-ref",
+ }
+ expectedTags := []string{
+ "gcr.io/library/busybox:1.2",
+ }
+ expectedDigests := []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"}
+ tags, digests := parseImageReferences(refs)
+ assert.Equal(t, expectedTags, tags)
+ assert.Equal(t, expectedDigests, digests)
+}
+
+func TestLocalResolve(t *testing.T) {
+ image := imagestore.Image{
+ ID: "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
+ ChainID: "test-chain-id-1",
+ References: []string{
+ "docker.io/library/busybox:latest",
+ "docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ },
+ Size: 10,
+ }
+ c := newTestCRIService()
+ var err error
+ c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image})
+ assert.NoError(t, err)
+
+ for _, ref := range []string{
+ "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113799",
+ "busybox",
+ "busybox:latest",
+ "busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ "library/busybox",
+ "library/busybox:latest",
+ "library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ "docker.io/busybox",
+ "docker.io/busybox:latest",
+ "docker.io/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ "docker.io/library/busybox",
+ "docker.io/library/busybox:latest",
+ "docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ } {
+ img, err := c.localResolve(ref)
+ assert.NoError(t, err)
+ assert.Equal(t, image, img)
+ }
+ img, err := c.localResolve("randomid")
+ assert.Equal(t, store.ErrNotExist, err)
+ assert.Equal(t, imagestore.Image{}, img)
+}
+
+func TestGenerateRuntimeOptions(t *testing.T) {
+ nilOpts := `
+systemd_cgroup = true
+[containerd]
+ no_pivot = true
+ default_runtime_name = "default"
+[containerd.runtimes.legacy]
+ runtime_type = "` + plugin.RuntimeLinuxV1 + `"
+[containerd.runtimes.runc]
+ runtime_type = "` + plugin.RuntimeRuncV1 + `"
+[containerd.runtimes.runcv2]
+ runtime_type = "` + plugin.RuntimeRuncV2 + `"
+`
+ nonNilOpts := `
+systemd_cgroup = true
+[containerd]
+ no_pivot = true
+ default_runtime_name = "default"
+[containerd.runtimes.legacy]
+ runtime_type = "` + plugin.RuntimeLinuxV1 + `"
+[containerd.runtimes.legacy.options]
+ Runtime = "legacy"
+ RuntimeRoot = "/legacy"
+[containerd.runtimes.runc]
+ runtime_type = "` + plugin.RuntimeRuncV1 + `"
+[containerd.runtimes.runc.options]
+ BinaryName = "runc"
+ Root = "/runc"
+ NoNewKeyring = true
+[containerd.runtimes.runcv2]
+ runtime_type = "` + plugin.RuntimeRuncV2 + `"
+[containerd.runtimes.runcv2.options]
+ BinaryName = "runc"
+ Root = "/runcv2"
+ NoNewKeyring = true
+`
+ var nilOptsConfig, nonNilOptsConfig criconfig.Config
+ _, err := toml.Decode(nilOpts, &nilOptsConfig)
+ require.NoError(t, err)
+ _, err = toml.Decode(nonNilOpts, &nonNilOptsConfig)
+ require.NoError(t, err)
+ require.Len(t, nilOptsConfig.Runtimes, 3)
+ require.Len(t, nonNilOptsConfig.Runtimes, 3)
+
+ for desc, test := range map[string]struct {
+ r criconfig.Runtime
+ c criconfig.Config
+ expectedOptions interface{}
+ }{
+ "when options is nil, should return nil option for io.containerd.runc.v1": {
+ r: nilOptsConfig.Runtimes["runc"],
+ c: nilOptsConfig,
+ expectedOptions: nil,
+ },
+ "when options is nil, should return nil option for io.containerd.runc.v2": {
+ r: nilOptsConfig.Runtimes["runcv2"],
+ c: nilOptsConfig,
+ expectedOptions: nil,
+ },
+ "when options is nil, should use legacy fields for legacy runtime": {
+ r: nilOptsConfig.Runtimes["legacy"],
+ c: nilOptsConfig,
+ expectedOptions: &runctypes.RuncOptions{
+ SystemdCgroup: true,
+ },
+ },
+ "when options is not nil, should be able to decode for io.containerd.runc.v1": {
+ r: nonNilOptsConfig.Runtimes["runc"],
+ c: nonNilOptsConfig,
+ expectedOptions: &runcoptions.Options{
+ BinaryName: "runc",
+ Root: "/runc",
+ NoNewKeyring: true,
+ },
+ },
+ "when options is not nil, should be able to decode for io.containerd.runc.v2": {
+ r: nonNilOptsConfig.Runtimes["runcv2"],
+ c: nonNilOptsConfig,
+ expectedOptions: &runcoptions.Options{
+ BinaryName: "runc",
+ Root: "/runcv2",
+ NoNewKeyring: true,
+ },
+ },
+ "when options is not nil, should be able to decode for legacy runtime": {
+ r: nonNilOptsConfig.Runtimes["legacy"],
+ c: nonNilOptsConfig,
+ expectedOptions: &runctypes.RuncOptions{
+ Runtime: "legacy",
+ RuntimeRoot: "/legacy",
+ },
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ opts, err := generateRuntimeOptions(test.r, test.c)
+ assert.NoError(t, err)
+ assert.Equal(t, test.expectedOptions, opts)
+ })
+ }
+}
+
+func TestEnvDeduplication(t *testing.T) {
+ for desc, test := range map[string]struct {
+ existing []string
+ kv [][2]string
+ expected []string
+ }{
+ "single env": {
+ kv: [][2]string{
+ {"a", "b"},
+ },
+ expected: []string{"a=b"},
+ },
+ "multiple envs": {
+ kv: [][2]string{
+ {"a", "b"},
+ {"c", "d"},
+ {"e", "f"},
+ },
+ expected: []string{
+ "a=b",
+ "c=d",
+ "e=f",
+ },
+ },
+ "env override": {
+ kv: [][2]string{
+ {"k1", "v1"},
+ {"k2", "v2"},
+ {"k3", "v3"},
+ {"k3", "v4"},
+ {"k1", "v5"},
+ {"k4", "v6"},
+ },
+ expected: []string{
+ "k1=v5",
+ "k2=v2",
+ "k3=v4",
+ "k4=v6",
+ },
+ },
+ "existing env": {
+ existing: []string{
+ "k1=v1",
+ "k2=v2",
+ "k3=v3",
+ },
+ kv: [][2]string{
+ {"k3", "v4"},
+ {"k2", "v5"},
+ {"k4", "v6"},
+ },
+ expected: []string{
+ "k1=v1",
+ "k2=v5",
+ "k3=v4",
+ "k4=v6",
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ var spec runtimespec.Spec
+ if len(test.existing) > 0 {
+ spec.Process = &runtimespec.Process{
+ Env: test.existing,
+ }
+ }
+ for _, kv := range test.kv {
+ oci.WithEnv([]string{kv[0] + "=" + kv[1]})(context.Background(), nil, nil, &spec)
+ }
+ assert.Equal(t, test.expected, spec.Process.Env)
+ }
+}
+
+func TestPassThroughAnnotationsFilter(t *testing.T) {
+ for desc, test := range map[string]struct {
+ podAnnotations map[string]string
+ runtimePodAnnotations []string
+ passthroughAnnotations map[string]string
+ }{
+ "should support direct match": {
+ podAnnotations: map[string]string{"c": "d", "d": "e"},
+ runtimePodAnnotations: []string{"c"},
+ passthroughAnnotations: map[string]string{"c": "d"},
+ },
+ "should support wildcard match": {
+ podAnnotations: map[string]string{
+ "t.f": "j",
+ "z.g": "o",
+ "z": "o",
+ "y.ca": "b",
+ "y": "b",
+ },
+ runtimePodAnnotations: []string{"*.f", "z*g", "y.c*"},
+ passthroughAnnotations: map[string]string{
+ "t.f": "j",
+ "z.g": "o",
+ "y.ca": "b",
+ },
+ },
+ "should support wildcard match all": {
+ podAnnotations: map[string]string{
+ "t.f": "j",
+ "z.g": "o",
+ "z": "o",
+ "y.ca": "b",
+ "y": "b",
+ },
+ runtimePodAnnotations: []string{"*"},
+ passthroughAnnotations: map[string]string{
+ "t.f": "j",
+ "z.g": "o",
+ "z": "o",
+ "y.ca": "b",
+ "y": "b",
+ },
+ },
+ "should support match including path separator": {
+ podAnnotations: map[string]string{
+ "matchend.com/end": "1",
+ "matchend.com/end1": "2",
+ "matchend.com/1end": "3",
+ "matchmid.com/mid": "4",
+ "matchmid.com/mi1d": "5",
+ "matchmid.com/mid1": "6",
+ "matchhead.com/head": "7",
+ "matchhead.com/1head": "8",
+ "matchhead.com/head1": "9",
+ "matchall.com/abc": "10",
+ "matchall.com/def": "11",
+ "end/matchend": "12",
+ "end1/matchend": "13",
+ "1end/matchend": "14",
+ "mid/matchmid": "15",
+ "mi1d/matchmid": "16",
+ "mid1/matchmid": "17",
+ "head/matchhead": "18",
+ "1head/matchhead": "19",
+ "head1/matchhead": "20",
+ "abc/matchall": "21",
+ "def/matchall": "22",
+ "match1/match2": "23",
+ "nomatch/nomatch": "24",
+ },
+ runtimePodAnnotations: []string{
+ "matchend.com/end*",
+ "matchmid.com/mi*d",
+ "matchhead.com/*head",
+ "matchall.com/*",
+ "end*/matchend",
+ "mi*d/matchmid",
+ "*head/matchhead",
+ "*/matchall",
+ "match*/match*",
+ },
+ passthroughAnnotations: map[string]string{
+ "matchend.com/end": "1",
+ "matchend.com/end1": "2",
+ "matchmid.com/mid": "4",
+ "matchmid.com/mi1d": "5",
+ "matchhead.com/head": "7",
+ "matchhead.com/1head": "8",
+ "matchall.com/abc": "10",
+ "matchall.com/def": "11",
+ "end/matchend": "12",
+ "end1/matchend": "13",
+ "mid/matchmid": "15",
+ "mi1d/matchmid": "16",
+ "head/matchhead": "18",
+ "1head/matchhead": "19",
+ "abc/matchall": "21",
+ "def/matchall": "22",
+ "match1/match2": "23",
+ },
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ passthroughAnnotations := getPassthroughAnnotations(test.podAnnotations, test.runtimePodAnnotations)
+ assert.Equal(t, test.passthroughAnnotations, passthroughAnnotations)
+ })
+ }
+}
+
+func TestEnsureRemoveAllNotExist(t *testing.T) {
+ // should never return an error for a non-existent path
+ if err := ensureRemoveAll(context.Background(), "/non/existent/path"); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestEnsureRemoveAllWithDir(t *testing.T) {
+ dir, err := ioutil.TempDir("", "test-ensure-removeall-with-dir")
+ if err != nil {
+ t.Fatal(err)
+ }
+ if err := ensureRemoveAll(context.Background(), dir); err != nil {
+ t.Fatal(err)
+ }
+}
+
+func TestEnsureRemoveAllWithFile(t *testing.T) {
+ tmp, err := ioutil.TempFile("", "test-ensure-removeall-with-dir")
+ if err != nil {
+ t.Fatal(err)
+ }
+ tmp.Close()
+ if err := ensureRemoveAll(context.Background(), tmp.Name()); err != nil {
+ t.Fatal(err)
+ }
+}
diff --git a/pkg/server/helpers_windows.go b/pkg/server/helpers_windows.go
new file mode 100644
index 000000000..f88f34bad
--- /dev/null
+++ b/pkg/server/helpers_windows.go
@@ -0,0 +1,170 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "os"
+ "path/filepath"
+ "syscall"
+
+ "github.com/opencontainers/runtime-spec/specs-go"
+)
+
+// openLogFile opens/creates a container log file.
+// It specifies `FILE_SHARE_DELETE` option to make sure
+// log files can be rotated by kubelet.
+// TODO(windows): Use golang support after 1.14. (https://github.com/golang/go/issues/32088)
+func openLogFile(path string) (*os.File, error) {
+ path = fixLongPath(path)
+ if len(path) == 0 {
+ return nil, syscall.ERROR_FILE_NOT_FOUND
+ }
+ pathp, err := syscall.UTF16PtrFromString(path)
+ if err != nil {
+ return nil, err
+ }
+ createmode := uint32(syscall.OPEN_ALWAYS)
+ access := uint32(syscall.FILE_APPEND_DATA)
+ sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE | syscall.FILE_SHARE_DELETE)
+ h, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, syscall.FILE_ATTRIBUTE_NORMAL, 0)
+ if err != nil {
+ return nil, err
+ }
+ return os.NewFile(uintptr(h), path), nil
+}
+
+// Copyright (c) 2009 The Go Authors. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// fixLongPath returns the extended-length (\\?\-prefixed) form of
+// path when needed, in order to avoid the default 260 character file
+// path limit imposed by Windows. If path is not easily converted to
+// the extended-length form (for example, if path is a relative path
+// or contains .. elements), or is short enough, fixLongPath returns
+// path unmodified.
+//
+// See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
+//
+// This is copied from https://golang.org/src/path/filepath/path_windows.go.
+func fixLongPath(path string) string {
+ // Do nothing (and don't allocate) if the path is "short".
+ // Empirically (at least on the Windows Server 2013 builder),
+ // the kernel is arbitrarily okay with < 248 bytes. That
+ // matches what the docs above say:
+ // "When using an API to create a directory, the specified
+ // path cannot be so long that you cannot append an 8.3 file
+ // name (that is, the directory name cannot exceed MAX_PATH
+ // minus 12)." Since MAX_PATH is 260, 260 - 12 = 248.
+ //
+ // The MSDN docs appear to say that a normal path that is 248 bytes long
+ // will work; empirically the path must be less then 248 bytes long.
+ if len(path) < 248 {
+ // Don't fix. (This is how Go 1.7 and earlier worked,
+ // not automatically generating the \\?\ form)
+ return path
+ }
+
+ // The extended form begins with \\?\, as in
+ // \\?\c:\windows\foo.txt or \\?\UNC\server\share\foo.txt.
+ // The extended form disables evaluation of . and .. path
+ // elements and disables the interpretation of / as equivalent
+ // to \. The conversion here rewrites / to \ and elides
+ // . elements as well as trailing or duplicate separators. For
+ // simplicity it avoids the conversion entirely for relative
+ // paths or paths containing .. elements. For now,
+ // \\server\share paths are not converted to
+ // \\?\UNC\server\share paths because the rules for doing so
+ // are less well-specified.
+ if len(path) >= 2 && path[:2] == `\\` {
+ // Don't canonicalize UNC paths.
+ return path
+ }
+ if !filepath.IsAbs(path) {
+ // Relative path
+ return path
+ }
+
+ const prefix = `\\?`
+
+ pathbuf := make([]byte, len(prefix)+len(path)+len(`\`))
+ copy(pathbuf, prefix)
+ n := len(path)
+ r, w := 0, len(prefix)
+ for r < n {
+ switch {
+ case os.IsPathSeparator(path[r]):
+ // empty block
+ r++
+ case path[r] == '.' && (r+1 == n || os.IsPathSeparator(path[r+1])):
+ // /./
+ r++
+ case r+1 < n && path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])):
+ // /../ is currently unhandled
+ return path
+ default:
+ pathbuf[w] = '\\'
+ w++
+ for ; r < n && !os.IsPathSeparator(path[r]); r++ {
+ pathbuf[w] = path[r]
+ w++
+ }
+ }
+ }
+ // A drive's root directory needs a trailing \
+ if w == len(`\\?\c:`) {
+ pathbuf[w] = '\\'
+ w++
+ }
+ return string(pathbuf[:w])
+}
+
+// ensureRemoveAll is a wrapper for os.RemoveAll on Windows.
+func ensureRemoveAll(_ context.Context, dir string) error {
+ return os.RemoveAll(dir)
+}
+
+func modifyProcessLabel(runtimeType string, spec *specs.Spec) error {
+ return nil
+}
diff --git a/pkg/server/image_list.go b/pkg/server/image_list.go
new file mode 100644
index 000000000..dc6aeecc2
--- /dev/null
+++ b/pkg/server/image_list.go
@@ -0,0 +1,38 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// ListImages lists existing images.
+// TODO(random-liu): Add image list filters after CRI defines this more clear, and kubelet
+// actually needs it.
+func (c *criService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (*runtime.ListImagesResponse, error) {
+ imagesInStore := c.imageStore.List()
+
+ var images []*runtime.Image
+ for _, image := range imagesInStore {
+ // TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot
+ // doesn't exist?
+ images = append(images, toCRIImage(image))
+ }
+
+ return &runtime.ListImagesResponse{Images: images}, nil
+}
diff --git a/pkg/server/image_list_test.go b/pkg/server/image_list_test.go
new file mode 100644
index 000000000..315470324
--- /dev/null
+++ b/pkg/server/image_list_test.go
@@ -0,0 +1,113 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ imagestore "github.com/containerd/cri/pkg/store/image"
+)
+
+func TestListImages(t *testing.T) {
+ c := newTestCRIService()
+ imagesInStore := []imagestore.Image{
+ {
+ ID: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ ChainID: "test-chainid-1",
+ References: []string{
+ "gcr.io/library/busybox:latest",
+ "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ },
+ Size: 1000,
+ ImageSpec: imagespec.Image{
+ Config: imagespec.ImageConfig{
+ User: "root",
+ },
+ },
+ },
+ {
+ ID: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ ChainID: "test-chainid-2",
+ References: []string{
+ "gcr.io/library/alpine:latest",
+ "gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ },
+ Size: 2000,
+ ImageSpec: imagespec.Image{
+ Config: imagespec.ImageConfig{
+ User: "1234:1234",
+ },
+ },
+ },
+ {
+ ID: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ ChainID: "test-chainid-3",
+ References: []string{
+ "gcr.io/library/ubuntu:latest",
+ "gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ },
+ Size: 3000,
+ ImageSpec: imagespec.Image{
+ Config: imagespec.ImageConfig{
+ User: "nobody",
+ },
+ },
+ },
+ }
+ expect := []*runtime.Image{
+ {
+ Id: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ RepoTags: []string{"gcr.io/library/busybox:latest"},
+ RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
+ Size_: uint64(1000),
+ Username: "root",
+ },
+ {
+ Id: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ RepoTags: []string{"gcr.io/library/alpine:latest"},
+ RepoDigests: []string{"gcr.io/library/alpine@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
+ Size_: uint64(2000),
+ Uid: &runtime.Int64Value{Value: 1234},
+ },
+ {
+ Id: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ RepoTags: []string{"gcr.io/library/ubuntu:latest"},
+ RepoDigests: []string{"gcr.io/library/ubuntu@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
+ Size_: uint64(3000),
+ Username: "nobody",
+ },
+ }
+
+ var err error
+ c.imageStore, err = imagestore.NewFakeStore(imagesInStore)
+ assert.NoError(t, err)
+
+ resp, err := c.ListImages(context.Background(), &runtime.ListImagesRequest{})
+ assert.NoError(t, err)
+ require.NotNil(t, resp)
+ images := resp.GetImages()
+ assert.Len(t, images, len(expect))
+ for _, i := range expect {
+ assert.Contains(t, images, i)
+ }
+}
diff --git a/pkg/server/image_pull.go b/pkg/server/image_pull.go
new file mode 100644
index 000000000..8e2493613
--- /dev/null
+++ b/pkg/server/image_pull.go
@@ -0,0 +1,519 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "crypto/tls"
+ "crypto/x509"
+ "encoding/base64"
+ "fmt"
+ "io/ioutil"
+ "net"
+ "net/http"
+ "net/url"
+ "strings"
+ "time"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/errdefs"
+ containerdimages "github.com/containerd/containerd/images"
+ "github.com/containerd/containerd/labels"
+ "github.com/containerd/containerd/log"
+ distribution "github.com/containerd/containerd/reference/docker"
+ "github.com/containerd/containerd/remotes/docker"
+ "github.com/containerd/imgcrypt"
+ "github.com/containerd/imgcrypt/images/encryption"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ criconfig "github.com/containerd/cri/pkg/config"
+)
+
+// For image management:
+// 1) We have an in-memory metadata index to:
+// a. Maintain ImageID -> RepoTags, ImageID -> RepoDigset relationships; ImageID
+// is the digest of image config, which conforms to oci image spec.
+// b. Cache constant and useful information such as image chainID, config etc.
+// c. An image will be added into the in-memory metadata only when it's successfully
+// pulled and unpacked.
+//
+// 2) We use containerd image metadata store and content store:
+// a. To resolve image reference (digest/tag) locally. During pulling image, we
+// normalize the image reference provided by user, and put it into image metadata
+// store with resolved descriptor. For the other operations, if image id is provided,
+// we'll access the in-memory metadata index directly; if image reference is
+// provided, we'll normalize it, resolve it in containerd image metadata store
+// to get the image id.
+// b. As the backup of in-memory metadata in 1). During startup, the in-memory
+// metadata could be re-constructed from image metadata store + content store.
+//
+// Several problems with current approach:
+// 1) An entry in containerd image metadata store doesn't mean a "READY" (successfully
+// pulled and unpacked) image. E.g. during pulling, the client gets killed. In that case,
+// if we saw an image without snapshots or with in-complete contents during startup,
+// should we re-pull the image? Or should we remove the entry?
+//
+// yanxuean: We can't delete image directly, because we don't know if the image
+// is pulled by us. There are resource leakage.
+//
+// 2) Containerd suggests user to add entry before pulling the image. However if
+// an error occurs during the pulling, should we remove the entry from metadata
+// store? Or should we leave it there until next startup (resource leakage)?
+//
+// 3) The cri plugin only exposes "READY" (successfully pulled and unpacked) images
+// to the user, which are maintained in the in-memory metadata index. However, it's
+// still possible that someone else removes the content or snapshot by-pass the cri plugin,
+// how do we detect that and update the in-memory metadata correspondingly? Always
+// check whether corresponding snapshot is ready when reporting image status?
+//
+// 4) Is the content important if we cached necessary information in-memory
+// after we pull the image? How to manage the disk usage of contents? If some
+// contents are missing but snapshots are ready, is the image still "READY"?
+
+// PullImage pulls an image with authentication config.
+func (c *criService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (*runtime.PullImageResponse, error) {
+ imageRef := r.GetImage().GetImage()
+ namedRef, err := distribution.ParseDockerRef(imageRef)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to parse image reference %q", imageRef)
+ }
+ ref := namedRef.String()
+ if ref != imageRef {
+ log.G(ctx).Debugf("PullImage using normalized image ref: %q", ref)
+ }
+ var (
+ resolver = docker.NewResolver(docker.ResolverOptions{
+ Headers: c.config.Registry.Headers,
+ Hosts: c.registryHosts(r.GetAuth()),
+ })
+ isSchema1 bool
+ imageHandler containerdimages.HandlerFunc = func(_ context.Context,
+ desc imagespec.Descriptor) ([]imagespec.Descriptor, error) {
+ if desc.MediaType == containerdimages.MediaTypeDockerSchema1Manifest {
+ isSchema1 = true
+ }
+ return nil, nil
+ }
+ )
+
+ pullOpts := []containerd.RemoteOpt{
+ containerd.WithSchema1Conversion,
+ containerd.WithResolver(resolver),
+ containerd.WithPullSnapshotter(c.config.ContainerdConfig.Snapshotter),
+ containerd.WithPullUnpack,
+ containerd.WithPullLabel(imageLabelKey, imageLabelValue),
+ containerd.WithMaxConcurrentDownloads(c.config.MaxConcurrentDownloads),
+ containerd.WithImageHandler(imageHandler),
+ }
+
+ pullOpts = append(pullOpts, c.encryptedImagesPullOpts()...)
+ if !c.config.ContainerdConfig.DisableSnapshotAnnotations {
+ pullOpts = append(pullOpts,
+ containerd.WithImageHandlerWrapper(appendInfoHandlerWrapper(ref)))
+ }
+
+ if c.config.ContainerdConfig.DiscardUnpackedLayers {
+ // Allows GC to clean layers up from the content store after unpacking
+ pullOpts = append(pullOpts,
+ containerd.WithChildLabelMap(containerdimages.ChildGCLabelsFilterLayers))
+ }
+
+ image, err := c.client.Pull(ctx, ref, pullOpts...)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to pull and unpack image %q", ref)
+ }
+
+ configDesc, err := image.Config(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "get image config descriptor")
+ }
+ imageID := configDesc.Digest.String()
+
+ repoDigest, repoTag := getRepoDigestAndTag(namedRef, image.Target().Digest, isSchema1)
+ for _, r := range []string{imageID, repoTag, repoDigest} {
+ if r == "" {
+ continue
+ }
+ if err := c.createImageReference(ctx, r, image.Target()); err != nil {
+ return nil, errors.Wrapf(err, "failed to create image reference %q", r)
+ }
+ // Update image store to reflect the newest state in containerd.
+ // No need to use `updateImage`, because the image reference must
+ // have been managed by the cri plugin.
+ if err := c.imageStore.Update(ctx, r); err != nil {
+ return nil, errors.Wrapf(err, "failed to update image store %q", r)
+ }
+ }
+
+ log.G(ctx).Debugf("Pulled image %q with image id %q, repo tag %q, repo digest %q", imageRef, imageID,
+ repoTag, repoDigest)
+ // NOTE(random-liu): the actual state in containerd is the source of truth, even we maintain
+ // in-memory image store, it's only for in-memory indexing. The image could be removed
+ // by someone else anytime, before/during/after we create the metadata. We should always
+ // check the actual state in containerd before using the image or returning status of the
+ // image.
+ return &runtime.PullImageResponse{ImageRef: imageID}, nil
+}
+
+// ParseAuth parses AuthConfig and returns username and password/secret required by containerd.
+func ParseAuth(auth *runtime.AuthConfig, host string) (string, string, error) {
+ if auth == nil {
+ return "", "", nil
+ }
+ if auth.ServerAddress != "" {
+ // Do not return the auth info when server address doesn't match.
+ u, err := url.Parse(auth.ServerAddress)
+ if err != nil {
+ return "", "", errors.Wrap(err, "parse server address")
+ }
+ if host != u.Host {
+ return "", "", nil
+ }
+ }
+ if auth.Username != "" {
+ return auth.Username, auth.Password, nil
+ }
+ if auth.IdentityToken != "" {
+ return "", auth.IdentityToken, nil
+ }
+ if auth.Auth != "" {
+ decLen := base64.StdEncoding.DecodedLen(len(auth.Auth))
+ decoded := make([]byte, decLen)
+ _, err := base64.StdEncoding.Decode(decoded, []byte(auth.Auth))
+ if err != nil {
+ return "", "", err
+ }
+ fields := strings.SplitN(string(decoded), ":", 2)
+ if len(fields) != 2 {
+ return "", "", errors.Errorf("invalid decoded auth: %q", decoded)
+ }
+ user, passwd := fields[0], fields[1]
+ return user, strings.Trim(passwd, "\x00"), nil
+ }
+ // TODO(random-liu): Support RegistryToken.
+ // An empty auth config is valid for anonymous registry
+ return "", "", nil
+}
+
+// createImageReference creates image reference inside containerd image store.
+// Note that because create and update are not finished in one transaction, there could be race. E.g.
+// the image reference is deleted by someone else after create returns already exists, but before update
+// happens.
+func (c *criService) createImageReference(ctx context.Context, name string, desc imagespec.Descriptor) error {
+ img := containerdimages.Image{
+ Name: name,
+ Target: desc,
+ // Add a label to indicate that the image is managed by the cri plugin.
+ Labels: map[string]string{imageLabelKey: imageLabelValue},
+ }
+ // TODO(random-liu): Figure out which is the more performant sequence create then update or
+ // update then create.
+ oldImg, err := c.client.ImageService().Create(ctx, img)
+ if err == nil || !errdefs.IsAlreadyExists(err) {
+ return err
+ }
+ if oldImg.Target.Digest == img.Target.Digest && oldImg.Labels[imageLabelKey] == imageLabelValue {
+ return nil
+ }
+ _, err = c.client.ImageService().Update(ctx, img, "target", "labels")
+ return err
+}
+
+// updateImage updates image store to reflect the newest state of an image reference
+// in containerd. If the reference is not managed by the cri plugin, the function also
+// generates necessary metadata for the image and make it managed.
+func (c *criService) updateImage(ctx context.Context, r string) error {
+ img, err := c.client.GetImage(ctx, r)
+ if err != nil && !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "get image by reference")
+ }
+ if err == nil && img.Labels()[imageLabelKey] != imageLabelValue {
+ // Make sure the image has the image id as its unique
+ // identifier that references the image in its lifetime.
+ configDesc, err := img.Config(ctx)
+ if err != nil {
+ return errors.Wrap(err, "get image id")
+ }
+ id := configDesc.Digest.String()
+ if err := c.createImageReference(ctx, id, img.Target()); err != nil {
+ return errors.Wrapf(err, "create image id reference %q", id)
+ }
+ if err := c.imageStore.Update(ctx, id); err != nil {
+ return errors.Wrapf(err, "update image store for %q", id)
+ }
+ // The image id is ready, add the label to mark the image as managed.
+ if err := c.createImageReference(ctx, r, img.Target()); err != nil {
+ return errors.Wrap(err, "create managed label")
+ }
+ }
+ // If the image is not found, we should continue updating the cache,
+ // so that the image can be removed from the cache.
+ if err := c.imageStore.Update(ctx, r); err != nil {
+ return errors.Wrapf(err, "update image store for %q", r)
+ }
+ return nil
+}
+
+// getTLSConfig returns a TLSConfig configured with a CA/Cert/Key specified by registryTLSConfig
+func (c *criService) getTLSConfig(registryTLSConfig criconfig.TLSConfig) (*tls.Config, error) {
+ var (
+ tlsConfig = &tls.Config{}
+ cert tls.Certificate
+ err error
+ )
+ if registryTLSConfig.CertFile != "" && registryTLSConfig.KeyFile == "" {
+ return nil, errors.Errorf("cert file %q was specified, but no corresponding key file was specified", registryTLSConfig.CertFile)
+ }
+ if registryTLSConfig.CertFile == "" && registryTLSConfig.KeyFile != "" {
+ return nil, errors.Errorf("key file %q was specified, but no corresponding cert file was specified", registryTLSConfig.KeyFile)
+ }
+ if registryTLSConfig.CertFile != "" && registryTLSConfig.KeyFile != "" {
+ cert, err = tls.LoadX509KeyPair(registryTLSConfig.CertFile, registryTLSConfig.KeyFile)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to load cert file")
+ }
+ if len(cert.Certificate) != 0 {
+ tlsConfig.Certificates = []tls.Certificate{cert}
+ }
+ tlsConfig.BuildNameToCertificate() // nolint:staticcheck
+ }
+
+ if registryTLSConfig.CAFile != "" {
+ caCertPool, err := x509.SystemCertPool()
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get system cert pool")
+ }
+ caCert, err := ioutil.ReadFile(registryTLSConfig.CAFile)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to load CA file")
+ }
+ caCertPool.AppendCertsFromPEM(caCert)
+ tlsConfig.RootCAs = caCertPool
+ }
+
+ tlsConfig.InsecureSkipVerify = registryTLSConfig.InsecureSkipVerify
+ return tlsConfig, nil
+}
+
+// registryHosts is the registry hosts to be used by the resolver.
+func (c *criService) registryHosts(auth *runtime.AuthConfig) docker.RegistryHosts {
+ return func(host string) ([]docker.RegistryHost, error) {
+ var registries []docker.RegistryHost
+
+ endpoints, err := c.registryEndpoints(host)
+ if err != nil {
+ return nil, errors.Wrap(err, "get registry endpoints")
+ }
+ for _, e := range endpoints {
+ u, err := url.Parse(e)
+ if err != nil {
+ return nil, errors.Wrapf(err, "parse registry endpoint %q from mirrors", e)
+ }
+
+ var (
+ transport = newTransport()
+ client = &http.Client{Transport: transport}
+ config = c.config.Registry.Configs[u.Host]
+ )
+
+ if config.TLS != nil {
+ transport.TLSClientConfig, err = c.getTLSConfig(*config.TLS)
+ if err != nil {
+ return nil, errors.Wrapf(err, "get TLSConfig for registry %q", e)
+ }
+ }
+
+ if auth == nil && config.Auth != nil {
+ auth = toRuntimeAuthConfig(*config.Auth)
+ }
+
+ if u.Path == "" {
+ u.Path = "/v2"
+ }
+
+ registries = append(registries, docker.RegistryHost{
+ Client: client,
+ Authorizer: docker.NewDockerAuthorizer(
+ docker.WithAuthClient(client),
+ docker.WithAuthCreds(func(host string) (string, string, error) {
+ return ParseAuth(auth, host)
+ })),
+ Host: u.Host,
+ Scheme: u.Scheme,
+ Path: u.Path,
+ Capabilities: docker.HostCapabilityResolve | docker.HostCapabilityPull,
+ })
+ }
+ return registries, nil
+ }
+}
+
+// defaultScheme returns the default scheme for a registry host.
+func defaultScheme(host string) string {
+ if h, _, err := net.SplitHostPort(host); err == nil {
+ host = h
+ }
+ if host == "localhost" || host == "127.0.0.1" || host == "::1" {
+ return "http"
+ }
+ return "https"
+}
+
+// addDefaultScheme returns the endpoint with default scheme
+func addDefaultScheme(endpoint string) (string, error) {
+ if strings.Contains(endpoint, "://") {
+ return endpoint, nil
+ }
+ ue := "dummy://" + endpoint
+ u, err := url.Parse(ue)
+ if err != nil {
+ return "", err
+ }
+ return fmt.Sprintf("%s://%s", defaultScheme(u.Host), endpoint), nil
+}
+
+// registryEndpoints returns endpoints for a given host.
+// It adds default registry endpoint if it does not exist in the passed-in endpoint list.
+// It also supports wildcard host matching with `*`.
+func (c *criService) registryEndpoints(host string) ([]string, error) {
+ var endpoints []string
+ _, ok := c.config.Registry.Mirrors[host]
+ if ok {
+ endpoints = c.config.Registry.Mirrors[host].Endpoints
+ } else {
+ endpoints = c.config.Registry.Mirrors["*"].Endpoints
+ }
+ defaultHost, err := docker.DefaultHost(host)
+ if err != nil {
+ return nil, errors.Wrap(err, "get default host")
+ }
+ for i := range endpoints {
+ en, err := addDefaultScheme(endpoints[i])
+ if err != nil {
+ return nil, errors.Wrap(err, "parse endpoint url")
+ }
+ endpoints[i] = en
+ }
+ for _, e := range endpoints {
+ u, err := url.Parse(e)
+ if err != nil {
+ return nil, errors.Wrap(err, "parse endpoint url")
+ }
+ if u.Host == host {
+ // Do not add default if the endpoint already exists.
+ return endpoints, nil
+ }
+ }
+ return append(endpoints, defaultScheme(defaultHost)+"://"+defaultHost), nil
+}
+
+// newTransport returns a new HTTP transport used to pull image.
+// TODO(random-liu): Create a library and share this code with `ctr`.
+func newTransport() *http.Transport {
+ return &http.Transport{
+ Proxy: http.ProxyFromEnvironment,
+ DialContext: (&net.Dialer{
+ Timeout: 30 * time.Second,
+ KeepAlive: 30 * time.Second,
+ FallbackDelay: 300 * time.Millisecond,
+ }).DialContext,
+ MaxIdleConns: 10,
+ IdleConnTimeout: 30 * time.Second,
+ TLSHandshakeTimeout: 10 * time.Second,
+ ExpectContinueTimeout: 5 * time.Second,
+ }
+}
+
+// encryptedImagesPullOpts returns the necessary list of pull options required
+// for decryption of encrypted images based on the cri decryption configuration.
+func (c *criService) encryptedImagesPullOpts() []containerd.RemoteOpt {
+ if c.config.ImageDecryption.KeyModel == criconfig.KeyModelNode {
+ ltdd := imgcrypt.Payload{}
+ decUnpackOpt := encryption.WithUnpackConfigApplyOpts(encryption.WithDecryptedUnpack(<dd))
+ opt := containerd.WithUnpackOpts([]containerd.UnpackOpt{decUnpackOpt})
+ return []containerd.RemoteOpt{opt}
+ }
+ return nil
+}
+
+const (
+ // targetRefLabel is a label which contains image reference and will be passed
+ // to snapshotters.
+ targetRefLabel = "containerd.io/snapshot/cri.image-ref"
+ // targetDigestLabel is a label which contains layer digest and will be passed
+ // to snapshotters.
+ targetDigestLabel = "containerd.io/snapshot/cri.layer-digest"
+ // targetImageLayersLabel is a label which contains layer digests contained in
+ // the target image and will be passed to snapshotters for preparing layers in
+ // parallel. Skipping some layers is allowed and only affects performance.
+ targetImageLayersLabel = "containerd.io/snapshot/cri.image-layers"
+)
+
+// appendInfoHandlerWrapper makes a handler which appends some basic information
+// of images to each layer descriptor as annotations during unpack. These
+// annotations will be passed to snapshotters as labels. These labels will be
+// used mainly by stargz-based snapshotters for querying image contents from the
+// registry.
+func appendInfoHandlerWrapper(ref string) func(f containerdimages.Handler) containerdimages.Handler {
+ return func(f containerdimages.Handler) containerdimages.Handler {
+ return containerdimages.HandlerFunc(func(ctx context.Context, desc imagespec.Descriptor) ([]imagespec.Descriptor, error) {
+ children, err := f.Handle(ctx, desc)
+ if err != nil {
+ return nil, err
+ }
+ switch desc.MediaType {
+ case imagespec.MediaTypeImageManifest, containerdimages.MediaTypeDockerSchema2Manifest:
+ for i := range children {
+ c := &children[i]
+ if containerdimages.IsLayerType(c.MediaType) {
+ if c.Annotations == nil {
+ c.Annotations = make(map[string]string)
+ }
+ c.Annotations[targetRefLabel] = ref
+ c.Annotations[targetDigestLabel] = c.Digest.String()
+ c.Annotations[targetImageLayersLabel] = getLayers(ctx, targetImageLayersLabel, children[i:], labels.Validate)
+ }
+ }
+ }
+ return children, nil
+ })
+ }
+}
+
+// getLayers returns comma-separated digests based on the passed list of
+// descriptors. The returned list contains as many digests as possible as well
+// as meets the label validation.
+func getLayers(ctx context.Context, key string, descs []imagespec.Descriptor, validate func(k, v string) error) (layers string) {
+ var item string
+ for _, l := range descs {
+ if containerdimages.IsLayerType(l.MediaType) {
+ item = l.Digest.String()
+ if layers != "" {
+ item = "," + item
+ }
+ // This avoids the label hits the size limitation.
+ if err := validate(key, layers+item); err != nil {
+ log.G(ctx).WithError(err).WithField("label", key).Debugf("%q is omitted in the layers list", l.Digest.String())
+ break
+ }
+ layers += item
+ }
+ }
+ return
+}
diff --git a/pkg/server/image_pull_test.go b/pkg/server/image_pull_test.go
new file mode 100644
index 000000000..551e68bae
--- /dev/null
+++ b/pkg/server/image_pull_test.go
@@ -0,0 +1,379 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "encoding/base64"
+ "fmt"
+ "strings"
+ "testing"
+
+ digest "github.com/opencontainers/go-digest"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ criconfig "github.com/containerd/cri/pkg/config"
+)
+
+func TestParseAuth(t *testing.T) {
+ testUser := "username"
+ testPasswd := "password"
+ testAuthLen := base64.StdEncoding.EncodedLen(len(testUser + ":" + testPasswd))
+ testAuth := make([]byte, testAuthLen)
+ base64.StdEncoding.Encode(testAuth, []byte(testUser+":"+testPasswd))
+ invalidAuth := make([]byte, testAuthLen)
+ base64.StdEncoding.Encode(invalidAuth, []byte(testUser+"@"+testPasswd))
+ for desc, test := range map[string]struct {
+ auth *runtime.AuthConfig
+ host string
+ expectedUser string
+ expectedSecret string
+ expectErr bool
+ }{
+ "should not return error if auth config is nil": {},
+ "should not return error if empty auth is provided for access to anonymous registry": {
+ auth: &runtime.AuthConfig{},
+ expectErr: false,
+ },
+ "should support identity token": {
+ auth: &runtime.AuthConfig{IdentityToken: "abcd"},
+ expectedSecret: "abcd",
+ },
+ "should support username and password": {
+ auth: &runtime.AuthConfig{
+ Username: testUser,
+ Password: testPasswd,
+ },
+ expectedUser: testUser,
+ expectedSecret: testPasswd,
+ },
+ "should support auth": {
+ auth: &runtime.AuthConfig{Auth: string(testAuth)},
+ expectedUser: testUser,
+ expectedSecret: testPasswd,
+ },
+ "should return error for invalid auth": {
+ auth: &runtime.AuthConfig{Auth: string(invalidAuth)},
+ expectErr: true,
+ },
+ "should return empty auth if server address doesn't match": {
+ auth: &runtime.AuthConfig{
+ Username: testUser,
+ Password: testPasswd,
+ ServerAddress: "https://registry-1.io",
+ },
+ host: "registry-2.io",
+ expectedUser: "",
+ expectedSecret: "",
+ },
+ "should return auth if server address matches": {
+ auth: &runtime.AuthConfig{
+ Username: testUser,
+ Password: testPasswd,
+ ServerAddress: "https://registry-1.io",
+ },
+ host: "registry-1.io",
+ expectedUser: testUser,
+ expectedSecret: testPasswd,
+ },
+ "should return auth if server address is not specified": {
+ auth: &runtime.AuthConfig{
+ Username: testUser,
+ Password: testPasswd,
+ },
+ host: "registry-1.io",
+ expectedUser: testUser,
+ expectedSecret: testPasswd,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ u, s, err := ParseAuth(test.auth, test.host)
+ assert.Equal(t, test.expectErr, err != nil)
+ assert.Equal(t, test.expectedUser, u)
+ assert.Equal(t, test.expectedSecret, s)
+ }
+}
+
+func TestRegistryEndpoints(t *testing.T) {
+ for desc, test := range map[string]struct {
+ mirrors map[string]criconfig.Mirror
+ host string
+ expected []string
+ }{
+ "no mirror configured": {
+ mirrors: map[string]criconfig.Mirror{
+ "registry-1.io": {
+ Endpoints: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ },
+ },
+ },
+ host: "registry-3.io",
+ expected: []string{
+ "https://registry-3.io",
+ },
+ },
+ "mirror configured": {
+ mirrors: map[string]criconfig.Mirror{
+ "registry-3.io": {
+ Endpoints: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ },
+ },
+ },
+ host: "registry-3.io",
+ expected: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ "https://registry-3.io",
+ },
+ },
+ "wildcard mirror configured": {
+ mirrors: map[string]criconfig.Mirror{
+ "*": {
+ Endpoints: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ },
+ },
+ },
+ host: "registry-3.io",
+ expected: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ "https://registry-3.io",
+ },
+ },
+ "host should take precedence if both host and wildcard mirrors are configured": {
+ mirrors: map[string]criconfig.Mirror{
+ "*": {
+ Endpoints: []string{
+ "https://registry-1.io",
+ },
+ },
+ "registry-3.io": {
+ Endpoints: []string{
+ "https://registry-2.io",
+ },
+ },
+ },
+ host: "registry-3.io",
+ expected: []string{
+ "https://registry-2.io",
+ "https://registry-3.io",
+ },
+ },
+ "default endpoint in list with http": {
+ mirrors: map[string]criconfig.Mirror{
+ "registry-3.io": {
+ Endpoints: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ "http://registry-3.io",
+ },
+ },
+ },
+ host: "registry-3.io",
+ expected: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ "http://registry-3.io",
+ },
+ },
+ "default endpoint in list with https": {
+ mirrors: map[string]criconfig.Mirror{
+ "registry-3.io": {
+ Endpoints: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ "https://registry-3.io",
+ },
+ },
+ },
+ host: "registry-3.io",
+ expected: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ "https://registry-3.io",
+ },
+ },
+ "default endpoint in list with path": {
+ mirrors: map[string]criconfig.Mirror{
+ "registry-3.io": {
+ Endpoints: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ "https://registry-3.io/path",
+ },
+ },
+ },
+ host: "registry-3.io",
+ expected: []string{
+ "https://registry-1.io",
+ "https://registry-2.io",
+ "https://registry-3.io/path",
+ },
+ },
+ "miss scheme endpoint in list with path": {
+ mirrors: map[string]criconfig.Mirror{
+ "registry-3.io": {
+ Endpoints: []string{
+ "https://registry-3.io",
+ "registry-1.io",
+ "127.0.0.1:1234",
+ },
+ },
+ },
+ host: "registry-3.io",
+ expected: []string{
+ "https://registry-3.io",
+ "https://registry-1.io",
+ "http://127.0.0.1:1234",
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ c := newTestCRIService()
+ c.config.Registry.Mirrors = test.mirrors
+ got, err := c.registryEndpoints(test.host)
+ assert.NoError(t, err)
+ assert.Equal(t, test.expected, got)
+ }
+}
+
+func TestDefaultScheme(t *testing.T) {
+ for desc, test := range map[string]struct {
+ host string
+ expected string
+ }{
+ "should use http by default for localhost": {
+ host: "localhost",
+ expected: "http",
+ },
+ "should use http by default for localhost with port": {
+ host: "localhost:8080",
+ expected: "http",
+ },
+ "should use http by default for 127.0.0.1": {
+ host: "127.0.0.1",
+ expected: "http",
+ },
+ "should use http by default for 127.0.0.1 with port": {
+ host: "127.0.0.1:8080",
+ expected: "http",
+ },
+ "should use http by default for ::1": {
+ host: "::1",
+ expected: "http",
+ },
+ "should use http by default for ::1 with port": {
+ host: "[::1]:8080",
+ expected: "http",
+ },
+ "should use https by default for remote host": {
+ host: "remote",
+ expected: "https",
+ },
+ "should use https by default for remote host with port": {
+ host: "remote:8080",
+ expected: "https",
+ },
+ "should use https by default for remote ip": {
+ host: "8.8.8.8",
+ expected: "https",
+ },
+ "should use https by default for remote ip with port": {
+ host: "8.8.8.8:8080",
+ expected: "https",
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ got := defaultScheme(test.host)
+ assert.Equal(t, test.expected, got)
+ }
+}
+
+func TestEncryptedImagePullOpts(t *testing.T) {
+ for desc, test := range map[string]struct {
+ keyModel string
+ expectedOpts int
+ }{
+ "node key model should return one unpack opt": {
+ keyModel: criconfig.KeyModelNode,
+ expectedOpts: 1,
+ },
+ "no key model selected should default to node key model": {
+ keyModel: "",
+ expectedOpts: 0,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ c := newTestCRIService()
+ c.config.ImageDecryption.KeyModel = test.keyModel
+ got := len(c.encryptedImagesPullOpts())
+ assert.Equal(t, test.expectedOpts, got)
+ }
+}
+
+func TestImageLayersLabel(t *testing.T) {
+ sampleKey := "sampleKey"
+ sampleDigest, err := digest.Parse("sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
+ assert.NoError(t, err)
+ sampleMaxSize := 300
+ sampleValidate := func(k, v string) error {
+ if (len(k) + len(v)) > sampleMaxSize {
+ return fmt.Errorf("invalid: %q: %q", k, v)
+ }
+ return nil
+ }
+
+ tests := []struct {
+ name string
+ layersNum int
+ wantNum int
+ }{
+ {
+ name: "valid number of layers",
+ layersNum: 2,
+ wantNum: 2,
+ },
+ {
+ name: "many layers",
+ layersNum: 5, // hits sampleMaxSize (300 chars).
+ wantNum: 4, // layers should be omitted for avoiding invalid label.
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ var sampleLayers []imagespec.Descriptor
+ for i := 0; i < tt.layersNum; i++ {
+ sampleLayers = append(sampleLayers, imagespec.Descriptor{
+ MediaType: imagespec.MediaTypeImageLayerGzip,
+ Digest: sampleDigest,
+ })
+ }
+ gotS := getLayers(context.Background(), sampleKey, sampleLayers, sampleValidate)
+ got := len(strings.Split(gotS, ","))
+ assert.Equal(t, tt.wantNum, got)
+ })
+ }
+}
diff --git a/pkg/server/image_remove.go b/pkg/server/image_remove.go
new file mode 100644
index 000000000..bcd02d758
--- /dev/null
+++ b/pkg/server/image_remove.go
@@ -0,0 +1,65 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/images"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/store"
+)
+
+// RemoveImage removes the image.
+// TODO(random-liu): Update CRI to pass image reference instead of ImageSpec. (See
+// kubernetes/kubernetes#46255)
+// TODO(random-liu): We should change CRI to distinguish image id and image spec.
+// Remove the whole image no matter the it's image id or reference. This is the
+// semantic defined in CRI now.
+func (c *criService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (*runtime.RemoveImageResponse, error) {
+ image, err := c.localResolve(r.GetImage().GetImage())
+ if err != nil {
+ if err == store.ErrNotExist {
+ // return empty without error when image not found.
+ return &runtime.RemoveImageResponse{}, nil
+ }
+ return nil, errors.Wrapf(err, "can not resolve %q locally", r.GetImage().GetImage())
+ }
+
+ // Remove all image references.
+ for i, ref := range image.References {
+ var opts []images.DeleteOpt
+ if i == len(image.References)-1 {
+ // Delete the last image reference synchronously to trigger garbage collection.
+ // This is best effort. It is possible that the image reference is deleted by
+ // someone else before this point.
+ opts = []images.DeleteOpt{images.SynchronousDelete()}
+ }
+ err = c.client.ImageService().Delete(ctx, ref, opts...)
+ if err == nil || errdefs.IsNotFound(err) {
+ // Update image store to reflect the newest state in containerd.
+ if err := c.imageStore.Update(ctx, ref); err != nil {
+ return nil, errors.Wrapf(err, "failed to update image reference %q for %q", ref, image.ID)
+ }
+ continue
+ }
+ return nil, errors.Wrapf(err, "failed to delete image reference %q for %q", ref, image.ID)
+ }
+ return &runtime.RemoveImageResponse{}, nil
+}
diff --git a/pkg/server/image_status.go b/pkg/server/image_status.go
new file mode 100644
index 000000000..5ada7b007
--- /dev/null
+++ b/pkg/server/image_status.go
@@ -0,0 +1,105 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "encoding/json"
+
+ "github.com/containerd/containerd/log"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/store"
+ imagestore "github.com/containerd/cri/pkg/store/image"
+)
+
+// ImageStatus returns the status of the image, returns nil if the image isn't present.
+// TODO(random-liu): We should change CRI to distinguish image id and image spec. (See
+// kubernetes/kubernetes#46255)
+func (c *criService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (*runtime.ImageStatusResponse, error) {
+ image, err := c.localResolve(r.GetImage().GetImage())
+ if err != nil {
+ if err == store.ErrNotExist {
+ // return empty without error when image not found.
+ return &runtime.ImageStatusResponse{}, nil
+ }
+ return nil, errors.Wrapf(err, "can not resolve %q locally", r.GetImage().GetImage())
+ }
+ // TODO(random-liu): [P0] Make sure corresponding snapshot exists. What if snapshot
+ // doesn't exist?
+
+ runtimeImage := toCRIImage(image)
+ info, err := c.toCRIImageInfo(ctx, &image, r.GetVerbose())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate image info")
+ }
+
+ return &runtime.ImageStatusResponse{
+ Image: runtimeImage,
+ Info: info,
+ }, nil
+}
+
+// toCRIImage converts internal image object to CRI runtime.Image.
+func toCRIImage(image imagestore.Image) *runtime.Image {
+ repoTags, repoDigests := parseImageReferences(image.References)
+ runtimeImage := &runtime.Image{
+ Id: image.ID,
+ RepoTags: repoTags,
+ RepoDigests: repoDigests,
+ Size_: uint64(image.Size),
+ }
+ uid, username := getUserFromImage(image.ImageSpec.Config.User)
+ if uid != nil {
+ runtimeImage.Uid = &runtime.Int64Value{Value: *uid}
+ }
+ runtimeImage.Username = username
+
+ return runtimeImage
+}
+
+// TODO (mikebrow): discuss moving this struct and / or constants for info map for some or all of these fields to CRI
+type verboseImageInfo struct {
+ ChainID string `json:"chainID"`
+ ImageSpec imagespec.Image `json:"imageSpec"`
+}
+
+// toCRIImageInfo converts internal image object information to CRI image status response info map.
+func (c *criService) toCRIImageInfo(ctx context.Context, image *imagestore.Image, verbose bool) (map[string]string, error) {
+ if !verbose {
+ return nil, nil
+ }
+
+ info := make(map[string]string)
+
+ imi := &verboseImageInfo{
+ ChainID: image.ChainID,
+ ImageSpec: image.ImageSpec,
+ }
+
+ m, err := json.Marshal(imi)
+ if err == nil {
+ info["info"] = string(m)
+ } else {
+ log.G(ctx).WithError(err).Errorf("failed to marshal info %v", imi)
+ info["info"] = err.Error()
+ }
+
+ return info, nil
+}
diff --git a/pkg/server/image_status_test.go b/pkg/server/image_status_test.go
new file mode 100644
index 000000000..5b3bc7af0
--- /dev/null
+++ b/pkg/server/image_status_test.go
@@ -0,0 +1,74 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ imagestore "github.com/containerd/cri/pkg/store/image"
+)
+
+func TestImageStatus(t *testing.T) {
+ testID := "sha256:d848ce12891bf78792cda4a23c58984033b0c397a55e93a1556202222ecc5ed4"
+ image := imagestore.Image{
+ ID: testID,
+ ChainID: "test-chain-id",
+ References: []string{
+ "gcr.io/library/busybox:latest",
+ "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ },
+ Size: 1234,
+ ImageSpec: imagespec.Image{
+ Config: imagespec.ImageConfig{
+ User: "user:group",
+ },
+ },
+ }
+ expected := &runtime.Image{
+ Id: testID,
+ RepoTags: []string{"gcr.io/library/busybox:latest"},
+ RepoDigests: []string{"gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582"},
+ Size_: uint64(1234),
+ Username: "user",
+ }
+
+ c := newTestCRIService()
+ t.Logf("should return nil image spec without error for non-exist image")
+ resp, err := c.ImageStatus(context.Background(), &runtime.ImageStatusRequest{
+ Image: &runtime.ImageSpec{Image: testID},
+ })
+ assert.NoError(t, err)
+ require.NotNil(t, resp)
+ assert.Nil(t, resp.GetImage())
+
+ c.imageStore, err = imagestore.NewFakeStore([]imagestore.Image{image})
+ assert.NoError(t, err)
+
+ t.Logf("should return correct image status for exist image")
+ resp, err = c.ImageStatus(context.Background(), &runtime.ImageStatusRequest{
+ Image: &runtime.ImageSpec{Image: testID},
+ })
+ assert.NoError(t, err)
+ assert.NotNil(t, resp)
+ assert.Equal(t, expected, resp.GetImage())
+}
diff --git a/pkg/server/imagefs_info.go b/pkg/server/imagefs_info.go
new file mode 100644
index 000000000..81dbd5cd0
--- /dev/null
+++ b/pkg/server/imagefs_info.go
@@ -0,0 +1,52 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "time"
+
+ "golang.org/x/net/context"
+
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// ImageFsInfo returns information of the filesystem that is used to store images.
+// TODO(windows): Usage for windows is always 0 right now. Support this for windows.
+func (c *criService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (*runtime.ImageFsInfoResponse, error) {
+ snapshots := c.snapshotStore.List()
+ timestamp := time.Now().UnixNano()
+ var usedBytes, inodesUsed uint64
+ for _, sn := range snapshots {
+ // Use the oldest timestamp as the timestamp of imagefs info.
+ if sn.Timestamp < timestamp {
+ timestamp = sn.Timestamp
+ }
+ usedBytes += sn.Size
+ inodesUsed += sn.Inodes
+ }
+ // TODO(random-liu): Handle content store
+ return &runtime.ImageFsInfoResponse{
+ ImageFilesystems: []*runtime.FilesystemUsage{
+ {
+ Timestamp: timestamp,
+ FsId: &runtime.FilesystemIdentifier{Mountpoint: c.imageFSPath},
+ UsedBytes: &runtime.UInt64Value{Value: usedBytes},
+ InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
+ },
+ },
+ }, nil
+}
diff --git a/pkg/server/imagefs_info_test.go b/pkg/server/imagefs_info_test.go
new file mode 100644
index 000000000..b36c1aafb
--- /dev/null
+++ b/pkg/server/imagefs_info_test.go
@@ -0,0 +1,70 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ snapshot "github.com/containerd/containerd/snapshots"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ snapshotstore "github.com/containerd/cri/pkg/store/snapshot"
+)
+
+func TestImageFsInfo(t *testing.T) {
+ c := newTestCRIService()
+ snapshots := []snapshotstore.Snapshot{
+ {
+ Key: "key1",
+ Kind: snapshot.KindActive,
+ Size: 10,
+ Inodes: 100,
+ Timestamp: 234567,
+ },
+ {
+ Key: "key2",
+ Kind: snapshot.KindCommitted,
+ Size: 20,
+ Inodes: 200,
+ Timestamp: 123456,
+ },
+ {
+ Key: "key3",
+ Kind: snapshot.KindView,
+ Size: 0,
+ Inodes: 0,
+ Timestamp: 345678,
+ },
+ }
+ expected := &runtime.FilesystemUsage{
+ Timestamp: 123456,
+ FsId: &runtime.FilesystemIdentifier{Mountpoint: testImageFSPath},
+ UsedBytes: &runtime.UInt64Value{Value: 30},
+ InodesUsed: &runtime.UInt64Value{Value: 300},
+ }
+ for _, sn := range snapshots {
+ c.snapshotStore.Add(sn)
+ }
+ resp, err := c.ImageFsInfo(context.Background(), &runtime.ImageFsInfoRequest{})
+ require.NoError(t, err)
+ stats := resp.GetImageFilesystems()
+ assert.Len(t, stats, 1)
+ assert.Equal(t, expected, stats[0])
+}
diff --git a/pkg/server/instrumented_service.go b/pkg/server/instrumented_service.go
new file mode 100644
index 000000000..2c2528ab6
--- /dev/null
+++ b/pkg/server/instrumented_service.go
@@ -0,0 +1,490 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "errors"
+
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+)
+
+// instrumentedService wraps service with containerd namespace and logs.
+type instrumentedService struct {
+ c *criService
+}
+
+func newInstrumentedService(c *criService) grpcServices {
+ return &instrumentedService{c: c}
+}
+
+// checkInitialized returns error if the server is not fully initialized.
+// GRPC service request handlers should return error before server is fully
+// initialized.
+// NOTE(random-liu): All following functions MUST check initialized at the beginning.
+func (in *instrumentedService) checkInitialized() error {
+ if in.c.initialized.IsSet() {
+ return nil
+ }
+ return errors.New("server is not initialized yet")
+}
+
+func (in *instrumentedService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (res *runtime.RunPodSandboxResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("RunPodsandbox for %+v", r.GetConfig().GetMetadata())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("RunPodSandbox for %+v failed, error", r.GetConfig().GetMetadata())
+ } else {
+ log.G(ctx).Infof("RunPodSandbox for %+v returns sandbox id %q", r.GetConfig().GetMetadata(), res.GetPodSandboxId())
+ }
+ }()
+ res, err = in.c.RunPodSandbox(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (res *runtime.ListPodSandboxResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("ListPodSandbox with filter %+v", r.GetFilter())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Error("ListPodSandbox failed")
+ } else {
+ log.G(ctx).Tracef("ListPodSandbox returns pod sandboxes %+v", res.GetItems())
+ }
+ }()
+ res, err = in.c.ListPodSandbox(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandboxStatusRequest) (res *runtime.PodSandboxStatusResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("PodSandboxStatus for %q", r.GetPodSandboxId())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("PodSandboxStatus for %q failed", r.GetPodSandboxId())
+ } else {
+ log.G(ctx).Tracef("PodSandboxStatus for %q returns status %+v", r.GetPodSandboxId(), res.GetStatus())
+ }
+ }()
+ res, err = in.c.PodSandboxStatus(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandboxRequest) (_ *runtime.StopPodSandboxResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("StopPodSandbox for %q", r.GetPodSandboxId())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("StopPodSandbox for %q failed", r.GetPodSandboxId())
+ } else {
+ log.G(ctx).Infof("StopPodSandbox for %q returns successfully", r.GetPodSandboxId())
+ }
+ }()
+ res, err := in.c.StopPodSandbox(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (_ *runtime.RemovePodSandboxResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("RemovePodSandbox for %q", r.GetPodSandboxId())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("RemovePodSandbox for %q failed", r.GetPodSandboxId())
+ } else {
+ log.G(ctx).Infof("RemovePodSandbox %q returns successfully", r.GetPodSandboxId())
+ }
+ }()
+ res, err := in.c.RemovePodSandbox(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) PortForward(ctx context.Context, r *runtime.PortForwardRequest) (res *runtime.PortForwardResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("Portforward for %q port %v", r.GetPodSandboxId(), r.GetPort())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Portforward for %q failed", r.GetPodSandboxId())
+ } else {
+ log.G(ctx).Infof("Portforward for %q returns URL %q", r.GetPodSandboxId(), res.GetUrl())
+ }
+ }()
+ res, err = in.c.PortForward(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) CreateContainer(ctx context.Context, r *runtime.CreateContainerRequest) (res *runtime.CreateContainerResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("CreateContainer within sandbox %q for container %+v",
+ r.GetPodSandboxId(), r.GetConfig().GetMetadata())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("CreateContainer within sandbox %q for %+v failed",
+ r.GetPodSandboxId(), r.GetConfig().GetMetadata())
+ } else {
+ log.G(ctx).Infof("CreateContainer within sandbox %q for %+v returns container id %q",
+ r.GetPodSandboxId(), r.GetConfig().GetMetadata(), res.GetContainerId())
+ }
+ }()
+ res, err = in.c.CreateContainer(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) StartContainer(ctx context.Context, r *runtime.StartContainerRequest) (_ *runtime.StartContainerResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("StartContainer for %q", r.GetContainerId())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("StartContainer for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Infof("StartContainer for %q returns successfully", r.GetContainerId())
+ }
+ }()
+ res, err := in.c.StartContainer(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ListContainers(ctx context.Context, r *runtime.ListContainersRequest) (res *runtime.ListContainersResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("ListContainers with filter %+v", r.GetFilter())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("ListContainers with filter %+v failed", r.GetFilter())
+ } else {
+ log.G(ctx).Tracef("ListContainers with filter %+v returns containers %+v",
+ r.GetFilter(), res.GetContainers())
+ }
+ }()
+ res, err = in.c.ListContainers(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ContainerStatus(ctx context.Context, r *runtime.ContainerStatusRequest) (res *runtime.ContainerStatusResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("ContainerStatus for %q", r.GetContainerId())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("ContainerStatus for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Tracef("ContainerStatus for %q returns status %+v", r.GetContainerId(), res.GetStatus())
+ }
+ }()
+ res, err = in.c.ContainerStatus(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) StopContainer(ctx context.Context, r *runtime.StopContainerRequest) (res *runtime.StopContainerResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("StopContainer for %q with timeout %d (s)", r.GetContainerId(), r.GetTimeout())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("StopContainer for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Infof("StopContainer for %q returns successfully", r.GetContainerId())
+ }
+ }()
+ res, err = in.c.StopContainer(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) RemoveContainer(ctx context.Context, r *runtime.RemoveContainerRequest) (res *runtime.RemoveContainerResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("RemoveContainer for %q", r.GetContainerId())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("RemoveContainer for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Infof("RemoveContainer for %q returns successfully", r.GetContainerId())
+ }
+ }()
+ res, err = in.c.RemoveContainer(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ExecSync(ctx context.Context, r *runtime.ExecSyncRequest) (res *runtime.ExecSyncResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("ExecSync for %q with command %+v and timeout %d (s)", r.GetContainerId(), r.GetCmd(), r.GetTimeout())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("ExecSync for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Infof("ExecSync for %q returns with exit code %d", r.GetContainerId(), res.GetExitCode())
+ log.G(ctx).Debugf("ExecSync for %q outputs - stdout: %q, stderr: %q", r.GetContainerId(),
+ res.GetStdout(), res.GetStderr())
+ }
+ }()
+ res, err = in.c.ExecSync(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) Exec(ctx context.Context, r *runtime.ExecRequest) (res *runtime.ExecResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("Exec for %q with command %+v, tty %v and stdin %v",
+ r.GetContainerId(), r.GetCmd(), r.GetTty(), r.GetStdin())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Exec for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Infof("Exec for %q returns URL %q", r.GetContainerId(), res.GetUrl())
+ }
+ }()
+ res, err = in.c.Exec(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) Attach(ctx context.Context, r *runtime.AttachRequest) (res *runtime.AttachResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("Attach for %q with tty %v and stdin %v", r.GetContainerId(), r.GetTty(), r.GetStdin())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Attach for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Infof("Attach for %q returns URL %q", r.GetContainerId(), res.Url)
+ }
+ }()
+ res, err = in.c.Attach(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (res *runtime.UpdateContainerResourcesResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("UpdateContainerResources for %q with %+v", r.GetContainerId(), r.GetLinux())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("UpdateContainerResources for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Infof("UpdateContainerResources for %q returns successfully", r.GetContainerId())
+ }
+ }()
+ res, err = in.c.UpdateContainerResources(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (res *runtime.PullImageResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("PullImage %q", r.GetImage().GetImage())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("PullImage %q failed", r.GetImage().GetImage())
+ } else {
+ log.G(ctx).Infof("PullImage %q returns image reference %q",
+ r.GetImage().GetImage(), res.GetImageRef())
+ }
+ }()
+ res, err = in.c.PullImage(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (res *runtime.ListImagesResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("ListImages with filter %+v", r.GetFilter())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("ListImages with filter %+v failed", r.GetFilter())
+ } else {
+ log.G(ctx).Tracef("ListImages with filter %+v returns image list %+v",
+ r.GetFilter(), res.GetImages())
+ }
+ }()
+ res, err = in.c.ListImages(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (res *runtime.ImageStatusResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("ImageStatus for %q", r.GetImage().GetImage())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("ImageStatus for %q failed", r.GetImage().GetImage())
+ } else {
+ log.G(ctx).Tracef("ImageStatus for %q returns image status %+v",
+ r.GetImage().GetImage(), res.GetImage())
+ }
+ }()
+ res, err = in.c.ImageStatus(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (_ *runtime.RemoveImageResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Infof("RemoveImage %q", r.GetImage().GetImage())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("RemoveImage %q failed", r.GetImage().GetImage())
+ } else {
+ log.G(ctx).Infof("RemoveImage %q returns successfully", r.GetImage().GetImage())
+ }
+ }()
+ res, err := in.c.RemoveImage(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (res *runtime.ImageFsInfoResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Debugf("ImageFsInfo")
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Error("ImageFsInfo failed")
+ } else {
+ log.G(ctx).Debugf("ImageFsInfo returns filesystem info %+v", res.ImageFilesystems)
+ }
+ }()
+ res, err = in.c.ImageFsInfo(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ContainerStats(ctx context.Context, r *runtime.ContainerStatsRequest) (res *runtime.ContainerStatsResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Debugf("ContainerStats for %q", r.GetContainerId())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("ContainerStats for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Debugf("ContainerStats for %q returns stats %+v", r.GetContainerId(), res.GetStats())
+ }
+ }()
+ res, err = in.c.ContainerStats(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ListContainerStats(ctx context.Context, r *runtime.ListContainerStatsRequest) (res *runtime.ListContainerStatsResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("ListContainerStats with filter %+v", r.GetFilter())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Error("ListContainerStats failed")
+ } else {
+ log.G(ctx).Tracef("ListContainerStats returns stats %+v", res.GetStats())
+ }
+ }()
+ res, err = in.c.ListContainerStats(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) Status(ctx context.Context, r *runtime.StatusRequest) (res *runtime.StatusResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("Status")
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Error("Status failed")
+ } else {
+ log.G(ctx).Tracef("Status returns status %+v", res.GetStatus())
+ }
+ }()
+ res, err = in.c.Status(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) Version(ctx context.Context, r *runtime.VersionRequest) (res *runtime.VersionResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Tracef("Version with client side version %q", r.GetVersion())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Error("Version failed")
+ } else {
+ log.G(ctx).Tracef("Version returns %+v", res)
+ }
+ }()
+ res, err = in.c.Version(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateRuntimeConfigRequest) (res *runtime.UpdateRuntimeConfigResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Debugf("UpdateRuntimeConfig with config %+v", r.GetRuntimeConfig())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Error("UpdateRuntimeConfig failed")
+ } else {
+ log.G(ctx).Debug("UpdateRuntimeConfig returns returns successfully")
+ }
+ }()
+ res, err = in.c.UpdateRuntimeConfig(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
+
+func (in *instrumentedService) ReopenContainerLog(ctx context.Context, r *runtime.ReopenContainerLogRequest) (res *runtime.ReopenContainerLogResponse, err error) {
+ if err := in.checkInitialized(); err != nil {
+ return nil, err
+ }
+ log.G(ctx).Debugf("ReopenContainerLog for %q", r.GetContainerId())
+ defer func() {
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("ReopenContainerLog for %q failed", r.GetContainerId())
+ } else {
+ log.G(ctx).Debugf("ReopenContainerLog for %q returns successfully", r.GetContainerId())
+ }
+ }()
+ res, err = in.c.ReopenContainerLog(ctrdutil.WithNamespace(ctx), r)
+ return res, errdefs.ToGRPC(err)
+}
diff --git a/pkg/server/io/container_io.go b/pkg/server/io/container_io.go
new file mode 100644
index 000000000..c66549ca5
--- /dev/null
+++ b/pkg/server/io/container_io.go
@@ -0,0 +1,236 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package io
+
+import (
+ "errors"
+ "io"
+ "strings"
+ "sync"
+
+ "github.com/containerd/containerd/cio"
+ "github.com/sirupsen/logrus"
+
+ cioutil "github.com/containerd/cri/pkg/ioutil"
+ "github.com/containerd/cri/pkg/util"
+)
+
+// streamKey generates a key for the stream.
+func streamKey(id, name string, stream StreamType) string {
+ return strings.Join([]string{id, name, string(stream)}, "-")
+}
+
+// ContainerIO holds the container io.
+type ContainerIO struct {
+ id string
+
+ fifos *cio.FIFOSet
+ *stdioPipes
+
+ stdoutGroup *cioutil.WriterGroup
+ stderrGroup *cioutil.WriterGroup
+
+ closer *wgCloser
+}
+
+var _ cio.IO = &ContainerIO{}
+
+// ContainerIOOpts sets specific information to newly created ContainerIO.
+type ContainerIOOpts func(*ContainerIO) error
+
+// WithFIFOs specifies existing fifos for the container io.
+func WithFIFOs(fifos *cio.FIFOSet) ContainerIOOpts {
+ return func(c *ContainerIO) error {
+ c.fifos = fifos
+ return nil
+ }
+}
+
+// WithNewFIFOs creates new fifos for the container io.
+func WithNewFIFOs(root string, tty, stdin bool) ContainerIOOpts {
+ return func(c *ContainerIO) error {
+ fifos, err := newFifos(root, c.id, tty, stdin)
+ if err != nil {
+ return err
+ }
+ return WithFIFOs(fifos)(c)
+ }
+}
+
+// NewContainerIO creates container io.
+func NewContainerIO(id string, opts ...ContainerIOOpts) (_ *ContainerIO, err error) {
+ c := &ContainerIO{
+ id: id,
+ stdoutGroup: cioutil.NewWriterGroup(),
+ stderrGroup: cioutil.NewWriterGroup(),
+ }
+ for _, opt := range opts {
+ if err := opt(c); err != nil {
+ return nil, err
+ }
+ }
+ if c.fifos == nil {
+ return nil, errors.New("fifos are not set")
+ }
+ // Create actual fifos.
+ stdio, closer, err := newStdioPipes(c.fifos)
+ if err != nil {
+ return nil, err
+ }
+ c.stdioPipes = stdio
+ c.closer = closer
+ return c, nil
+}
+
+// Config returns io config.
+func (c *ContainerIO) Config() cio.Config {
+ return c.fifos.Config
+}
+
+// Pipe creates container fifos and pipe container output
+// to output stream.
+func (c *ContainerIO) Pipe() {
+ wg := c.closer.wg
+ if c.stdout != nil {
+ wg.Add(1)
+ go func() {
+ if _, err := io.Copy(c.stdoutGroup, c.stdout); err != nil {
+ logrus.WithError(err).Errorf("Failed to pipe stdout of container %q", c.id)
+ }
+ c.stdout.Close()
+ c.stdoutGroup.Close()
+ wg.Done()
+ logrus.Infof("Finish piping stdout of container %q", c.id)
+ }()
+ }
+
+ if !c.fifos.Terminal && c.stderr != nil {
+ wg.Add(1)
+ go func() {
+ if _, err := io.Copy(c.stderrGroup, c.stderr); err != nil {
+ logrus.WithError(err).Errorf("Failed to pipe stderr of container %q", c.id)
+ }
+ c.stderr.Close()
+ c.stderrGroup.Close()
+ wg.Done()
+ logrus.Infof("Finish piping stderr of container %q", c.id)
+ }()
+ }
+}
+
+// Attach attaches container stdio.
+// TODO(random-liu): Use pools.Copy in docker to reduce memory usage?
+func (c *ContainerIO) Attach(opts AttachOptions) {
+ var wg sync.WaitGroup
+ key := util.GenerateID()
+ stdinKey := streamKey(c.id, "attach-"+key, Stdin)
+ stdoutKey := streamKey(c.id, "attach-"+key, Stdout)
+ stderrKey := streamKey(c.id, "attach-"+key, Stderr)
+
+ var stdinStreamRC io.ReadCloser
+ if c.stdin != nil && opts.Stdin != nil {
+ // Create a wrapper of stdin which could be closed. Note that the
+ // wrapper doesn't close the actual stdin, it only stops io.Copy.
+ // The actual stdin will be closed by stream server.
+ stdinStreamRC = cioutil.NewWrapReadCloser(opts.Stdin)
+ wg.Add(1)
+ go func() {
+ if _, err := io.Copy(c.stdin, stdinStreamRC); err != nil {
+ logrus.WithError(err).Errorf("Failed to pipe stdin for container attach %q", c.id)
+ }
+ logrus.Infof("Attach stream %q closed", stdinKey)
+ if opts.StdinOnce && !opts.Tty {
+ // Due to kubectl requirements and current docker behavior, when (opts.StdinOnce &&
+ // opts.Tty) we have to close container stdin and keep stdout and stderr open until
+ // container stops.
+ c.stdin.Close()
+ // Also closes the containerd side.
+ if err := opts.CloseStdin(); err != nil {
+ logrus.WithError(err).Errorf("Failed to close stdin for container %q", c.id)
+ }
+ } else {
+ if opts.Stdout != nil {
+ c.stdoutGroup.Remove(stdoutKey)
+ }
+ if opts.Stderr != nil {
+ c.stderrGroup.Remove(stderrKey)
+ }
+ }
+ wg.Done()
+ }()
+ }
+
+ attachStream := func(key string, close <-chan struct{}) {
+ <-close
+ logrus.Infof("Attach stream %q closed", key)
+ // Make sure stdin gets closed.
+ if stdinStreamRC != nil {
+ stdinStreamRC.Close()
+ }
+ wg.Done()
+ }
+
+ if opts.Stdout != nil {
+ wg.Add(1)
+ wc, close := cioutil.NewWriteCloseInformer(opts.Stdout)
+ c.stdoutGroup.Add(stdoutKey, wc)
+ go attachStream(stdoutKey, close)
+ }
+ if !opts.Tty && opts.Stderr != nil {
+ wg.Add(1)
+ wc, close := cioutil.NewWriteCloseInformer(opts.Stderr)
+ c.stderrGroup.Add(stderrKey, wc)
+ go attachStream(stderrKey, close)
+ }
+ wg.Wait()
+}
+
+// AddOutput adds new write closers to the container stream, and returns existing
+// write closers if there are any.
+func (c *ContainerIO) AddOutput(name string, stdout, stderr io.WriteCloser) (io.WriteCloser, io.WriteCloser) {
+ var oldStdout, oldStderr io.WriteCloser
+ if stdout != nil {
+ key := streamKey(c.id, name, Stdout)
+ oldStdout = c.stdoutGroup.Get(key)
+ c.stdoutGroup.Add(key, stdout)
+ }
+ if stderr != nil {
+ key := streamKey(c.id, name, Stderr)
+ oldStderr = c.stderrGroup.Get(key)
+ c.stderrGroup.Add(key, stderr)
+ }
+ return oldStdout, oldStderr
+}
+
+// Cancel cancels container io.
+func (c *ContainerIO) Cancel() {
+ c.closer.Cancel()
+}
+
+// Wait waits container io to finish.
+func (c *ContainerIO) Wait() {
+ c.closer.Wait()
+}
+
+// Close closes all FIFOs.
+func (c *ContainerIO) Close() error {
+ c.closer.Close()
+ if c.fifos != nil {
+ return c.fifos.Close()
+ }
+ return nil
+}
diff --git a/pkg/server/io/exec_io.go b/pkg/server/io/exec_io.go
new file mode 100644
index 000000000..4a695030d
--- /dev/null
+++ b/pkg/server/io/exec_io.go
@@ -0,0 +1,146 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package io
+
+import (
+ "io"
+ "sync"
+
+ "github.com/containerd/containerd/cio"
+ "github.com/sirupsen/logrus"
+
+ cioutil "github.com/containerd/cri/pkg/ioutil"
+)
+
+// ExecIO holds the exec io.
+type ExecIO struct {
+ id string
+ fifos *cio.FIFOSet
+ *stdioPipes
+ closer *wgCloser
+}
+
+var _ cio.IO = &ExecIO{}
+
+// NewExecIO creates exec io.
+func NewExecIO(id, root string, tty, stdin bool) (*ExecIO, error) {
+ fifos, err := newFifos(root, id, tty, stdin)
+ if err != nil {
+ return nil, err
+ }
+ stdio, closer, err := newStdioPipes(fifos)
+ if err != nil {
+ return nil, err
+ }
+ return &ExecIO{
+ id: id,
+ fifos: fifos,
+ stdioPipes: stdio,
+ closer: closer,
+ }, nil
+}
+
+// Config returns io config.
+func (e *ExecIO) Config() cio.Config {
+ return e.fifos.Config
+}
+
+// Attach attaches exec stdio. The logic is similar with container io attach.
+func (e *ExecIO) Attach(opts AttachOptions) <-chan struct{} {
+ var wg sync.WaitGroup
+ var stdinStreamRC io.ReadCloser
+ if e.stdin != nil && opts.Stdin != nil {
+ stdinStreamRC = cioutil.NewWrapReadCloser(opts.Stdin)
+ wg.Add(1)
+ go func() {
+ if _, err := io.Copy(e.stdin, stdinStreamRC); err != nil {
+ logrus.WithError(err).Errorf("Failed to redirect stdin for container exec %q", e.id)
+ }
+ logrus.Infof("Container exec %q stdin closed", e.id)
+ if opts.StdinOnce && !opts.Tty {
+ e.stdin.Close()
+ if err := opts.CloseStdin(); err != nil {
+ logrus.WithError(err).Errorf("Failed to close stdin for container exec %q", e.id)
+ }
+ } else {
+ if e.stdout != nil {
+ e.stdout.Close()
+ }
+ if e.stderr != nil {
+ e.stderr.Close()
+ }
+ }
+ wg.Done()
+ }()
+ }
+
+ attachOutput := func(t StreamType, stream io.WriteCloser, out io.ReadCloser) {
+ if _, err := io.Copy(stream, out); err != nil {
+ logrus.WithError(err).Errorf("Failed to pipe %q for container exec %q", t, e.id)
+ }
+ out.Close()
+ stream.Close()
+ if stdinStreamRC != nil {
+ stdinStreamRC.Close()
+ }
+ e.closer.wg.Done()
+ wg.Done()
+ logrus.Infof("Finish piping %q of container exec %q", t, e.id)
+ }
+
+ if opts.Stdout != nil {
+ wg.Add(1)
+ // Closer should wait for this routine to be over.
+ e.closer.wg.Add(1)
+ go attachOutput(Stdout, opts.Stdout, e.stdout)
+ }
+
+ if !opts.Tty && opts.Stderr != nil {
+ wg.Add(1)
+ // Closer should wait for this routine to be over.
+ e.closer.wg.Add(1)
+ go attachOutput(Stderr, opts.Stderr, e.stderr)
+ }
+
+ done := make(chan struct{})
+ go func() {
+ wg.Wait()
+ close(done)
+ }()
+ return done
+}
+
+// Cancel cancels exec io.
+func (e *ExecIO) Cancel() {
+ e.closer.Cancel()
+}
+
+// Wait waits exec io to finish.
+func (e *ExecIO) Wait() {
+ e.closer.Wait()
+}
+
+// Close closes all FIFOs.
+func (e *ExecIO) Close() error {
+ if e.closer != nil {
+ e.closer.Close()
+ }
+ if e.fifos != nil {
+ return e.fifos.Close()
+ }
+ return nil
+}
diff --git a/pkg/server/io/helpers.go b/pkg/server/io/helpers.go
new file mode 100644
index 000000000..59d41411f
--- /dev/null
+++ b/pkg/server/io/helpers.go
@@ -0,0 +1,144 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package io
+
+import (
+ "io"
+ "os"
+ "path/filepath"
+ "sync"
+ "syscall"
+
+ "github.com/containerd/containerd/cio"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// AttachOptions specifies how to attach to a container.
+type AttachOptions struct {
+ Stdin io.Reader
+ Stdout io.WriteCloser
+ Stderr io.WriteCloser
+ Tty bool
+ StdinOnce bool
+ // CloseStdin is the function to close container stdin.
+ CloseStdin func() error
+}
+
+// StreamType is the type of the stream, stdout/stderr.
+type StreamType string
+
+const (
+ // Stdin stream type.
+ Stdin StreamType = "stdin"
+ // Stdout stream type.
+ Stdout StreamType = StreamType(runtime.Stdout)
+ // Stderr stream type.
+ Stderr StreamType = StreamType(runtime.Stderr)
+)
+
+type wgCloser struct {
+ ctx context.Context
+ wg *sync.WaitGroup
+ set []io.Closer
+ cancel context.CancelFunc
+}
+
+func (g *wgCloser) Wait() {
+ g.wg.Wait()
+}
+
+func (g *wgCloser) Close() {
+ for _, f := range g.set {
+ f.Close()
+ }
+}
+
+func (g *wgCloser) Cancel() {
+ g.cancel()
+}
+
+// newFifos creates fifos directory for a container.
+func newFifos(root, id string, tty, stdin bool) (*cio.FIFOSet, error) {
+ root = filepath.Join(root, "io")
+ if err := os.MkdirAll(root, 0700); err != nil {
+ return nil, err
+ }
+ fifos, err := cio.NewFIFOSetInDir(root, id, tty)
+ if err != nil {
+ return nil, err
+ }
+ if !stdin {
+ fifos.Stdin = ""
+ }
+ return fifos, nil
+}
+
+type stdioPipes struct {
+ stdin io.WriteCloser
+ stdout io.ReadCloser
+ stderr io.ReadCloser
+}
+
+// newStdioPipes creates actual fifos for stdio.
+func newStdioPipes(fifos *cio.FIFOSet) (_ *stdioPipes, _ *wgCloser, err error) {
+ var (
+ f io.ReadWriteCloser
+ set []io.Closer
+ ctx, cancel = context.WithCancel(context.Background())
+ p = &stdioPipes{}
+ )
+ defer func() {
+ if err != nil {
+ for _, f := range set {
+ f.Close()
+ }
+ cancel()
+ }
+ }()
+
+ if fifos.Stdin != "" {
+ if f, err = openPipe(ctx, fifos.Stdin, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+ return nil, nil, err
+ }
+ p.stdin = f
+ set = append(set, f)
+ }
+
+ if fifos.Stdout != "" {
+ if f, err = openPipe(ctx, fifos.Stdout, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+ return nil, nil, err
+ }
+ p.stdout = f
+ set = append(set, f)
+ }
+
+ if fifos.Stderr != "" {
+ if f, err = openPipe(ctx, fifos.Stderr, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
+ return nil, nil, err
+ }
+ p.stderr = f
+ set = append(set, f)
+ }
+
+ return p, &wgCloser{
+ wg: &sync.WaitGroup{},
+ set: set,
+ ctx: ctx,
+ cancel: cancel,
+ }, nil
+}
diff --git a/pkg/server/io/helpers_unix.go b/pkg/server/io/helpers_unix.go
new file mode 100644
index 000000000..2780b958a
--- /dev/null
+++ b/pkg/server/io/helpers_unix.go
@@ -0,0 +1,31 @@
+// +build !windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package io
+
+import (
+ "io"
+ "os"
+
+ "github.com/containerd/fifo"
+ "golang.org/x/net/context"
+)
+
+func openPipe(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) {
+ return fifo.OpenFifo(ctx, fn, flag, perm)
+}
diff --git a/pkg/server/io/helpers_windows.go b/pkg/server/io/helpers_windows.go
new file mode 100644
index 000000000..f64807edf
--- /dev/null
+++ b/pkg/server/io/helpers_windows.go
@@ -0,0 +1,81 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package io
+
+import (
+ "io"
+ "net"
+ "os"
+ "sync"
+
+ winio "github.com/Microsoft/go-winio"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+)
+
+type pipe struct {
+ l net.Listener
+ con net.Conn
+ conErr error
+ conWg sync.WaitGroup
+}
+
+func openPipe(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) {
+ l, err := winio.ListenPipe(fn, nil)
+ if err != nil {
+ return nil, err
+ }
+ p := &pipe{l: l}
+ p.conWg.Add(1)
+ go func() {
+ defer p.conWg.Done()
+ c, err := l.Accept()
+ if err != nil {
+ p.conErr = err
+ return
+ }
+ p.con = c
+ }()
+ return p, nil
+}
+
+func (p *pipe) Write(b []byte) (int, error) {
+ p.conWg.Wait()
+ if p.conErr != nil {
+ return 0, errors.Wrap(p.conErr, "connection error")
+ }
+ return p.con.Write(b)
+}
+
+func (p *pipe) Read(b []byte) (int, error) {
+ p.conWg.Wait()
+ if p.conErr != nil {
+ return 0, errors.Wrap(p.conErr, "connection error")
+ }
+ return p.con.Read(b)
+}
+
+func (p *pipe) Close() error {
+ p.l.Close()
+ p.conWg.Wait()
+ if p.con != nil {
+ return p.con.Close()
+ }
+ return p.conErr
+}
diff --git a/pkg/server/io/logger.go b/pkg/server/io/logger.go
new file mode 100644
index 000000000..f13b6f8bf
--- /dev/null
+++ b/pkg/server/io/logger.go
@@ -0,0 +1,196 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package io
+
+import (
+ "bufio"
+ "bytes"
+ "fmt"
+ "io"
+ "io/ioutil"
+ "time"
+
+ "github.com/sirupsen/logrus"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ cioutil "github.com/containerd/cri/pkg/ioutil"
+)
+
+const (
+ // delimiter used in CRI logging format.
+ delimiter = ' '
+ // eof is end-of-line.
+ eol = '\n'
+ // timestampFormat is the timestamp format used in CRI logging format.
+ timestampFormat = time.RFC3339Nano
+ // defaultBufSize is the default size of the read buffer in bytes.
+ defaultBufSize = 4096
+)
+
+// NewDiscardLogger creates logger which discards all the input.
+func NewDiscardLogger() io.WriteCloser {
+ return cioutil.NewNopWriteCloser(ioutil.Discard)
+}
+
+// NewCRILogger returns a write closer which redirect container log into
+// log file, and decorate the log line into CRI defined format. It also
+// returns a channel which indicates whether the logger is stopped.
+// maxLen is the max length limit of a line. A line longer than the
+// limit will be cut into multiple lines.
+func NewCRILogger(path string, w io.Writer, stream StreamType, maxLen int) (io.WriteCloser, <-chan struct{}) {
+ logrus.Debugf("Start writing stream %q to log file %q", stream, path)
+ prc, pwc := io.Pipe()
+ stop := make(chan struct{})
+ go func() {
+ redirectLogs(path, prc, w, stream, maxLen)
+ close(stop)
+ }()
+ return pwc, stop
+}
+
+// bufio.ReadLine in golang eats both read errors and tailing newlines
+// (See https://golang.org/pkg/bufio/#Reader.ReadLine). When reading
+// to io.EOF, it is impossible for the caller to figure out whether
+// there is a newline at the end, for example:
+// 1) When reading "CONTENT\n", it returns "CONTENT" without error;
+// 2) When reading "CONTENT", it also returns "CONTENT" without error.
+//
+// To differentiate these 2 cases, we need to write a readLine function
+// ourselves to not ignore the error.
+//
+// The code is similar with https://golang.org/src/bufio/bufio.go?s=9537:9604#L359.
+// The only difference is that it returns all errors from `ReadSlice`.
+//
+// readLine returns err != nil if and only if line does not end with a new line.
+func readLine(b *bufio.Reader) (line []byte, isPrefix bool, err error) {
+ line, err = b.ReadSlice('\n')
+ if err == bufio.ErrBufferFull {
+ // Handle the case where "\r\n" straddles the buffer.
+ if len(line) > 0 && line[len(line)-1] == '\r' {
+ // Unread the last '\r'
+ if err := b.UnreadByte(); err != nil {
+ panic(fmt.Sprintf("invalid unread %v", err))
+ }
+ line = line[:len(line)-1]
+ }
+ return line, true, nil
+ }
+
+ if len(line) == 0 {
+ if err != nil {
+ line = nil
+ }
+ return
+ }
+
+ if line[len(line)-1] == '\n' {
+ // "ReadSlice returns err != nil if and only if line does not end in delim"
+ // (See https://golang.org/pkg/bufio/#Reader.ReadSlice).
+ if err != nil {
+ panic(fmt.Sprintf("full read with unexpected error %v", err))
+ }
+ drop := 1
+ if len(line) > 1 && line[len(line)-2] == '\r' {
+ drop = 2
+ }
+ line = line[:len(line)-drop]
+ }
+ return
+}
+
+func redirectLogs(path string, rc io.ReadCloser, w io.Writer, s StreamType, maxLen int) {
+ defer rc.Close()
+ var (
+ stream = []byte(s)
+ delimiter = []byte{delimiter}
+ partial = []byte(runtime.LogTagPartial)
+ full = []byte(runtime.LogTagFull)
+ buf [][]byte
+ length int
+ bufSize = defaultBufSize
+ )
+ // Make sure bufSize <= maxLen
+ if maxLen > 0 && maxLen < bufSize {
+ bufSize = maxLen
+ }
+ r := bufio.NewReaderSize(rc, bufSize)
+ writeLine := func(tag, line []byte) {
+ timestamp := time.Now().AppendFormat(nil, timestampFormat)
+ data := bytes.Join([][]byte{timestamp, stream, tag, line}, delimiter)
+ data = append(data, eol)
+ if _, err := w.Write(data); err != nil {
+ logrus.WithError(err).Errorf("Fail to write %q log to log file %q", s, path)
+ // Continue on write error to drain the container output.
+ }
+ }
+ for {
+ var stop bool
+ newLine, isPrefix, err := readLine(r)
+ // NOTE(random-liu): readLine can return actual content even if there is an error.
+ if len(newLine) > 0 {
+ // Buffer returned by ReadLine will change after
+ // next read, copy it.
+ l := make([]byte, len(newLine))
+ copy(l, newLine)
+ buf = append(buf, l)
+ length += len(l)
+ }
+ if err != nil {
+ if err == io.EOF {
+ logrus.Debugf("Getting EOF from stream %q while redirecting to log file %q", s, path)
+ } else {
+ logrus.WithError(err).Errorf("An error occurred when redirecting stream %q to log file %q", s, path)
+ }
+ if length == 0 {
+ // No content left to write, break.
+ break
+ }
+ // Stop after writing the content left in buffer.
+ stop = true
+ }
+ if maxLen > 0 && length > maxLen {
+ exceedLen := length - maxLen
+ last := buf[len(buf)-1]
+ if exceedLen > len(last) {
+ // exceedLen must <= len(last), or else the buffer
+ // should have be written in the previous iteration.
+ panic("exceed length should <= last buffer size")
+ }
+ buf[len(buf)-1] = last[:len(last)-exceedLen]
+ writeLine(partial, bytes.Join(buf, nil))
+ buf = [][]byte{last[len(last)-exceedLen:]}
+ length = exceedLen
+ }
+ if isPrefix {
+ continue
+ }
+ if stop {
+ // readLine only returns error when the message doesn't
+ // end with a newline, in that case it should be treated
+ // as a partial line.
+ writeLine(partial, bytes.Join(buf, nil))
+ } else {
+ writeLine(full, bytes.Join(buf, nil))
+ }
+ buf = nil
+ length = 0
+ if stop {
+ break
+ }
+ }
+ logrus.Debugf("Finish redirecting stream %q to log file %q", s, path)
+}
diff --git a/pkg/server/io/logger_test.go b/pkg/server/io/logger_test.go
new file mode 100644
index 000000000..6c91a049a
--- /dev/null
+++ b/pkg/server/io/logger_test.go
@@ -0,0 +1,258 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package io
+
+import (
+ "bytes"
+ "io/ioutil"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ cioutil "github.com/containerd/cri/pkg/ioutil"
+)
+
+func TestRedirectLogs(t *testing.T) {
+ // defaultBufSize is even number
+ const maxLen = defaultBufSize * 4
+ for desc, test := range map[string]struct {
+ input string
+ stream StreamType
+ maxLen int
+ tag []runtime.LogTag
+ content []string
+ }{
+ "stdout log": {
+ input: "test stdout log 1\ntest stdout log 2\n",
+ stream: Stdout,
+ maxLen: maxLen,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ "test stdout log 1",
+ "test stdout log 2",
+ },
+ },
+ "stderr log": {
+ input: "test stderr log 1\ntest stderr log 2\n",
+ stream: Stderr,
+ maxLen: maxLen,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ "test stderr log 1",
+ "test stderr log 2",
+ },
+ },
+ "log ends without newline": {
+ input: "test stderr log 1\ntest stderr log 2",
+ stream: Stderr,
+ maxLen: maxLen,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagPartial,
+ },
+ content: []string{
+ "test stderr log 1",
+ "test stderr log 2",
+ },
+ },
+ "log length equal to buffer size": {
+ input: strings.Repeat("a", defaultBufSize) + "\n" + strings.Repeat("a", defaultBufSize) + "\n",
+ stream: Stdout,
+ maxLen: maxLen,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", defaultBufSize),
+ strings.Repeat("a", defaultBufSize),
+ },
+ },
+ "log length longer than buffer size": {
+ input: strings.Repeat("a", defaultBufSize*2+10) + "\n" + strings.Repeat("a", defaultBufSize*2+20) + "\n",
+ stream: Stdout,
+ maxLen: maxLen,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", defaultBufSize*2+10),
+ strings.Repeat("a", defaultBufSize*2+20),
+ },
+ },
+ "log length equal to max length": {
+ input: strings.Repeat("a", maxLen) + "\n" + strings.Repeat("a", maxLen) + "\n",
+ stream: Stdout,
+ maxLen: maxLen,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", maxLen),
+ strings.Repeat("a", maxLen),
+ },
+ },
+ "log length exceed max length by 1": {
+ input: strings.Repeat("a", maxLen+1) + "\n" + strings.Repeat("a", maxLen+1) + "\n",
+ stream: Stdout,
+ maxLen: maxLen,
+ tag: []runtime.LogTag{
+ runtime.LogTagPartial,
+ runtime.LogTagFull,
+ runtime.LogTagPartial,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", maxLen),
+ "a",
+ strings.Repeat("a", maxLen),
+ "a",
+ },
+ },
+ "log length longer than max length": {
+ input: strings.Repeat("a", maxLen*2) + "\n" + strings.Repeat("a", maxLen*2+1) + "\n",
+ stream: Stdout,
+ maxLen: maxLen,
+ tag: []runtime.LogTag{
+ runtime.LogTagPartial,
+ runtime.LogTagFull,
+ runtime.LogTagPartial,
+ runtime.LogTagPartial,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", maxLen),
+ strings.Repeat("a", maxLen),
+ strings.Repeat("a", maxLen),
+ strings.Repeat("a", maxLen),
+ "a",
+ },
+ },
+ "max length shorter than buffer size": {
+ input: strings.Repeat("a", defaultBufSize*3/2+10) + "\n" + strings.Repeat("a", defaultBufSize*3/2+20) + "\n",
+ stream: Stdout,
+ maxLen: defaultBufSize / 2,
+ tag: []runtime.LogTag{
+ runtime.LogTagPartial,
+ runtime.LogTagPartial,
+ runtime.LogTagPartial,
+ runtime.LogTagFull,
+ runtime.LogTagPartial,
+ runtime.LogTagPartial,
+ runtime.LogTagPartial,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", defaultBufSize*1/2),
+ strings.Repeat("a", defaultBufSize*1/2),
+ strings.Repeat("a", defaultBufSize*1/2),
+ strings.Repeat("a", 10),
+ strings.Repeat("a", defaultBufSize*1/2),
+ strings.Repeat("a", defaultBufSize*1/2),
+ strings.Repeat("a", defaultBufSize*1/2),
+ strings.Repeat("a", 20),
+ },
+ },
+ "log length longer than max length, and (maxLen % defaultBufSize != 0)": {
+ input: strings.Repeat("a", defaultBufSize*2+10) + "\n" + strings.Repeat("a", defaultBufSize*2+20) + "\n",
+ stream: Stdout,
+ maxLen: defaultBufSize * 3 / 2,
+ tag: []runtime.LogTag{
+ runtime.LogTagPartial,
+ runtime.LogTagFull,
+ runtime.LogTagPartial,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", defaultBufSize*3/2),
+ strings.Repeat("a", defaultBufSize*1/2+10),
+ strings.Repeat("a", defaultBufSize*3/2),
+ strings.Repeat("a", defaultBufSize*1/2+20),
+ },
+ },
+ "no limit if max length is 0": {
+ input: strings.Repeat("a", defaultBufSize*10+10) + "\n" + strings.Repeat("a", defaultBufSize*10+20) + "\n",
+ stream: Stdout,
+ maxLen: 0,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", defaultBufSize*10+10),
+ strings.Repeat("a", defaultBufSize*10+20),
+ },
+ },
+ "no limit if max length is negative": {
+ input: strings.Repeat("a", defaultBufSize*10+10) + "\n" + strings.Repeat("a", defaultBufSize*10+20) + "\n",
+ stream: Stdout,
+ maxLen: -1,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", defaultBufSize*10+10),
+ strings.Repeat("a", defaultBufSize*10+20),
+ },
+ },
+ "log length longer than buffer size with tailing \\r\\n": {
+ input: strings.Repeat("a", defaultBufSize-1) + "\r\n" + strings.Repeat("a", defaultBufSize-1) + "\r\n",
+ stream: Stdout,
+ maxLen: -1,
+ tag: []runtime.LogTag{
+ runtime.LogTagFull,
+ runtime.LogTagFull,
+ },
+ content: []string{
+ strings.Repeat("a", defaultBufSize-1),
+ strings.Repeat("a", defaultBufSize-1),
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ rc := ioutil.NopCloser(strings.NewReader(test.input))
+ buf := bytes.NewBuffer(nil)
+ wc := cioutil.NewNopWriteCloser(buf)
+ redirectLogs("test-path", rc, wc, test.stream, test.maxLen)
+ output := buf.String()
+ lines := strings.Split(output, "\n")
+ lines = lines[:len(lines)-1] // Discard empty string after last \n
+ assert.Len(t, lines, len(test.content))
+ for i := range lines {
+ fields := strings.SplitN(lines[i], string([]byte{delimiter}), 4)
+ require.Len(t, fields, 4)
+ _, err := time.Parse(timestampFormat, fields[0])
+ assert.NoError(t, err)
+ assert.EqualValues(t, test.stream, fields[1])
+ assert.Equal(t, string(test.tag[i]), fields[2])
+ assert.Equal(t, test.content[i], fields[3])
+ }
+ }
+}
diff --git a/pkg/server/opts.go b/pkg/server/opts.go
new file mode 100644
index 000000000..58520dbc3
--- /dev/null
+++ b/pkg/server/opts.go
@@ -0,0 +1,51 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/nri"
+ v1 "github.com/containerd/nri/types/v1"
+)
+
+// WithNRISandboxDelete calls delete for a sandbox'd task
+func WithNRISandboxDelete(sandboxID string) containerd.ProcessDeleteOpts {
+ return func(ctx context.Context, p containerd.Process) error {
+ task, ok := p.(containerd.Task)
+ if !ok {
+ return nil
+ }
+ nric, err := nri.New()
+ if err != nil {
+ log.G(ctx).WithError(err).Error("unable to create nri client")
+ return nil
+ }
+ if nric == nil {
+ return nil
+ }
+ sb := &nri.Sandbox{
+ ID: sandboxID,
+ }
+ if _, err := nric.InvokeWithSandbox(ctx, task, v1.Delete, sb); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to delete nri for %q", task.ID())
+ }
+ return nil
+ }
+}
diff --git a/pkg/server/restart.go b/pkg/server/restart.go
new file mode 100644
index 000000000..2480bd5ea
--- /dev/null
+++ b/pkg/server/restart.go
@@ -0,0 +1,483 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ goruntime "runtime"
+ "time"
+
+ "github.com/containerd/containerd"
+ containerdio "github.com/containerd/containerd/cio"
+ "github.com/containerd/containerd/errdefs"
+ containerdimages "github.com/containerd/containerd/images"
+ "github.com/containerd/containerd/log"
+ "github.com/containerd/containerd/platforms"
+ "github.com/containerd/typeurl"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ "github.com/containerd/cri/pkg/netns"
+ cio "github.com/containerd/cri/pkg/server/io"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+// NOTE: The recovery logic has following assumption: when the cri plugin is down:
+// 1) Files (e.g. root directory, netns) and checkpoint maintained by the plugin MUST NOT be
+// touched. Or else, recovery logic for those containers/sandboxes may return error.
+// 2) Containerd containers may be deleted, but SHOULD NOT be added. Or else, recovery logic
+// for the newly added container/sandbox will return error, because there is no corresponding root
+// directory created.
+// 3) Containerd container tasks may exit or be stoppped, deleted. Even though current logic could
+// tolerant tasks being created or started, we prefer that not to happen.
+
+// recover recovers system state from containerd and status checkpoint.
+func (c *criService) recover(ctx context.Context) error {
+ // Recover all sandboxes.
+ sandboxes, err := c.client.Containers(ctx, filterLabel(containerKindLabel, containerKindSandbox))
+ if err != nil {
+ return errors.Wrap(err, "failed to list sandbox containers")
+ }
+ for _, sandbox := range sandboxes {
+ sb, err := c.loadSandbox(ctx, sandbox)
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to load sandbox %q", sandbox.ID())
+ continue
+ }
+ log.G(ctx).Debugf("Loaded sandbox %+v", sb)
+ if err := c.sandboxStore.Add(sb); err != nil {
+ return errors.Wrapf(err, "failed to add sandbox %q to store", sandbox.ID())
+ }
+ if err := c.sandboxNameIndex.Reserve(sb.Name, sb.ID); err != nil {
+ return errors.Wrapf(err, "failed to reserve sandbox name %q", sb.Name)
+ }
+ }
+
+ // Recover all containers.
+ containers, err := c.client.Containers(ctx, filterLabel(containerKindLabel, containerKindContainer))
+ if err != nil {
+ return errors.Wrap(err, "failed to list containers")
+ }
+ for _, container := range containers {
+ cntr, err := c.loadContainer(ctx, container)
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to load container %q", container.ID())
+ continue
+ }
+ log.G(ctx).Debugf("Loaded container %+v", cntr)
+ if err := c.containerStore.Add(cntr); err != nil {
+ return errors.Wrapf(err, "failed to add container %q to store", container.ID())
+ }
+ if err := c.containerNameIndex.Reserve(cntr.Name, cntr.ID); err != nil {
+ return errors.Wrapf(err, "failed to reserve container name %q", cntr.Name)
+ }
+ }
+
+ // Recover all images.
+ cImages, err := c.client.ListImages(ctx)
+ if err != nil {
+ return errors.Wrap(err, "failed to list images")
+ }
+ c.loadImages(ctx, cImages)
+
+ // It's possible that containerd containers are deleted unexpectedly. In that case,
+ // we can't even get metadata, we should cleanup orphaned sandbox/container directories
+ // with best effort.
+
+ // Cleanup orphaned sandbox and container directories without corresponding containerd container.
+ for _, cleanup := range []struct {
+ cntrs []containerd.Container
+ base string
+ errMsg string
+ }{
+ {
+ cntrs: sandboxes,
+ base: filepath.Join(c.config.RootDir, sandboxesDir),
+ errMsg: "failed to cleanup orphaned sandbox directories",
+ },
+ {
+ cntrs: sandboxes,
+ base: filepath.Join(c.config.StateDir, sandboxesDir),
+ errMsg: "failed to cleanup orphaned volatile sandbox directories",
+ },
+ {
+ cntrs: containers,
+ base: filepath.Join(c.config.RootDir, containersDir),
+ errMsg: "failed to cleanup orphaned container directories",
+ },
+ {
+ cntrs: containers,
+ base: filepath.Join(c.config.StateDir, containersDir),
+ errMsg: "failed to cleanup orphaned volatile container directories",
+ },
+ } {
+ if err := cleanupOrphanedIDDirs(ctx, cleanup.cntrs, cleanup.base); err != nil {
+ return errors.Wrap(err, cleanup.errMsg)
+ }
+ }
+ return nil
+}
+
+// loadContainerTimeout is the default timeout for loading a container/sandbox.
+// One container/sandbox hangs (e.g. containerd#2438) should not affect other
+// containers/sandboxes.
+// Most CRI container/sandbox related operations are per container, the ones
+// which handle multiple containers at a time are:
+// * ListPodSandboxes: Don't talk with containerd services.
+// * ListContainers: Don't talk with containerd services.
+// * ListContainerStats: Not in critical code path, a default timeout will
+// be applied at CRI level.
+// * Recovery logic: We should set a time for each container/sandbox recovery.
+// * Event monitor: We should set a timeout for each container/sandbox event handling.
+const loadContainerTimeout = 10 * time.Second
+
+// loadContainer loads container from containerd and status checkpoint.
+func (c *criService) loadContainer(ctx context.Context, cntr containerd.Container) (containerstore.Container, error) {
+ ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout)
+ defer cancel()
+ id := cntr.ID()
+ containerDir := c.getContainerRootDir(id)
+ volatileContainerDir := c.getVolatileContainerRootDir(id)
+ var container containerstore.Container
+ // Load container metadata.
+ exts, err := cntr.Extensions(ctx)
+ if err != nil {
+ return container, errors.Wrap(err, "failed to get container extensions")
+ }
+ ext, ok := exts[containerMetadataExtension]
+ if !ok {
+ return container, errors.Errorf("metadata extension %q not found", containerMetadataExtension)
+ }
+ data, err := typeurl.UnmarshalAny(&ext)
+ if err != nil {
+ return container, errors.Wrapf(err, "failed to unmarshal metadata extension %q", ext)
+ }
+ meta := data.(*containerstore.Metadata)
+
+ // Load status from checkpoint.
+ status, err := containerstore.LoadStatus(containerDir, id)
+ if err != nil {
+ log.G(ctx).WithError(err).Warnf("Failed to load container status for %q", id)
+ status = unknownContainerStatus()
+ }
+
+ var containerIO *cio.ContainerIO
+ err = func() error {
+ // Load up-to-date status from containerd.
+ t, err := cntr.Task(ctx, func(fifos *containerdio.FIFOSet) (_ containerdio.IO, err error) {
+ stdoutWC, stderrWC, err := c.createContainerLoggers(meta.LogPath, meta.Config.GetTty())
+ if err != nil {
+ return nil, err
+ }
+ defer func() {
+ if err != nil {
+ if stdoutWC != nil {
+ stdoutWC.Close()
+ }
+ if stderrWC != nil {
+ stderrWC.Close()
+ }
+ }
+ }()
+ containerIO, err = cio.NewContainerIO(id,
+ cio.WithFIFOs(fifos),
+ )
+ if err != nil {
+ return nil, err
+ }
+ containerIO.AddOutput("log", stdoutWC, stderrWC)
+ containerIO.Pipe()
+ return containerIO, nil
+ })
+ if err != nil && !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to load task")
+ }
+ var s containerd.Status
+ var notFound bool
+ if errdefs.IsNotFound(err) {
+ // Task is not found.
+ notFound = true
+ } else {
+ // Task is found. Get task status.
+ s, err = t.Status(ctx)
+ if err != nil {
+ // It's still possible that task is deleted during this window.
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to get task status")
+ }
+ notFound = true
+ }
+ }
+ if notFound {
+ // Task is not created or has been deleted, use the checkpointed status
+ // to generate container status.
+ switch status.State() {
+ case runtime.ContainerState_CONTAINER_CREATED:
+ // NOTE: Another possibility is that we've tried to start the container, but
+ // containerd got restarted during that. In that case, we still
+ // treat the container as `CREATED`.
+ containerIO, err = cio.NewContainerIO(id,
+ cio.WithNewFIFOs(volatileContainerDir, meta.Config.GetTty(), meta.Config.GetStdin()),
+ )
+ if err != nil {
+ return errors.Wrap(err, "failed to create container io")
+ }
+ case runtime.ContainerState_CONTAINER_RUNNING:
+ // Container was in running state, but its task has been deleted,
+ // set unknown exited state. Container io is not needed in this case.
+ status.FinishedAt = time.Now().UnixNano()
+ status.ExitCode = unknownExitCode
+ status.Reason = unknownExitReason
+ default:
+ // Container is in exited/unknown state, return the status as it is.
+ }
+ } else {
+ // Task status is found. Update container status based on the up-to-date task status.
+ switch s.Status {
+ case containerd.Created:
+ // Task has been created, but not started yet. This could only happen if containerd
+ // gets restarted during container start.
+ // Container must be in `CREATED` state.
+ if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to delete task")
+ }
+ if status.State() != runtime.ContainerState_CONTAINER_CREATED {
+ return errors.Errorf("unexpected container state for created task: %q", status.State())
+ }
+ case containerd.Running:
+ // Task is running. Container must be in `RUNNING` state, based on our assuption that
+ // "task should not be started when containerd is down".
+ switch status.State() {
+ case runtime.ContainerState_CONTAINER_EXITED:
+ return errors.Errorf("unexpected container state for running task: %q", status.State())
+ case runtime.ContainerState_CONTAINER_RUNNING:
+ default:
+ // This may happen if containerd gets restarted after task is started, but
+ // before status is checkpointed.
+ status.StartedAt = time.Now().UnixNano()
+ status.Pid = t.Pid()
+ }
+ // Wait for the task for exit monitor.
+ // wait is a long running background request, no timeout needed.
+ exitCh, err := t.Wait(ctrdutil.NamespacedContext())
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to wait for task")
+ }
+ // Container was in running state, but its task has been deleted,
+ // set unknown exited state.
+ status.FinishedAt = time.Now().UnixNano()
+ status.ExitCode = unknownExitCode
+ status.Reason = unknownExitReason
+ } else {
+ // Start exit monitor.
+ c.eventMonitor.startExitMonitor(context.Background(), id, status.Pid, exitCh)
+ }
+ case containerd.Stopped:
+ // Task is stopped. Updata status and delete the task.
+ if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to delete task")
+ }
+ status.FinishedAt = s.ExitTime.UnixNano()
+ status.ExitCode = int32(s.ExitStatus)
+ default:
+ return errors.Errorf("unexpected task status %q", s.Status)
+ }
+ }
+ return nil
+ }()
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to load container status for %q", id)
+ // Only set the unknown field in this case, because other fields may
+ // contain useful information loaded from the checkpoint.
+ status.Unknown = true
+ }
+ opts := []containerstore.Opts{
+ containerstore.WithStatus(status, containerDir),
+ containerstore.WithContainer(cntr),
+ }
+ // containerIO could be nil for container in unknown state.
+ if containerIO != nil {
+ opts = append(opts, containerstore.WithContainerIO(containerIO))
+ }
+ return containerstore.NewContainer(*meta, opts...)
+}
+
+// loadSandbox loads sandbox from containerd.
+func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container) (sandboxstore.Sandbox, error) {
+ ctx, cancel := context.WithTimeout(ctx, loadContainerTimeout)
+ defer cancel()
+ var sandbox sandboxstore.Sandbox
+ // Load sandbox metadata.
+ exts, err := cntr.Extensions(ctx)
+ if err != nil {
+ return sandbox, errors.Wrap(err, "failed to get sandbox container extensions")
+ }
+ ext, ok := exts[sandboxMetadataExtension]
+ if !ok {
+ return sandbox, errors.Errorf("metadata extension %q not found", sandboxMetadataExtension)
+ }
+ data, err := typeurl.UnmarshalAny(&ext)
+ if err != nil {
+ return sandbox, errors.Wrapf(err, "failed to unmarshal metadata extension %q", ext)
+ }
+ meta := data.(*sandboxstore.Metadata)
+
+ s, err := func() (sandboxstore.Status, error) {
+ status := unknownSandboxStatus()
+ // Load sandbox created timestamp.
+ info, err := cntr.Info(ctx)
+ if err != nil {
+ return status, errors.Wrap(err, "failed to get sandbox container info")
+ }
+ status.CreatedAt = info.CreatedAt
+
+ // Load sandbox state.
+ t, err := cntr.Task(ctx, nil)
+ if err != nil && !errdefs.IsNotFound(err) {
+ return status, errors.Wrap(err, "failed to load task")
+ }
+ var taskStatus containerd.Status
+ var notFound bool
+ if errdefs.IsNotFound(err) {
+ // Task is not found.
+ notFound = true
+ } else {
+ // Task is found. Get task status.
+ taskStatus, err = t.Status(ctx)
+ if err != nil {
+ // It's still possible that task is deleted during this window.
+ if !errdefs.IsNotFound(err) {
+ return status, errors.Wrap(err, "failed to get task status")
+ }
+ notFound = true
+ }
+ }
+ if notFound {
+ // Task does not exist, set sandbox state as NOTREADY.
+ status.State = sandboxstore.StateNotReady
+ } else {
+ if taskStatus.Status == containerd.Running {
+ // Wait for the task for sandbox monitor.
+ // wait is a long running background request, no timeout needed.
+ exitCh, err := t.Wait(ctrdutil.NamespacedContext())
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ return status, errors.Wrap(err, "failed to wait for task")
+ }
+ status.State = sandboxstore.StateNotReady
+ } else {
+ // Task is running, set sandbox state as READY.
+ status.State = sandboxstore.StateReady
+ status.Pid = t.Pid()
+ c.eventMonitor.startExitMonitor(context.Background(), meta.ID, status.Pid, exitCh)
+ }
+ } else {
+ // Task is not running. Delete the task and set sandbox state as NOTREADY.
+ if _, err := t.Delete(ctx, containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
+ return status, errors.Wrap(err, "failed to delete task")
+ }
+ status.State = sandboxstore.StateNotReady
+ }
+ }
+ return status, nil
+ }()
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to load sandbox status for %q", cntr.ID())
+ }
+
+ sandbox = sandboxstore.NewSandbox(*meta, s)
+ sandbox.Container = cntr
+
+ // Load network namespace.
+ if goruntime.GOOS != "windows" &&
+ meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
+ // Don't need to load netns for host network sandbox.
+ return sandbox, nil
+ }
+ sandbox.NetNS = netns.LoadNetNS(meta.NetNSPath)
+
+ // It doesn't matter whether task is running or not. If it is running, sandbox
+ // status will be `READY`; if it is not running, sandbox status will be `NOT_READY`,
+ // kubelet will stop the sandbox which will properly cleanup everything.
+ return sandbox, nil
+}
+
+// loadImages loads images from containerd.
+func (c *criService) loadImages(ctx context.Context, cImages []containerd.Image) {
+ snapshotter := c.config.ContainerdConfig.Snapshotter
+ for _, i := range cImages {
+ ok, _, _, _, err := containerdimages.Check(ctx, i.ContentStore(), i.Target(), platforms.Default())
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to check image content readiness for %q", i.Name())
+ continue
+ }
+ if !ok {
+ log.G(ctx).Warnf("The image content readiness for %q is not ok", i.Name())
+ continue
+ }
+ // Checking existence of top-level snapshot for each image being recovered.
+ unpacked, err := i.IsUnpacked(ctx, snapshotter)
+ if err != nil {
+ log.G(ctx).WithError(err).Warnf("Failed to check whether image is unpacked for image %s", i.Name())
+ continue
+ }
+ if !unpacked {
+ log.G(ctx).Warnf("The image %s is not unpacked.", i.Name())
+ // TODO(random-liu): Consider whether we should try unpack here.
+ }
+ if err := c.updateImage(ctx, i.Name()); err != nil {
+ log.G(ctx).WithError(err).Warnf("Failed to update reference for image %q", i.Name())
+ continue
+ }
+ log.G(ctx).Debugf("Loaded image %q", i.Name())
+ }
+}
+
+func cleanupOrphanedIDDirs(ctx context.Context, cntrs []containerd.Container, base string) error {
+ // Cleanup orphaned id directories.
+ dirs, err := ioutil.ReadDir(base)
+ if err != nil && !os.IsNotExist(err) {
+ return errors.Wrap(err, "failed to read base directory")
+ }
+ idsMap := make(map[string]containerd.Container)
+ for _, cntr := range cntrs {
+ idsMap[cntr.ID()] = cntr
+ }
+ for _, d := range dirs {
+ if !d.IsDir() {
+ log.G(ctx).Warnf("Invalid file %q found in base directory %q", d.Name(), base)
+ continue
+ }
+ if _, ok := idsMap[d.Name()]; ok {
+ // Do not remove id directory if corresponding container is found.
+ continue
+ }
+ dir := filepath.Join(base, d.Name())
+ if err := ensureRemoveAll(ctx, dir); err != nil {
+ log.G(ctx).WithError(err).Warnf("Failed to remove id directory %q", dir)
+ } else {
+ log.G(ctx).Debugf("Cleanup orphaned id directory %q", dir)
+ }
+ }
+ return nil
+}
diff --git a/pkg/server/sandbox_list.go b/pkg/server/sandbox_list.go
new file mode 100644
index 000000000..d2528b267
--- /dev/null
+++ b/pkg/server/sandbox_list.go
@@ -0,0 +1,101 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+// ListPodSandbox returns a list of Sandbox.
+func (c *criService) ListPodSandbox(ctx context.Context, r *runtime.ListPodSandboxRequest) (*runtime.ListPodSandboxResponse, error) {
+ // List all sandboxes from store.
+ sandboxesInStore := c.sandboxStore.List()
+ var sandboxes []*runtime.PodSandbox
+ for _, sandboxInStore := range sandboxesInStore {
+ sandboxes = append(sandboxes, toCRISandbox(
+ sandboxInStore.Metadata,
+ sandboxInStore.Status.Get(),
+ ))
+ }
+
+ sandboxes = c.filterCRISandboxes(sandboxes, r.GetFilter())
+ return &runtime.ListPodSandboxResponse{Items: sandboxes}, nil
+}
+
+// toCRISandbox converts sandbox metadata into CRI pod sandbox.
+func toCRISandbox(meta sandboxstore.Metadata, status sandboxstore.Status) *runtime.PodSandbox {
+ // Set sandbox state to NOTREADY by default.
+ state := runtime.PodSandboxState_SANDBOX_NOTREADY
+ if status.State == sandboxstore.StateReady {
+ state = runtime.PodSandboxState_SANDBOX_READY
+ }
+ return &runtime.PodSandbox{
+ Id: meta.ID,
+ Metadata: meta.Config.GetMetadata(),
+ State: state,
+ CreatedAt: status.CreatedAt.UnixNano(),
+ Labels: meta.Config.GetLabels(),
+ Annotations: meta.Config.GetAnnotations(),
+ RuntimeHandler: meta.RuntimeHandler,
+ }
+}
+
+func (c *criService) normalizePodSandboxFilter(filter *runtime.PodSandboxFilter) {
+ if sb, err := c.sandboxStore.Get(filter.GetId()); err == nil {
+ filter.Id = sb.ID
+ }
+}
+
+// filterCRISandboxes filters CRISandboxes.
+func (c *criService) filterCRISandboxes(sandboxes []*runtime.PodSandbox, filter *runtime.PodSandboxFilter) []*runtime.PodSandbox {
+ if filter == nil {
+ return sandboxes
+ }
+
+ c.normalizePodSandboxFilter(filter)
+ filtered := []*runtime.PodSandbox{}
+ for _, s := range sandboxes {
+ // Filter by id
+ if filter.GetId() != "" && filter.GetId() != s.Id {
+ continue
+ }
+ // Filter by state
+ if filter.GetState() != nil && filter.GetState().GetState() != s.State {
+ continue
+ }
+ // Filter by label
+ if filter.GetLabelSelector() != nil {
+ match := true
+ for k, v := range filter.GetLabelSelector() {
+ got, ok := s.Labels[k]
+ if !ok || got != v {
+ match = false
+ break
+ }
+ }
+ if !match {
+ continue
+ }
+ }
+ filtered = append(filtered, s)
+ }
+
+ return filtered
+}
diff --git a/pkg/server/sandbox_list_test.go b/pkg/server/sandbox_list_test.go
new file mode 100644
index 000000000..af67bbfeb
--- /dev/null
+++ b/pkg/server/sandbox_list_test.go
@@ -0,0 +1,208 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+func TestToCRISandbox(t *testing.T) {
+ config := &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "test-name",
+ Uid: "test-uid",
+ Namespace: "test-ns",
+ Attempt: 1,
+ },
+ Labels: map[string]string{"a": "b"},
+ Annotations: map[string]string{"c": "d"},
+ }
+ createdAt := time.Now()
+ meta := sandboxstore.Metadata{
+ ID: "test-id",
+ Name: "test-name",
+ Config: config,
+ NetNSPath: "test-netns",
+ RuntimeHandler: "test-runtime-handler",
+ }
+ expect := &runtime.PodSandbox{
+ Id: "test-id",
+ Metadata: config.GetMetadata(),
+ CreatedAt: createdAt.UnixNano(),
+ Labels: config.GetLabels(),
+ Annotations: config.GetAnnotations(),
+ RuntimeHandler: "test-runtime-handler",
+ }
+ for desc, test := range map[string]struct {
+ state sandboxstore.State
+ expectedState runtime.PodSandboxState
+ }{
+ "sandbox state ready": {
+ state: sandboxstore.StateReady,
+ expectedState: runtime.PodSandboxState_SANDBOX_READY,
+ },
+ "sandbox state not ready": {
+ state: sandboxstore.StateNotReady,
+ expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
+ },
+ "sandbox state unknown": {
+ state: sandboxstore.StateUnknown,
+ expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
+ },
+ } {
+ status := sandboxstore.Status{
+ CreatedAt: createdAt,
+ State: test.state,
+ }
+ expect.State = test.expectedState
+ s := toCRISandbox(meta, status)
+ assert.Equal(t, expect, s, desc)
+ }
+}
+
+func TestFilterSandboxes(t *testing.T) {
+ c := newTestCRIService()
+ sandboxes := []sandboxstore.Sandbox{
+ sandboxstore.NewSandbox(
+ sandboxstore.Metadata{
+ ID: "1abcdef",
+ Name: "sandboxname-1",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "podname-1",
+ Uid: "uid-1",
+ Namespace: "ns-1",
+ Attempt: 1,
+ },
+ },
+ RuntimeHandler: "test-runtime-handler",
+ },
+ sandboxstore.Status{
+ CreatedAt: time.Now(),
+ State: sandboxstore.StateReady,
+ },
+ ),
+ sandboxstore.NewSandbox(
+ sandboxstore.Metadata{
+ ID: "2abcdef",
+ Name: "sandboxname-2",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "podname-2",
+ Uid: "uid-2",
+ Namespace: "ns-2",
+ Attempt: 2,
+ },
+ Labels: map[string]string{"a": "b"},
+ },
+ RuntimeHandler: "test-runtime-handler",
+ },
+ sandboxstore.Status{
+ CreatedAt: time.Now(),
+ State: sandboxstore.StateNotReady,
+ },
+ ),
+ sandboxstore.NewSandbox(
+ sandboxstore.Metadata{
+ ID: "3abcdef",
+ Name: "sandboxname-3",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "podname-2",
+ Uid: "uid-2",
+ Namespace: "ns-2",
+ Attempt: 2,
+ },
+ Labels: map[string]string{"c": "d"},
+ },
+ RuntimeHandler: "test-runtime-handler",
+ },
+ sandboxstore.Status{
+ CreatedAt: time.Now(),
+ State: sandboxstore.StateReady,
+ },
+ ),
+ }
+
+ // Create PodSandbox
+ testSandboxes := []*runtime.PodSandbox{}
+ for _, sb := range sandboxes {
+ testSandboxes = append(testSandboxes, toCRISandbox(sb.Metadata, sb.Status.Get()))
+ }
+
+ // Inject test sandbox metadata
+ for _, sb := range sandboxes {
+ assert.NoError(t, c.sandboxStore.Add(sb))
+ }
+
+ for desc, test := range map[string]struct {
+ filter *runtime.PodSandboxFilter
+ expect []*runtime.PodSandbox
+ }{
+ "no filter": {
+ expect: testSandboxes,
+ },
+ "id filter": {
+ filter: &runtime.PodSandboxFilter{Id: "2abcdef"},
+ expect: []*runtime.PodSandbox{testSandboxes[1]},
+ },
+ "truncid filter": {
+ filter: &runtime.PodSandboxFilter{Id: "2"},
+ expect: []*runtime.PodSandbox{testSandboxes[1]},
+ },
+ "state filter": {
+ filter: &runtime.PodSandboxFilter{
+ State: &runtime.PodSandboxStateValue{
+ State: runtime.PodSandboxState_SANDBOX_READY,
+ },
+ },
+ expect: []*runtime.PodSandbox{testSandboxes[0], testSandboxes[2]},
+ },
+ "label filter": {
+ filter: &runtime.PodSandboxFilter{
+ LabelSelector: map[string]string{"a": "b"},
+ },
+ expect: []*runtime.PodSandbox{testSandboxes[1]},
+ },
+ "mixed filter not matched": {
+ filter: &runtime.PodSandboxFilter{
+ Id: "1",
+ LabelSelector: map[string]string{"a": "b"},
+ },
+ expect: []*runtime.PodSandbox{},
+ },
+ "mixed filter matched": {
+ filter: &runtime.PodSandboxFilter{
+ State: &runtime.PodSandboxStateValue{
+ State: runtime.PodSandboxState_SANDBOX_READY,
+ },
+ LabelSelector: map[string]string{"c": "d"},
+ },
+ expect: []*runtime.PodSandbox{testSandboxes[2]},
+ },
+ } {
+ t.Logf("TestCase: %s", desc)
+ filtered := c.filterCRISandboxes(testSandboxes, test.filter)
+ assert.Equal(t, test.expect, filtered, desc)
+ }
+}
diff --git a/pkg/server/sandbox_portforward.go b/pkg/server/sandbox_portforward.go
new file mode 100644
index 000000000..6d382ba2b
--- /dev/null
+++ b/pkg/server/sandbox_portforward.go
@@ -0,0 +1,38 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+// PortForward prepares a streaming endpoint to forward ports from a PodSandbox, and returns the address.
+func (c *criService) PortForward(ctx context.Context, r *runtime.PortForwardRequest) (retRes *runtime.PortForwardResponse, retErr error) {
+ sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to find sandbox %q", r.GetPodSandboxId())
+ }
+ if sandbox.Status.Get().State != sandboxstore.StateReady {
+ return nil, errors.New("sandbox container is not running")
+ }
+ // TODO(random-liu): Verify that ports are exposed.
+ return c.streamServer.GetPortForward(r)
+}
diff --git a/pkg/server/sandbox_portforward_linux.go b/pkg/server/sandbox_portforward_linux.go
new file mode 100644
index 000000000..32b062456
--- /dev/null
+++ b/pkg/server/sandbox_portforward_linux.go
@@ -0,0 +1,126 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "fmt"
+ "io"
+ "net"
+ "time"
+
+ "github.com/containerd/containerd/log"
+ "github.com/containernetworking/plugins/pkg/ns"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// portForward uses netns to enter the sandbox namespace, and forwards a stream inside the
+// the namespace to a specific port. It keeps forwarding until it exits or client disconnect.
+func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriteCloser) error {
+ s, err := c.sandboxStore.Get(id)
+ if err != nil {
+ return errors.Wrapf(err, "failed to find sandbox %q in store", id)
+ }
+
+ var netNSDo func(func(ns.NetNS) error) error
+ // netNSPath is the network namespace path for logging.
+ var netNSPath string
+ securityContext := s.Config.GetLinux().GetSecurityContext()
+ hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
+ if !hostNet {
+ if closed, err := s.NetNS.Closed(); err != nil {
+ return errors.Wrapf(err, "failed to check netwok namespace closed for sandbox %q", id)
+ } else if closed {
+ return errors.Errorf("network namespace for sandbox %q is closed", id)
+ }
+ netNSDo = s.NetNS.Do
+ netNSPath = s.NetNS.GetPath()
+ } else {
+ // Run the function directly for host network.
+ netNSDo = func(do func(_ ns.NetNS) error) error {
+ return do(nil)
+ }
+ netNSPath = "host"
+ }
+
+ log.G(ctx).Infof("Executing port forwarding in network namespace %q", netNSPath)
+ err = netNSDo(func(_ ns.NetNS) error {
+ defer stream.Close()
+ // TODO: hardcoded to tcp4 because localhost resolves to ::1 by default if the system has IPv6 enabled.
+ // Theoretically happy eyeballs will try IPv6 first and fallback to IPv4
+ // but resolving localhost doesn't seem to return and IPv4 address, thus failing the connection.
+ conn, err := net.Dial("tcp4", fmt.Sprintf("localhost:%d", port))
+ if err != nil {
+ return errors.Wrapf(err, "failed to dial %d", port)
+ }
+ defer conn.Close()
+
+ errCh := make(chan error, 2)
+ // Copy from the the namespace port connection to the client stream
+ go func() {
+ log.G(ctx).Debugf("PortForward copying data from namespace %q port %d to the client stream", id, port)
+ _, err := io.Copy(stream, conn)
+ errCh <- err
+ }()
+
+ // Copy from the client stream to the namespace port connection
+ go func() {
+ log.G(ctx).Debugf("PortForward copying data from client stream to namespace %q port %d", id, port)
+ _, err := io.Copy(conn, stream)
+ errCh <- err
+ }()
+
+ // Wait until the first error is returned by one of the connections
+ // we use errFwd to store the result of the port forwarding operation
+ // if the context is cancelled close everything and return
+ var errFwd error
+ select {
+ case errFwd = <-errCh:
+ log.G(ctx).Debugf("PortForward stop forwarding in one direction in network namespace %q port %d: %v", id, port, errFwd)
+ case <-ctx.Done():
+ log.G(ctx).Debugf("PortForward cancelled in network namespace %q port %d: %v", id, port, ctx.Err())
+ return ctx.Err()
+ }
+ // give a chance to terminate gracefully or timeout
+ // after 1s
+ // https://linux.die.net/man/1/socat
+ const timeout = time.Second
+ select {
+ case e := <-errCh:
+ if errFwd == nil {
+ errFwd = e
+ }
+ log.G(ctx).Debugf("PortForward stopped forwarding in both directions in network namespace %q port %d: %v", id, port, e)
+ case <-time.After(timeout):
+ log.G(ctx).Debugf("PortForward timed out waiting to close the connection in network namespace %q port %d", id, port)
+ case <-ctx.Done():
+ log.G(ctx).Debugf("PortForward cancelled in network namespace %q port %d: %v", id, port, ctx.Err())
+ errFwd = ctx.Err()
+ }
+
+ return errFwd
+ })
+
+ if err != nil {
+ return errors.Wrapf(err, "failed to execute portforward in network namespace %q", netNSPath)
+ }
+ log.G(ctx).Infof("Finish port forwarding for %q port %d", id, port)
+
+ return nil
+}
diff --git a/pkg/server/sandbox_portforward_other.go b/pkg/server/sandbox_portforward_other.go
new file mode 100644
index 000000000..1b88170ed
--- /dev/null
+++ b/pkg/server/sandbox_portforward_other.go
@@ -0,0 +1,33 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "io"
+
+ "github.com/containerd/containerd/errdefs"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+)
+
+// portForward uses netns to enter the sandbox namespace, and forwards a stream inside the
+// the namespace to a specific port. It keeps forwarding until it exits or client disconnect.
+func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriteCloser) error {
+ return errors.Wrap(errdefs.ErrNotImplemented, "port forward")
+}
diff --git a/pkg/server/sandbox_portforward_windows.go b/pkg/server/sandbox_portforward_windows.go
new file mode 100644
index 000000000..3c328a314
--- /dev/null
+++ b/pkg/server/sandbox_portforward_windows.go
@@ -0,0 +1,80 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ "k8s.io/utils/exec"
+
+ "github.com/containerd/cri/pkg/ioutil"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriter) error {
+ stdout := ioutil.NewNopWriteCloser(stream)
+ stderrBuffer := new(bytes.Buffer)
+ stderr := ioutil.NewNopWriteCloser(stderrBuffer)
+ // localhost is resolved to 127.0.0.1 in ipv4, and ::1 in ipv6.
+ // Explicitly using ipv4 IP address in here to avoid flakiness.
+ cmd := []string{"wincat.exe", "127.0.0.1", fmt.Sprint(port)}
+ err := c.execInSandbox(ctx, id, cmd, stream, stdout, stderr)
+ if err != nil {
+ return errors.Wrapf(err, "failed to execute port forward in sandbox: %s", stderrBuffer.String())
+ }
+ return nil
+}
+
+func (c *criService) execInSandbox(ctx context.Context, sandboxID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser) error {
+ // Get sandbox from our sandbox store.
+ sb, err := c.sandboxStore.Get(sandboxID)
+ if err != nil {
+ return errors.Wrapf(err, "failed to find sandbox %q in store", sandboxID)
+ }
+
+ // Check the sandbox state
+ state := sb.Status.Get().State
+ if state != sandboxstore.StateReady {
+ return errors.Errorf("sandbox is in %s state", fmt.Sprint(state))
+ }
+
+ opts := execOptions{
+ cmd: cmd,
+ stdin: stdin,
+ stdout: stdout,
+ stderr: stderr,
+ tty: false,
+ resize: nil,
+ }
+ exitCode, err := c.execInternal(ctx, sb.Container, sandboxID, opts)
+ if err != nil {
+ return errors.Wrap(err, "failed to exec in sandbox")
+ }
+ if *exitCode == 0 {
+ return nil
+ }
+ return &exec.CodeExitError{
+ Err: errors.Errorf("error executing command %v, exit code %d", cmd, *exitCode),
+ Code: int(*exitCode),
+ }
+}
diff --git a/pkg/server/sandbox_remove.go b/pkg/server/sandbox_remove.go
new file mode 100644
index 000000000..2c2deb2e0
--- /dev/null
+++ b/pkg/server/sandbox_remove.go
@@ -0,0 +1,115 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/store"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+// RemovePodSandbox removes the sandbox. If there are running containers in the
+// sandbox, they should be forcibly removed.
+func (c *criService) RemovePodSandbox(ctx context.Context, r *runtime.RemovePodSandboxRequest) (*runtime.RemovePodSandboxResponse, error) {
+ sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
+ if err != nil {
+ if err != store.ErrNotExist {
+ return nil, errors.Wrapf(err, "an error occurred when try to find sandbox %q",
+ r.GetPodSandboxId())
+ }
+ // Do not return error if the id doesn't exist.
+ log.G(ctx).Tracef("RemovePodSandbox called for sandbox %q that does not exist",
+ r.GetPodSandboxId())
+ return &runtime.RemovePodSandboxResponse{}, nil
+ }
+ // Use the full sandbox id.
+ id := sandbox.ID
+
+ // Return error if sandbox container is still running or unknown.
+ state := sandbox.Status.Get().State
+ if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown {
+ logrus.Infof("Forcibly stopping sandbox %q", id)
+ if err := c.stopPodSandbox(ctx, sandbox); err != nil {
+ return nil, errors.Wrapf(err, "failed to forcibly stop sandbox %q", id)
+ }
+ }
+
+ // Return error if sandbox network namespace is not closed yet.
+ if sandbox.NetNS != nil {
+ nsPath := sandbox.NetNS.GetPath()
+ if closed, err := sandbox.NetNS.Closed(); err != nil {
+ return nil, errors.Wrapf(err, "failed to check sandbox network namespace %q closed", nsPath)
+ } else if !closed {
+ return nil, errors.Errorf("sandbox network namespace %q is not fully closed", nsPath)
+ }
+ }
+
+ // Remove all containers inside the sandbox.
+ // NOTE(random-liu): container could still be created after this point, Kubelet should
+ // not rely on this behavior.
+ // TODO(random-liu): Introduce an intermediate state to avoid container creation after
+ // this point.
+ cntrs := c.containerStore.List()
+ for _, cntr := range cntrs {
+ if cntr.SandboxID != id {
+ continue
+ }
+ _, err = c.RemoveContainer(ctx, &runtime.RemoveContainerRequest{ContainerId: cntr.ID})
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to remove container %q", cntr.ID)
+ }
+ }
+
+ // Cleanup the sandbox root directories.
+ sandboxRootDir := c.getSandboxRootDir(id)
+ if err := ensureRemoveAll(ctx, sandboxRootDir); err != nil {
+ return nil, errors.Wrapf(err, "failed to remove sandbox root directory %q",
+ sandboxRootDir)
+ }
+ volatileSandboxRootDir := c.getVolatileSandboxRootDir(id)
+ if err := ensureRemoveAll(ctx, volatileSandboxRootDir); err != nil {
+ return nil, errors.Wrapf(err, "failed to remove volatile sandbox root directory %q",
+ volatileSandboxRootDir)
+ }
+
+ // Delete sandbox container.
+ if err := sandbox.Container.Delete(ctx, containerd.WithSnapshotCleanup); err != nil {
+ if !errdefs.IsNotFound(err) {
+ return nil, errors.Wrapf(err, "failed to delete sandbox container %q", id)
+ }
+ log.G(ctx).Tracef("Remove called for sandbox container %q that does not exist", id)
+ }
+
+ // Remove sandbox from sandbox store. Note that once the sandbox is successfully
+ // deleted:
+ // 1) ListPodSandbox will not include this sandbox.
+ // 2) PodSandboxStatus and StopPodSandbox will return error.
+ // 3) On-going operations which have held the reference will not be affected.
+ c.sandboxStore.Delete(id)
+
+ // Release the sandbox name reserved for the sandbox.
+ c.sandboxNameIndex.ReleaseByKey(id)
+
+ return &runtime.RemovePodSandboxResponse{}, nil
+}
diff --git a/pkg/server/sandbox_run.go b/pkg/server/sandbox_run.go
new file mode 100644
index 000000000..e0b207d7a
--- /dev/null
+++ b/pkg/server/sandbox_run.go
@@ -0,0 +1,548 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "encoding/json"
+ "math"
+ goruntime "runtime"
+ "strings"
+
+ "github.com/containerd/containerd"
+ containerdio "github.com/containerd/containerd/cio"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ cni "github.com/containerd/go-cni"
+ "github.com/containerd/nri"
+ v1 "github.com/containerd/nri/types/v1"
+ "github.com/containerd/typeurl"
+ "github.com/davecgh/go-spew/spew"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ criconfig "github.com/containerd/cri/pkg/config"
+ customopts "github.com/containerd/cri/pkg/containerd/opts"
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ "github.com/containerd/cri/pkg/netns"
+ "github.com/containerd/cri/pkg/server/bandwidth"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+ "github.com/containerd/cri/pkg/util"
+ selinux "github.com/opencontainers/selinux/go-selinux"
+)
+
+func init() {
+ typeurl.Register(&sandboxstore.Metadata{},
+ "github.com/containerd/cri/pkg/store/sandbox", "Metadata")
+}
+
+// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
+// the sandbox is in ready state.
+func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandboxRequest) (_ *runtime.RunPodSandboxResponse, retErr error) {
+ config := r.GetConfig()
+ log.G(ctx).Debugf("Sandbox config %+v", config)
+
+ // Generate unique id and name for the sandbox and reserve the name.
+ id := util.GenerateID()
+ metadata := config.GetMetadata()
+ if metadata == nil {
+ return nil, errors.New("sandbox config must include metadata")
+ }
+ name := makeSandboxName(metadata)
+ log.G(ctx).Debugf("Generated id %q for sandbox %q", id, name)
+ // Reserve the sandbox name to avoid concurrent `RunPodSandbox` request starting the
+ // same sandbox.
+ if err := c.sandboxNameIndex.Reserve(name, id); err != nil {
+ return nil, errors.Wrapf(err, "failed to reserve sandbox name %q", name)
+ }
+ defer func() {
+ // Release the name if the function returns with an error.
+ if retErr != nil {
+ c.sandboxNameIndex.ReleaseByName(name)
+ }
+ }()
+
+ // Create initial internal sandbox object.
+ sandbox := sandboxstore.NewSandbox(
+ sandboxstore.Metadata{
+ ID: id,
+ Name: name,
+ Config: config,
+ RuntimeHandler: r.GetRuntimeHandler(),
+ },
+ sandboxstore.Status{
+ State: sandboxstore.StateUnknown,
+ },
+ )
+
+ // Ensure sandbox container image snapshot.
+ image, err := c.ensureImageExists(ctx, c.config.SandboxImage, config)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to get sandbox image %q", c.config.SandboxImage)
+ }
+ containerdImage, err := c.toContainerdImage(ctx, *image)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to get image from containerd %q", image.ID)
+ }
+
+ ociRuntime, err := c.getSandboxRuntime(config, r.GetRuntimeHandler())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get sandbox runtime")
+ }
+ log.G(ctx).Debugf("Use OCI %+v for sandbox %q", ociRuntime, id)
+
+ podNetwork := true
+ // Pod network is always needed on windows.
+ if goruntime.GOOS != "windows" &&
+ config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
+ // Pod network is not needed on linux with host network.
+ podNetwork = false
+ }
+ if podNetwork {
+ // If it is not in host network namespace then create a namespace and set the sandbox
+ // handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
+ // namespaces. If the pod is in host network namespace then both are empty and should not
+ // be used.
+ sandbox.NetNS, err = netns.NewNetNS()
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to create network namespace for sandbox %q", id)
+ }
+ sandbox.NetNSPath = sandbox.NetNS.GetPath()
+ defer func() {
+ if retErr != nil {
+ // Teardown network if an error is returned.
+ if err := c.teardownPodNetwork(ctx, sandbox); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to destroy network for sandbox %q", id)
+ }
+
+ if err := sandbox.NetNS.Remove(); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to remove network namespace %s for sandbox %q", sandbox.NetNSPath, id)
+ }
+ sandbox.NetNSPath = ""
+ }
+ }()
+
+ // Setup network for sandbox.
+ // Certain VM based solutions like clear containers (Issue containerd/cri-containerd#524)
+ // rely on the assumption that CRI shim will not be querying the network namespace to check the
+ // network states such as IP.
+ // In future runtime implementation should avoid relying on CRI shim implementation details.
+ // In this case however caching the IP will add a subtle performance enhancement by avoiding
+ // calls to network namespace of the pod to query the IP of the veth interface on every
+ // SandboxStatus request.
+ if err := c.setupPodNetwork(ctx, &sandbox); err != nil {
+ return nil, errors.Wrapf(err, "failed to setup network for sandbox %q", id)
+ }
+ }
+
+ // Create sandbox container.
+ // NOTE: sandboxContainerSpec SHOULD NOT have side
+ // effect, e.g. accessing/creating files, so that we can test
+ // it safely.
+ spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate sandbox container spec")
+ }
+ log.G(ctx).Debugf("Sandbox container %q spec: %#+v", id, spew.NewFormatter(spec))
+ sandbox.ProcessLabel = spec.Process.SelinuxLabel
+ defer func() {
+ if retErr != nil {
+ selinux.ReleaseLabel(sandbox.ProcessLabel)
+ }
+ }()
+
+ // handle any KVM based runtime
+ if err := modifyProcessLabel(ociRuntime.Type, spec); err != nil {
+ return nil, err
+ }
+
+ if config.GetLinux().GetSecurityContext().GetPrivileged() {
+ // If privileged don't set selinux label, but we still record the MCS label so that
+ // the unused label can be freed later.
+ spec.Process.SelinuxLabel = ""
+ }
+
+ // Generate spec options that will be applied to the spec later.
+ specOpts, err := c.sandboxContainerSpecOpts(config, &image.ImageSpec.Config)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate sanbdox container spec options")
+ }
+
+ sandboxLabels := buildLabels(config.Labels, containerKindSandbox)
+
+ runtimeOpts, err := generateRuntimeOptions(ociRuntime, c.config)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate runtime options")
+ }
+ opts := []containerd.NewContainerOpts{
+ containerd.WithSnapshotter(c.config.ContainerdConfig.Snapshotter),
+ customopts.WithNewSnapshot(id, containerdImage),
+ containerd.WithSpec(spec, specOpts...),
+ containerd.WithContainerLabels(sandboxLabels),
+ containerd.WithContainerExtension(sandboxMetadataExtension, &sandbox.Metadata),
+ containerd.WithRuntime(ociRuntime.Type, runtimeOpts)}
+
+ container, err := c.client.NewContainer(ctx, id, opts...)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create containerd container")
+ }
+ defer func() {
+ if retErr != nil {
+ deferCtx, deferCancel := ctrdutil.DeferContext()
+ defer deferCancel()
+ if err := container.Delete(deferCtx, containerd.WithSnapshotCleanup); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to delete containerd container %q", id)
+ }
+ }
+ }()
+
+ // Create sandbox container root directories.
+ sandboxRootDir := c.getSandboxRootDir(id)
+ if err := c.os.MkdirAll(sandboxRootDir, 0755); err != nil {
+ return nil, errors.Wrapf(err, "failed to create sandbox root directory %q",
+ sandboxRootDir)
+ }
+ defer func() {
+ if retErr != nil {
+ // Cleanup the sandbox root directory.
+ if err := c.os.RemoveAll(sandboxRootDir); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to remove sandbox root directory %q",
+ sandboxRootDir)
+ }
+ }
+ }()
+ volatileSandboxRootDir := c.getVolatileSandboxRootDir(id)
+ if err := c.os.MkdirAll(volatileSandboxRootDir, 0755); err != nil {
+ return nil, errors.Wrapf(err, "failed to create volatile sandbox root directory %q",
+ volatileSandboxRootDir)
+ }
+ defer func() {
+ if retErr != nil {
+ // Cleanup the volatile sandbox root directory.
+ if err := c.os.RemoveAll(volatileSandboxRootDir); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to remove volatile sandbox root directory %q",
+ volatileSandboxRootDir)
+ }
+ }
+ }()
+
+ // Setup files required for the sandbox.
+ if err = c.setupSandboxFiles(id, config); err != nil {
+ return nil, errors.Wrapf(err, "failed to setup sandbox files")
+ }
+ defer func() {
+ if retErr != nil {
+ if err = c.cleanupSandboxFiles(id, config); err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to cleanup sandbox files in %q",
+ sandboxRootDir)
+ }
+ }
+ }()
+
+ // Update sandbox created timestamp.
+ info, err := container.Info(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get sandbox container info")
+ }
+
+ // Create sandbox task in containerd.
+ log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
+ id, name)
+
+ taskOpts := c.taskOpts(ociRuntime.Type)
+ // We don't need stdio for sandbox container.
+ task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create containerd task")
+ }
+ defer func() {
+ if retErr != nil {
+ deferCtx, deferCancel := ctrdutil.DeferContext()
+ defer deferCancel()
+ // Cleanup the sandbox container if an error is returned.
+ if _, err := task.Delete(deferCtx, WithNRISandboxDelete(id), containerd.WithProcessKill); err != nil && !errdefs.IsNotFound(err) {
+ log.G(ctx).WithError(err).Errorf("Failed to delete sandbox container %q", id)
+ }
+ }
+ }()
+
+ // wait is a long running background request, no timeout needed.
+ exitCh, err := task.Wait(ctrdutil.NamespacedContext())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to wait for sandbox container task")
+ }
+
+ nric, err := nri.New()
+ if err != nil {
+ return nil, errors.Wrap(err, "unable to create nri client")
+ }
+ if nric != nil {
+ nriSB := &nri.Sandbox{
+ ID: id,
+ Labels: config.Labels,
+ }
+ if _, err := nric.InvokeWithSandbox(ctx, task, v1.Create, nriSB); err != nil {
+ return nil, errors.Wrap(err, "nri invoke")
+ }
+ }
+
+ if err := task.Start(ctx); err != nil {
+ return nil, errors.Wrapf(err, "failed to start sandbox container task %q", id)
+ }
+
+ if err := sandbox.Status.Update(func(status sandboxstore.Status) (sandboxstore.Status, error) {
+ // Set the pod sandbox as ready after successfully start sandbox container.
+ status.Pid = task.Pid()
+ status.State = sandboxstore.StateReady
+ status.CreatedAt = info.CreatedAt
+ return status, nil
+ }); err != nil {
+ return nil, errors.Wrap(err, "failed to update sandbox status")
+ }
+
+ // Add sandbox into sandbox store in INIT state.
+ sandbox.Container = container
+
+ if err := c.sandboxStore.Add(sandbox); err != nil {
+ return nil, errors.Wrapf(err, "failed to add sandbox %+v into store", sandbox)
+ }
+
+ // start the monitor after adding sandbox into the store, this ensures
+ // that sandbox is in the store, when event monitor receives the TaskExit event.
+ //
+ // TaskOOM from containerd may come before sandbox is added to store,
+ // but we don't care about sandbox TaskOOM right now, so it is fine.
+ c.eventMonitor.startExitMonitor(context.Background(), id, task.Pid(), exitCh)
+
+ return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
+}
+
+// setupPodNetwork setups up the network for a pod
+func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.Sandbox) error {
+ var (
+ id = sandbox.ID
+ config = sandbox.Config
+ path = sandbox.NetNSPath
+ )
+ if c.netPlugin == nil {
+ return errors.New("cni config not initialized")
+ }
+
+ opts, err := cniNamespaceOpts(id, config)
+ if err != nil {
+ return errors.Wrap(err, "get cni namespace options")
+ }
+
+ result, err := c.netPlugin.Setup(ctx, id, path, opts...)
+ if err != nil {
+ return err
+ }
+ logDebugCNIResult(ctx, id, result)
+ // Check if the default interface has IP config
+ if configs, ok := result.Interfaces[defaultIfName]; ok && len(configs.IPConfigs) > 0 {
+ sandbox.IP, sandbox.AdditionalIPs = selectPodIPs(configs.IPConfigs)
+ sandbox.CNIResult = result
+ return nil
+ }
+ return errors.Errorf("failed to find network info for sandbox %q", id)
+}
+
+// cniNamespaceOpts get CNI namespace options from sandbox config.
+func cniNamespaceOpts(id string, config *runtime.PodSandboxConfig) ([]cni.NamespaceOpts, error) {
+ opts := []cni.NamespaceOpts{
+ cni.WithLabels(toCNILabels(id, config)),
+ }
+
+ portMappings := toCNIPortMappings(config.GetPortMappings())
+ if len(portMappings) > 0 {
+ opts = append(opts, cni.WithCapabilityPortMap(portMappings))
+ }
+
+ // Will return an error if the bandwidth limitation has the wrong unit
+ // or an unreasonable value see validateBandwidthIsReasonable()
+ bandWidth, err := toCNIBandWidth(config.Annotations)
+ if err != nil {
+ return nil, err
+ }
+ if bandWidth != nil {
+ opts = append(opts, cni.WithCapabilityBandWidth(*bandWidth))
+ }
+
+ dns := toCNIDNS(config.GetDnsConfig())
+ if dns != nil {
+ opts = append(opts, cni.WithCapabilityDNS(*dns))
+ }
+
+ return opts, nil
+}
+
+// toCNILabels adds pod metadata into CNI labels.
+func toCNILabels(id string, config *runtime.PodSandboxConfig) map[string]string {
+ return map[string]string{
+ "K8S_POD_NAMESPACE": config.GetMetadata().GetNamespace(),
+ "K8S_POD_NAME": config.GetMetadata().GetName(),
+ "K8S_POD_INFRA_CONTAINER_ID": id,
+ "IgnoreUnknown": "1",
+ }
+}
+
+// toCNIBandWidth converts CRI annotations to CNI bandwidth.
+func toCNIBandWidth(annotations map[string]string) (*cni.BandWidth, error) {
+ ingress, egress, err := bandwidth.ExtractPodBandwidthResources(annotations)
+ if err != nil {
+ return nil, errors.Wrap(err, "reading pod bandwidth annotations")
+ }
+
+ if ingress == nil && egress == nil {
+ return nil, nil
+ }
+
+ bandWidth := &cni.BandWidth{}
+
+ if ingress != nil {
+ bandWidth.IngressRate = uint64(ingress.Value())
+ bandWidth.IngressBurst = math.MaxUint32
+ }
+
+ if egress != nil {
+ bandWidth.EgressRate = uint64(egress.Value())
+ bandWidth.EgressBurst = math.MaxUint32
+ }
+
+ return bandWidth, nil
+}
+
+// toCNIPortMappings converts CRI port mappings to CNI.
+func toCNIPortMappings(criPortMappings []*runtime.PortMapping) []cni.PortMapping {
+ var portMappings []cni.PortMapping
+ for _, mapping := range criPortMappings {
+ if mapping.HostPort <= 0 {
+ continue
+ }
+ portMappings = append(portMappings, cni.PortMapping{
+ HostPort: mapping.HostPort,
+ ContainerPort: mapping.ContainerPort,
+ Protocol: strings.ToLower(mapping.Protocol.String()),
+ HostIP: mapping.HostIp,
+ })
+ }
+ return portMappings
+}
+
+// toCNIDNS converts CRI DNSConfig to CNI.
+func toCNIDNS(dns *runtime.DNSConfig) *cni.DNS {
+ if dns == nil {
+ return nil
+ }
+ return &cni.DNS{
+ Servers: dns.GetServers(),
+ Searches: dns.GetSearches(),
+ Options: dns.GetOptions(),
+ }
+}
+
+// selectPodIPs select an ip from the ip list. It prefers ipv4 more than ipv6
+// and returns the additional ips
+// TODO(random-liu): Revisit the ip order in the ipv6 beta stage. (cri#1278)
+func selectPodIPs(ipConfigs []*cni.IPConfig) (string, []string) {
+ var (
+ additionalIPs []string
+ ip string
+ )
+ for _, c := range ipConfigs {
+ if c.IP.To4() != nil && ip == "" {
+ ip = c.IP.String()
+ } else {
+ additionalIPs = append(additionalIPs, c.IP.String())
+ }
+ }
+ if ip != "" {
+ return ip, additionalIPs
+ }
+ if len(ipConfigs) == 1 {
+ return additionalIPs[0], nil
+ }
+ return additionalIPs[0], additionalIPs[1:]
+}
+
+// untrustedWorkload returns true if the sandbox contains untrusted workload.
+func untrustedWorkload(config *runtime.PodSandboxConfig) bool {
+ return config.GetAnnotations()[annotations.UntrustedWorkload] == "true"
+}
+
+// hostAccessingSandbox returns true if the sandbox configuration
+// requires additional host access for the sandbox.
+func hostAccessingSandbox(config *runtime.PodSandboxConfig) bool {
+ securityContext := config.GetLinux().GetSecurityContext()
+
+ namespaceOptions := securityContext.GetNamespaceOptions()
+ if namespaceOptions.GetNetwork() == runtime.NamespaceMode_NODE ||
+ namespaceOptions.GetPid() == runtime.NamespaceMode_NODE ||
+ namespaceOptions.GetIpc() == runtime.NamespaceMode_NODE {
+ return true
+ }
+
+ return false
+}
+
+// getSandboxRuntime returns the runtime configuration for sandbox.
+// If the sandbox contains untrusted workload, runtime for untrusted workload will be returned,
+// or else default runtime will be returned.
+func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtimeHandler string) (criconfig.Runtime, error) {
+ if untrustedWorkload(config) {
+ // If the untrusted annotation is provided, runtimeHandler MUST be empty.
+ if runtimeHandler != "" && runtimeHandler != criconfig.RuntimeUntrusted {
+ return criconfig.Runtime{}, errors.New("untrusted workload with explicit runtime handler is not allowed")
+ }
+
+ // If the untrusted workload is requesting access to the host/node, this request will fail.
+ //
+ // Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
+ // runtime may support this. For example, in a virtual-machine isolated runtime, privileged
+ // is a supported option, granting the workload to access the entire guest VM instead of host.
+ // TODO(windows): Deprecate this so that we don't need to handle it for windows.
+ if hostAccessingSandbox(config) {
+ return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed")
+ }
+
+ runtimeHandler = criconfig.RuntimeUntrusted
+ }
+
+ if runtimeHandler == "" {
+ runtimeHandler = c.config.ContainerdConfig.DefaultRuntimeName
+ }
+
+ handler, ok := c.config.ContainerdConfig.Runtimes[runtimeHandler]
+ if !ok {
+ return criconfig.Runtime{}, errors.Errorf("no runtime for %q is configured", runtimeHandler)
+ }
+ return handler, nil
+}
+
+func logDebugCNIResult(ctx context.Context, sandboxID string, result *cni.CNIResult) {
+ if logrus.GetLevel() < logrus.DebugLevel {
+ return
+ }
+ cniResult, err := json.Marshal(result)
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to marshal CNI result for sandbox %q: %v", sandboxID, err)
+ return
+ }
+ log.G(ctx).Debugf("cni result for sandbox %q: %s", sandboxID, string(cniResult))
+}
diff --git a/pkg/server/sandbox_run_linux.go b/pkg/server/sandbox_run_linux.go
new file mode 100644
index 000000000..f886d2ae2
--- /dev/null
+++ b/pkg/server/sandbox_run_linux.go
@@ -0,0 +1,308 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "fmt"
+ "os"
+ "strings"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/oci"
+ "github.com/containerd/containerd/plugin"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ selinux "github.com/opencontainers/selinux/go-selinux"
+ "github.com/pkg/errors"
+ "golang.org/x/sys/unix"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ customopts "github.com/containerd/cri/pkg/containerd/opts"
+ osinterface "github.com/containerd/cri/pkg/os"
+)
+
+func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
+ imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (_ *runtimespec.Spec, retErr error) {
+ // Creates a spec Generator with the default spec.
+ // TODO(random-liu): [P1] Compare the default settings with docker and containerd default.
+ specOpts := []oci.SpecOpts{
+ customopts.WithoutRunMount,
+ customopts.WithoutDefaultSecuritySettings,
+ customopts.WithRelativeRoot(relativeRootfsPath),
+ oci.WithEnv(imageConfig.Env),
+ oci.WithRootFSReadonly(),
+ oci.WithHostname(config.GetHostname()),
+ }
+ if imageConfig.WorkingDir != "" {
+ specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
+ }
+
+ if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 {
+ // Pause image must have entrypoint or cmd.
+ return nil, errors.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig)
+ }
+ specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...))
+
+ // Set cgroups parent.
+ if c.config.DisableCgroup {
+ specOpts = append(specOpts, customopts.WithDisabledCgroups)
+ } else {
+ if config.GetLinux().GetCgroupParent() != "" {
+ cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id)
+ specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
+ }
+ }
+
+ // When cgroup parent is not set, containerd-shim will create container in a child cgroup
+ // of the cgroup itself is in.
+ // TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified.
+
+ // Set namespace options.
+ var (
+ securityContext = config.GetLinux().GetSecurityContext()
+ nsOptions = securityContext.GetNamespaceOptions()
+ )
+ if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE {
+ specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace))
+ specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace))
+ } else {
+ specOpts = append(specOpts, oci.WithLinuxNamespace(
+ runtimespec.LinuxNamespace{
+ Type: runtimespec.NetworkNamespace,
+ Path: nsPath,
+ }))
+ }
+ if nsOptions.GetPid() == runtime.NamespaceMode_NODE {
+ specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace))
+ }
+ if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
+ specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
+ }
+
+ // It's fine to generate the spec before the sandbox /dev/shm
+ // is actually created.
+ sandboxDevShm := c.getSandboxDevShm(id)
+ if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
+ sandboxDevShm = devShm
+ }
+ specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{
+ {
+ Source: sandboxDevShm,
+ Destination: devShm,
+ Type: "bind",
+ Options: []string{"rbind", "ro"},
+ },
+ // Add resolv.conf for katacontainers to setup the DNS of pod VM properly.
+ {
+ Source: c.getResolvPath(id),
+ Destination: resolvConfPath,
+ Type: "bind",
+ Options: []string{"rbind", "ro"},
+ },
+ }))
+
+ processLabel, mountLabel, err := initLabelsFromOpt(securityContext.GetSelinuxOptions())
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
+ }
+ defer func() {
+ if retErr != nil {
+ selinux.ReleaseLabel(processLabel)
+ }
+ }()
+
+ supplementalGroups := securityContext.GetSupplementalGroups()
+ specOpts = append(specOpts,
+ customopts.WithSelinuxLabels(processLabel, mountLabel),
+ customopts.WithSupplementalGroups(supplementalGroups),
+ )
+
+ // Add sysctls
+ sysctls := config.GetLinux().GetSysctls()
+ specOpts = append(specOpts, customopts.WithSysctls(sysctls))
+
+ // Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile
+
+ if !c.config.DisableCgroup {
+ specOpts = append(specOpts, customopts.WithDefaultSandboxShares)
+ }
+ specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj))
+
+ for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
+ runtimePodAnnotations) {
+ specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
+ }
+
+ specOpts = append(specOpts,
+ customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox),
+ customopts.WithAnnotation(annotations.SandboxID, id),
+ customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()),
+ )
+
+ return c.runtimeSpec(id, "", specOpts...)
+}
+
+// sandboxContainerSpecOpts generates OCI spec options for
+// the sandbox container.
+func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
+ var (
+ securityContext = config.GetLinux().GetSecurityContext()
+ specOpts []oci.SpecOpts
+ )
+ seccompSpecOpts, err := c.generateSeccompSpecOpts(
+ securityContext.GetSeccompProfilePath(),
+ securityContext.GetPrivileged(),
+ c.seccompEnabled())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
+ }
+ if seccompSpecOpts != nil {
+ specOpts = append(specOpts, seccompSpecOpts)
+ }
+
+ userstr, err := generateUserString(
+ "",
+ securityContext.GetRunAsUser(),
+ securityContext.GetRunAsGroup(),
+ )
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate user string")
+ }
+ if userstr == "" {
+ // Lastly, since no user override was passed via CRI try to set via OCI
+ // Image
+ userstr = imageConfig.User
+ }
+ if userstr != "" {
+ specOpts = append(specOpts, oci.WithUser(userstr))
+ }
+ return specOpts, nil
+}
+
+// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts,
+// /etc/resolv.conf and /etc/hostname.
+func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
+ sandboxEtcHostname := c.getSandboxHostname(id)
+ hostname := config.GetHostname()
+ if hostname == "" {
+ var err error
+ hostname, err = c.os.Hostname()
+ if err != nil {
+ return errors.Wrap(err, "failed to get hostname")
+ }
+ }
+ if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil {
+ return errors.Wrapf(err, "failed to write hostname to %q", sandboxEtcHostname)
+ }
+
+ // TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet.
+ sandboxEtcHosts := c.getSandboxHosts(id)
+ if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil {
+ return errors.Wrapf(err, "failed to generate sandbox hosts file %q", sandboxEtcHosts)
+ }
+
+ // Set DNS options. Maintain a resolv.conf for the sandbox.
+ var err error
+ resolvContent := ""
+ if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
+ resolvContent, err = parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options)
+ if err != nil {
+ return errors.Wrapf(err, "failed to parse sandbox DNSConfig %+v", dnsConfig)
+ }
+ }
+ resolvPath := c.getResolvPath(id)
+ if resolvContent == "" {
+ // copy host's resolv.conf to resolvPath
+ err = c.os.CopyFile(resolvConfPath, resolvPath, 0644)
+ if err != nil {
+ return errors.Wrapf(err, "failed to copy host's resolv.conf to %q", resolvPath)
+ }
+ } else {
+ err = c.os.WriteFile(resolvPath, []byte(resolvContent), 0644)
+ if err != nil {
+ return errors.Wrapf(err, "failed to write resolv content to %q", resolvPath)
+ }
+ }
+
+ // Setup sandbox /dev/shm.
+ if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
+ if _, err := c.os.Stat(devShm); err != nil {
+ return errors.Wrapf(err, "host %q is not available for host ipc", devShm)
+ }
+ } else {
+ sandboxDevShm := c.getSandboxDevShm(id)
+ if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil {
+ return errors.Wrap(err, "failed to create sandbox shm")
+ }
+ shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize)
+ if err := c.os.(osinterface.UNIX).Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil {
+ return errors.Wrap(err, "failed to mount sandbox shm")
+ }
+ }
+
+ return nil
+}
+
+// parseDNSOptions parse DNS options into resolv.conf format content,
+// if none option is specified, will return empty with no error.
+func parseDNSOptions(servers, searches, options []string) (string, error) {
+ resolvContent := ""
+
+ if len(searches) > maxDNSSearches {
+ return "", errors.Errorf("DNSOption.Searches has more than %d domains", maxDNSSearches)
+ }
+
+ if len(searches) > 0 {
+ resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " "))
+ }
+
+ if len(servers) > 0 {
+ resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver "))
+ }
+
+ if len(options) > 0 {
+ resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " "))
+ }
+
+ return resolvContent, nil
+}
+
+// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
+// remove these files. Unmount should *NOT* return error if the mount point is already unmounted.
+func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
+ if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE {
+ path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/")
+ if err != nil {
+ return errors.Wrap(err, "failed to follow symlink")
+ }
+ if err := c.os.(osinterface.UNIX).Unmount(path); err != nil && !os.IsNotExist(err) {
+ return errors.Wrapf(err, "failed to unmount %q", path)
+ }
+ }
+ return nil
+}
+
+// taskOpts generates task options for a (sandbox) container.
+func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
+ // TODO(random-liu): Remove this after shim v1 is deprecated.
+ var taskOpts []containerd.NewTaskOpts
+ if c.config.NoPivot && (runtimeType == plugin.RuntimeRuncV1 || runtimeType == plugin.RuntimeRuncV2) {
+ taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
+ }
+ return taskOpts
+}
diff --git a/pkg/server/sandbox_run_linux_test.go b/pkg/server/sandbox_run_linux_test.go
new file mode 100644
index 000000000..166cd6f9e
--- /dev/null
+++ b/pkg/server/sandbox_run_linux_test.go
@@ -0,0 +1,431 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "os"
+ "path/filepath"
+ "testing"
+
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ "github.com/containerd/cri/pkg/containerd/opts"
+ ostesting "github.com/containerd/cri/pkg/os/testing"
+)
+
+func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
+ config := &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "test-name",
+ Uid: "test-uid",
+ Namespace: "test-ns",
+ Attempt: 1,
+ },
+ Hostname: "test-hostname",
+ LogDirectory: "test-log-directory",
+ Labels: map[string]string{"a": "b"},
+ Annotations: map[string]string{"c": "d"},
+ Linux: &runtime.LinuxPodSandboxConfig{
+ CgroupParent: "/test/cgroup/parent",
+ },
+ }
+ imageConfig := &imagespec.ImageConfig{
+ Env: []string{"a=b", "c=d"},
+ Entrypoint: []string{"/pause"},
+ Cmd: []string{"forever"},
+ WorkingDir: "/workspace",
+ }
+ specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) {
+ assert.Equal(t, "test-hostname", spec.Hostname)
+ assert.Equal(t, getCgroupsPath("/test/cgroup/parent", id), spec.Linux.CgroupsPath)
+ assert.Equal(t, relativeRootfsPath, spec.Root.Path)
+ assert.Equal(t, true, spec.Root.Readonly)
+ assert.Contains(t, spec.Process.Env, "a=b", "c=d")
+ assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args)
+ assert.Equal(t, "/workspace", spec.Process.Cwd)
+ assert.EqualValues(t, *spec.Linux.Resources.CPU.Shares, opts.DefaultSandboxCPUshares)
+ assert.EqualValues(t, *spec.Process.OOMScoreAdj, defaultSandboxOOMAdj)
+
+ t.Logf("Check PodSandbox annotations")
+ assert.Contains(t, spec.Annotations, annotations.SandboxID)
+ assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id)
+
+ assert.Contains(t, spec.Annotations, annotations.ContainerType)
+ assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox)
+
+ assert.Contains(t, spec.Annotations, annotations.SandboxLogDir)
+ assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory")
+
+ if selinux.GetEnabled() {
+ assert.NotEqual(t, "", spec.Process.SelinuxLabel)
+ assert.NotEqual(t, "", spec.Linux.MountLabel)
+ }
+ }
+ return config, imageConfig, specCheck
+}
+
+func TestLinuxSandboxContainerSpec(t *testing.T) {
+ testID := "test-id"
+ nsPath := "test-cni"
+ for desc, test := range map[string]struct {
+ configChange func(*runtime.PodSandboxConfig)
+ specCheck func(*testing.T, *runtimespec.Spec)
+ expectErr bool
+ }{
+ "spec should reflect original config": {
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ // runtime spec should have expected namespaces enabled by default.
+ require.NotNil(t, spec.Linux)
+ assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.NetworkNamespace,
+ Path: nsPath,
+ })
+ assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.UTSNamespace,
+ })
+ assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.PIDNamespace,
+ })
+ assert.Contains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.IPCNamespace,
+ })
+ },
+ },
+ "host namespace": {
+ configChange: func(c *runtime.PodSandboxConfig) {
+ c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
+ NamespaceOptions: &runtime.NamespaceOption{
+ Network: runtime.NamespaceMode_NODE,
+ Pid: runtime.NamespaceMode_NODE,
+ Ipc: runtime.NamespaceMode_NODE,
+ },
+ }
+ },
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ // runtime spec should disable expected namespaces in host mode.
+ require.NotNil(t, spec.Linux)
+ assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.NetworkNamespace,
+ })
+ assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.UTSNamespace,
+ })
+ assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.PIDNamespace,
+ })
+ assert.NotContains(t, spec.Linux.Namespaces, runtimespec.LinuxNamespace{
+ Type: runtimespec.IPCNamespace,
+ })
+ },
+ },
+ "should set supplemental groups correctly": {
+ configChange: func(c *runtime.PodSandboxConfig) {
+ c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
+ SupplementalGroups: []int64{1111, 2222},
+ }
+ },
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ require.NotNil(t, spec.Process)
+ assert.Contains(t, spec.Process.User.AdditionalGids, uint32(1111))
+ assert.Contains(t, spec.Process.User.AdditionalGids, uint32(2222))
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ c := newTestCRIService()
+ config, imageConfig, specCheck := getRunPodSandboxTestData()
+ if test.configChange != nil {
+ test.configChange(config)
+ }
+ spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, nil)
+ if test.expectErr {
+ assert.Error(t, err)
+ assert.Nil(t, spec)
+ continue
+ }
+ assert.NoError(t, err)
+ assert.NotNil(t, spec)
+ specCheck(t, testID, spec)
+ if test.specCheck != nil {
+ test.specCheck(t, spec)
+ }
+ }
+}
+
+func TestSetupSandboxFiles(t *testing.T) {
+ const (
+ testID = "test-id"
+ realhostname = "test-real-hostname"
+ )
+ for desc, test := range map[string]struct {
+ dnsConfig *runtime.DNSConfig
+ hostname string
+ ipcMode runtime.NamespaceMode
+ expectedCalls []ostesting.CalledDetail
+ }{
+ "should check host /dev/shm existence when ipc mode is NODE": {
+ ipcMode: runtime.NamespaceMode_NODE,
+ expectedCalls: []ostesting.CalledDetail{
+ {
+ Name: "Hostname",
+ },
+ {
+ Name: "WriteFile",
+ Arguments: []interface{}{
+ filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
+ []byte(realhostname + "\n"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "CopyFile",
+ Arguments: []interface{}{
+ "/etc/hosts",
+ filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "CopyFile",
+ Arguments: []interface{}{
+ "/etc/resolv.conf",
+ filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "Stat",
+ Arguments: []interface{}{"/dev/shm"},
+ },
+ },
+ },
+ "should create new /etc/resolv.conf if DNSOptions is set": {
+ dnsConfig: &runtime.DNSConfig{
+ Servers: []string{"8.8.8.8"},
+ Searches: []string{"114.114.114.114"},
+ Options: []string{"timeout:1"},
+ },
+ ipcMode: runtime.NamespaceMode_NODE,
+ expectedCalls: []ostesting.CalledDetail{
+ {
+ Name: "Hostname",
+ },
+ {
+ Name: "WriteFile",
+ Arguments: []interface{}{
+ filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
+ []byte(realhostname + "\n"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "CopyFile",
+ Arguments: []interface{}{
+ "/etc/hosts",
+ filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "WriteFile",
+ Arguments: []interface{}{
+ filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
+ []byte(`search 114.114.114.114
+nameserver 8.8.8.8
+options timeout:1
+`), os.FileMode(0644),
+ },
+ },
+ {
+ Name: "Stat",
+ Arguments: []interface{}{"/dev/shm"},
+ },
+ },
+ },
+ "should create sandbox shm when ipc namespace mode is not NODE": {
+ ipcMode: runtime.NamespaceMode_POD,
+ expectedCalls: []ostesting.CalledDetail{
+ {
+ Name: "Hostname",
+ },
+ {
+ Name: "WriteFile",
+ Arguments: []interface{}{
+ filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
+ []byte(realhostname + "\n"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "CopyFile",
+ Arguments: []interface{}{
+ "/etc/hosts",
+ filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "CopyFile",
+ Arguments: []interface{}{
+ "/etc/resolv.conf",
+ filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "MkdirAll",
+ Arguments: []interface{}{
+ filepath.Join(testStateDir, sandboxesDir, testID, "shm"),
+ os.FileMode(0700),
+ },
+ },
+ {
+ Name: "Mount",
+ // Ignore arguments which are too complex to check.
+ },
+ },
+ },
+ "should create /etc/hostname when hostname is set": {
+ hostname: "test-hostname",
+ ipcMode: runtime.NamespaceMode_NODE,
+ expectedCalls: []ostesting.CalledDetail{
+ {
+ Name: "WriteFile",
+ Arguments: []interface{}{
+ filepath.Join(testRootDir, sandboxesDir, testID, "hostname"),
+ []byte("test-hostname\n"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "CopyFile",
+ Arguments: []interface{}{
+ "/etc/hosts",
+ filepath.Join(testRootDir, sandboxesDir, testID, "hosts"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "CopyFile",
+ Arguments: []interface{}{
+ "/etc/resolv.conf",
+ filepath.Join(testRootDir, sandboxesDir, testID, "resolv.conf"),
+ os.FileMode(0644),
+ },
+ },
+ {
+ Name: "Stat",
+ Arguments: []interface{}{"/dev/shm"},
+ },
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ c := newTestCRIService()
+ c.os.(*ostesting.FakeOS).HostnameFn = func() (string, error) {
+ return realhostname, nil
+ }
+ cfg := &runtime.PodSandboxConfig{
+ Hostname: test.hostname,
+ DnsConfig: test.dnsConfig,
+ Linux: &runtime.LinuxPodSandboxConfig{
+ SecurityContext: &runtime.LinuxSandboxSecurityContext{
+ NamespaceOptions: &runtime.NamespaceOption{
+ Ipc: test.ipcMode,
+ },
+ },
+ },
+ }
+ c.setupSandboxFiles(testID, cfg)
+ calls := c.os.(*ostesting.FakeOS).GetCalls()
+ assert.Len(t, calls, len(test.expectedCalls))
+ for i, expected := range test.expectedCalls {
+ if expected.Arguments == nil {
+ // Ignore arguments.
+ expected.Arguments = calls[i].Arguments
+ }
+ assert.Equal(t, expected, calls[i])
+ }
+ }
+}
+
+func TestParseDNSOption(t *testing.T) {
+ for desc, test := range map[string]struct {
+ servers []string
+ searches []string
+ options []string
+ expectedContent string
+ expectErr bool
+ }{
+ "empty dns options should return empty content": {},
+ "non-empty dns options should return correct content": {
+ servers: []string{"8.8.8.8", "server.google.com"},
+ searches: []string{"114.114.114.114"},
+ options: []string{"timeout:1"},
+ expectedContent: `search 114.114.114.114
+nameserver 8.8.8.8
+nameserver server.google.com
+options timeout:1
+`,
+ },
+ "should return error if dns search exceeds limit(6)": {
+ searches: []string{
+ "server0.google.com",
+ "server1.google.com",
+ "server2.google.com",
+ "server3.google.com",
+ "server4.google.com",
+ "server5.google.com",
+ "server6.google.com",
+ },
+ expectErr: true,
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ resolvContent, err := parseDNSOptions(test.servers, test.searches, test.options)
+ if test.expectErr {
+ assert.Error(t, err)
+ continue
+ }
+ assert.NoError(t, err)
+ assert.Equal(t, resolvContent, test.expectedContent)
+ }
+}
+
+func TestSandboxDisableCgroup(t *testing.T) {
+ config, imageConfig, _ := getRunPodSandboxTestData()
+ c := newTestCRIService()
+ c.config.DisableCgroup = true
+ spec, err := c.sandboxContainerSpec("test-id", config, imageConfig, "test-cni", []string{})
+ require.NoError(t, err)
+
+ t.Log("resource limit should not be set")
+ assert.Nil(t, spec.Linux.Resources.Memory)
+ assert.Nil(t, spec.Linux.Resources.CPU)
+
+ t.Log("cgroup path should be empty")
+ assert.Empty(t, spec.Linux.CgroupsPath)
+}
+
+// TODO(random-liu): [P1] Add unit test for different error cases to make sure
+// the function cleans up on error properly.
diff --git a/pkg/server/sandbox_run_other.go b/pkg/server/sandbox_run_other.go
new file mode 100644
index 000000000..61d3904f7
--- /dev/null
+++ b/pkg/server/sandbox_run_other.go
@@ -0,0 +1,55 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/oci"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
+ imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (_ *runtimespec.Spec, retErr error) {
+ return c.runtimeSpec(id, "")
+}
+
+// sandboxContainerSpecOpts generates OCI spec options for
+// the sandbox container.
+func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
+ return []oci.SpecOpts{}, nil
+}
+
+// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts,
+// /etc/resolv.conf and /etc/hostname.
+func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
+ return nil
+}
+
+// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
+// remove these files. Unmount should *NOT* return error if the mount point is already unmounted.
+func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
+ return nil
+}
+
+// taskOpts generates task options for a (sandbox) container.
+func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
+ return []containerd.NewTaskOpts{}
+}
diff --git a/pkg/server/sandbox_run_other_test.go b/pkg/server/sandbox_run_other_test.go
new file mode 100644
index 000000000..daf903908
--- /dev/null
+++ b/pkg/server/sandbox_run_other_test.go
@@ -0,0 +1,35 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
+ config := &runtime.PodSandboxConfig{}
+ imageConfig := &imagespec.ImageConfig{}
+ specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) {
+ }
+ return config, imageConfig, specCheck
+}
diff --git a/pkg/server/sandbox_run_test.go b/pkg/server/sandbox_run_test.go
new file mode 100644
index 000000000..70f01ce03
--- /dev/null
+++ b/pkg/server/sandbox_run_test.go
@@ -0,0 +1,500 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "net"
+ "testing"
+
+ cni "github.com/containerd/go-cni"
+ "github.com/containerd/typeurl"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ criconfig "github.com/containerd/cri/pkg/config"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+func TestSandboxContainerSpec(t *testing.T) {
+ testID := "test-id"
+ nsPath := "test-cni"
+ for desc, test := range map[string]struct {
+ configChange func(*runtime.PodSandboxConfig)
+ podAnnotations []string
+ imageConfigChange func(*imagespec.ImageConfig)
+ specCheck func(*testing.T, *runtimespec.Spec)
+ expectErr bool
+ }{
+ "should return error when entrypoint and cmd are empty": {
+ imageConfigChange: func(c *imagespec.ImageConfig) {
+ c.Entrypoint = nil
+ c.Cmd = nil
+ },
+ expectErr: true,
+ },
+ "a passthrough annotation should be passed as an OCI annotation": {
+ podAnnotations: []string{"c"},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ assert.Equal(t, spec.Annotations["c"], "d")
+ },
+ },
+ "a non-passthrough annotation should not be passed as an OCI annotation": {
+ configChange: func(c *runtime.PodSandboxConfig) {
+ c.Annotations["d"] = "e"
+ },
+ podAnnotations: []string{"c"},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ assert.Equal(t, spec.Annotations["c"], "d")
+ _, ok := spec.Annotations["d"]
+ assert.False(t, ok)
+ },
+ },
+ "passthrough annotations should support wildcard match": {
+ configChange: func(c *runtime.PodSandboxConfig) {
+ c.Annotations["t.f"] = "j"
+ c.Annotations["z.g"] = "o"
+ c.Annotations["z"] = "o"
+ c.Annotations["y.ca"] = "b"
+ c.Annotations["y"] = "b"
+ },
+ podAnnotations: []string{"t*", "z.*", "y.c*"},
+ specCheck: func(t *testing.T, spec *runtimespec.Spec) {
+ assert.Equal(t, spec.Annotations["t.f"], "j")
+ assert.Equal(t, spec.Annotations["z.g"], "o")
+ assert.Equal(t, spec.Annotations["y.ca"], "b")
+ _, ok := spec.Annotations["y"]
+ assert.False(t, ok)
+ _, ok = spec.Annotations["z"]
+ assert.False(t, ok)
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ c := newTestCRIService()
+ config, imageConfig, specCheck := getRunPodSandboxTestData()
+ if test.configChange != nil {
+ test.configChange(config)
+ }
+
+ if test.imageConfigChange != nil {
+ test.imageConfigChange(imageConfig)
+ }
+ spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath,
+ test.podAnnotations)
+ if test.expectErr {
+ assert.Error(t, err)
+ assert.Nil(t, spec)
+ continue
+ }
+ assert.NoError(t, err)
+ assert.NotNil(t, spec)
+ specCheck(t, testID, spec)
+ if test.specCheck != nil {
+ test.specCheck(t, spec)
+ }
+ }
+}
+
+func TestTypeurlMarshalUnmarshalSandboxMeta(t *testing.T) {
+ for desc, test := range map[string]struct {
+ configChange func(*runtime.PodSandboxConfig)
+ }{
+ "should marshal original config": {},
+ "should marshal Linux": {
+ configChange: func(c *runtime.PodSandboxConfig) {
+ if c.Linux == nil {
+ c.Linux = &runtime.LinuxPodSandboxConfig{}
+ }
+ c.Linux.SecurityContext = &runtime.LinuxSandboxSecurityContext{
+ NamespaceOptions: &runtime.NamespaceOption{
+ Network: runtime.NamespaceMode_NODE,
+ Pid: runtime.NamespaceMode_NODE,
+ Ipc: runtime.NamespaceMode_NODE,
+ },
+ SupplementalGroups: []int64{1111, 2222},
+ }
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ meta := &sandboxstore.Metadata{
+ ID: "1",
+ Name: "sandbox_1",
+ NetNSPath: "/home/cloud",
+ }
+ meta.Config, _, _ = getRunPodSandboxTestData()
+ if test.configChange != nil {
+ test.configChange(meta.Config)
+ }
+
+ any, err := typeurl.MarshalAny(meta)
+ assert.NoError(t, err)
+ data, err := typeurl.UnmarshalAny(any)
+ assert.NoError(t, err)
+ assert.IsType(t, &sandboxstore.Metadata{}, data)
+ curMeta, ok := data.(*sandboxstore.Metadata)
+ assert.True(t, ok)
+ assert.Equal(t, meta, curMeta)
+ }
+}
+
+func TestToCNIPortMappings(t *testing.T) {
+ for desc, test := range map[string]struct {
+ criPortMappings []*runtime.PortMapping
+ cniPortMappings []cni.PortMapping
+ }{
+ "empty CRI port mapping should map to empty CNI port mapping": {},
+ "CRI port mapping should be converted to CNI port mapping properly": {
+ criPortMappings: []*runtime.PortMapping{
+ {
+ Protocol: runtime.Protocol_UDP,
+ ContainerPort: 1234,
+ HostPort: 5678,
+ HostIp: "123.124.125.126",
+ },
+ {
+ Protocol: runtime.Protocol_TCP,
+ ContainerPort: 4321,
+ HostPort: 8765,
+ HostIp: "126.125.124.123",
+ },
+ {
+ Protocol: runtime.Protocol_SCTP,
+ ContainerPort: 1234,
+ HostPort: 5678,
+ HostIp: "123.124.125.126",
+ },
+ },
+ cniPortMappings: []cni.PortMapping{
+ {
+ HostPort: 5678,
+ ContainerPort: 1234,
+ Protocol: "udp",
+ HostIP: "123.124.125.126",
+ },
+ {
+ HostPort: 8765,
+ ContainerPort: 4321,
+ Protocol: "tcp",
+ HostIP: "126.125.124.123",
+ },
+ {
+ HostPort: 5678,
+ ContainerPort: 1234,
+ Protocol: "sctp",
+ HostIP: "123.124.125.126",
+ },
+ },
+ },
+ "CRI port mapping without host port should be skipped": {
+ criPortMappings: []*runtime.PortMapping{
+ {
+ Protocol: runtime.Protocol_UDP,
+ ContainerPort: 1234,
+ HostIp: "123.124.125.126",
+ },
+ {
+ Protocol: runtime.Protocol_TCP,
+ ContainerPort: 4321,
+ HostPort: 8765,
+ HostIp: "126.125.124.123",
+ },
+ },
+ cniPortMappings: []cni.PortMapping{
+ {
+ HostPort: 8765,
+ ContainerPort: 4321,
+ Protocol: "tcp",
+ HostIP: "126.125.124.123",
+ },
+ },
+ },
+ "CRI port mapping with unsupported protocol should be skipped": {
+ criPortMappings: []*runtime.PortMapping{
+ {
+ Protocol: runtime.Protocol_TCP,
+ ContainerPort: 4321,
+ HostPort: 8765,
+ HostIp: "126.125.124.123",
+ },
+ },
+ cniPortMappings: []cni.PortMapping{
+ {
+ HostPort: 8765,
+ ContainerPort: 4321,
+ Protocol: "tcp",
+ HostIP: "126.125.124.123",
+ },
+ },
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ assert.Equal(t, test.cniPortMappings, toCNIPortMappings(test.criPortMappings))
+ }
+}
+
+func TestSelectPodIP(t *testing.T) {
+ for desc, test := range map[string]struct {
+ ips []string
+ expectedIP string
+ expectedAdditionalIPs []string
+ }{
+ "ipv4 should be picked even if ipv6 comes first": {
+ ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43"},
+ expectedIP: "192.168.17.43",
+ expectedAdditionalIPs: []string{"2001:db8:85a3::8a2e:370:7334"},
+ },
+ "ipv4 should be picked when there is only ipv4": {
+ ips: []string{"192.168.17.43"},
+ expectedIP: "192.168.17.43",
+ expectedAdditionalIPs: nil,
+ },
+ "ipv6 should be picked when there is no ipv4": {
+ ips: []string{"2001:db8:85a3::8a2e:370:7334"},
+ expectedIP: "2001:db8:85a3::8a2e:370:7334",
+ expectedAdditionalIPs: nil,
+ },
+ "the first ipv4 should be picked when there are multiple ipv4": { // unlikely to happen
+ ips: []string{"2001:db8:85a3::8a2e:370:7334", "192.168.17.43", "2001:db8:85a3::8a2e:370:7335", "192.168.17.45"},
+ expectedIP: "192.168.17.43",
+ expectedAdditionalIPs: []string{"2001:db8:85a3::8a2e:370:7334", "2001:db8:85a3::8a2e:370:7335", "192.168.17.45"},
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ var ipConfigs []*cni.IPConfig
+ for _, ip := range test.ips {
+ ipConfigs = append(ipConfigs, &cni.IPConfig{
+ IP: net.ParseIP(ip),
+ })
+ }
+ ip, additionalIPs := selectPodIPs(ipConfigs)
+ assert.Equal(t, test.expectedIP, ip)
+ assert.Equal(t, test.expectedAdditionalIPs, additionalIPs)
+ }
+}
+
+func TestHostAccessingSandbox(t *testing.T) {
+ privilegedContext := &runtime.PodSandboxConfig{
+ Linux: &runtime.LinuxPodSandboxConfig{
+ SecurityContext: &runtime.LinuxSandboxSecurityContext{
+ Privileged: true,
+ },
+ },
+ }
+ nonPrivilegedContext := &runtime.PodSandboxConfig{
+ Linux: &runtime.LinuxPodSandboxConfig{
+ SecurityContext: &runtime.LinuxSandboxSecurityContext{
+ Privileged: false,
+ },
+ },
+ }
+ hostNamespace := &runtime.PodSandboxConfig{
+ Linux: &runtime.LinuxPodSandboxConfig{
+ SecurityContext: &runtime.LinuxSandboxSecurityContext{
+ Privileged: false,
+ NamespaceOptions: &runtime.NamespaceOption{
+ Network: runtime.NamespaceMode_NODE,
+ Pid: runtime.NamespaceMode_NODE,
+ Ipc: runtime.NamespaceMode_NODE,
+ },
+ },
+ },
+ }
+ tests := []struct {
+ name string
+ config *runtime.PodSandboxConfig
+ want bool
+ }{
+ {"Security Context is nil", nil, false},
+ {"Security Context is privileged", privilegedContext, false},
+ {"Security Context is not privileged", nonPrivilegedContext, false},
+ {"Security Context namespace host access", hostNamespace, true},
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ if got := hostAccessingSandbox(tt.config); got != tt.want {
+ t.Errorf("hostAccessingSandbox() = %v, want %v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestGetSandboxRuntime(t *testing.T) {
+ untrustedWorkloadRuntime := criconfig.Runtime{
+ Type: "io.containerd.runtime.v1.linux",
+ Engine: "untrusted-workload-runtime",
+ Root: "",
+ }
+
+ defaultRuntime := criconfig.Runtime{
+ Type: "io.containerd.runtime.v1.linux",
+ Engine: "default-runtime",
+ Root: "",
+ }
+
+ fooRuntime := criconfig.Runtime{
+ Type: "io.containerd.runtime.v1.linux",
+ Engine: "foo-bar",
+ Root: "",
+ }
+
+ for desc, test := range map[string]struct {
+ sandboxConfig *runtime.PodSandboxConfig
+ runtimeHandler string
+ runtimes map[string]criconfig.Runtime
+ expectErr bool
+ expectedRuntime criconfig.Runtime
+ }{
+ "should return error if untrusted workload requires host access": {
+ sandboxConfig: &runtime.PodSandboxConfig{
+ Linux: &runtime.LinuxPodSandboxConfig{
+ SecurityContext: &runtime.LinuxSandboxSecurityContext{
+ Privileged: false,
+ NamespaceOptions: &runtime.NamespaceOption{
+ Network: runtime.NamespaceMode_NODE,
+ Pid: runtime.NamespaceMode_NODE,
+ Ipc: runtime.NamespaceMode_NODE,
+ },
+ },
+ },
+ Annotations: map[string]string{
+ annotations.UntrustedWorkload: "true",
+ },
+ },
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ criconfig.RuntimeUntrusted: untrustedWorkloadRuntime,
+ },
+ expectErr: true,
+ },
+ "should use untrusted workload runtime for untrusted workload": {
+ sandboxConfig: &runtime.PodSandboxConfig{
+ Annotations: map[string]string{
+ annotations.UntrustedWorkload: "true",
+ },
+ },
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ criconfig.RuntimeUntrusted: untrustedWorkloadRuntime,
+ },
+ expectedRuntime: untrustedWorkloadRuntime,
+ },
+ "should use default runtime for regular workload": {
+ sandboxConfig: &runtime.PodSandboxConfig{},
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ },
+ expectedRuntime: defaultRuntime,
+ },
+ "should use default runtime for trusted workload": {
+ sandboxConfig: &runtime.PodSandboxConfig{
+ Annotations: map[string]string{
+ annotations.UntrustedWorkload: "false",
+ },
+ },
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ criconfig.RuntimeUntrusted: untrustedWorkloadRuntime,
+ },
+ expectedRuntime: defaultRuntime,
+ },
+ "should return error if untrusted workload runtime is required but not configured": {
+ sandboxConfig: &runtime.PodSandboxConfig{
+ Annotations: map[string]string{
+ annotations.UntrustedWorkload: "true",
+ },
+ },
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ },
+ expectErr: true,
+ },
+ "should use 'untrusted' runtime for untrusted workload": {
+ sandboxConfig: &runtime.PodSandboxConfig{
+ Annotations: map[string]string{
+ annotations.UntrustedWorkload: "true",
+ },
+ },
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ criconfig.RuntimeUntrusted: untrustedWorkloadRuntime,
+ },
+ expectedRuntime: untrustedWorkloadRuntime,
+ },
+ "should use 'untrusted' runtime for untrusted workload & handler": {
+ sandboxConfig: &runtime.PodSandboxConfig{
+ Annotations: map[string]string{
+ annotations.UntrustedWorkload: "true",
+ },
+ },
+ runtimeHandler: "untrusted",
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ criconfig.RuntimeUntrusted: untrustedWorkloadRuntime,
+ },
+ expectedRuntime: untrustedWorkloadRuntime,
+ },
+ "should return an error if untrusted annotation with conflicting handler": {
+ sandboxConfig: &runtime.PodSandboxConfig{
+ Annotations: map[string]string{
+ annotations.UntrustedWorkload: "true",
+ },
+ },
+ runtimeHandler: "foo",
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ criconfig.RuntimeUntrusted: untrustedWorkloadRuntime,
+ "foo": fooRuntime,
+ },
+ expectErr: true,
+ },
+ "should use correct runtime for a runtime handler": {
+ sandboxConfig: &runtime.PodSandboxConfig{},
+ runtimeHandler: "foo",
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ criconfig.RuntimeUntrusted: untrustedWorkloadRuntime,
+ "foo": fooRuntime,
+ },
+ expectedRuntime: fooRuntime,
+ },
+ "should return error if runtime handler is required but not configured": {
+ sandboxConfig: &runtime.PodSandboxConfig{},
+ runtimeHandler: "bar",
+ runtimes: map[string]criconfig.Runtime{
+ criconfig.RuntimeDefault: defaultRuntime,
+ "foo": fooRuntime,
+ },
+ expectErr: true,
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ cri := newTestCRIService()
+ cri.config = criconfig.Config{
+ PluginConfig: criconfig.DefaultConfig(),
+ }
+ cri.config.ContainerdConfig.DefaultRuntimeName = criconfig.RuntimeDefault
+ cri.config.ContainerdConfig.Runtimes = test.runtimes
+ r, err := cri.getSandboxRuntime(test.sandboxConfig, test.runtimeHandler)
+ assert.Equal(t, test.expectErr, err != nil)
+ assert.Equal(t, test.expectedRuntime, r)
+ })
+ }
+}
diff --git a/pkg/server/sandbox_run_windows.go b/pkg/server/sandbox_run_windows.go
new file mode 100644
index 000000000..85105c299
--- /dev/null
+++ b/pkg/server/sandbox_run_windows.go
@@ -0,0 +1,91 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/oci"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ customopts "github.com/containerd/cri/pkg/containerd/opts"
+)
+
+func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
+ imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) {
+ // Creates a spec Generator with the default spec.
+ specOpts := []oci.SpecOpts{
+ oci.WithEnv(imageConfig.Env),
+ oci.WithHostname(config.GetHostname()),
+ }
+ if imageConfig.WorkingDir != "" {
+ specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
+ }
+
+ if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 {
+ // Pause image must have entrypoint or cmd.
+ return nil, errors.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig)
+ }
+ specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...))
+
+ specOpts = append(specOpts,
+ // Clear the root location since hcsshim expects it.
+ // NOTE: readonly rootfs doesn't work on windows.
+ customopts.WithoutRoot,
+ customopts.WithWindowsNetworkNamespace(nsPath),
+ )
+
+ specOpts = append(specOpts, customopts.WithWindowsDefaultSandboxShares)
+
+ for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
+ runtimePodAnnotations) {
+ specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
+ }
+
+ specOpts = append(specOpts,
+ customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox),
+ customopts.WithAnnotation(annotations.SandboxID, id),
+ customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()),
+ )
+
+ return c.runtimeSpec(id, "", specOpts...)
+}
+
+// No sandbox container spec options for windows yet.
+func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
+ return nil, nil
+}
+
+// No sandbox files needed for windows.
+func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
+ return nil
+}
+
+// No sandbox files needed for windows.
+func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
+ return nil
+}
+
+// No task options needed for windows.
+func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
+ return nil
+}
diff --git a/pkg/server/sandbox_run_windows_test.go b/pkg/server/sandbox_run_windows_test.go
new file mode 100644
index 000000000..96d2c1c74
--- /dev/null
+++ b/pkg/server/sandbox_run_windows_test.go
@@ -0,0 +1,86 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/annotations"
+ "github.com/containerd/cri/pkg/containerd/opts"
+)
+
+func getRunPodSandboxTestData() (*runtime.PodSandboxConfig, *imagespec.ImageConfig, func(*testing.T, string, *runtimespec.Spec)) {
+ config := &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "test-name",
+ Uid: "test-uid",
+ Namespace: "test-ns",
+ Attempt: 1,
+ },
+ Hostname: "test-hostname",
+ LogDirectory: "test-log-directory",
+ Labels: map[string]string{"a": "b"},
+ Annotations: map[string]string{"c": "d"},
+ }
+ imageConfig := &imagespec.ImageConfig{
+ Env: []string{"a=b", "c=d"},
+ Entrypoint: []string{"/pause"},
+ Cmd: []string{"forever"},
+ WorkingDir: "/workspace",
+ }
+ specCheck := func(t *testing.T, id string, spec *runtimespec.Spec) {
+ assert.Equal(t, "test-hostname", spec.Hostname)
+ assert.Nil(t, spec.Root)
+ assert.Contains(t, spec.Process.Env, "a=b", "c=d")
+ assert.Equal(t, []string{"/pause", "forever"}, spec.Process.Args)
+ assert.Equal(t, "/workspace", spec.Process.Cwd)
+ assert.EqualValues(t, *spec.Windows.Resources.CPU.Shares, opts.DefaultSandboxCPUshares)
+
+ t.Logf("Check PodSandbox annotations")
+ assert.Contains(t, spec.Annotations, annotations.SandboxID)
+ assert.EqualValues(t, spec.Annotations[annotations.SandboxID], id)
+
+ assert.Contains(t, spec.Annotations, annotations.ContainerType)
+ assert.EqualValues(t, spec.Annotations[annotations.ContainerType], annotations.ContainerTypeSandbox)
+
+ assert.Contains(t, spec.Annotations, annotations.SandboxLogDir)
+ assert.EqualValues(t, spec.Annotations[annotations.SandboxLogDir], "test-log-directory")
+ }
+ return config, imageConfig, specCheck
+}
+
+func TestSandboxWindowsNetworkNamespace(t *testing.T) {
+ testID := "test-id"
+ nsPath := "test-cni"
+ c := newTestCRIService()
+
+ config, imageConfig, specCheck := getRunPodSandboxTestData()
+ spec, err := c.sandboxContainerSpec(testID, config, imageConfig, nsPath, nil)
+ assert.NoError(t, err)
+ assert.NotNil(t, spec)
+ specCheck(t, testID, spec)
+ assert.NotNil(t, spec.Windows)
+ assert.NotNil(t, spec.Windows.Network)
+ assert.Equal(t, nsPath, spec.Windows.Network.NetworkNamespace)
+}
diff --git a/pkg/server/sandbox_status.go b/pkg/server/sandbox_status.go
new file mode 100644
index 000000000..5644ab1be
--- /dev/null
+++ b/pkg/server/sandbox_status.go
@@ -0,0 +1,217 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "encoding/json"
+ goruntime "runtime"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/errdefs"
+ cni "github.com/containerd/go-cni"
+ runtimespec "github.com/opencontainers/runtime-spec/specs-go"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+// PodSandboxStatus returns the status of the PodSandbox.
+func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandboxStatusRequest) (*runtime.PodSandboxStatusResponse, error) {
+ sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
+ if err != nil {
+ return nil, errors.Wrap(err, "an error occurred when try to find sandbox")
+ }
+
+ ip, additionalIPs, err := c.getIPs(sandbox)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get sandbox ip")
+ }
+ status := toCRISandboxStatus(sandbox.Metadata, sandbox.Status.Get(), ip, additionalIPs)
+ if status.GetCreatedAt() == 0 {
+ // CRI doesn't allow CreatedAt == 0.
+ info, err := sandbox.Container.Info(ctx)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to get CreatedAt for sandbox container in %q state", status.State)
+ }
+ status.CreatedAt = info.CreatedAt.UnixNano()
+ }
+ if !r.GetVerbose() {
+ return &runtime.PodSandboxStatusResponse{Status: status}, nil
+ }
+
+ // Generate verbose information.
+ info, err := toCRISandboxInfo(ctx, sandbox)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get verbose sandbox container info")
+ }
+
+ return &runtime.PodSandboxStatusResponse{
+ Status: status,
+ Info: info,
+ }, nil
+}
+
+func (c *criService) getIPs(sandbox sandboxstore.Sandbox) (string, []string, error) {
+ config := sandbox.Config
+
+ if goruntime.GOOS != "windows" &&
+ config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
+ // For sandboxes using the node network we are not
+ // responsible for reporting the IP.
+ return "", nil, nil
+ }
+
+ if closed, err := sandbox.NetNS.Closed(); err != nil {
+ return "", nil, errors.Wrap(err, "check network namespace closed")
+ } else if closed {
+ return "", nil, nil
+ }
+
+ return sandbox.IP, sandbox.AdditionalIPs, nil
+}
+
+// toCRISandboxStatus converts sandbox metadata into CRI pod sandbox status.
+func toCRISandboxStatus(meta sandboxstore.Metadata, status sandboxstore.Status, ip string, additionalIPs []string) *runtime.PodSandboxStatus {
+ // Set sandbox state to NOTREADY by default.
+ state := runtime.PodSandboxState_SANDBOX_NOTREADY
+ if status.State == sandboxstore.StateReady {
+ state = runtime.PodSandboxState_SANDBOX_READY
+ }
+ nsOpts := meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions()
+ var ips []*runtime.PodIP
+ for _, additionalIP := range additionalIPs {
+ ips = append(ips, &runtime.PodIP{Ip: additionalIP})
+ }
+ return &runtime.PodSandboxStatus{
+ Id: meta.ID,
+ Metadata: meta.Config.GetMetadata(),
+ State: state,
+ CreatedAt: status.CreatedAt.UnixNano(),
+ Network: &runtime.PodSandboxNetworkStatus{
+ Ip: ip,
+ AdditionalIps: ips,
+ },
+ Linux: &runtime.LinuxPodSandboxStatus{
+ Namespaces: &runtime.Namespace{
+ Options: &runtime.NamespaceOption{
+ Network: nsOpts.GetNetwork(),
+ Pid: nsOpts.GetPid(),
+ Ipc: nsOpts.GetIpc(),
+ },
+ },
+ },
+ Labels: meta.Config.GetLabels(),
+ Annotations: meta.Config.GetAnnotations(),
+ RuntimeHandler: meta.RuntimeHandler,
+ }
+}
+
+// SandboxInfo is extra information for sandbox.
+// TODO (mikebrow): discuss predefining constants structures for some or all of these field names in CRI
+type SandboxInfo struct {
+ Pid uint32 `json:"pid"`
+ Status string `json:"processStatus"`
+ NetNSClosed bool `json:"netNamespaceClosed"`
+ Image string `json:"image"`
+ SnapshotKey string `json:"snapshotKey"`
+ Snapshotter string `json:"snapshotter"`
+ // Note: a new field `RuntimeHandler` has been added into the CRI PodSandboxStatus struct, and
+ // should be set. This `RuntimeHandler` field will be deprecated after containerd 1.3 (tracked
+ // in https://github.com/containerd/cri/issues/1064).
+ RuntimeHandler string `json:"runtimeHandler"` // see the Note above
+ RuntimeType string `json:"runtimeType"`
+ RuntimeOptions interface{} `json:"runtimeOptions"`
+ Config *runtime.PodSandboxConfig `json:"config"`
+ RuntimeSpec *runtimespec.Spec `json:"runtimeSpec"`
+ CNIResult *cni.CNIResult `json:"cniResult"`
+}
+
+// toCRISandboxInfo converts internal container object information to CRI sandbox status response info map.
+func toCRISandboxInfo(ctx context.Context, sandbox sandboxstore.Sandbox) (map[string]string, error) {
+ container := sandbox.Container
+ task, err := container.Task(ctx, nil)
+ if err != nil && !errdefs.IsNotFound(err) {
+ return nil, errors.Wrap(err, "failed to get sandbox container task")
+ }
+
+ var processStatus containerd.ProcessStatus
+ if task != nil {
+ taskStatus, err := task.Status(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get task status")
+ }
+
+ processStatus = taskStatus.Status
+ }
+
+ si := &SandboxInfo{
+ Pid: sandbox.Status.Get().Pid,
+ RuntimeHandler: sandbox.RuntimeHandler,
+ Status: string(processStatus),
+ Config: sandbox.Config,
+ CNIResult: sandbox.CNIResult,
+ }
+
+ if si.Status == "" {
+ // If processStatus is empty, it means that the task is deleted. Apply "deleted"
+ // status which does not exist in containerd.
+ si.Status = "deleted"
+ }
+
+ if sandbox.NetNS != nil {
+ // Add network closed information if sandbox is not using host network.
+ closed, err := sandbox.NetNS.Closed()
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to check network namespace closed")
+ }
+ si.NetNSClosed = closed
+ }
+
+ spec, err := container.Spec(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get sandbox container runtime spec")
+ }
+ si.RuntimeSpec = spec
+
+ ctrInfo, err := container.Info(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get sandbox container info")
+ }
+ // Do not use config.SandboxImage because the configuration might
+ // be changed during restart. It may not reflect the actual image
+ // used by the sandbox container.
+ si.Image = ctrInfo.Image
+ si.SnapshotKey = ctrInfo.SnapshotKey
+ si.Snapshotter = ctrInfo.Snapshotter
+
+ runtimeOptions, err := getRuntimeOptions(ctrInfo)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get runtime options")
+ }
+ si.RuntimeType = ctrInfo.Runtime.Name
+ si.RuntimeOptions = runtimeOptions
+
+ infoBytes, err := json.Marshal(si)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to marshal info %v", si)
+ }
+ return map[string]string{
+ "info": string(infoBytes),
+ }, nil
+}
diff --git a/pkg/server/sandbox_status_test.go b/pkg/server/sandbox_status_test.go
new file mode 100644
index 000000000..894f945f5
--- /dev/null
+++ b/pkg/server/sandbox_status_test.go
@@ -0,0 +1,116 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+func TestPodSandboxStatus(t *testing.T) {
+ const (
+ id = "test-id"
+ ip = "10.10.10.10"
+ )
+ additionalIPs := []string{"8.8.8.8", "2001:db8:85a3::8a2e:370:7334"}
+ createdAt := time.Now()
+ config := &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "test-name",
+ Uid: "test-uid",
+ Namespace: "test-ns",
+ Attempt: 1,
+ },
+ Linux: &runtime.LinuxPodSandboxConfig{
+ SecurityContext: &runtime.LinuxSandboxSecurityContext{
+ NamespaceOptions: &runtime.NamespaceOption{
+ Network: runtime.NamespaceMode_NODE,
+ Pid: runtime.NamespaceMode_CONTAINER,
+ Ipc: runtime.NamespaceMode_POD,
+ },
+ },
+ },
+ Labels: map[string]string{"a": "b"},
+ Annotations: map[string]string{"c": "d"},
+ }
+ metadata := sandboxstore.Metadata{
+ ID: id,
+ Name: "test-name",
+ Config: config,
+ RuntimeHandler: "test-runtime-handler",
+ }
+
+ expected := &runtime.PodSandboxStatus{
+ Id: id,
+ Metadata: config.GetMetadata(),
+ CreatedAt: createdAt.UnixNano(),
+ Network: &runtime.PodSandboxNetworkStatus{
+ Ip: ip,
+ AdditionalIps: []*runtime.PodIP{
+ {
+ Ip: additionalIPs[0],
+ },
+ {
+ Ip: additionalIPs[1],
+ },
+ },
+ },
+ Linux: &runtime.LinuxPodSandboxStatus{
+ Namespaces: &runtime.Namespace{
+ Options: &runtime.NamespaceOption{
+ Network: runtime.NamespaceMode_NODE,
+ Pid: runtime.NamespaceMode_CONTAINER,
+ Ipc: runtime.NamespaceMode_POD,
+ },
+ },
+ },
+ Labels: config.GetLabels(),
+ Annotations: config.GetAnnotations(),
+ RuntimeHandler: "test-runtime-handler",
+ }
+ for desc, test := range map[string]struct {
+ state sandboxstore.State
+ expectedState runtime.PodSandboxState
+ }{
+ "sandbox state ready": {
+ state: sandboxstore.StateReady,
+ expectedState: runtime.PodSandboxState_SANDBOX_READY,
+ },
+ "sandbox state not ready": {
+ state: sandboxstore.StateNotReady,
+ expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
+ },
+ "sandbox state unknown": {
+ state: sandboxstore.StateUnknown,
+ expectedState: runtime.PodSandboxState_SANDBOX_NOTREADY,
+ },
+ } {
+ t.Logf("TestCase: %s", desc)
+ status := sandboxstore.Status{
+ CreatedAt: createdAt,
+ State: test.state,
+ }
+ expected.State = test.expectedState
+ got := toCRISandboxStatus(metadata, status, ip, additionalIPs)
+ assert.Equal(t, expected, got)
+ }
+}
diff --git a/pkg/server/sandbox_stop.go b/pkg/server/sandbox_stop.go
new file mode 100644
index 000000000..9b6e0a6ec
--- /dev/null
+++ b/pkg/server/sandbox_stop.go
@@ -0,0 +1,195 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "syscall"
+ "time"
+
+ eventtypes "github.com/containerd/containerd/api/events"
+ "github.com/containerd/containerd/errdefs"
+ "github.com/containerd/containerd/log"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+// StopPodSandbox stops the sandbox. If there are any running containers in the
+// sandbox, they should be forcibly terminated.
+func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandboxRequest) (*runtime.StopPodSandboxResponse, error) {
+ sandbox, err := c.sandboxStore.Get(r.GetPodSandboxId())
+ if err != nil {
+ return nil, errors.Wrapf(err, "an error occurred when try to find sandbox %q",
+ r.GetPodSandboxId())
+ }
+
+ if err := c.stopPodSandbox(ctx, sandbox); err != nil {
+ return nil, err
+ }
+
+ return &runtime.StopPodSandboxResponse{}, nil
+}
+
+func (c *criService) stopPodSandbox(ctx context.Context, sandbox sandboxstore.Sandbox) error {
+ // Use the full sandbox id.
+ id := sandbox.ID
+
+ // Stop all containers inside the sandbox. This terminates the container forcibly,
+ // and container may still be created, so production should not rely on this behavior.
+ // TODO(random-liu): Introduce a state in sandbox to avoid future container creation.
+ containers := c.containerStore.List()
+ for _, container := range containers {
+ if container.SandboxID != id {
+ continue
+ }
+ // Forcibly stop the container. Do not use `StopContainer`, because it introduces a race
+ // if a container is removed after list.
+ if err := c.stopContainer(ctx, container, 0); err != nil {
+ return errors.Wrapf(err, "failed to stop container %q", container.ID)
+ }
+ }
+
+ if err := c.cleanupSandboxFiles(id, sandbox.Config); err != nil {
+ return errors.Wrap(err, "failed to cleanup sandbox files")
+ }
+
+ // Only stop sandbox container when it's running or unknown.
+ state := sandbox.Status.Get().State
+ if state == sandboxstore.StateReady || state == sandboxstore.StateUnknown {
+ if err := c.stopSandboxContainer(ctx, sandbox); err != nil {
+ return errors.Wrapf(err, "failed to stop sandbox container %q in %q state", id, state)
+ }
+ }
+
+ // Teardown network for sandbox.
+ if sandbox.NetNS != nil {
+ // Use empty netns path if netns is not available. This is defined in:
+ // https://github.com/containernetworking/cni/blob/v0.7.0-alpha1/SPEC.md
+ if closed, err := sandbox.NetNS.Closed(); err != nil {
+ return errors.Wrap(err, "failed to check network namespace closed")
+ } else if closed {
+ sandbox.NetNSPath = ""
+ }
+ if err := c.teardownPodNetwork(ctx, sandbox); err != nil {
+ return errors.Wrapf(err, "failed to destroy network for sandbox %q", id)
+ }
+ if err := sandbox.NetNS.Remove(); err != nil {
+ return errors.Wrapf(err, "failed to remove network namespace for sandbox %q", id)
+ }
+ }
+
+ log.G(ctx).Infof("TearDown network for sandbox %q successfully", id)
+
+ return nil
+}
+
+// stopSandboxContainer kills the sandbox container.
+// `task.Delete` is not called here because it will be called when
+// the event monitor handles the `TaskExit` event.
+func (c *criService) stopSandboxContainer(ctx context.Context, sandbox sandboxstore.Sandbox) error {
+ id := sandbox.ID
+ container := sandbox.Container
+ state := sandbox.Status.Get().State
+ task, err := container.Task(ctx, nil)
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to get sandbox container")
+ }
+ // Don't return for unknown state, some cleanup needs to be done.
+ if state == sandboxstore.StateUnknown {
+ return cleanupUnknownSandbox(ctx, id, sandbox)
+ }
+ return nil
+ }
+
+ // Handle unknown state.
+ // The cleanup logic is the same with container unknown state.
+ if state == sandboxstore.StateUnknown {
+ // Start an exit handler for containers in unknown state.
+ waitCtx, waitCancel := context.WithCancel(ctrdutil.NamespacedContext())
+ defer waitCancel()
+ exitCh, err := task.Wait(waitCtx)
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to wait for task")
+ }
+ return cleanupUnknownSandbox(ctx, id, sandbox)
+ }
+
+ exitCtx, exitCancel := context.WithCancel(context.Background())
+ stopCh := c.eventMonitor.startExitMonitor(exitCtx, id, task.Pid(), exitCh)
+ defer func() {
+ exitCancel()
+ // This ensures that exit monitor is stopped before
+ // `Wait` is cancelled, so no exit event is generated
+ // because of the `Wait` cancellation.
+ <-stopCh
+ }()
+ }
+
+ // Kill the sandbox container.
+ if err = task.Kill(ctx, syscall.SIGKILL); err != nil && !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "failed to kill sandbox container")
+ }
+
+ return c.waitSandboxStop(ctx, sandbox)
+}
+
+// waitSandboxStop waits for sandbox to be stopped until context is cancelled or
+// the context deadline is exceeded.
+func (c *criService) waitSandboxStop(ctx context.Context, sandbox sandboxstore.Sandbox) error {
+ select {
+ case <-ctx.Done():
+ return errors.Wrapf(ctx.Err(), "wait sandbox container %q", sandbox.ID)
+ case <-sandbox.Stopped():
+ return nil
+ }
+}
+
+// teardownPodNetwork removes the network from the pod
+func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstore.Sandbox) error {
+ if c.netPlugin == nil {
+ return errors.New("cni config not initialized")
+ }
+
+ var (
+ id = sandbox.ID
+ path = sandbox.NetNSPath
+ config = sandbox.Config
+ )
+ opts, err := cniNamespaceOpts(id, config)
+ if err != nil {
+ return errors.Wrap(err, "get cni namespace options")
+ }
+
+ return c.netPlugin.Remove(ctx, id, path, opts...)
+}
+
+// cleanupUnknownSandbox cleanup stopped sandbox in unknown state.
+func cleanupUnknownSandbox(ctx context.Context, id string, sandbox sandboxstore.Sandbox) error {
+ // Reuse handleSandboxExit to do the cleanup.
+ return handleSandboxExit(ctx, &eventtypes.TaskExit{
+ ContainerID: id,
+ ID: id,
+ Pid: 0,
+ ExitStatus: unknownExitCode,
+ ExitedAt: time.Now(),
+ }, sandbox)
+}
diff --git a/pkg/server/sandbox_stop_test.go b/pkg/server/sandbox_stop_test.go
new file mode 100644
index 000000000..59083e5b3
--- /dev/null
+++ b/pkg/server/sandbox_stop_test.go
@@ -0,0 +1,73 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/assert"
+ "golang.org/x/net/context"
+
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+)
+
+func TestWaitSandboxStop(t *testing.T) {
+ id := "test-id"
+ for desc, test := range map[string]struct {
+ state sandboxstore.State
+ cancel bool
+ timeout time.Duration
+ expectErr bool
+ }{
+ "should return error if timeout exceeds": {
+ state: sandboxstore.StateReady,
+ timeout: 200 * time.Millisecond,
+ expectErr: true,
+ },
+ "should return error if context is cancelled": {
+ state: sandboxstore.StateReady,
+ timeout: time.Hour,
+ cancel: true,
+ expectErr: true,
+ },
+ "should not return error if sandbox is stopped before timeout": {
+ state: sandboxstore.StateNotReady,
+ timeout: time.Hour,
+ expectErr: false,
+ },
+ } {
+ c := newTestCRIService()
+ sandbox := sandboxstore.NewSandbox(
+ sandboxstore.Metadata{ID: id},
+ sandboxstore.Status{State: test.state},
+ )
+ ctx := context.Background()
+ if test.cancel {
+ cancelledCtx, cancel := context.WithCancel(ctx)
+ cancel()
+ ctx = cancelledCtx
+ }
+ if test.timeout > 0 {
+ timeoutCtx, cancel := context.WithTimeout(ctx, test.timeout)
+ defer cancel()
+ ctx = timeoutCtx
+ }
+ err := c.waitSandboxStop(ctx, sandbox)
+ assert.Equal(t, test.expectErr, err != nil, desc)
+ }
+}
diff --git a/pkg/server/service.go b/pkg/server/service.go
new file mode 100644
index 000000000..94e02591a
--- /dev/null
+++ b/pkg/server/service.go
@@ -0,0 +1,325 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "os"
+ "path/filepath"
+ "time"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/oci"
+ "github.com/containerd/containerd/plugin"
+ "github.com/containerd/cri/pkg/streaming"
+ cni "github.com/containerd/go-cni"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+ "google.golang.org/grpc"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/store/label"
+
+ "github.com/containerd/cri/pkg/atomic"
+ criconfig "github.com/containerd/cri/pkg/config"
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ osinterface "github.com/containerd/cri/pkg/os"
+ "github.com/containerd/cri/pkg/registrar"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ imagestore "github.com/containerd/cri/pkg/store/image"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+ snapshotstore "github.com/containerd/cri/pkg/store/snapshot"
+)
+
+// grpcServices are all the grpc services provided by cri containerd.
+type grpcServices interface {
+ runtime.RuntimeServiceServer
+ runtime.ImageServiceServer
+}
+
+// CRIService is the interface implement CRI remote service server.
+type CRIService interface {
+ Run() error
+ // io.Closer is used by containerd to gracefully stop cri service.
+ io.Closer
+ plugin.Service
+ grpcServices
+}
+
+// criService implements CRIService.
+type criService struct {
+ // config contains all configurations.
+ config criconfig.Config
+ // imageFSPath is the path to image filesystem.
+ imageFSPath string
+ // os is an interface for all required os operations.
+ os osinterface.OS
+ // sandboxStore stores all resources associated with sandboxes.
+ sandboxStore *sandboxstore.Store
+ // sandboxNameIndex stores all sandbox names and make sure each name
+ // is unique.
+ sandboxNameIndex *registrar.Registrar
+ // containerStore stores all resources associated with containers.
+ containerStore *containerstore.Store
+ // containerNameIndex stores all container names and make sure each
+ // name is unique.
+ containerNameIndex *registrar.Registrar
+ // imageStore stores all resources associated with images.
+ imageStore *imagestore.Store
+ // snapshotStore stores information of all snapshots.
+ snapshotStore *snapshotstore.Store
+ // netPlugin is used to setup and teardown network when run/stop pod sandbox.
+ netPlugin cni.CNI
+ // client is an instance of the containerd client
+ client *containerd.Client
+ // streamServer is the streaming server serves container streaming request.
+ streamServer streaming.Server
+ // eventMonitor is the monitor monitors containerd events.
+ eventMonitor *eventMonitor
+ // initialized indicates whether the server is initialized. All GRPC services
+ // should return error before the server is initialized.
+ initialized atomic.Bool
+ // cniNetConfMonitor is used to reload cni network conf if there is
+ // any valid fs change events from cni network conf dir.
+ cniNetConfMonitor *cniNetConfSyncer
+ // baseOCISpecs contains cached OCI specs loaded via `Runtime.BaseRuntimeSpec`
+ baseOCISpecs map[string]*oci.Spec
+}
+
+// NewCRIService returns a new instance of CRIService
+func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIService, error) {
+ var err error
+ labels := label.NewStore()
+ c := &criService{
+ config: config,
+ client: client,
+ os: osinterface.RealOS{},
+ sandboxStore: sandboxstore.NewStore(labels),
+ containerStore: containerstore.NewStore(labels),
+ imageStore: imagestore.NewStore(client),
+ snapshotStore: snapshotstore.NewStore(),
+ sandboxNameIndex: registrar.NewRegistrar(),
+ containerNameIndex: registrar.NewRegistrar(),
+ initialized: atomic.NewBool(false),
+ }
+
+ if client.SnapshotService(c.config.ContainerdConfig.Snapshotter) == nil {
+ return nil, errors.Errorf("failed to find snapshotter %q", c.config.ContainerdConfig.Snapshotter)
+ }
+
+ c.imageFSPath = imageFSPath(config.ContainerdRootDir, config.ContainerdConfig.Snapshotter)
+ logrus.Infof("Get image filesystem path %q", c.imageFSPath)
+
+ if err := c.initPlatform(); err != nil {
+ return nil, errors.Wrap(err, "initialize platform")
+ }
+
+ // prepare streaming server
+ c.streamServer, err = newStreamServer(c, config.StreamServerAddress, config.StreamServerPort, config.StreamIdleTimeout)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create stream server")
+ }
+
+ c.eventMonitor = newEventMonitor(c)
+
+ c.cniNetConfMonitor, err = newCNINetConfSyncer(c.config.NetworkPluginConfDir, c.netPlugin, c.cniLoadOptions())
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to create cni conf monitor")
+ }
+
+ // Preload base OCI specs
+ c.baseOCISpecs, err = loadBaseOCISpecs(&config)
+ if err != nil {
+ return nil, err
+ }
+
+ return c, nil
+}
+
+// Register registers all required services onto a specific grpc server.
+// This is used by containerd cri plugin.
+func (c *criService) Register(s *grpc.Server) error {
+ return c.register(s)
+}
+
+// RegisterTCP register all required services onto a GRPC server on TCP.
+// This is used by containerd CRI plugin.
+func (c *criService) RegisterTCP(s *grpc.Server) error {
+ if !c.config.DisableTCPService {
+ return c.register(s)
+ }
+ return nil
+}
+
+// Run starts the CRI service.
+func (c *criService) Run() error {
+ logrus.Info("Start subscribing containerd event")
+ c.eventMonitor.subscribe(c.client)
+
+ logrus.Infof("Start recovering state")
+ if err := c.recover(ctrdutil.NamespacedContext()); err != nil {
+ return errors.Wrap(err, "failed to recover state")
+ }
+
+ // Start event handler.
+ logrus.Info("Start event monitor")
+ eventMonitorErrCh := c.eventMonitor.start()
+
+ // Start snapshot stats syncer, it doesn't need to be stopped.
+ logrus.Info("Start snapshots syncer")
+ snapshotsSyncer := newSnapshotsSyncer(
+ c.snapshotStore,
+ c.client.SnapshotService(c.config.ContainerdConfig.Snapshotter),
+ time.Duration(c.config.StatsCollectPeriod)*time.Second,
+ )
+ snapshotsSyncer.start()
+
+ // Start CNI network conf syncer
+ logrus.Info("Start cni network conf syncer")
+ cniNetConfMonitorErrCh := make(chan error, 1)
+ go func() {
+ defer close(cniNetConfMonitorErrCh)
+ cniNetConfMonitorErrCh <- c.cniNetConfMonitor.syncLoop()
+ }()
+
+ // Start streaming server.
+ logrus.Info("Start streaming server")
+ streamServerErrCh := make(chan error)
+ go func() {
+ defer close(streamServerErrCh)
+ if err := c.streamServer.Start(true); err != nil && err != http.ErrServerClosed {
+ logrus.WithError(err).Error("Failed to start streaming server")
+ streamServerErrCh <- err
+ }
+ }()
+
+ // Set the server as initialized. GRPC services could start serving traffic.
+ c.initialized.Set()
+
+ var eventMonitorErr, streamServerErr, cniNetConfMonitorErr error
+ // Stop the whole CRI service if any of the critical service exits.
+ select {
+ case eventMonitorErr = <-eventMonitorErrCh:
+ case streamServerErr = <-streamServerErrCh:
+ case cniNetConfMonitorErr = <-cniNetConfMonitorErrCh:
+ }
+ if err := c.Close(); err != nil {
+ return errors.Wrap(err, "failed to stop cri service")
+ }
+ // If the error is set above, err from channel must be nil here, because
+ // the channel is supposed to be closed. Or else, we wait and set it.
+ if err := <-eventMonitorErrCh; err != nil {
+ eventMonitorErr = err
+ }
+ logrus.Info("Event monitor stopped")
+ // There is a race condition with http.Server.Serve.
+ // When `Close` is called at the same time with `Serve`, `Close`
+ // may finish first, and `Serve` may still block.
+ // See https://github.com/golang/go/issues/20239.
+ // Here we set a 2 second timeout for the stream server wait,
+ // if it timeout, an error log is generated.
+ // TODO(random-liu): Get rid of this after https://github.com/golang/go/issues/20239
+ // is fixed.
+ const streamServerStopTimeout = 2 * time.Second
+ select {
+ case err := <-streamServerErrCh:
+ if err != nil {
+ streamServerErr = err
+ }
+ logrus.Info("Stream server stopped")
+ case <-time.After(streamServerStopTimeout):
+ logrus.Errorf("Stream server is not stopped in %q", streamServerStopTimeout)
+ }
+ if eventMonitorErr != nil {
+ return errors.Wrap(eventMonitorErr, "event monitor error")
+ }
+ if streamServerErr != nil {
+ return errors.Wrap(streamServerErr, "stream server error")
+ }
+ if cniNetConfMonitorErr != nil {
+ return errors.Wrap(cniNetConfMonitorErr, "cni network conf monitor error")
+ }
+ return nil
+}
+
+// Close stops the CRI service.
+// TODO(random-liu): Make close synchronous.
+func (c *criService) Close() error {
+ logrus.Info("Stop CRI service")
+ if err := c.cniNetConfMonitor.stop(); err != nil {
+ logrus.WithError(err).Error("failed to stop cni network conf monitor")
+ }
+ c.eventMonitor.stop()
+ if err := c.streamServer.Stop(); err != nil {
+ return errors.Wrap(err, "failed to stop stream server")
+ }
+ return nil
+}
+
+func (c *criService) register(s *grpc.Server) error {
+ instrumented := newInstrumentedService(c)
+ runtime.RegisterRuntimeServiceServer(s, instrumented)
+ runtime.RegisterImageServiceServer(s, instrumented)
+ return nil
+}
+
+// imageFSPath returns containerd image filesystem path.
+// Note that if containerd changes directory layout, we also needs to change this.
+func imageFSPath(rootDir, snapshotter string) string {
+ return filepath.Join(rootDir, fmt.Sprintf("%s.%s", plugin.SnapshotPlugin, snapshotter))
+}
+
+func loadOCISpec(filename string) (*oci.Spec, error) {
+ file, err := os.Open(filename)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to open base OCI spec: %s", filename)
+ }
+ defer file.Close()
+
+ spec := oci.Spec{}
+ if err := json.NewDecoder(file).Decode(&spec); err != nil {
+ return nil, errors.Wrap(err, "failed to parse base OCI spec file")
+ }
+
+ return &spec, nil
+}
+
+func loadBaseOCISpecs(config *criconfig.Config) (map[string]*oci.Spec, error) {
+ specs := map[string]*oci.Spec{}
+ for _, cfg := range config.Runtimes {
+ if cfg.BaseRuntimeSpec == "" {
+ continue
+ }
+
+ // Don't load same file twice
+ if _, ok := specs[cfg.BaseRuntimeSpec]; ok {
+ continue
+ }
+
+ spec, err := loadOCISpec(cfg.BaseRuntimeSpec)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to load base OCI spec from file: %s", cfg.BaseRuntimeSpec)
+ }
+
+ specs[cfg.BaseRuntimeSpec] = spec
+ }
+
+ return specs, nil
+}
diff --git a/pkg/server/service_linux.go b/pkg/server/service_linux.go
new file mode 100644
index 000000000..03b28f0ae
--- /dev/null
+++ b/pkg/server/service_linux.go
@@ -0,0 +1,70 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd/sys"
+ cni "github.com/containerd/go-cni"
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+)
+
+// networkAttachCount is the minimum number of networks the PodSandbox
+// attaches to
+const networkAttachCount = 2
+
+// initPlatform handles linux specific initialization for the CRI service.
+func (c *criService) initPlatform() error {
+ var err error
+
+ if sys.RunningInUserNS() {
+ if !(c.config.DisableCgroup && !c.apparmorEnabled() && c.config.RestrictOOMScoreAdj) {
+ logrus.Warn("Running containerd in a user namespace typically requires disable_cgroup, disable_apparmor, restrict_oom_score_adj set to be true")
+ }
+ }
+
+ if c.config.EnableSelinux {
+ if !selinux.GetEnabled() {
+ logrus.Warn("Selinux is not supported")
+ }
+ if r := c.config.SelinuxCategoryRange; r > 0 {
+ selinux.CategoryRange = uint32(r)
+ }
+ } else {
+ selinux.SetDisabled()
+ }
+
+ // Pod needs to attach to at least loopback network and a non host network,
+ // hence networkAttachCount is 2. If there are more network configs the
+ // pod will be attached to all the networks but we will only use the ip
+ // of the default network interface as the pod IP.
+ c.netPlugin, err = cni.New(cni.WithMinNetworkCount(networkAttachCount),
+ cni.WithPluginConfDir(c.config.NetworkPluginConfDir),
+ cni.WithPluginMaxConfNum(c.config.NetworkPluginMaxConfNum),
+ cni.WithPluginDir([]string{c.config.NetworkPluginBinDir}))
+ if err != nil {
+ return errors.Wrap(err, "failed to initialize cni")
+ }
+
+ return nil
+}
+
+// cniLoadOptions returns cni load options for the linux.
+func (c *criService) cniLoadOptions() []cni.CNIOpt {
+ return []cni.CNIOpt{cni.WithLoNetwork, cni.WithDefaultConf}
+}
diff --git a/pkg/server/service_other.go b/pkg/server/service_other.go
new file mode 100644
index 000000000..c17f7ccae
--- /dev/null
+++ b/pkg/server/service_other.go
@@ -0,0 +1,33 @@
+// +build !windows,!linux
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ cni "github.com/containerd/go-cni"
+)
+
+// initPlatform handles linux specific initialization for the CRI service.
+func (c *criService) initPlatform() error {
+ return nil
+}
+
+// cniLoadOptions returns cni load options for the linux.
+func (c *criService) cniLoadOptions() []cni.CNIOpt {
+ return []cni.CNIOpt{}
+}
diff --git a/pkg/server/service_test.go b/pkg/server/service_test.go
new file mode 100644
index 000000000..a39f0b9ce
--- /dev/null
+++ b/pkg/server/service_test.go
@@ -0,0 +1,102 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "encoding/json"
+ "io/ioutil"
+ "os"
+ "testing"
+
+ "github.com/containerd/containerd/oci"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+
+ criconfig "github.com/containerd/cri/pkg/config"
+ ostesting "github.com/containerd/cri/pkg/os/testing"
+ "github.com/containerd/cri/pkg/registrar"
+ servertesting "github.com/containerd/cri/pkg/server/testing"
+ containerstore "github.com/containerd/cri/pkg/store/container"
+ imagestore "github.com/containerd/cri/pkg/store/image"
+ "github.com/containerd/cri/pkg/store/label"
+ sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
+ snapshotstore "github.com/containerd/cri/pkg/store/snapshot"
+)
+
+const (
+ testRootDir = "/test/root"
+ testStateDir = "/test/state"
+ // Use an image id as test sandbox image to avoid image name resolve.
+ // TODO(random-liu): Change this to image name after we have complete image
+ // management unit test framework.
+ testSandboxImage = "sha256:c75bebcdd211f41b3a460c7bf82970ed6c75acaab9cd4c9a4e125b03ca113798"
+ testImageFSPath = "/test/image/fs/path"
+)
+
+// newTestCRIService creates a fake criService for test.
+func newTestCRIService() *criService {
+ labels := label.NewStore()
+ return &criService{
+ config: criconfig.Config{
+ RootDir: testRootDir,
+ StateDir: testStateDir,
+ PluginConfig: criconfig.PluginConfig{
+ SandboxImage: testSandboxImage,
+ },
+ },
+ imageFSPath: testImageFSPath,
+ os: ostesting.NewFakeOS(),
+ sandboxStore: sandboxstore.NewStore(labels),
+ imageStore: imagestore.NewStore(nil),
+ snapshotStore: snapshotstore.NewStore(),
+ sandboxNameIndex: registrar.NewRegistrar(),
+ containerStore: containerstore.NewStore(labels),
+ containerNameIndex: registrar.NewRegistrar(),
+ netPlugin: servertesting.NewFakeCNIPlugin(),
+ }
+}
+
+func TestLoadBaseOCISpec(t *testing.T) {
+ spec := oci.Spec{Version: "1.0.2", Hostname: "default"}
+
+ file, err := ioutil.TempFile("", "spec-test-")
+ require.NoError(t, err)
+
+ defer func() {
+ assert.NoError(t, file.Close())
+ assert.NoError(t, os.RemoveAll(file.Name()))
+ }()
+
+ err = json.NewEncoder(file).Encode(&spec)
+ assert.NoError(t, err)
+
+ config := criconfig.Config{}
+ config.Runtimes = map[string]criconfig.Runtime{
+ "runc": {BaseRuntimeSpec: file.Name()},
+ }
+
+ specs, err := loadBaseOCISpecs(&config)
+ assert.NoError(t, err)
+
+ assert.Len(t, specs, 1)
+
+ out, ok := specs[file.Name()]
+ assert.True(t, ok, "expected spec with file name %q", file.Name())
+
+ assert.Equal(t, "1.0.2", out.Version)
+ assert.Equal(t, "default", out.Hostname)
+}
diff --git a/pkg/server/service_windows.go b/pkg/server/service_windows.go
new file mode 100644
index 000000000..5f764d452
--- /dev/null
+++ b/pkg/server/service_windows.go
@@ -0,0 +1,52 @@
+// +build windows
+
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ cni "github.com/containerd/go-cni"
+ "github.com/pkg/errors"
+)
+
+// windowsNetworkAttachCount is the minimum number of networks the PodSandbox
+// attaches to
+const windowsNetworkAttachCount = 1
+
+// initPlatform handles linux specific initialization for the CRI service.
+func (c *criService) initPlatform() error {
+ var err error
+ // For windows, the loopback network is added as default.
+ // There is no need to explicitly add one hence networkAttachCount is 1.
+ // If there are more network configs the pod will be attached to all the
+ // networks but we will only use the ip of the default network interface
+ // as the pod IP.
+ c.netPlugin, err = cni.New(cni.WithMinNetworkCount(windowsNetworkAttachCount),
+ cni.WithPluginConfDir(c.config.NetworkPluginConfDir),
+ cni.WithPluginMaxConfNum(c.config.NetworkPluginMaxConfNum),
+ cni.WithPluginDir([]string{c.config.NetworkPluginBinDir}))
+ if err != nil {
+ return errors.Wrap(err, "failed to initialize cni")
+ }
+
+ return nil
+}
+
+// cniLoadOptions returns cni load options for the windows.
+func (c *criService) cniLoadOptions() []cni.CNIOpt {
+ return []cni.CNIOpt{cni.WithDefaultConf}
+}
diff --git a/pkg/server/snapshots.go b/pkg/server/snapshots.go
new file mode 100644
index 000000000..0c1670750
--- /dev/null
+++ b/pkg/server/snapshots.go
@@ -0,0 +1,120 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "time"
+
+ "github.com/containerd/containerd/errdefs"
+ snapshot "github.com/containerd/containerd/snapshots"
+ "github.com/pkg/errors"
+ "github.com/sirupsen/logrus"
+
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ snapshotstore "github.com/containerd/cri/pkg/store/snapshot"
+)
+
+// snapshotsSyncer syncs snapshot stats periodically. imagefs info and container stats
+// should both use cached result here.
+// TODO(random-liu): Benchmark with high workload. We may need a statsSyncer instead if
+// benchmark result shows that container cpu/memory stats also need to be cached.
+type snapshotsSyncer struct {
+ store *snapshotstore.Store
+ snapshotter snapshot.Snapshotter
+ syncPeriod time.Duration
+}
+
+// newSnapshotsSyncer creates a snapshot syncer.
+func newSnapshotsSyncer(store *snapshotstore.Store, snapshotter snapshot.Snapshotter,
+ period time.Duration) *snapshotsSyncer {
+ return &snapshotsSyncer{
+ store: store,
+ snapshotter: snapshotter,
+ syncPeriod: period,
+ }
+}
+
+// start starts the snapshots syncer. No stop function is needed because
+// the syncer doesn't update any persistent states, it's fine to let it
+// exit with the process.
+func (s *snapshotsSyncer) start() {
+ tick := time.NewTicker(s.syncPeriod)
+ go func() {
+ defer tick.Stop()
+ // TODO(random-liu): This is expensive. We should do benchmark to
+ // check the resource usage and optimize this.
+ for {
+ if err := s.sync(); err != nil {
+ logrus.WithError(err).Error("Failed to sync snapshot stats")
+ }
+ <-tick.C
+ }
+ }()
+}
+
+// sync updates all snapshots stats.
+func (s *snapshotsSyncer) sync() error {
+ ctx := ctrdutil.NamespacedContext()
+ start := time.Now().UnixNano()
+ var snapshots []snapshot.Info
+ // Do not call `Usage` directly in collect function, because
+ // `Usage` takes time, we don't want `Walk` to hold read lock
+ // of snapshot metadata store for too long time.
+ // TODO(random-liu): Set timeout for the following 2 contexts.
+ if err := s.snapshotter.Walk(ctx, func(ctx context.Context, info snapshot.Info) error {
+ snapshots = append(snapshots, info)
+ return nil
+ }); err != nil {
+ return errors.Wrap(err, "walk all snapshots failed")
+ }
+ for _, info := range snapshots {
+ sn, err := s.store.Get(info.Name)
+ if err == nil {
+ // Only update timestamp for non-active snapshot.
+ if sn.Kind == info.Kind && sn.Kind != snapshot.KindActive {
+ sn.Timestamp = time.Now().UnixNano()
+ s.store.Add(sn)
+ continue
+ }
+ }
+ // Get newest stats if the snapshot is new or active.
+ sn = snapshotstore.Snapshot{
+ Key: info.Name,
+ Kind: info.Kind,
+ Timestamp: time.Now().UnixNano(),
+ }
+ usage, err := s.snapshotter.Usage(ctx, info.Name)
+ if err != nil {
+ if !errdefs.IsNotFound(err) {
+ logrus.WithError(err).Errorf("Failed to get usage for snapshot %q", info.Name)
+ }
+ continue
+ }
+ sn.Size = uint64(usage.Size)
+ sn.Inodes = uint64(usage.Inodes)
+ s.store.Add(sn)
+ }
+ for _, sn := range s.store.List() {
+ if sn.Timestamp >= start {
+ continue
+ }
+ // Delete the snapshot stats if it's not updated this time.
+ s.store.Delete(sn.Key)
+ }
+ return nil
+}
diff --git a/pkg/server/status.go b/pkg/server/status.go
new file mode 100644
index 000000000..4283a5190
--- /dev/null
+++ b/pkg/server/status.go
@@ -0,0 +1,83 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "encoding/json"
+ "fmt"
+ goruntime "runtime"
+
+ "github.com/containerd/containerd/log"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// networkNotReadyReason is the reason reported when network is not ready.
+const networkNotReadyReason = "NetworkPluginNotReady"
+
+// Status returns the status of the runtime.
+func (c *criService) Status(ctx context.Context, r *runtime.StatusRequest) (*runtime.StatusResponse, error) {
+ // As a containerd plugin, if CRI plugin is serving request,
+ // containerd must be ready.
+ runtimeCondition := &runtime.RuntimeCondition{
+ Type: runtime.RuntimeReady,
+ Status: true,
+ }
+ networkCondition := &runtime.RuntimeCondition{
+ Type: runtime.NetworkReady,
+ Status: true,
+ }
+ // Check the status of the cni initialization
+ if err := c.netPlugin.Status(); err != nil {
+ networkCondition.Status = false
+ networkCondition.Reason = networkNotReadyReason
+ networkCondition.Message = fmt.Sprintf("Network plugin returns error: %v", err)
+ }
+
+ resp := &runtime.StatusResponse{
+ Status: &runtime.RuntimeStatus{Conditions: []*runtime.RuntimeCondition{
+ runtimeCondition,
+ networkCondition,
+ }},
+ }
+ if r.Verbose {
+ configByt, err := json.Marshal(c.config)
+ if err != nil {
+ return nil, err
+ }
+ resp.Info = make(map[string]string)
+ resp.Info["config"] = string(configByt)
+ versionByt, err := json.Marshal(goruntime.Version())
+ if err != nil {
+ return nil, err
+ }
+ resp.Info["golang"] = string(versionByt)
+
+ cniConfig, err := json.Marshal(c.netPlugin.GetConfig())
+ if err != nil {
+ log.G(ctx).WithError(err).Errorf("Failed to marshal CNI config %v", err)
+ }
+ resp.Info["cniconfig"] = string(cniConfig)
+
+ lastCNILoadStatus := "OK"
+ if lerr := c.cniNetConfMonitor.lastStatus(); lerr != nil {
+ lastCNILoadStatus = lerr.Error()
+ }
+ resp.Info["lastCNILoadStatus"] = lastCNILoadStatus
+ }
+ return resp, nil
+}
diff --git a/pkg/server/streaming.go b/pkg/server/streaming.go
new file mode 100644
index 000000000..d0089cc89
--- /dev/null
+++ b/pkg/server/streaming.go
@@ -0,0 +1,239 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "context"
+ "crypto/tls"
+ "io"
+ "math"
+ "net"
+ "os"
+ "time"
+
+ "github.com/pkg/errors"
+ k8snet "k8s.io/apimachinery/pkg/util/net"
+ "k8s.io/apimachinery/pkg/util/runtime"
+ "k8s.io/client-go/tools/remotecommand"
+ k8scert "k8s.io/client-go/util/cert"
+ "k8s.io/utils/exec"
+
+ ctrdutil "github.com/containerd/cri/pkg/containerd/util"
+ "github.com/containerd/cri/pkg/streaming"
+)
+
+type streamListenerMode int
+
+const (
+ x509KeyPairTLS streamListenerMode = iota
+ selfSignTLS
+ withoutTLS
+)
+
+func getStreamListenerMode(c *criService) (streamListenerMode, error) {
+ if c.config.EnableTLSStreaming {
+ if c.config.X509KeyPairStreaming.TLSCertFile != "" && c.config.X509KeyPairStreaming.TLSKeyFile != "" {
+ return x509KeyPairTLS, nil
+ }
+ if c.config.X509KeyPairStreaming.TLSCertFile != "" && c.config.X509KeyPairStreaming.TLSKeyFile == "" {
+ return -1, errors.New("must set X509KeyPairStreaming.TLSKeyFile")
+ }
+ if c.config.X509KeyPairStreaming.TLSCertFile == "" && c.config.X509KeyPairStreaming.TLSKeyFile != "" {
+ return -1, errors.New("must set X509KeyPairStreaming.TLSCertFile")
+ }
+ return selfSignTLS, nil
+ }
+ if c.config.X509KeyPairStreaming.TLSCertFile != "" {
+ return -1, errors.New("X509KeyPairStreaming.TLSCertFile is set but EnableTLSStreaming is not set")
+ }
+ if c.config.X509KeyPairStreaming.TLSKeyFile != "" {
+ return -1, errors.New("X509KeyPairStreaming.TLSKeyFile is set but EnableTLSStreaming is not set")
+ }
+ return withoutTLS, nil
+}
+
+func newStreamServer(c *criService, addr, port, streamIdleTimeout string) (streaming.Server, error) {
+ if addr == "" {
+ a, err := k8snet.ResolveBindAddress(nil)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to get stream server address")
+ }
+ addr = a.String()
+ }
+ config := streaming.DefaultConfig
+ if streamIdleTimeout != "" {
+ var err error
+ config.StreamIdleTimeout, err = time.ParseDuration(streamIdleTimeout)
+ if err != nil {
+ return nil, errors.Wrap(err, "invalid stream idle timeout")
+ }
+ }
+ config.Addr = net.JoinHostPort(addr, port)
+ run := newStreamRuntime(c)
+ tlsMode, err := getStreamListenerMode(c)
+ if err != nil {
+ return nil, errors.Wrapf(err, "invalid stream server configuration")
+ }
+ switch tlsMode {
+ case x509KeyPairTLS:
+ tlsCert, err := tls.LoadX509KeyPair(c.config.X509KeyPairStreaming.TLSCertFile, c.config.X509KeyPairStreaming.TLSKeyFile)
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to load x509 key pair for stream server")
+ }
+ config.TLSConfig = &tls.Config{
+ Certificates: []tls.Certificate{tlsCert},
+ }
+ return streaming.NewServer(config, run)
+ case selfSignTLS:
+ tlsCert, err := newTLSCert()
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to generate tls certificate for stream server")
+ }
+ config.TLSConfig = &tls.Config{
+ Certificates: []tls.Certificate{tlsCert},
+ InsecureSkipVerify: true,
+ }
+ return streaming.NewServer(config, run)
+ case withoutTLS:
+ return streaming.NewServer(config, run)
+ default:
+ return nil, errors.New("invalid configuration for the stream listener")
+ }
+}
+
+type streamRuntime struct {
+ c *criService
+}
+
+func newStreamRuntime(c *criService) streaming.Runtime {
+ return &streamRuntime{c: c}
+}
+
+// Exec executes a command inside the container. exec.ExitError is returned if the command
+// returns non-zero exit code.
+func (s *streamRuntime) Exec(containerID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser,
+ tty bool, resize <-chan remotecommand.TerminalSize) error {
+ exitCode, err := s.c.execInContainer(ctrdutil.NamespacedContext(), containerID, execOptions{
+ cmd: cmd,
+ stdin: stdin,
+ stdout: stdout,
+ stderr: stderr,
+ tty: tty,
+ resize: resize,
+ })
+ if err != nil {
+ return errors.Wrap(err, "failed to exec in container")
+ }
+ if *exitCode == 0 {
+ return nil
+ }
+ return &exec.CodeExitError{
+ Err: errors.Errorf("error executing command %v, exit code %d", cmd, *exitCode),
+ Code: int(*exitCode),
+ }
+}
+
+func (s *streamRuntime) Attach(containerID string, in io.Reader, out, err io.WriteCloser, tty bool,
+ resize <-chan remotecommand.TerminalSize) error {
+ return s.c.attachContainer(ctrdutil.NamespacedContext(), containerID, in, out, err, tty, resize)
+}
+
+func (s *streamRuntime) PortForward(podSandboxID string, port int32, stream io.ReadWriteCloser) error {
+ if port <= 0 || port > math.MaxUint16 {
+ return errors.Errorf("invalid port %d", port)
+ }
+ ctx := ctrdutil.NamespacedContext()
+ return s.c.portForward(ctx, podSandboxID, port, stream)
+}
+
+// handleResizing spawns a goroutine that processes the resize channel, calling resizeFunc for each
+// remotecommand.TerminalSize received from the channel.
+func handleResizing(ctx context.Context, resize <-chan remotecommand.TerminalSize, resizeFunc func(size remotecommand.TerminalSize)) {
+ if resize == nil {
+ return
+ }
+
+ go func() {
+ defer runtime.HandleCrash()
+
+ for {
+ select {
+ case <-ctx.Done():
+ return
+ case size, ok := <-resize:
+ if !ok {
+ return
+ }
+ if size.Height < 1 || size.Width < 1 {
+ continue
+ }
+ resizeFunc(size)
+ }
+ }
+ }()
+}
+
+// newTLSCert returns a self CA signed tls.certificate.
+// TODO (mikebrow): replace / rewrite this function to support using CA
+// signing of the certificate. Requires a security plan for kubernetes regarding
+// CRI connections / streaming, etc. For example, kubernetes could configure or
+// require a CA service and pass a configuration down through CRI.
+func newTLSCert() (tls.Certificate, error) {
+ fail := func(err error) (tls.Certificate, error) { return tls.Certificate{}, err }
+
+ hostName, err := os.Hostname()
+ if err != nil {
+ return fail(errors.Wrap(err, "failed to get hostname"))
+ }
+
+ addrs, err := net.InterfaceAddrs()
+ if err != nil {
+ return fail(errors.Wrap(err, "failed to get host IP addresses"))
+ }
+
+ var alternateIPs []net.IP
+ var alternateDNS []string
+ for _, addr := range addrs {
+ var ip net.IP
+
+ switch v := addr.(type) {
+ case *net.IPNet:
+ ip = v.IP
+ case *net.IPAddr:
+ ip = v.IP
+ default:
+ continue
+ }
+
+ alternateIPs = append(alternateIPs, ip)
+ alternateDNS = append(alternateDNS, ip.String())
+ }
+
+ // Generate a self signed certificate key (CA is self)
+ certPem, keyPem, err := k8scert.GenerateSelfSignedCertKey(hostName, alternateIPs, alternateDNS)
+ if err != nil {
+ return fail(errors.Wrap(err, "certificate key could not be created"))
+ }
+
+ // Load the tls certificate
+ tlsCert, err := tls.X509KeyPair(certPem, keyPem)
+ if err != nil {
+ return fail(errors.Wrap(err, "certificate could not be loaded"))
+ }
+
+ return tlsCert, nil
+}
diff --git a/pkg/server/streaming_test.go b/pkg/server/streaming_test.go
new file mode 100644
index 000000000..0e6f8d2a5
--- /dev/null
+++ b/pkg/server/streaming_test.go
@@ -0,0 +1,153 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "testing"
+
+ "github.com/containerd/cri/pkg/config"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestValidateStreamServer(t *testing.T) {
+ for desc, test := range map[string]struct {
+ *criService
+ tlsMode streamListenerMode
+ expectErr bool
+ }{
+ "should pass with default withoutTLS": {
+ criService: &criService{
+ config: config.Config{
+ PluginConfig: config.DefaultConfig(),
+ },
+ },
+ tlsMode: withoutTLS,
+ expectErr: false,
+ },
+ "should pass with x509KeyPairTLS": {
+ criService: &criService{
+ config: config.Config{
+ PluginConfig: config.PluginConfig{
+ EnableTLSStreaming: true,
+ X509KeyPairStreaming: config.X509KeyPairStreaming{
+ TLSKeyFile: "non-empty",
+ TLSCertFile: "non-empty",
+ },
+ },
+ },
+ },
+ tlsMode: x509KeyPairTLS,
+ expectErr: false,
+ },
+ "should pass with selfSign": {
+ criService: &criService{
+ config: config.Config{
+ PluginConfig: config.PluginConfig{
+ EnableTLSStreaming: true,
+ },
+ },
+ },
+ tlsMode: selfSignTLS,
+ expectErr: false,
+ },
+ "should return error with X509 keypair but not EnableTLSStreaming": {
+ criService: &criService{
+ config: config.Config{
+ PluginConfig: config.PluginConfig{
+ EnableTLSStreaming: false,
+ X509KeyPairStreaming: config.X509KeyPairStreaming{
+ TLSKeyFile: "non-empty",
+ TLSCertFile: "non-empty",
+ },
+ },
+ },
+ },
+ tlsMode: -1,
+ expectErr: true,
+ },
+ "should return error with X509 TLSCertFile empty": {
+ criService: &criService{
+ config: config.Config{
+ PluginConfig: config.PluginConfig{
+ EnableTLSStreaming: true,
+ X509KeyPairStreaming: config.X509KeyPairStreaming{
+ TLSKeyFile: "non-empty",
+ TLSCertFile: "",
+ },
+ },
+ },
+ },
+ tlsMode: -1,
+ expectErr: true,
+ },
+ "should return error with X509 TLSKeyFile empty": {
+ criService: &criService{
+ config: config.Config{
+ PluginConfig: config.PluginConfig{
+ EnableTLSStreaming: true,
+ X509KeyPairStreaming: config.X509KeyPairStreaming{
+ TLSKeyFile: "",
+ TLSCertFile: "non-empty",
+ },
+ },
+ },
+ },
+ tlsMode: -1,
+ expectErr: true,
+ },
+ "should return error without EnableTLSStreaming and only TLSCertFile set": {
+ criService: &criService{
+ config: config.Config{
+ PluginConfig: config.PluginConfig{
+ EnableTLSStreaming: false,
+ X509KeyPairStreaming: config.X509KeyPairStreaming{
+ TLSKeyFile: "",
+ TLSCertFile: "non-empty",
+ },
+ },
+ },
+ },
+ tlsMode: -1,
+ expectErr: true,
+ },
+ "should return error without EnableTLSStreaming and only TLSKeyFile set": {
+ criService: &criService{
+ config: config.Config{
+ PluginConfig: config.PluginConfig{
+ EnableTLSStreaming: false,
+ X509KeyPairStreaming: config.X509KeyPairStreaming{
+ TLSKeyFile: "non-empty",
+ TLSCertFile: "",
+ },
+ },
+ },
+ },
+ tlsMode: -1,
+ expectErr: true,
+ },
+ } {
+ t.Run(desc, func(t *testing.T) {
+ tlsMode, err := getStreamListenerMode(test.criService)
+ if test.expectErr {
+ assert.Error(t, err)
+ return
+ }
+ assert.NoError(t, err)
+ assert.Equal(t, test.tlsMode, tlsMode)
+ })
+ }
+}
diff --git a/pkg/server/testing/fake_cni_plugin.go b/pkg/server/testing/fake_cni_plugin.go
new file mode 100644
index 000000000..71a930f59
--- /dev/null
+++ b/pkg/server/testing/fake_cni_plugin.go
@@ -0,0 +1,59 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package testing
+
+import (
+ "context"
+
+ cni "github.com/containerd/go-cni"
+)
+
+// FakeCNIPlugin is a fake plugin used for test.
+type FakeCNIPlugin struct {
+ StatusErr error
+ LoadErr error
+}
+
+// NewFakeCNIPlugin create a FakeCNIPlugin.
+func NewFakeCNIPlugin() *FakeCNIPlugin {
+ return &FakeCNIPlugin{}
+}
+
+// Setup setups the network of PodSandbox.
+func (f *FakeCNIPlugin) Setup(ctx context.Context, id, path string, opts ...cni.NamespaceOpts) (*cni.CNIResult, error) {
+ return nil, nil
+}
+
+// Remove teardown the network of PodSandbox.
+func (f *FakeCNIPlugin) Remove(ctx context.Context, id, path string, opts ...cni.NamespaceOpts) error {
+ return nil
+}
+
+// Status get the status of the plugin.
+func (f *FakeCNIPlugin) Status() error {
+ return f.StatusErr
+}
+
+// Load loads the network config.
+func (f *FakeCNIPlugin) Load(opts ...cni.CNIOpt) error {
+ return f.LoadErr
+}
+
+// GetConfig returns a copy of the CNI plugin configurations as parsed by CNI
+func (f *FakeCNIPlugin) GetConfig() *cni.ConfigResult {
+ return nil
+}
diff --git a/pkg/server/update_runtime_config.go b/pkg/server/update_runtime_config.go
new file mode 100644
index 000000000..6c725e234
--- /dev/null
+++ b/pkg/server/update_runtime_config.go
@@ -0,0 +1,128 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "net"
+ "os"
+ "path/filepath"
+ "strings"
+ "text/template"
+
+ "github.com/containerd/containerd/log"
+ "github.com/pkg/errors"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// cniConfigTemplate contains the values containerd will overwrite
+// in the cni config template.
+type cniConfigTemplate struct {
+ // PodCIDR is the cidr for pods on the node.
+ PodCIDR string
+ // PodCIDRRanges is the cidr ranges for pods on the node.
+ PodCIDRRanges []string
+ // Routes is a list of routes configured.
+ Routes []string
+}
+
+const (
+ // cniConfigFileName is the name of cni config file generated by containerd.
+ cniConfigFileName = "10-containerd-net.conflist"
+ // zeroCIDRv6 is the null route for IPv6.
+ zeroCIDRv6 = "::/0"
+ // zeroCIDRv4 is the null route for IPv4.
+ zeroCIDRv4 = "0.0.0.0/0"
+)
+
+// UpdateRuntimeConfig updates the runtime config. Currently only handles podCIDR updates.
+func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateRuntimeConfigRequest) (*runtime.UpdateRuntimeConfigResponse, error) {
+ podCIDRs := r.GetRuntimeConfig().GetNetworkConfig().GetPodCidr()
+ if podCIDRs == "" {
+ return &runtime.UpdateRuntimeConfigResponse{}, nil
+ }
+ cidrs := strings.Split(podCIDRs, ",")
+ for i := range cidrs {
+ cidrs[i] = strings.TrimSpace(cidrs[i])
+ }
+ routes, err := getRoutes(cidrs)
+ if err != nil {
+ return nil, errors.Wrap(err, "get routes")
+ }
+
+ confTemplate := c.config.NetworkPluginConfTemplate
+ if confTemplate == "" {
+ log.G(ctx).Info("No cni config template is specified, wait for other system components to drop the config.")
+ return &runtime.UpdateRuntimeConfigResponse{}, nil
+ }
+ if err := c.netPlugin.Status(); err == nil {
+ log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate)
+ return &runtime.UpdateRuntimeConfigResponse{}, nil
+ } else if err := c.netPlugin.Load(c.cniLoadOptions()...); err == nil {
+ log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate)
+ return &runtime.UpdateRuntimeConfigResponse{}, nil
+ }
+ log.G(ctx).Infof("Generating cni config from template %q", confTemplate)
+ // generate cni config file from the template with updated pod cidr.
+ t, err := template.ParseFiles(confTemplate)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to parse cni config template %q", confTemplate)
+ }
+ if err := os.MkdirAll(c.config.NetworkPluginConfDir, 0755); err != nil {
+ return nil, errors.Wrapf(err, "failed to create cni config directory: %q", c.config.NetworkPluginConfDir)
+ }
+ confFile := filepath.Join(c.config.NetworkPluginConfDir, cniConfigFileName)
+ f, err := os.OpenFile(confFile, os.O_WRONLY|os.O_CREATE, 0644)
+ if err != nil {
+ return nil, errors.Wrapf(err, "failed to open cni config file %q", confFile)
+ }
+ defer f.Close()
+ if err := t.Execute(f, cniConfigTemplate{
+ PodCIDR: cidrs[0],
+ PodCIDRRanges: cidrs,
+ Routes: routes,
+ }); err != nil {
+ return nil, errors.Wrapf(err, "failed to generate cni config file %q", confFile)
+ }
+ return &runtime.UpdateRuntimeConfigResponse{}, nil
+}
+
+// getRoutes generates required routes for the passed in cidrs.
+func getRoutes(cidrs []string) ([]string, error) {
+ var (
+ routes []string
+ hasV4, hasV6 bool
+ )
+ for _, c := range cidrs {
+ _, cidr, err := net.ParseCIDR(c)
+ if err != nil {
+ return nil, err
+ }
+ if cidr.IP.To4() != nil {
+ hasV4 = true
+ } else {
+ hasV6 = true
+ }
+ }
+ if hasV4 {
+ routes = append(routes, zeroCIDRv4)
+ }
+ if hasV6 {
+ routes = append(routes, zeroCIDRv6)
+ }
+ return routes, nil
+}
diff --git a/pkg/server/update_runtime_config_test.go b/pkg/server/update_runtime_config_test.go
new file mode 100644
index 000000000..2e62c8fd9
--- /dev/null
+++ b/pkg/server/update_runtime_config_test.go
@@ -0,0 +1,140 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "testing"
+
+ "github.com/pkg/errors"
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ criconfig "github.com/containerd/cri/pkg/config"
+ servertesting "github.com/containerd/cri/pkg/server/testing"
+)
+
+func TestUpdateRuntimeConfig(t *testing.T) {
+ const (
+ testTemplate = `
+{
+ "name": "test-pod-network",
+ "cniVersion": "0.3.1",
+ "plugins": [
+ {
+ "type": "ptp",
+ "mtu": 1460,
+ "ipam": {
+ "type": "host-local",
+ "subnet": "{{.PodCIDR}}",
+ "ranges": [{{range $i, $range := .PodCIDRRanges}}{{if $i}}, {{end}}[{"subnet": "{{$range}}"}]{{end}}],
+ "routes": [{{range $i, $route := .Routes}}{{if $i}}, {{end}}{"dst": "{{$route}}"}{{end}}]
+ }
+ },
+ ]
+}`
+ testCIDR = "10.0.0.0/24, 2001:4860:4860::/64"
+ expected = `
+{
+ "name": "test-pod-network",
+ "cniVersion": "0.3.1",
+ "plugins": [
+ {
+ "type": "ptp",
+ "mtu": 1460,
+ "ipam": {
+ "type": "host-local",
+ "subnet": "10.0.0.0/24",
+ "ranges": [[{"subnet": "10.0.0.0/24"}], [{"subnet": "2001:4860:4860::/64"}]],
+ "routes": [{"dst": "0.0.0.0/0"}, {"dst": "::/0"}]
+ }
+ },
+ ]
+}`
+ )
+
+ for name, test := range map[string]struct {
+ noTemplate bool
+ emptyCIDR bool
+ networkReady bool
+ expectCNIConfig bool
+ }{
+ "should not generate cni config if cidr is empty": {
+ emptyCIDR: true,
+ expectCNIConfig: false,
+ },
+ "should not generate cni config if template file is not specified": {
+ noTemplate: true,
+ expectCNIConfig: false,
+ },
+ "should not generate cni config if network is ready": {
+ networkReady: true,
+ expectCNIConfig: false,
+ },
+ "should generate cni config if template is specified and cidr is provided": {
+ expectCNIConfig: true,
+ },
+ } {
+ t.Run(name, func(t *testing.T) {
+ testDir, err := ioutil.TempDir(os.TempDir(), "test-runtime-config")
+ require.NoError(t, err)
+ defer os.RemoveAll(testDir)
+ templateName := filepath.Join(testDir, "template")
+ err = ioutil.WriteFile(templateName, []byte(testTemplate), 0666)
+ require.NoError(t, err)
+ confDir := filepath.Join(testDir, "net.d")
+ confName := filepath.Join(confDir, cniConfigFileName)
+
+ c := newTestCRIService()
+ c.config.CniConfig = criconfig.CniConfig{
+ NetworkPluginConfDir: confDir,
+ NetworkPluginConfTemplate: templateName,
+ }
+ req := &runtime.UpdateRuntimeConfigRequest{
+ RuntimeConfig: &runtime.RuntimeConfig{
+ NetworkConfig: &runtime.NetworkConfig{
+ PodCidr: testCIDR,
+ },
+ },
+ }
+ if test.noTemplate {
+ c.config.CniConfig.NetworkPluginConfTemplate = ""
+ }
+ if test.emptyCIDR {
+ req.RuntimeConfig.NetworkConfig.PodCidr = ""
+ }
+ if !test.networkReady {
+ c.netPlugin.(*servertesting.FakeCNIPlugin).StatusErr = errors.New("random error")
+ c.netPlugin.(*servertesting.FakeCNIPlugin).LoadErr = errors.New("random error")
+ }
+ _, err = c.UpdateRuntimeConfig(context.Background(), req)
+ assert.NoError(t, err)
+ if !test.expectCNIConfig {
+ _, err := os.Stat(confName)
+ assert.Error(t, err)
+ } else {
+ got, err := ioutil.ReadFile(confName)
+ assert.NoError(t, err)
+ assert.Equal(t, expected, string(got))
+ }
+ })
+ }
+}
diff --git a/pkg/server/version.go b/pkg/server/version.go
new file mode 100644
index 000000000..c1dea50c1
--- /dev/null
+++ b/pkg/server/version.go
@@ -0,0 +1,42 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package server
+
+import (
+ "github.com/containerd/containerd/version"
+ "golang.org/x/net/context"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/constants"
+)
+
+const (
+ containerName = "containerd"
+ // kubeAPIVersion is the api version of kubernetes.
+ // TODO(random-liu): Change this to actual CRI version.
+ kubeAPIVersion = "0.1.0"
+)
+
+// Version returns the runtime name, runtime version and runtime API version.
+func (c *criService) Version(ctx context.Context, r *runtime.VersionRequest) (*runtime.VersionResponse, error) {
+ return &runtime.VersionResponse{
+ Version: kubeAPIVersion,
+ RuntimeName: containerName,
+ RuntimeVersion: version.Version,
+ RuntimeApiVersion: constants.CRIVersion,
+ }, nil
+}
diff --git a/pkg/seutil/seutil.go b/pkg/seutil/seutil.go
new file mode 100644
index 000000000..f453a7775
--- /dev/null
+++ b/pkg/seutil/seutil.go
@@ -0,0 +1,71 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package seutil
+
+import (
+ "bufio"
+ "os"
+
+ "github.com/opencontainers/selinux/go-selinux"
+)
+
+var seTypes map[string]struct{}
+
+const typePath = "/etc/selinux/targeted/contexts/customizable_types"
+
+func init() {
+ seTypes = make(map[string]struct{})
+ if !selinux.GetEnabled() {
+ return
+ }
+ f, err := os.Open(typePath)
+ if err != nil {
+ return
+ }
+ defer f.Close()
+ s := bufio.NewScanner(f)
+ for s.Scan() {
+ seTypes[s.Text()] = struct{}{}
+ }
+}
+
+// HasType returns true if the underlying system has the
+// provided selinux type enabled.
+func HasType(name string) bool {
+ _, ok := seTypes[name]
+ return ok
+}
+
+// ChangeToKVM process label
+func ChangeToKVM(l string) (string, error) {
+ if l == "" || !selinux.GetEnabled() {
+ return "", nil
+ }
+ proc, _ := selinux.KVMContainerLabels()
+ selinux.ReleaseLabel(proc)
+
+ current, err := selinux.NewContext(l)
+ if err != nil {
+ return "", err
+ }
+ next, err := selinux.NewContext(proc)
+ if err != nil {
+ return "", err
+ }
+ current["type"] = next["type"]
+ return current.Get(), nil
+}
diff --git a/pkg/store/container/container.go b/pkg/store/container/container.go
new file mode 100644
index 000000000..53c0745a5
--- /dev/null
+++ b/pkg/store/container/container.go
@@ -0,0 +1,177 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package container
+
+import (
+ "sync"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/cri/pkg/store/label"
+ "github.com/docker/docker/pkg/truncindex"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ cio "github.com/containerd/cri/pkg/server/io"
+ "github.com/containerd/cri/pkg/store"
+)
+
+// Container contains all resources associated with the container. All methods to
+// mutate the internal state are thread-safe.
+type Container struct {
+ // Metadata is the metadata of the container, it is **immutable** after created.
+ Metadata
+ // Status stores the status of the container.
+ Status StatusStorage
+ // Container is the containerd container client.
+ Container containerd.Container
+ // Container IO.
+ // IO could only be nil when the container is in unknown state.
+ IO *cio.ContainerIO
+ // StopCh is used to propagate the stop information of the container.
+ *store.StopCh
+}
+
+// Opts sets specific information to newly created Container.
+type Opts func(*Container) error
+
+// WithContainer adds the containerd Container to the internal data store.
+func WithContainer(cntr containerd.Container) Opts {
+ return func(c *Container) error {
+ c.Container = cntr
+ return nil
+ }
+}
+
+// WithContainerIO adds IO into the container.
+func WithContainerIO(io *cio.ContainerIO) Opts {
+ return func(c *Container) error {
+ c.IO = io
+ return nil
+ }
+}
+
+// WithStatus adds status to the container.
+func WithStatus(status Status, root string) Opts {
+ return func(c *Container) error {
+ s, err := StoreStatus(root, c.ID, status)
+ if err != nil {
+ return err
+ }
+ c.Status = s
+ if s.Get().State() == runtime.ContainerState_CONTAINER_EXITED {
+ c.Stop()
+ }
+ return nil
+ }
+}
+
+// NewContainer creates an internally used container type.
+func NewContainer(metadata Metadata, opts ...Opts) (Container, error) {
+ c := Container{
+ Metadata: metadata,
+ StopCh: store.NewStopCh(),
+ }
+ for _, o := range opts {
+ if err := o(&c); err != nil {
+ return Container{}, err
+ }
+ }
+ return c, nil
+}
+
+// Delete deletes checkpoint for the container.
+func (c *Container) Delete() error {
+ return c.Status.Delete()
+}
+
+// Store stores all Containers.
+type Store struct {
+ lock sync.RWMutex
+ containers map[string]Container
+ idIndex *truncindex.TruncIndex
+ labels *label.Store
+}
+
+// NewStore creates a container store.
+func NewStore(labels *label.Store) *Store {
+ return &Store{
+ containers: make(map[string]Container),
+ idIndex: truncindex.NewTruncIndex([]string{}),
+ labels: labels,
+ }
+}
+
+// Add a container into the store. Returns store.ErrAlreadyExist if the
+// container already exists.
+func (s *Store) Add(c Container) error {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ if _, ok := s.containers[c.ID]; ok {
+ return store.ErrAlreadyExist
+ }
+ if err := s.labels.Reserve(c.ProcessLabel); err != nil {
+ return err
+ }
+ if err := s.idIndex.Add(c.ID); err != nil {
+ return err
+ }
+ s.containers[c.ID] = c
+ return nil
+}
+
+// Get returns the container with specified id. Returns store.ErrNotExist
+// if the container doesn't exist.
+func (s *Store) Get(id string) (Container, error) {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ id, err := s.idIndex.Get(id)
+ if err != nil {
+ if err == truncindex.ErrNotExist {
+ err = store.ErrNotExist
+ }
+ return Container{}, err
+ }
+ if c, ok := s.containers[id]; ok {
+ return c, nil
+ }
+ return Container{}, store.ErrNotExist
+}
+
+// List lists all containers.
+func (s *Store) List() []Container {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ var containers []Container
+ for _, c := range s.containers {
+ containers = append(containers, c)
+ }
+ return containers
+}
+
+// Delete deletes the container from store with specified id.
+func (s *Store) Delete(id string) {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ id, err := s.idIndex.Get(id)
+ if err != nil {
+ // Note: The idIndex.Delete and delete doesn't handle truncated index.
+ // So we need to return if there are error.
+ return
+ }
+ s.labels.Release(s.containers[id].ProcessLabel)
+ s.idIndex.Delete(id) // nolint: errcheck
+ delete(s.containers, id)
+}
diff --git a/pkg/store/container/container_test.go b/pkg/store/container/container_test.go
new file mode 100644
index 000000000..a88bc02c2
--- /dev/null
+++ b/pkg/store/container/container_test.go
@@ -0,0 +1,247 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package container
+
+import (
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/containerd/cri/pkg/store/label"
+ "github.com/opencontainers/selinux/go-selinux"
+ assertlib "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ cio "github.com/containerd/cri/pkg/server/io"
+ "github.com/containerd/cri/pkg/store"
+)
+
+func TestContainerStore(t *testing.T) {
+ metadatas := map[string]Metadata{
+ "1": {
+ ID: "1",
+ Name: "Container-1",
+ SandboxID: "Sandbox-1",
+ Config: &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "TestPod-1",
+ Attempt: 1,
+ },
+ },
+ ImageRef: "TestImage-1",
+ StopSignal: "SIGTERM",
+ LogPath: "/test/log/path/1",
+ ProcessLabel: "junk:junk:junk:c1,c2",
+ },
+ "2abcd": {
+ ID: "2abcd",
+ Name: "Container-2abcd",
+ SandboxID: "Sandbox-2abcd",
+ Config: &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "TestPod-2abcd",
+ Attempt: 2,
+ },
+ },
+ StopSignal: "SIGTERM",
+ ImageRef: "TestImage-2",
+ LogPath: "/test/log/path/2",
+ ProcessLabel: "junk:junk:junk:c1,c2",
+ },
+ "4a333": {
+ ID: "4a333",
+ Name: "Container-4a333",
+ SandboxID: "Sandbox-4a333",
+ Config: &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "TestPod-4a333",
+ Attempt: 3,
+ },
+ },
+ StopSignal: "SIGTERM",
+ ImageRef: "TestImage-3",
+ LogPath: "/test/log/path/3",
+ ProcessLabel: "junk:junk:junk:c1,c3",
+ },
+ "4abcd": {
+ ID: "4abcd",
+ Name: "Container-4abcd",
+ SandboxID: "Sandbox-4abcd",
+ Config: &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "TestPod-4abcd",
+ Attempt: 1,
+ },
+ },
+ StopSignal: "SIGTERM",
+ ImageRef: "TestImage-4abcd",
+ ProcessLabel: "junk:junk:junk:c1,c4",
+ },
+ }
+ statuses := map[string]Status{
+ "1": {
+ Pid: 1,
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ ExitCode: 1,
+ Reason: "TestReason-1",
+ Message: "TestMessage-1",
+ },
+ "2abcd": {
+ Pid: 2,
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ ExitCode: 2,
+ Reason: "TestReason-2abcd",
+ Message: "TestMessage-2abcd",
+ },
+ "4a333": {
+ Pid: 3,
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ ExitCode: 3,
+ Reason: "TestReason-4a333",
+ Message: "TestMessage-4a333",
+ Starting: true,
+ },
+ "4abcd": {
+ Pid: 4,
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ ExitCode: 4,
+ Reason: "TestReason-4abcd",
+ Message: "TestMessage-4abcd",
+ Removing: true,
+ },
+ }
+ assert := assertlib.New(t)
+ containers := map[string]Container{}
+ for id := range metadatas {
+ container, err := NewContainer(
+ metadatas[id],
+ WithFakeStatus(statuses[id]),
+ )
+ assert.NoError(err)
+ containers[id] = container
+ }
+
+ s := NewStore(label.NewStore())
+ reserved := map[string]bool{}
+ s.labels.Reserver = func(label string) {
+ reserved[strings.SplitN(label, ":", 4)[3]] = true
+ }
+ s.labels.Releaser = func(label string) {
+ reserved[strings.SplitN(label, ":", 4)[3]] = false
+ }
+
+ t.Logf("should be able to add container")
+ for _, c := range containers {
+ assert.NoError(s.Add(c))
+ }
+
+ t.Logf("should be able to get container")
+ genTruncIndex := func(normalName string) string { return normalName[:(len(normalName)+1)/2] }
+ for id, c := range containers {
+ got, err := s.Get(genTruncIndex(id))
+ assert.NoError(err)
+ assert.Equal(c, got)
+ }
+
+ t.Logf("should be able to list containers")
+ cs := s.List()
+ assert.Len(cs, len(containers))
+
+ if selinux.GetEnabled() {
+ t.Logf("should have reserved labels (requires -tag selinux)")
+ assert.Equal(map[string]bool{
+ "c1,c2": true,
+ "c1,c3": true,
+ "c1,c4": true,
+ }, reserved)
+ }
+
+ cntrNum := len(containers)
+ for testID, v := range containers {
+ truncID := genTruncIndex(testID)
+
+ t.Logf("add should return already exists error for duplicated container")
+ assert.Equal(store.ErrAlreadyExist, s.Add(v))
+
+ t.Logf("should be able to delete container")
+ s.Delete(truncID)
+ cntrNum--
+ cs = s.List()
+ assert.Len(cs, cntrNum)
+
+ t.Logf("get should return not exist error after deletion")
+ c, err := s.Get(truncID)
+ assert.Equal(Container{}, c)
+ assert.Equal(store.ErrNotExist, err)
+ }
+
+ if selinux.GetEnabled() {
+ t.Logf("should have released all labels (requires -tag selinux)")
+ assert.Equal(map[string]bool{
+ "c1,c2": false,
+ "c1,c3": false,
+ "c1,c4": false,
+ }, reserved)
+ }
+}
+
+func TestWithContainerIO(t *testing.T) {
+ meta := Metadata{
+ ID: "1",
+ Name: "Container-1",
+ SandboxID: "Sandbox-1",
+ Config: &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "TestPod-1",
+ Attempt: 1,
+ },
+ },
+ ImageRef: "TestImage-1",
+ StopSignal: "SIGTERM",
+ LogPath: "/test/log/path",
+ }
+ status := Status{
+ Pid: 1,
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ ExitCode: 1,
+ Reason: "TestReason-1",
+ Message: "TestMessage-1",
+ }
+ assert := assertlib.New(t)
+
+ c, err := NewContainer(meta, WithFakeStatus(status))
+ assert.NoError(err)
+ assert.Nil(c.IO)
+
+ c, err = NewContainer(
+ meta,
+ WithFakeStatus(status),
+ WithContainerIO(&cio.ContainerIO{}),
+ )
+ assert.NoError(err)
+ assert.NotNil(c.IO)
+}
diff --git a/pkg/store/container/fake_status.go b/pkg/store/container/fake_status.go
new file mode 100644
index 000000000..756588152
--- /dev/null
+++ b/pkg/store/container/fake_status.go
@@ -0,0 +1,62 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package container
+
+import "sync"
+
+// WithFakeStatus adds fake status to the container.
+func WithFakeStatus(status Status) Opts {
+ return func(c *Container) error {
+ c.Status = &fakeStatusStorage{status: status}
+ if status.FinishedAt != 0 {
+ // Fake the TaskExit event
+ c.Stop()
+ }
+ return nil
+ }
+}
+
+// fakeStatusStorage is a fake status storage for testing.
+type fakeStatusStorage struct {
+ sync.RWMutex
+ status Status
+}
+
+func (f *fakeStatusStorage) Get() Status {
+ f.RLock()
+ defer f.RUnlock()
+ return f.status
+}
+
+func (f *fakeStatusStorage) UpdateSync(u UpdateFunc) error {
+ return f.Update(u)
+}
+
+func (f *fakeStatusStorage) Update(u UpdateFunc) error {
+ f.Lock()
+ defer f.Unlock()
+ newStatus, err := u(f.status)
+ if err != nil {
+ return err
+ }
+ f.status = newStatus
+ return nil
+}
+
+func (f *fakeStatusStorage) Delete() error {
+ return nil
+}
diff --git a/pkg/store/container/metadata.go b/pkg/store/container/metadata.go
new file mode 100644
index 000000000..ff9b5f2a3
--- /dev/null
+++ b/pkg/store/container/metadata.go
@@ -0,0 +1,89 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package container
+
+import (
+ "encoding/json"
+
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// NOTE(random-liu):
+// 1) Metadata is immutable after created.
+// 2) Metadata is checkpointed as containerd container label.
+
+// metadataVersion is current version of container metadata.
+const metadataVersion = "v1" // nolint
+
+// versionedMetadata is the internal versioned container metadata.
+// nolint
+type versionedMetadata struct {
+ // Version indicates the version of the versioned container metadata.
+ Version string
+ // Metadata's type is metadataInternal. If not there will be a recursive call in MarshalJSON.
+ Metadata metadataInternal
+}
+
+// metadataInternal is for internal use.
+type metadataInternal Metadata
+
+// Metadata is the unversioned container metadata.
+type Metadata struct {
+ // ID is the container id.
+ ID string
+ // Name is the container name.
+ Name string
+ // SandboxID is the sandbox id the container belongs to.
+ SandboxID string
+ // Config is the CRI container config.
+ // NOTE(random-liu): Resource limits are updatable, the source
+ // of truth for resource limits are in containerd.
+ Config *runtime.ContainerConfig
+ // ImageRef is the reference of image used by the container.
+ ImageRef string
+ // LogPath is the container log path.
+ LogPath string
+ // StopSignal is the system call signal that will be sent to the container to exit.
+ // TODO(random-liu): Add integration test for stop signal.
+ StopSignal string
+ // ProcessLabel is the SELinux process label for the container
+ ProcessLabel string
+}
+
+// MarshalJSON encodes Metadata into bytes in json format.
+func (c *Metadata) MarshalJSON() ([]byte, error) {
+ return json.Marshal(&versionedMetadata{
+ Version: metadataVersion,
+ Metadata: metadataInternal(*c),
+ })
+}
+
+// UnmarshalJSON decodes Metadata from bytes.
+func (c *Metadata) UnmarshalJSON(data []byte) error {
+ versioned := &versionedMetadata{}
+ if err := json.Unmarshal(data, versioned); err != nil {
+ return err
+ }
+ // Handle old version after upgrade.
+ switch versioned.Version {
+ case metadataVersion:
+ *c = Metadata(versioned.Metadata)
+ return nil
+ }
+ return errors.Errorf("unsupported version: %q", versioned.Version)
+}
diff --git a/pkg/store/container/metadata_test.go b/pkg/store/container/metadata_test.go
new file mode 100644
index 000000000..297bc094e
--- /dev/null
+++ b/pkg/store/container/metadata_test.go
@@ -0,0 +1,81 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package container
+
+import (
+ "encoding/json"
+ "testing"
+
+ assertlib "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestMetadataMarshalUnmarshal(t *testing.T) {
+ meta := &Metadata{
+ ID: "test-id",
+ Name: "test-name",
+ SandboxID: "test-sandbox-id",
+ Config: &runtime.ContainerConfig{
+ Metadata: &runtime.ContainerMetadata{
+ Name: "test-name",
+ Attempt: 1,
+ },
+ },
+ ImageRef: "test-image-ref",
+ LogPath: "/test/log/path",
+ }
+
+ assert := assertlib.New(t)
+ newMeta := &Metadata{}
+ newVerMeta := &versionedMetadata{}
+
+ t.Logf("should be able to do json.marshal")
+ data, err := json.Marshal(meta)
+ assert.NoError(err)
+ data1, err := json.Marshal(&versionedMetadata{
+ Version: metadataVersion,
+ Metadata: metadataInternal(*meta),
+ })
+ assert.NoError(err)
+ assert.Equal(data, data1)
+
+ t.Logf("should be able to do MarshalJSON")
+ data, err = meta.MarshalJSON()
+ assert.NoError(err)
+ assert.NoError(newMeta.UnmarshalJSON(data))
+ assert.Equal(meta, newMeta)
+
+ t.Logf("should be able to do MarshalJSON and json.Unmarshal")
+ data, err = meta.MarshalJSON()
+ assert.NoError(err)
+ assert.NoError(json.Unmarshal(data, newVerMeta))
+ assert.Equal(meta, (*Metadata)(&newVerMeta.Metadata))
+
+ t.Logf("should be able to do json.Marshal and UnmarshalJSON")
+ data, err = json.Marshal(meta)
+ assert.NoError(err)
+ assert.NoError(newMeta.UnmarshalJSON(data))
+ assert.Equal(meta, newMeta)
+
+ t.Logf("should json.Unmarshal fail for unsupported version")
+ unsupported, err := json.Marshal(&versionedMetadata{
+ Version: "random-test-version",
+ Metadata: metadataInternal(*meta),
+ })
+ assert.NoError(err)
+ assert.Error(json.Unmarshal(unsupported, &newMeta))
+}
diff --git a/pkg/store/container/status.go b/pkg/store/container/status.go
new file mode 100644
index 000000000..655f58806
--- /dev/null
+++ b/pkg/store/container/status.go
@@ -0,0 +1,247 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package container
+
+import (
+ "encoding/json"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "sync"
+
+ "github.com/containerd/continuity"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// The container state machine in the CRI plugin:
+//
+// + +
+// | |
+// | Create | Load
+// | |
+// +----v----+ |
+// | | |
+// | CREATED <---------+-----------+
+// | | | |
+// +----+----- | |
+// | | |
+// | Start | |
+// | | |
+// +----v----+ | |
+// Exec +--------+ | | |
+// Attach | | RUNNING <---------+ |
+// LogReopen +--------> | | |
+// +----+----+ | |
+// | | |
+// | Stop/Exit | |
+// | | |
+// +----v----+ | |
+// | <---------+ +----v----+
+// | EXITED | | |
+// | <----------------+ UNKNOWN |
+// +----+----+ Stop | |
+// | +---------+
+// | Remove
+// v
+// DELETED
+
+// statusVersion is current version of container status.
+const statusVersion = "v1" // nolint
+
+// versionedStatus is the internal used versioned container status.
+// nolint
+type versionedStatus struct {
+ // Version indicates the version of the versioned container status.
+ Version string
+ Status
+}
+
+// Status is the status of a container.
+type Status struct {
+ // Pid is the init process id of the container.
+ Pid uint32
+ // CreatedAt is the created timestamp.
+ CreatedAt int64
+ // StartedAt is the started timestamp.
+ StartedAt int64
+ // FinishedAt is the finished timestamp.
+ FinishedAt int64
+ // ExitCode is the container exit code.
+ ExitCode int32
+ // CamelCase string explaining why container is in its current state.
+ Reason string
+ // Human-readable message indicating details about why container is in its
+ // current state.
+ Message string
+ // Starting indicates that the container is in starting state.
+ // This field doesn't need to be checkpointed.
+ Starting bool `json:"-"`
+ // Removing indicates that the container is in removing state.
+ // This field doesn't need to be checkpointed.
+ Removing bool `json:"-"`
+ // Unknown indicates that the container status is not fully loaded.
+ // This field doesn't need to be checkpointed.
+ Unknown bool `json:"-"`
+}
+
+// State returns current state of the container based on the container status.
+func (s Status) State() runtime.ContainerState {
+ if s.Unknown {
+ return runtime.ContainerState_CONTAINER_UNKNOWN
+ }
+ if s.FinishedAt != 0 {
+ return runtime.ContainerState_CONTAINER_EXITED
+ }
+ if s.StartedAt != 0 {
+ return runtime.ContainerState_CONTAINER_RUNNING
+ }
+ if s.CreatedAt != 0 {
+ return runtime.ContainerState_CONTAINER_CREATED
+ }
+ return runtime.ContainerState_CONTAINER_UNKNOWN
+}
+
+// encode encodes Status into bytes in json format.
+func (s *Status) encode() ([]byte, error) {
+ return json.Marshal(&versionedStatus{
+ Version: statusVersion,
+ Status: *s,
+ })
+}
+
+// decode decodes Status from bytes.
+func (s *Status) decode(data []byte) error {
+ versioned := &versionedStatus{}
+ if err := json.Unmarshal(data, versioned); err != nil {
+ return err
+ }
+ // Handle old version after upgrade.
+ switch versioned.Version {
+ case statusVersion:
+ *s = versioned.Status
+ return nil
+ }
+ return errors.New("unsupported version")
+}
+
+// UpdateFunc is function used to update the container status. If there
+// is an error, the update will be rolled back.
+type UpdateFunc func(Status) (Status, error)
+
+// StatusStorage manages the container status with a storage backend.
+type StatusStorage interface {
+ // Get a container status.
+ Get() Status
+ // UpdateSync updates the container status and the on disk checkpoint.
+ // Note that the update MUST be applied in one transaction.
+ UpdateSync(UpdateFunc) error
+ // Update the container status. Note that the update MUST be applied
+ // in one transaction.
+ Update(UpdateFunc) error
+ // Delete the container status.
+ // Note:
+ // * Delete should be idempotent.
+ // * The status must be deleted in one trasaction.
+ Delete() error
+}
+
+// StoreStatus creates the storage containing the passed in container status with the
+// specified id.
+// The status MUST be created in one transaction.
+func StoreStatus(root, id string, status Status) (StatusStorage, error) {
+ data, err := status.encode()
+ if err != nil {
+ return nil, errors.Wrap(err, "failed to encode status")
+ }
+ path := filepath.Join(root, "status")
+ if err := continuity.AtomicWriteFile(path, data, 0600); err != nil {
+ return nil, errors.Wrapf(err, "failed to checkpoint status to %q", path)
+ }
+ return &statusStorage{
+ path: path,
+ status: status,
+ }, nil
+}
+
+// LoadStatus loads container status from checkpoint. There shouldn't be threads
+// writing to the file during loading.
+func LoadStatus(root, id string) (Status, error) {
+ path := filepath.Join(root, "status")
+ data, err := ioutil.ReadFile(path)
+ if err != nil {
+ return Status{}, errors.Wrapf(err, "failed to read status from %q", path)
+ }
+ var status Status
+ if err := status.decode(data); err != nil {
+ return Status{}, errors.Wrapf(err, "failed to decode status %q", data)
+ }
+ return status, nil
+}
+
+type statusStorage struct {
+ sync.RWMutex
+ path string
+ status Status
+}
+
+// Get a copy of container status.
+func (s *statusStorage) Get() Status {
+ s.RLock()
+ defer s.RUnlock()
+ return s.status
+}
+
+// UpdateSync updates the container status and the on disk checkpoint.
+func (s *statusStorage) UpdateSync(u UpdateFunc) error {
+ s.Lock()
+ defer s.Unlock()
+ newStatus, err := u(s.status)
+ if err != nil {
+ return err
+ }
+ data, err := newStatus.encode()
+ if err != nil {
+ return errors.Wrap(err, "failed to encode status")
+ }
+ if err := continuity.AtomicWriteFile(s.path, data, 0600); err != nil {
+ return errors.Wrapf(err, "failed to checkpoint status to %q", s.path)
+ }
+ s.status = newStatus
+ return nil
+}
+
+// Update the container status.
+func (s *statusStorage) Update(u UpdateFunc) error {
+ s.Lock()
+ defer s.Unlock()
+ newStatus, err := u(s.status)
+ if err != nil {
+ return err
+ }
+ s.status = newStatus
+ return nil
+}
+
+// Delete deletes the container status from disk atomically.
+func (s *statusStorage) Delete() error {
+ temp := filepath.Dir(s.path) + ".del-" + filepath.Base(s.path)
+ if err := os.Rename(s.path, temp); err != nil && !os.IsNotExist(err) {
+ return err
+ }
+ return os.RemoveAll(temp)
+}
diff --git a/pkg/store/container/status_test.go b/pkg/store/container/status_test.go
new file mode 100644
index 000000000..702cc262d
--- /dev/null
+++ b/pkg/store/container/status_test.go
@@ -0,0 +1,195 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package container
+
+import (
+ "encoding/json"
+ "errors"
+ "io/ioutil"
+ "os"
+ "path/filepath"
+ "testing"
+ "time"
+
+ assertlib "github.com/stretchr/testify/assert"
+ requirelib "github.com/stretchr/testify/require"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestContainerState(t *testing.T) {
+ for c, test := range map[string]struct {
+ status Status
+ state runtime.ContainerState
+ }{
+ "unknown state": {
+ status: Status{
+ Unknown: true,
+ },
+ state: runtime.ContainerState_CONTAINER_UNKNOWN,
+ },
+ "unknown state because there is no timestamp set": {
+ status: Status{},
+ state: runtime.ContainerState_CONTAINER_UNKNOWN,
+ },
+ "created state": {
+ status: Status{
+ CreatedAt: time.Now().UnixNano(),
+ },
+ state: runtime.ContainerState_CONTAINER_CREATED,
+ },
+ "running state": {
+ status: Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ },
+ state: runtime.ContainerState_CONTAINER_RUNNING,
+ },
+ "exited state": {
+ status: Status{
+ CreatedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ },
+ state: runtime.ContainerState_CONTAINER_EXITED,
+ },
+ } {
+ t.Logf("TestCase %q", c)
+ assertlib.Equal(t, test.state, test.status.State())
+ }
+}
+
+func TestStatusEncodeDecode(t *testing.T) {
+ s := &Status{
+ Pid: 1234,
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ FinishedAt: time.Now().UnixNano(),
+ ExitCode: 1,
+ Reason: "test-reason",
+ Message: "test-message",
+ Removing: true,
+ Starting: true,
+ Unknown: true,
+ }
+ assert := assertlib.New(t)
+ data, err := s.encode()
+ assert.NoError(err)
+ newS := &Status{}
+ assert.NoError(newS.decode(data))
+ s.Removing = false // Removing should not be encoded.
+ s.Starting = false // Starting should not be encoded.
+ s.Unknown = false // Unknown should not be encoded.
+ assert.Equal(s, newS)
+
+ unsupported, err := json.Marshal(&versionedStatus{
+ Version: "random-test-version",
+ Status: *s,
+ })
+ assert.NoError(err)
+ assert.Error(newS.decode(unsupported))
+}
+
+func TestStatus(t *testing.T) {
+ testID := "test-id"
+ testStatus := Status{
+ CreatedAt: time.Now().UnixNano(),
+ }
+ updateStatus := Status{
+ CreatedAt: time.Now().UnixNano(),
+ StartedAt: time.Now().UnixNano(),
+ }
+ updateErr := errors.New("update error")
+ assert := assertlib.New(t)
+ require := requirelib.New(t)
+
+ tempDir, err := ioutil.TempDir(os.TempDir(), "status-test")
+ require.NoError(err)
+ defer os.RemoveAll(tempDir)
+ statusFile := filepath.Join(tempDir, "status")
+
+ t.Logf("simple store and get")
+ s, err := StoreStatus(tempDir, testID, testStatus)
+ assert.NoError(err)
+ old := s.Get()
+ assert.Equal(testStatus, old)
+ _, err = os.Stat(statusFile)
+ assert.NoError(err)
+ loaded, err := LoadStatus(tempDir, testID)
+ require.NoError(err)
+ assert.Equal(testStatus, loaded)
+
+ t.Logf("failed update should not take effect")
+ err = s.Update(func(o Status) (Status, error) {
+ o = updateStatus
+ return o, updateErr
+ })
+ assert.Equal(updateErr, err)
+ assert.Equal(testStatus, s.Get())
+ loaded, err = LoadStatus(tempDir, testID)
+ require.NoError(err)
+ assert.Equal(testStatus, loaded)
+
+ t.Logf("successful update should take effect but not checkpoint")
+ err = s.Update(func(o Status) (Status, error) {
+ o = updateStatus
+ return o, nil
+ })
+ assert.NoError(err)
+ assert.Equal(updateStatus, s.Get())
+ loaded, err = LoadStatus(tempDir, testID)
+ require.NoError(err)
+ assert.Equal(testStatus, loaded)
+ // Recover status.
+ assert.NoError(s.Update(func(o Status) (Status, error) {
+ o = testStatus
+ return o, nil
+ }))
+
+ t.Logf("failed update sync should not take effect")
+ err = s.UpdateSync(func(o Status) (Status, error) {
+ o = updateStatus
+ return o, updateErr
+ })
+ assert.Equal(updateErr, err)
+ assert.Equal(testStatus, s.Get())
+ loaded, err = LoadStatus(tempDir, testID)
+ require.NoError(err)
+ assert.Equal(testStatus, loaded)
+
+ t.Logf("successful update sync should take effect and checkpoint")
+ err = s.UpdateSync(func(o Status) (Status, error) {
+ o = updateStatus
+ return o, nil
+ })
+ assert.NoError(err)
+ assert.Equal(updateStatus, s.Get())
+ loaded, err = LoadStatus(tempDir, testID)
+ require.NoError(err)
+ assert.Equal(updateStatus, loaded)
+
+ t.Logf("successful update should not affect existing snapshot")
+ assert.Equal(testStatus, old)
+
+ t.Logf("delete status")
+ assert.NoError(s.Delete())
+ _, err = LoadStatus(tempDir, testID)
+ assert.Error(err)
+ _, err = os.Stat(statusFile)
+ assert.True(os.IsNotExist(err))
+
+ t.Logf("delete status should be idempotent")
+ assert.NoError(s.Delete())
+}
diff --git a/pkg/store/errors.go b/pkg/store/errors.go
new file mode 100644
index 000000000..d8398e45f
--- /dev/null
+++ b/pkg/store/errors.go
@@ -0,0 +1,33 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package store
+
+import "github.com/containerd/containerd/errdefs"
+
+var (
+ // ErrAlreadyExist is the error returned when data added in the store
+ // already exists.
+ //
+ // This error has been DEPRECATED and will be removed in 1.5. Please switch
+ // usage directly to `errdefs.ErrAlreadyExists`.
+ ErrAlreadyExist = errdefs.ErrAlreadyExists
+ // ErrNotExist is the error returned when data is not in the store.
+ //
+ // This error has been DEPRECATED and will be removed in 1.5. Please switch
+ // usage directly to `errdefs.ErrNotFound`.
+ ErrNotExist = errdefs.ErrNotFound
+)
diff --git a/pkg/store/errors_test.go b/pkg/store/errors_test.go
new file mode 100644
index 000000000..4171e0b37
--- /dev/null
+++ b/pkg/store/errors_test.go
@@ -0,0 +1,48 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package store
+
+import (
+ "testing"
+
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+
+ "github.com/containerd/containerd/errdefs"
+)
+
+func TestStoreErrAlreadyExistGRPCStatus(t *testing.T) {
+ err := errdefs.ToGRPC(ErrAlreadyExist)
+ s, ok := status.FromError(err)
+ if !ok {
+ t.Fatalf("failed to convert err: %v to status: %d", err, codes.AlreadyExists)
+ }
+ if s.Code() != codes.AlreadyExists {
+ t.Fatalf("expected code: %d got: %d", codes.AlreadyExists, s.Code())
+ }
+}
+
+func TestStoreErrNotExistGRPCStatus(t *testing.T) {
+ err := errdefs.ToGRPC(ErrNotExist)
+ s, ok := status.FromError(err)
+ if !ok {
+ t.Fatalf("failed to convert err: %v to status: %d", err, codes.NotFound)
+ }
+ if s.Code() != codes.NotFound {
+ t.Fatalf("expected code: %d got: %d", codes.NotFound, s.Code())
+ }
+}
diff --git a/pkg/store/image/fake_image.go b/pkg/store/image/fake_image.go
new file mode 100644
index 000000000..e1d6b7cd4
--- /dev/null
+++ b/pkg/store/image/fake_image.go
@@ -0,0 +1,34 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package image
+
+import "github.com/pkg/errors"
+
+// NewFakeStore returns an image store with predefined images.
+// Update is not allowed for this fake store.
+func NewFakeStore(images []Image) (*Store, error) {
+ s := NewStore(nil)
+ for _, i := range images {
+ for _, ref := range i.References {
+ s.refCache[ref] = i.ID
+ }
+ if err := s.store.add(i); err != nil {
+ return nil, errors.Wrapf(err, "add image %+v", i)
+ }
+ }
+ return s, nil
+}
diff --git a/pkg/store/image/image.go b/pkg/store/image/image.go
new file mode 100644
index 000000000..208d490db
--- /dev/null
+++ b/pkg/store/image/image.go
@@ -0,0 +1,256 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package image
+
+import (
+ "context"
+ "encoding/json"
+ "sync"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/containerd/content"
+ "github.com/containerd/containerd/errdefs"
+ imagedigest "github.com/opencontainers/go-digest"
+ "github.com/opencontainers/go-digest/digestset"
+ imageidentity "github.com/opencontainers/image-spec/identity"
+ imagespec "github.com/opencontainers/image-spec/specs-go/v1"
+ "github.com/pkg/errors"
+
+ storeutil "github.com/containerd/cri/pkg/store"
+ "github.com/containerd/cri/pkg/util"
+)
+
+// Image contains all resources associated with the image. All fields
+// MUST not be mutated directly after created.
+type Image struct {
+ // Id of the image. Normally the digest of image config.
+ ID string
+ // References are references to the image, e.g. RepoTag and RepoDigest.
+ References []string
+ // ChainID is the chainID of the image.
+ ChainID string
+ // Size is the compressed size of the image.
+ Size int64
+ // ImageSpec is the oci image structure which describes basic information about the image.
+ ImageSpec imagespec.Image
+}
+
+// Store stores all images.
+type Store struct {
+ lock sync.RWMutex
+ // refCache is a containerd image reference to image id cache.
+ refCache map[string]string
+ // client is the containerd client.
+ client *containerd.Client
+ // store is the internal image store indexed by image id.
+ store *store
+}
+
+// NewStore creates an image store.
+func NewStore(client *containerd.Client) *Store {
+ return &Store{
+ refCache: make(map[string]string),
+ client: client,
+ store: &store{
+ images: make(map[string]Image),
+ digestSet: digestset.NewSet(),
+ },
+ }
+}
+
+// Update updates cache for a reference.
+func (s *Store) Update(ctx context.Context, ref string) error {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ i, err := s.client.GetImage(ctx, ref)
+ if err != nil && !errdefs.IsNotFound(err) {
+ return errors.Wrap(err, "get image from containerd")
+ }
+ var img *Image
+ if err == nil {
+ img, err = getImage(ctx, i)
+ if err != nil {
+ return errors.Wrap(err, "get image info from containerd")
+ }
+ }
+ return s.update(ref, img)
+}
+
+// update updates the internal cache. img == nil means that
+// the image does not exist in containerd.
+func (s *Store) update(ref string, img *Image) error {
+ oldID, oldExist := s.refCache[ref]
+ if img == nil {
+ // The image reference doesn't exist in containerd.
+ if oldExist {
+ // Remove the reference from the store.
+ s.store.delete(oldID, ref)
+ delete(s.refCache, ref)
+ }
+ return nil
+ }
+ if oldExist {
+ if oldID == img.ID {
+ return nil
+ }
+ // Updated. Remove tag from old image.
+ s.store.delete(oldID, ref)
+ }
+ // New image. Add new image.
+ s.refCache[ref] = img.ID
+ return s.store.add(*img)
+}
+
+// getImage gets image information from containerd.
+func getImage(ctx context.Context, i containerd.Image) (*Image, error) {
+ // Get image information.
+ diffIDs, err := i.RootFS(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "get image diffIDs")
+ }
+ chainID := imageidentity.ChainID(diffIDs)
+
+ size, err := i.Size(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "get image compressed resource size")
+ }
+
+ desc, err := i.Config(ctx)
+ if err != nil {
+ return nil, errors.Wrap(err, "get image config descriptor")
+ }
+ id := desc.Digest.String()
+
+ rb, err := content.ReadBlob(ctx, i.ContentStore(), desc)
+ if err != nil {
+ return nil, errors.Wrap(err, "read image config from content store")
+ }
+ var ociimage imagespec.Image
+ if err := json.Unmarshal(rb, &ociimage); err != nil {
+ return nil, errors.Wrapf(err, "unmarshal image config %s", rb)
+ }
+
+ return &Image{
+ ID: id,
+ References: []string{i.Name()},
+ ChainID: chainID.String(),
+ Size: size,
+ ImageSpec: ociimage,
+ }, nil
+}
+
+// Resolve resolves a image reference to image id.
+func (s *Store) Resolve(ref string) (string, error) {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ id, ok := s.refCache[ref]
+ if !ok {
+ return "", storeutil.ErrNotExist
+ }
+ return id, nil
+}
+
+// Get gets image metadata by image id. The id can be truncated.
+// Returns various validation errors if the image id is invalid.
+// Returns storeutil.ErrNotExist if the image doesn't exist.
+func (s *Store) Get(id string) (Image, error) {
+ return s.store.get(id)
+}
+
+// List lists all images.
+func (s *Store) List() []Image {
+ return s.store.list()
+}
+
+type store struct {
+ lock sync.RWMutex
+ images map[string]Image
+ digestSet *digestset.Set
+}
+
+func (s *store) list() []Image {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ var images []Image
+ for _, i := range s.images {
+ images = append(images, i)
+ }
+ return images
+}
+
+func (s *store) add(img Image) error {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ if _, err := s.digestSet.Lookup(img.ID); err != nil {
+ if err != digestset.ErrDigestNotFound {
+ return err
+ }
+ if err := s.digestSet.Add(imagedigest.Digest(img.ID)); err != nil {
+ return err
+ }
+ }
+
+ i, ok := s.images[img.ID]
+ if !ok {
+ // If the image doesn't exist, add it.
+ s.images[img.ID] = img
+ return nil
+ }
+ // Or else, merge the references.
+ i.References = util.MergeStringSlices(i.References, img.References)
+ s.images[img.ID] = i
+ return nil
+}
+
+func (s *store) get(id string) (Image, error) {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ digest, err := s.digestSet.Lookup(id)
+ if err != nil {
+ if err == digestset.ErrDigestNotFound {
+ err = storeutil.ErrNotExist
+ }
+ return Image{}, err
+ }
+ if i, ok := s.images[digest.String()]; ok {
+ return i, nil
+ }
+ return Image{}, storeutil.ErrNotExist
+}
+
+func (s *store) delete(id, ref string) {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ digest, err := s.digestSet.Lookup(id)
+ if err != nil {
+ // Note: The idIndex.Delete and delete doesn't handle truncated index.
+ // So we need to return if there are error.
+ return
+ }
+ i, ok := s.images[digest.String()]
+ if !ok {
+ return
+ }
+ i.References = util.SubtractStringSlice(i.References, ref)
+ if len(i.References) != 0 {
+ s.images[digest.String()] = i
+ return
+ }
+ // Remove the image if it is not referenced any more.
+ s.digestSet.Remove(digest) // nolint: errcheck
+ delete(s.images, digest.String())
+}
diff --git a/pkg/store/image/image_test.go b/pkg/store/image/image_test.go
new file mode 100644
index 000000000..5635469f3
--- /dev/null
+++ b/pkg/store/image/image_test.go
@@ -0,0 +1,248 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package image
+
+import (
+ "sort"
+ "strings"
+ "testing"
+
+ "github.com/opencontainers/go-digest/digestset"
+ assertlib "github.com/stretchr/testify/assert"
+
+ storeutil "github.com/containerd/cri/pkg/store"
+)
+
+func TestInternalStore(t *testing.T) {
+ images := []Image{
+ {
+ ID: "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ ChainID: "test-chain-id-1",
+ References: []string{"ref-1"},
+ Size: 10,
+ },
+ {
+ ID: "sha256:2123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ ChainID: "test-chain-id-2abcd",
+ References: []string{"ref-2abcd"},
+ Size: 20,
+ },
+ {
+ ID: "sha256:3123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ References: []string{"ref-4a333"},
+ ChainID: "test-chain-id-4a333",
+ Size: 30,
+ },
+ {
+ ID: "sha256:4123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef",
+ References: []string{"ref-4abcd"},
+ ChainID: "test-chain-id-4abcd",
+ Size: 40,
+ },
+ }
+ assert := assertlib.New(t)
+ genTruncIndex := func(normalName string) string { return normalName[:(len(normalName)+1)/2] }
+
+ s := &store{
+ images: make(map[string]Image),
+ digestSet: digestset.NewSet(),
+ }
+
+ t.Logf("should be able to add image")
+ for _, img := range images {
+ err := s.add(img)
+ assert.NoError(err)
+ }
+
+ t.Logf("should be able to get image")
+ for _, v := range images {
+ truncID := genTruncIndex(v.ID)
+ got, err := s.get(truncID)
+ assert.NoError(err, "truncID:%s, fullID:%s", truncID, v.ID)
+ assert.Equal(v, got)
+ }
+
+ t.Logf("should be able to get image by truncated imageId without algorithm")
+ for _, v := range images {
+ truncID := genTruncIndex(v.ID[strings.Index(v.ID, ":")+1:])
+ got, err := s.get(truncID)
+ assert.NoError(err, "truncID:%s, fullID:%s", truncID, v.ID)
+ assert.Equal(v, got)
+ }
+
+ t.Logf("should not be able to get image by ambiguous prefix")
+ ambiguousPrefixs := []string{"sha256", "sha256:"}
+ for _, v := range ambiguousPrefixs {
+ _, err := s.get(v)
+ assert.NotEqual(nil, err)
+ }
+
+ t.Logf("should be able to list images")
+ imgs := s.list()
+ assert.Len(imgs, len(images))
+
+ imageNum := len(images)
+ for _, v := range images {
+ truncID := genTruncIndex(v.ID)
+ oldRef := v.References[0]
+ newRef := oldRef + "new"
+
+ t.Logf("should be able to add new references")
+ newImg := v
+ newImg.References = []string{newRef}
+ err := s.add(newImg)
+ assert.NoError(err)
+ got, err := s.get(truncID)
+ assert.NoError(err)
+ assert.Len(got.References, 2)
+ assert.Contains(got.References, oldRef, newRef)
+
+ t.Logf("should not be able to add duplicated references")
+ err = s.add(newImg)
+ assert.NoError(err)
+ got, err = s.get(truncID)
+ assert.NoError(err)
+ assert.Len(got.References, 2)
+ assert.Contains(got.References, oldRef, newRef)
+
+ t.Logf("should be able to delete image references")
+ s.delete(truncID, oldRef)
+ got, err = s.get(truncID)
+ assert.NoError(err)
+ assert.Equal([]string{newRef}, got.References)
+
+ t.Logf("should be able to delete image")
+ s.delete(truncID, newRef)
+ got, err = s.get(truncID)
+ assert.Equal(storeutil.ErrNotExist, err)
+ assert.Equal(Image{}, got)
+
+ imageNum--
+ imgs = s.list()
+ assert.Len(imgs, imageNum)
+ }
+}
+
+func TestImageStore(t *testing.T) {
+ id := "sha256:1123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
+ newID := "sha256:9923456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef"
+ image := Image{
+ ID: id,
+ ChainID: "test-chain-id-1",
+ References: []string{"ref-1"},
+ Size: 10,
+ }
+ assert := assertlib.New(t)
+
+ equal := func(i1, i2 Image) {
+ sort.Strings(i1.References)
+ sort.Strings(i2.References)
+ assert.Equal(i1, i2)
+ }
+ for desc, test := range map[string]struct {
+ ref string
+ image *Image
+ expected []Image
+ }{
+ "nothing should happen if a non-exist ref disappear": {
+ ref: "ref-2",
+ image: nil,
+ expected: []Image{image},
+ },
+ "new ref for an existing image": {
+ ref: "ref-2",
+ image: &Image{
+ ID: id,
+ ChainID: "test-chain-id-1",
+ References: []string{"ref-2"},
+ Size: 10,
+ },
+ expected: []Image{
+ {
+ ID: id,
+ ChainID: "test-chain-id-1",
+ References: []string{"ref-1", "ref-2"},
+ Size: 10,
+ },
+ },
+ },
+ "new ref for a new image": {
+ ref: "ref-2",
+ image: &Image{
+ ID: newID,
+ ChainID: "test-chain-id-2",
+ References: []string{"ref-2"},
+ Size: 20,
+ },
+ expected: []Image{
+ image,
+ {
+ ID: newID,
+ ChainID: "test-chain-id-2",
+ References: []string{"ref-2"},
+ Size: 20,
+ },
+ },
+ },
+ "existing ref point to a new image": {
+ ref: "ref-1",
+ image: &Image{
+ ID: newID,
+ ChainID: "test-chain-id-2",
+ References: []string{"ref-1"},
+ Size: 20,
+ },
+ expected: []Image{
+ {
+ ID: newID,
+ ChainID: "test-chain-id-2",
+ References: []string{"ref-1"},
+ Size: 20,
+ },
+ },
+ },
+ "existing ref disappear": {
+ ref: "ref-1",
+ image: nil,
+ expected: []Image{},
+ },
+ } {
+ t.Logf("TestCase %q", desc)
+ s, err := NewFakeStore([]Image{image})
+ assert.NoError(err)
+ assert.NoError(s.update(test.ref, test.image))
+
+ assert.Len(s.List(), len(test.expected))
+ for _, expect := range test.expected {
+ got, err := s.Get(expect.ID)
+ assert.NoError(err)
+ equal(got, expect)
+ for _, ref := range expect.References {
+ id, err := s.Resolve(ref)
+ assert.NoError(err)
+ assert.Equal(expect.ID, id)
+ }
+ }
+
+ if test.image == nil {
+ // Shouldn't be able to index by removed ref.
+ id, err := s.Resolve(test.ref)
+ assert.Equal(storeutil.ErrNotExist, err)
+ assert.Empty(id)
+ }
+ }
+}
diff --git a/pkg/store/label/label.go b/pkg/store/label/label.go
new file mode 100644
index 000000000..c8c5ff924
--- /dev/null
+++ b/pkg/store/label/label.go
@@ -0,0 +1,90 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package label
+
+import (
+ "sync"
+
+ "github.com/opencontainers/selinux/go-selinux"
+)
+
+type Store struct {
+ sync.Mutex
+ levels map[string]int
+ Releaser func(string)
+ Reserver func(string)
+}
+
+func NewStore() *Store {
+ return &Store{
+ levels: map[string]int{},
+ Releaser: selinux.ReleaseLabel,
+ Reserver: selinux.ReserveLabel,
+ }
+}
+
+func (s *Store) Reserve(label string) error {
+ s.Lock()
+ defer s.Unlock()
+
+ context, err := selinux.NewContext(label)
+ if err != nil {
+ return err
+ }
+
+ level := context["level"]
+ // no reason to count empty
+ if level == "" {
+ return nil
+ }
+
+ if _, ok := s.levels[level]; !ok {
+ s.Reserver(label)
+ }
+
+ s.levels[level]++
+ return nil
+}
+
+func (s *Store) Release(label string) {
+ s.Lock()
+ defer s.Unlock()
+
+ context, err := selinux.NewContext(label)
+ if err != nil {
+ return
+ }
+
+ level := context["level"]
+ if level == "" {
+ return
+ }
+
+ count, ok := s.levels[level]
+ if !ok {
+ return
+ }
+ switch {
+ case count == 1:
+ s.Releaser(label)
+ delete(s.levels, level)
+ case count < 1:
+ delete(s.levels, level)
+ case count > 1:
+ s.levels[level] = count - 1
+ }
+}
diff --git a/pkg/store/label/label_test.go b/pkg/store/label/label_test.go
new file mode 100644
index 000000000..cc2c214bf
--- /dev/null
+++ b/pkg/store/label/label_test.go
@@ -0,0 +1,116 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package label
+
+import (
+ "testing"
+
+ "github.com/opencontainers/selinux/go-selinux"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestAddThenRemove(t *testing.T) {
+ if !selinux.GetEnabled() {
+ t.Skip("selinux is not enabled")
+ }
+
+ assert := assert.New(t)
+ store := NewStore()
+ releaseCount := 0
+ reserveCount := 0
+ store.Releaser = func(label string) {
+ assert.Contains(label, ":c1,c2")
+ releaseCount++
+ assert.Equal(1, releaseCount)
+ }
+ store.Reserver = func(label string) {
+ assert.Contains(label, ":c1,c2")
+ reserveCount++
+ assert.Equal(1, reserveCount)
+ }
+
+ t.Log("should count to two level")
+ assert.NoError(store.Reserve("junk:junk:junk:c1,c2"))
+ assert.NoError(store.Reserve("junk2:junk2:junk2:c1,c2"))
+
+ t.Log("should have one item")
+ assert.Equal(1, len(store.levels))
+
+ t.Log("c1,c2 count should be 2")
+ assert.Equal(2, store.levels["c1,c2"])
+
+ store.Release("junk:junk:junk:c1,c2")
+ store.Release("junk2:junk2:junk2:c1,c2")
+
+ t.Log("should have 0 items")
+ assert.Equal(0, len(store.levels))
+
+ t.Log("should have reserved")
+ assert.Equal(1, reserveCount)
+
+ t.Log("should have released")
+ assert.Equal(1, releaseCount)
+}
+
+func TestJunkData(t *testing.T) {
+ if !selinux.GetEnabled() {
+ t.Skip("selinux is not enabled")
+ }
+
+ assert := assert.New(t)
+ store := NewStore()
+ releaseCount := 0
+ store.Releaser = func(label string) {
+ releaseCount++
+ }
+ reserveCount := 0
+ store.Reserver = func(label string) {
+ reserveCount++
+ }
+
+ t.Log("should ignore empty label")
+ assert.NoError(store.Reserve(""))
+ assert.Equal(0, len(store.levels))
+ store.Release("")
+ assert.Equal(0, len(store.levels))
+ assert.Equal(0, releaseCount)
+ assert.Equal(0, reserveCount)
+
+ t.Log("should fail on bad label")
+ assert.Error(store.Reserve("junkjunkjunkc1c2"))
+ assert.Equal(0, len(store.levels))
+ store.Release("junkjunkjunkc1c2")
+ assert.Equal(0, len(store.levels))
+ assert.Equal(0, releaseCount)
+ assert.Equal(0, reserveCount)
+
+ t.Log("should not release unknown label")
+ store.Release("junk2:junk2:junk2:c1,c2")
+ assert.Equal(0, len(store.levels))
+ assert.Equal(0, releaseCount)
+ assert.Equal(0, reserveCount)
+
+ t.Log("should release once even if too many deletes")
+ assert.NoError(store.Reserve("junk2:junk2:junk2:c1,c2"))
+ assert.Equal(1, len(store.levels))
+ assert.Equal(1, store.levels["c1,c2"])
+ store.Release("junk2:junk2:junk2:c1,c2")
+ store.Release("junk2:junk2:junk2:c1,c2")
+ assert.Equal(0, len(store.levels))
+ assert.Equal(1, releaseCount)
+ assert.Equal(1, reserveCount)
+}
diff --git a/pkg/store/sandbox/metadata.go b/pkg/store/sandbox/metadata.go
new file mode 100644
index 000000000..eb3aa8e83
--- /dev/null
+++ b/pkg/store/sandbox/metadata.go
@@ -0,0 +1,89 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package sandbox
+
+import (
+ "encoding/json"
+
+ cni "github.com/containerd/go-cni"
+ "github.com/pkg/errors"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// NOTE(random-liu):
+// 1) Metadata is immutable after created.
+// 2) Metadata is checkpointed as containerd container label.
+
+// metadataVersion is current version of sandbox metadata.
+const metadataVersion = "v1" // nolint
+
+// versionedMetadata is the internal versioned sandbox metadata.
+// nolint
+type versionedMetadata struct {
+ // Version indicates the version of the versioned sandbox metadata.
+ Version string
+ // Metadata's type is metadataInternal. If not there will be a recursive call in MarshalJSON.
+ Metadata metadataInternal
+}
+
+// metadataInternal is for internal use.
+type metadataInternal Metadata
+
+// Metadata is the unversioned sandbox metadata.
+type Metadata struct {
+ // ID is the sandbox id.
+ ID string
+ // Name is the sandbox name.
+ Name string
+ // Config is the CRI sandbox config.
+ Config *runtime.PodSandboxConfig
+ // NetNSPath is the network namespace used by the sandbox.
+ NetNSPath string
+ // IP of Pod if it is attached to non host network
+ IP string
+ // AdditionalIPs of the Pod if it is attached to non host network
+ AdditionalIPs []string
+ // RuntimeHandler is the runtime handler name of the pod.
+ RuntimeHandler string
+ // CNIresult resulting configuration for attached network namespace interfaces
+ CNIResult *cni.CNIResult
+ // ProcessLabel is the SELinux process label for the container
+ ProcessLabel string
+}
+
+// MarshalJSON encodes Metadata into bytes in json format.
+func (c *Metadata) MarshalJSON() ([]byte, error) {
+ return json.Marshal(&versionedMetadata{
+ Version: metadataVersion,
+ Metadata: metadataInternal(*c),
+ })
+}
+
+// UnmarshalJSON decodes Metadata from bytes.
+func (c *Metadata) UnmarshalJSON(data []byte) error {
+ versioned := &versionedMetadata{}
+ if err := json.Unmarshal(data, versioned); err != nil {
+ return err
+ }
+ // Handle old version after upgrade.
+ switch versioned.Version {
+ case metadataVersion:
+ *c = Metadata(versioned.Metadata)
+ return nil
+ }
+ return errors.Errorf("unsupported version: %q", versioned.Version)
+}
diff --git a/pkg/store/sandbox/metadata_test.go b/pkg/store/sandbox/metadata_test.go
new file mode 100644
index 000000000..d0a51d90e
--- /dev/null
+++ b/pkg/store/sandbox/metadata_test.go
@@ -0,0 +1,79 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package sandbox
+
+import (
+ "encoding/json"
+ "testing"
+
+ assertlib "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+func TestMetadataMarshalUnmarshal(t *testing.T) {
+ meta := &Metadata{
+ ID: "test-id",
+ Name: "test-name",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "test-name",
+ Uid: "test-uid",
+ Namespace: "test-namespace",
+ Attempt: 1,
+ },
+ },
+ }
+ assert := assertlib.New(t)
+ newMeta := &Metadata{}
+ newVerMeta := &versionedMetadata{}
+
+ t.Logf("should be able to do json.marshal")
+ data, err := json.Marshal(meta)
+ assert.NoError(err)
+ data1, err := json.Marshal(&versionedMetadata{
+ Version: metadataVersion,
+ Metadata: metadataInternal(*meta),
+ })
+ assert.NoError(err)
+ assert.Equal(data, data1)
+
+ t.Logf("should be able to do MarshalJSON")
+ data, err = meta.MarshalJSON()
+ assert.NoError(err)
+ assert.NoError(newMeta.UnmarshalJSON(data))
+ assert.Equal(meta, newMeta)
+
+ t.Logf("should be able to do MarshalJSON and json.Unmarshal")
+ data, err = meta.MarshalJSON()
+ assert.NoError(err)
+ assert.NoError(json.Unmarshal(data, newVerMeta))
+ assert.Equal(meta, (*Metadata)(&newVerMeta.Metadata))
+
+ t.Logf("should be able to do json.Marshal and UnmarshalJSON")
+ data, err = json.Marshal(meta)
+ assert.NoError(err)
+ assert.NoError(newMeta.UnmarshalJSON(data))
+ assert.Equal(meta, newMeta)
+
+ t.Logf("should json.Unmarshal fail for unsupported version")
+ unsupported, err := json.Marshal(&versionedMetadata{
+ Version: "random-test-version",
+ Metadata: metadataInternal(*meta),
+ })
+ assert.NoError(err)
+ assert.Error(json.Unmarshal(unsupported, &newMeta))
+}
diff --git a/pkg/store/sandbox/sandbox.go b/pkg/store/sandbox/sandbox.go
new file mode 100644
index 000000000..223e88369
--- /dev/null
+++ b/pkg/store/sandbox/sandbox.go
@@ -0,0 +1,137 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package sandbox
+
+import (
+ "sync"
+
+ "github.com/containerd/containerd"
+ "github.com/containerd/cri/pkg/store/label"
+ "github.com/docker/docker/pkg/truncindex"
+
+ "github.com/containerd/cri/pkg/netns"
+ "github.com/containerd/cri/pkg/store"
+)
+
+// Sandbox contains all resources associated with the sandbox. All methods to
+// mutate the internal state are thread safe.
+type Sandbox struct {
+ // Metadata is the metadata of the sandbox, it is immutable after created.
+ Metadata
+ // Status stores the status of the sandbox.
+ Status StatusStorage
+ // Container is the containerd sandbox container client.
+ Container containerd.Container
+ // CNI network namespace client.
+ // For hostnetwork pod, this is always nil;
+ // For non hostnetwork pod, this should never be nil.
+ NetNS *netns.NetNS
+ // StopCh is used to propagate the stop information of the sandbox.
+ *store.StopCh
+}
+
+// NewSandbox creates an internally used sandbox type. This functions reminds
+// the caller that a sandbox must have a status.
+func NewSandbox(metadata Metadata, status Status) Sandbox {
+ s := Sandbox{
+ Metadata: metadata,
+ Status: StoreStatus(status),
+ StopCh: store.NewStopCh(),
+ }
+ if status.State == StateNotReady {
+ s.Stop()
+ }
+ return s
+}
+
+// Store stores all sandboxes.
+type Store struct {
+ lock sync.RWMutex
+ sandboxes map[string]Sandbox
+ idIndex *truncindex.TruncIndex
+ labels *label.Store
+}
+
+// NewStore creates a sandbox store.
+func NewStore(labels *label.Store) *Store {
+ return &Store{
+ sandboxes: make(map[string]Sandbox),
+ idIndex: truncindex.NewTruncIndex([]string{}),
+ labels: labels,
+ }
+}
+
+// Add a sandbox into the store.
+func (s *Store) Add(sb Sandbox) error {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ if _, ok := s.sandboxes[sb.ID]; ok {
+ return store.ErrAlreadyExist
+ }
+ if err := s.labels.Reserve(sb.ProcessLabel); err != nil {
+ return err
+ }
+ if err := s.idIndex.Add(sb.ID); err != nil {
+ return err
+ }
+ s.sandboxes[sb.ID] = sb
+ return nil
+}
+
+// Get returns the sandbox with specified id.
+// Returns store.ErrNotExist if the sandbox doesn't exist.
+func (s *Store) Get(id string) (Sandbox, error) {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ id, err := s.idIndex.Get(id)
+ if err != nil {
+ if err == truncindex.ErrNotExist {
+ err = store.ErrNotExist
+ }
+ return Sandbox{}, err
+ }
+ if sb, ok := s.sandboxes[id]; ok {
+ return sb, nil
+ }
+ return Sandbox{}, store.ErrNotExist
+}
+
+// List lists all sandboxes.
+func (s *Store) List() []Sandbox {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ var sandboxes []Sandbox
+ for _, sb := range s.sandboxes {
+ sandboxes = append(sandboxes, sb)
+ }
+ return sandboxes
+}
+
+// Delete deletes the sandbox with specified id.
+func (s *Store) Delete(id string) {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ id, err := s.idIndex.Get(id)
+ if err != nil {
+ // Note: The idIndex.Delete and delete doesn't handle truncated index.
+ // So we need to return if there are error.
+ return
+ }
+ s.labels.Release(s.sandboxes[id].ProcessLabel)
+ s.idIndex.Delete(id) // nolint: errcheck
+ delete(s.sandboxes, id)
+}
diff --git a/pkg/store/sandbox/sandbox_test.go b/pkg/store/sandbox/sandbox_test.go
new file mode 100644
index 000000000..4c922eeb8
--- /dev/null
+++ b/pkg/store/sandbox/sandbox_test.go
@@ -0,0 +1,156 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package sandbox
+
+import (
+ "testing"
+
+ "github.com/containerd/cri/pkg/store/label"
+ assertlib "github.com/stretchr/testify/assert"
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/store"
+)
+
+func TestSandboxStore(t *testing.T) {
+ sandboxes := map[string]Sandbox{
+ "1": NewSandbox(
+ Metadata{
+ ID: "1",
+ Name: "Sandbox-1",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "TestPod-1",
+ Uid: "TestUid-1",
+ Namespace: "TestNamespace-1",
+ Attempt: 1,
+ },
+ },
+ NetNSPath: "TestNetNS-1",
+ },
+ Status{State: StateReady},
+ ),
+ "2abcd": NewSandbox(
+ Metadata{
+ ID: "2abcd",
+ Name: "Sandbox-2abcd",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "TestPod-2abcd",
+ Uid: "TestUid-2abcd",
+ Namespace: "TestNamespace-2abcd",
+ Attempt: 2,
+ },
+ },
+ NetNSPath: "TestNetNS-2",
+ },
+ Status{State: StateNotReady},
+ ),
+ "4a333": NewSandbox(
+ Metadata{
+ ID: "4a333",
+ Name: "Sandbox-4a333",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "TestPod-4a333",
+ Uid: "TestUid-4a333",
+ Namespace: "TestNamespace-4a333",
+ Attempt: 3,
+ },
+ },
+ NetNSPath: "TestNetNS-3",
+ },
+ Status{State: StateNotReady},
+ ),
+ "4abcd": NewSandbox(
+ Metadata{
+ ID: "4abcd",
+ Name: "Sandbox-4abcd",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "TestPod-4abcd",
+ Uid: "TestUid-4abcd",
+ Namespace: "TestNamespace-4abcd",
+ Attempt: 1,
+ },
+ },
+ NetNSPath: "TestNetNS-4abcd",
+ },
+ Status{State: StateReady},
+ ),
+ }
+ unknown := NewSandbox(
+ Metadata{
+ ID: "3defg",
+ Name: "Sandbox-3defg",
+ Config: &runtime.PodSandboxConfig{
+ Metadata: &runtime.PodSandboxMetadata{
+ Name: "TestPod-3defg",
+ Uid: "TestUid-3defg",
+ Namespace: "TestNamespace-3defg",
+ Attempt: 1,
+ },
+ },
+ NetNSPath: "TestNetNS-3defg",
+ },
+ Status{State: StateUnknown},
+ )
+ assert := assertlib.New(t)
+ s := NewStore(label.NewStore())
+
+ t.Logf("should be able to add sandbox")
+ for _, sb := range sandboxes {
+ assert.NoError(s.Add(sb))
+ }
+ assert.NoError(s.Add(unknown))
+
+ t.Logf("should be able to get sandbox")
+ genTruncIndex := func(normalName string) string { return normalName[:(len(normalName)+1)/2] }
+ for id, sb := range sandboxes {
+ got, err := s.Get(genTruncIndex(id))
+ assert.NoError(err)
+ assert.Equal(sb, got)
+ }
+
+ t.Logf("should be able to get sandbox in unknown state with Get")
+ got, err := s.Get(unknown.ID)
+ assert.NoError(err)
+ assert.Equal(unknown, got)
+
+ t.Logf("should be able to list sandboxes")
+ sbNum := len(sandboxes) + 1
+ sbs := s.List()
+ assert.Len(sbs, sbNum)
+
+ for testID, v := range sandboxes {
+ truncID := genTruncIndex(testID)
+
+ t.Logf("add should return already exists error for duplicated sandbox")
+ assert.Equal(store.ErrAlreadyExist, s.Add(v))
+
+ t.Logf("should be able to delete sandbox")
+ s.Delete(truncID)
+ sbNum--
+ sbs = s.List()
+ assert.Len(sbs, sbNum)
+
+ t.Logf("get should return not exist error after deletion")
+ sb, err := s.Get(truncID)
+ assert.Equal(Sandbox{}, sb)
+ assert.Equal(store.ErrNotExist, err)
+ }
+}
diff --git a/pkg/store/sandbox/status.go b/pkg/store/sandbox/status.go
new file mode 100644
index 000000000..e9198eb97
--- /dev/null
+++ b/pkg/store/sandbox/status.go
@@ -0,0 +1,151 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package sandbox
+
+import (
+ "strconv"
+ "sync"
+ "time"
+
+ runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+)
+
+// The sandbox state machine in the CRI plugin:
+// + +
+// | |
+// | Create(Run) | Load
+// | |
+// Start | |
+// (failed) | |
+// +------------------+ +-----------+
+// | | | |
+// | | | |
+// | | | |
+// | | Start(Run) | |
+// | | | |
+// | PortForward +----v----+ | |
+// | +------+ | | |
+// | | | READY <---------+ |
+// | +------> | | |
+// | +----+----+ | |
+// | | | |
+// | | Stop/Exit | |
+// | | | |
+// | +----v----+ | |
+// | | <---------+ +----v----+
+// | | NOTREADY| | |
+// | | <----------------+ UNKNOWN |
+// | +----+----+ Stop | |
+// | | +---------+
+// | | Remove
+// | v
+// +-------------> DELETED
+
+// State is the sandbox state we use in containerd/cri.
+// It includes unknown, which is internal states not defined in CRI.
+// The state mapping from internal states to CRI states:
+// * ready -> ready
+// * not ready -> not ready
+// * unknown -> not ready
+type State uint32
+
+const (
+ // StateReady is ready state, it means sandbox container
+ // is running.
+ StateReady State = iota
+ // StateNotReady is notready state, it ONLY means sandbox
+ // container is not running.
+ // StopPodSandbox should still be called for NOTREADY sandbox to
+ // cleanup resources other than sandbox container, e.g. network namespace.
+ // This is an assumption made in CRI.
+ StateNotReady
+ // StateUnknown is unknown state. Sandbox only goes
+ // into unknown state when its status fails to be loaded.
+ StateUnknown
+)
+
+// String returns the string representation of the state
+func (s State) String() string {
+ switch s {
+ case StateReady:
+ return runtime.PodSandboxState_SANDBOX_READY.String()
+ case StateNotReady:
+ return runtime.PodSandboxState_SANDBOX_NOTREADY.String()
+ case StateUnknown:
+ // PodSandboxState doesn't have an unknown state, but State does, so return a string using the same convention
+ return "SANDBOX_UNKNOWN"
+ default:
+ return "invalid sandbox state value: " + strconv.Itoa(int(s))
+ }
+}
+
+// Status is the status of a sandbox.
+type Status struct {
+ // Pid is the init process id of the sandbox container.
+ Pid uint32
+ // CreatedAt is the created timestamp.
+ CreatedAt time.Time
+ // State is the state of the sandbox.
+ State State
+}
+
+// UpdateFunc is function used to update the sandbox status. If there
+// is an error, the update will be rolled back.
+type UpdateFunc func(Status) (Status, error)
+
+// StatusStorage manages the sandbox status.
+// The status storage for sandbox is different from container status storage,
+// because we don't checkpoint sandbox status. If we need checkpoint in the
+// future, we should combine this with container status storage.
+type StatusStorage interface {
+ // Get a sandbox status.
+ Get() Status
+ // Update the sandbox status. Note that the update MUST be applied
+ // in one transaction.
+ Update(UpdateFunc) error
+}
+
+// StoreStatus creates the storage containing the passed in sandbox status with the
+// specified id.
+// The status MUST be created in one transaction.
+func StoreStatus(status Status) StatusStorage {
+ return &statusStorage{status: status}
+}
+
+type statusStorage struct {
+ sync.RWMutex
+ status Status
+}
+
+// Get a copy of sandbox status.
+func (s *statusStorage) Get() Status {
+ s.RLock()
+ defer s.RUnlock()
+ return s.status
+}
+
+// Update the sandbox status.
+func (s *statusStorage) Update(u UpdateFunc) error {
+ s.Lock()
+ defer s.Unlock()
+ newStatus, err := u(s.status)
+ if err != nil {
+ return err
+ }
+ s.status = newStatus
+ return nil
+}
diff --git a/pkg/store/sandbox/status_test.go b/pkg/store/sandbox/status_test.go
new file mode 100644
index 000000000..ad27db0c6
--- /dev/null
+++ b/pkg/store/sandbox/status_test.go
@@ -0,0 +1,69 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package sandbox
+
+import (
+ "errors"
+ "testing"
+ "time"
+
+ assertlib "github.com/stretchr/testify/assert"
+)
+
+func TestStatus(t *testing.T) {
+ testStatus := Status{
+ Pid: 123,
+ CreatedAt: time.Now(),
+ State: StateUnknown,
+ }
+ updateStatus := Status{
+ Pid: 456,
+ CreatedAt: time.Now(),
+ State: StateReady,
+ }
+ updateErr := errors.New("update error")
+ assert := assertlib.New(t)
+
+ t.Logf("simple store and get")
+ s := StoreStatus(testStatus)
+ old := s.Get()
+ assert.Equal(testStatus, old)
+
+ t.Logf("failed update should not take effect")
+ err := s.Update(func(o Status) (Status, error) {
+ o = updateStatus
+ return o, updateErr
+ })
+ assert.Equal(updateErr, err)
+ assert.Equal(testStatus, s.Get())
+
+ t.Logf("successful update should take effect but not checkpoint")
+ err = s.Update(func(o Status) (Status, error) {
+ o = updateStatus
+ return o, nil
+ })
+ assert.NoError(err)
+ assert.Equal(updateStatus, s.Get())
+}
+
+func TestStateStringConversion(t *testing.T) {
+ assert := assertlib.New(t)
+ assert.Equal("SANDBOX_READY", StateReady.String())
+ assert.Equal("SANDBOX_NOTREADY", StateNotReady.String())
+ assert.Equal("SANDBOX_UNKNOWN", StateUnknown.String())
+ assert.Equal("invalid sandbox state value: 123", State(123).String())
+}
diff --git a/pkg/store/snapshot/snapshot.go b/pkg/store/snapshot/snapshot.go
new file mode 100644
index 000000000..ce05f0e04
--- /dev/null
+++ b/pkg/store/snapshot/snapshot.go
@@ -0,0 +1,87 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package snapshot
+
+import (
+ "sync"
+
+ snapshot "github.com/containerd/containerd/snapshots"
+
+ "github.com/containerd/cri/pkg/store"
+)
+
+// Snapshot contains the information about the snapshot.
+type Snapshot struct {
+ // Key is the key of the snapshot
+ Key string
+ // Kind is the kind of the snapshot (active, committed, view)
+ Kind snapshot.Kind
+ // Size is the size of the snapshot in bytes.
+ Size uint64
+ // Inodes is the number of inodes used by the snapshot
+ Inodes uint64
+ // Timestamp is latest update time (in nanoseconds) of the snapshot
+ // information.
+ Timestamp int64
+}
+
+// Store stores all snapshots.
+type Store struct {
+ lock sync.RWMutex
+ snapshots map[string]Snapshot
+}
+
+// NewStore creates a snapshot store.
+func NewStore() *Store {
+ return &Store{snapshots: make(map[string]Snapshot)}
+}
+
+// Add a snapshot into the store.
+func (s *Store) Add(snapshot Snapshot) {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ s.snapshots[snapshot.Key] = snapshot
+}
+
+// Get returns the snapshot with specified key. Returns store.ErrNotExist if the
+// snapshot doesn't exist.
+func (s *Store) Get(key string) (Snapshot, error) {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ if sn, ok := s.snapshots[key]; ok {
+ return sn, nil
+ }
+ return Snapshot{}, store.ErrNotExist
+}
+
+// List lists all snapshots.
+func (s *Store) List() []Snapshot {
+ s.lock.RLock()
+ defer s.lock.RUnlock()
+ var snapshots []Snapshot
+ for _, sn := range s.snapshots {
+ snapshots = append(snapshots, sn)
+ }
+ return snapshots
+}
+
+// Delete deletes the snapshot with specified key.
+func (s *Store) Delete(key string) {
+ s.lock.Lock()
+ defer s.lock.Unlock()
+ delete(s.snapshots, key)
+}
diff --git a/pkg/store/snapshot/snapshot_test.go b/pkg/store/snapshot/snapshot_test.go
new file mode 100644
index 000000000..f15107e11
--- /dev/null
+++ b/pkg/store/snapshot/snapshot_test.go
@@ -0,0 +1,84 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package snapshot
+
+import (
+ "testing"
+ "time"
+
+ snapshot "github.com/containerd/containerd/snapshots"
+ assertlib "github.com/stretchr/testify/assert"
+
+ "github.com/containerd/cri/pkg/store"
+)
+
+func TestSnapshotStore(t *testing.T) {
+ snapshots := map[string]Snapshot{
+ "key1": {
+ Key: "key1",
+ Kind: snapshot.KindActive,
+ Size: 10,
+ Inodes: 100,
+ Timestamp: time.Now().UnixNano(),
+ },
+ "key2": {
+ Key: "key2",
+ Kind: snapshot.KindCommitted,
+ Size: 20,
+ Inodes: 200,
+ Timestamp: time.Now().UnixNano(),
+ },
+ "key3": {
+ Key: "key3",
+ Kind: snapshot.KindView,
+ Size: 0,
+ Inodes: 0,
+ Timestamp: time.Now().UnixNano(),
+ },
+ }
+ assert := assertlib.New(t)
+
+ s := NewStore()
+
+ t.Logf("should be able to add snapshot")
+ for _, sn := range snapshots {
+ s.Add(sn)
+ }
+
+ t.Logf("should be able to get snapshot")
+ for id, sn := range snapshots {
+ got, err := s.Get(id)
+ assert.NoError(err)
+ assert.Equal(sn, got)
+ }
+
+ t.Logf("should be able to list snapshot")
+ sns := s.List()
+ assert.Len(sns, 3)
+
+ testKey := "key2"
+
+ t.Logf("should be able to delete snapshot")
+ s.Delete(testKey)
+ sns = s.List()
+ assert.Len(sns, 2)
+
+ t.Logf("get should return empty struct and ErrNotExist after deletion")
+ sn, err := s.Get(testKey)
+ assert.Equal(Snapshot{}, sn)
+ assert.Equal(store.ErrNotExist, err)
+}
diff --git a/pkg/store/util.go b/pkg/store/util.go
new file mode 100644
index 000000000..73626b1af
--- /dev/null
+++ b/pkg/store/util.go
@@ -0,0 +1,42 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package store
+
+import "sync"
+
+// StopCh is used to propagate the stop information of a container.
+type StopCh struct {
+ ch chan struct{}
+ once sync.Once
+}
+
+// NewStopCh creates a stop channel. The channel is open by default.
+func NewStopCh() *StopCh {
+ return &StopCh{ch: make(chan struct{})}
+}
+
+// Stop close stopCh of the container.
+func (s *StopCh) Stop() {
+ s.once.Do(func() {
+ close(s.ch)
+ })
+}
+
+// Stopped return the stopCh of the container as a readonly channel.
+func (s *StopCh) Stopped() <-chan struct{} {
+ return s.ch
+}
diff --git a/pkg/streaming/errors.go b/pkg/streaming/errors.go
new file mode 100644
index 000000000..874064054
--- /dev/null
+++ b/pkg/streaming/errors.go
@@ -0,0 +1,72 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package streaming
+
+import (
+ "net/http"
+ "strconv"
+
+ "google.golang.org/grpc/codes"
+ grpcstatus "google.golang.org/grpc/status"
+)
+
+// NewErrorStreamingDisabled creates an error for disabled streaming method.
+func NewErrorStreamingDisabled(method string) error {
+ return grpcstatus.Errorf(codes.NotFound, "streaming method %s disabled", method)
+}
+
+// NewErrorTooManyInFlight creates an error for exceeding the maximum number of in-flight requests.
+func NewErrorTooManyInFlight() error {
+ return grpcstatus.Error(codes.ResourceExhausted, "maximum number of in-flight requests exceeded")
+}
+
+// WriteError translates a CRI streaming error into an appropriate HTTP response.
+func WriteError(err error, w http.ResponseWriter) error {
+ s, _ := grpcstatus.FromError(err)
+ var status int
+ switch s.Code() {
+ case codes.NotFound:
+ status = http.StatusNotFound
+ case codes.ResourceExhausted:
+ // We only expect to hit this if there is a DoS, so we just wait the full TTL.
+ // If this is ever hit in steady-state operations, consider increasing the maxInFlight requests,
+ // or plumbing through the time to next expiration.
+ w.Header().Set("Retry-After", strconv.Itoa(int(cacheTTL.Seconds())))
+ status = http.StatusTooManyRequests
+ default:
+ status = http.StatusInternalServerError
+ }
+ w.WriteHeader(status)
+ _, writeErr := w.Write([]byte(err.Error()))
+ return writeErr
+}
diff --git a/pkg/streaming/portforward/constants.go b/pkg/streaming/portforward/constants.go
new file mode 100644
index 000000000..217a0b114
--- /dev/null
+++ b/pkg/streaming/portforward/constants.go
@@ -0,0 +1,40 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2015 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package portforward contains server-side logic for handling port forwarding requests.
+package portforward
+
+// ProtocolV1Name is the name of the subprotocol used for port forwarding.
+const ProtocolV1Name = "portforward.k8s.io"
+
+// SupportedProtocols are the supported port forwarding protocols.
+var SupportedProtocols = []string{ProtocolV1Name}
diff --git a/pkg/streaming/portforward/httpstream.go b/pkg/streaming/portforward/httpstream.go
new file mode 100644
index 000000000..f961cdb6f
--- /dev/null
+++ b/pkg/streaming/portforward/httpstream.go
@@ -0,0 +1,315 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package portforward
+
+import (
+ "errors"
+ "fmt"
+ "net/http"
+ "strconv"
+ "sync"
+ "time"
+
+ api "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/apimachinery/pkg/util/httpstream"
+ "k8s.io/apimachinery/pkg/util/httpstream/spdy"
+ utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+
+ "k8s.io/klog/v2"
+)
+
+func handleHTTPStreams(req *http.Request, w http.ResponseWriter, portForwarder PortForwarder, podName string, uid types.UID, supportedPortForwardProtocols []string, idleTimeout, streamCreationTimeout time.Duration) error {
+ _, err := httpstream.Handshake(req, w, supportedPortForwardProtocols)
+ // negotiated protocol isn't currently used server side, but could be in the future
+ if err != nil {
+ // Handshake writes the error to the client
+ return err
+ }
+ streamChan := make(chan httpstream.Stream, 1)
+
+ klog.V(5).Infof("Upgrading port forward response")
+ upgrader := spdy.NewResponseUpgrader()
+ conn := upgrader.UpgradeResponse(w, req, httpStreamReceived(streamChan))
+ if conn == nil {
+ return errors.New("unable to upgrade httpstream connection")
+ }
+ defer conn.Close()
+
+ klog.V(5).Infof("(conn=%p) setting port forwarding streaming connection idle timeout to %v", conn, idleTimeout)
+ conn.SetIdleTimeout(idleTimeout)
+
+ h := &httpStreamHandler{
+ conn: conn,
+ streamChan: streamChan,
+ streamPairs: make(map[string]*httpStreamPair),
+ streamCreationTimeout: streamCreationTimeout,
+ pod: podName,
+ uid: uid,
+ forwarder: portForwarder,
+ }
+ h.run()
+
+ return nil
+}
+
+// httpStreamReceived is the httpstream.NewStreamHandler for port
+// forward streams. It checks each stream's port and stream type headers,
+// rejecting any streams that with missing or invalid values. Each valid
+// stream is sent to the streams channel.
+func httpStreamReceived(streams chan httpstream.Stream) func(httpstream.Stream, <-chan struct{}) error {
+ return func(stream httpstream.Stream, replySent <-chan struct{}) error {
+ // make sure it has a valid port header
+ portString := stream.Headers().Get(api.PortHeader)
+ if len(portString) == 0 {
+ return fmt.Errorf("%q header is required", api.PortHeader)
+ }
+ port, err := strconv.ParseUint(portString, 10, 16)
+ if err != nil {
+ return fmt.Errorf("unable to parse %q as a port: %v", portString, err)
+ }
+ if port < 1 {
+ return fmt.Errorf("port %q must be > 0", portString)
+ }
+
+ // make sure it has a valid stream type header
+ streamType := stream.Headers().Get(api.StreamType)
+ if len(streamType) == 0 {
+ return fmt.Errorf("%q header is required", api.StreamType)
+ }
+ if streamType != api.StreamTypeError && streamType != api.StreamTypeData {
+ return fmt.Errorf("invalid stream type %q", streamType)
+ }
+
+ streams <- stream
+ return nil
+ }
+}
+
+// httpStreamHandler is capable of processing multiple port forward
+// requests over a single httpstream.Connection.
+type httpStreamHandler struct {
+ conn httpstream.Connection
+ streamChan chan httpstream.Stream
+ streamPairsLock sync.RWMutex
+ streamPairs map[string]*httpStreamPair
+ streamCreationTimeout time.Duration
+ pod string
+ uid types.UID
+ forwarder PortForwarder
+}
+
+// getStreamPair returns a httpStreamPair for requestID. This creates a
+// new pair if one does not yet exist for the requestID. The returned bool is
+// true if the pair was created.
+func (h *httpStreamHandler) getStreamPair(requestID string) (*httpStreamPair, bool) {
+ h.streamPairsLock.Lock()
+ defer h.streamPairsLock.Unlock()
+
+ if p, ok := h.streamPairs[requestID]; ok {
+ klog.V(5).Infof("(conn=%p, request=%s) found existing stream pair", h.conn, requestID)
+ return p, false
+ }
+
+ klog.V(5).Infof("(conn=%p, request=%s) creating new stream pair", h.conn, requestID)
+
+ p := newPortForwardPair(requestID)
+ h.streamPairs[requestID] = p
+
+ return p, true
+}
+
+// monitorStreamPair waits for the pair to receive both its error and data
+// streams, or for the timeout to expire (whichever happens first), and then
+// removes the pair.
+func (h *httpStreamHandler) monitorStreamPair(p *httpStreamPair, timeout <-chan time.Time) {
+ select {
+ case <-timeout:
+ err := fmt.Errorf("(conn=%v, request=%s) timed out waiting for streams", h.conn, p.requestID)
+ utilruntime.HandleError(err)
+ p.printError(err.Error())
+ case <-p.complete:
+ klog.V(5).Infof("(conn=%v, request=%s) successfully received error and data streams", h.conn, p.requestID)
+ }
+ h.removeStreamPair(p.requestID)
+}
+
+// removeStreamPair removes the stream pair identified by requestID from streamPairs.
+func (h *httpStreamHandler) removeStreamPair(requestID string) {
+ h.streamPairsLock.Lock()
+ defer h.streamPairsLock.Unlock()
+
+ delete(h.streamPairs, requestID)
+}
+
+// requestID returns the request id for stream.
+func (h *httpStreamHandler) requestID(stream httpstream.Stream) string {
+ requestID := stream.Headers().Get(api.PortForwardRequestIDHeader)
+ if len(requestID) == 0 {
+ klog.V(5).Infof("(conn=%p) stream received without %s header", h.conn, api.PortForwardRequestIDHeader)
+ // If we get here, it's because the connection came from an older client
+ // that isn't generating the request id header
+ // (https://github.com/kubernetes/kubernetes/blob/843134885e7e0b360eb5441e85b1410a8b1a7a0c/pkg/client/unversioned/portforward/portforward.go#L258-L287)
+ //
+ // This is a best-effort attempt at supporting older clients.
+ //
+ // When there aren't concurrent new forwarded connections, each connection
+ // will have a pair of streams (data, error), and the stream IDs will be
+ // consecutive odd numbers, e.g. 1 and 3 for the first connection. Convert
+ // the stream ID into a pseudo-request id by taking the stream type and
+ // using id = stream.Identifier() when the stream type is error,
+ // and id = stream.Identifier() - 2 when it's data.
+ //
+ // NOTE: this only works when there are not concurrent new streams from
+ // multiple forwarded connections; it's a best-effort attempt at supporting
+ // old clients that don't generate request ids. If there are concurrent
+ // new connections, it's possible that 1 connection gets streams whose IDs
+ // are not consecutive (e.g. 5 and 9 instead of 5 and 7).
+ streamType := stream.Headers().Get(api.StreamType)
+ switch streamType {
+ case api.StreamTypeError:
+ requestID = strconv.Itoa(int(stream.Identifier()))
+ case api.StreamTypeData:
+ requestID = strconv.Itoa(int(stream.Identifier()) - 2)
+ }
+
+ klog.V(5).Infof("(conn=%p) automatically assigning request ID=%q from stream type=%s, stream ID=%d", h.conn, requestID, streamType, stream.Identifier())
+ }
+ return requestID
+}
+
+// run is the main loop for the httpStreamHandler. It processes new
+// streams, invoking portForward for each complete stream pair. The loop exits
+// when the httpstream.Connection is closed.
+func (h *httpStreamHandler) run() {
+ klog.V(5).Infof("(conn=%p) waiting for port forward streams", h.conn)
+Loop:
+ for {
+ select {
+ case <-h.conn.CloseChan():
+ klog.V(5).Infof("(conn=%p) upgraded connection closed", h.conn)
+ break Loop
+ case stream := <-h.streamChan:
+ requestID := h.requestID(stream)
+ streamType := stream.Headers().Get(api.StreamType)
+ klog.V(5).Infof("(conn=%p, request=%s) received new stream of type %s", h.conn, requestID, streamType)
+
+ p, created := h.getStreamPair(requestID)
+ if created {
+ go h.monitorStreamPair(p, time.After(h.streamCreationTimeout))
+ }
+ if complete, err := p.add(stream); err != nil {
+ msg := fmt.Sprintf("error processing stream for request %s: %v", requestID, err)
+ utilruntime.HandleError(errors.New(msg))
+ p.printError(msg)
+ } else if complete {
+ go h.portForward(p)
+ }
+ }
+ }
+}
+
+// portForward invokes the httpStreamHandler's forwarder.PortForward
+// function for the given stream pair.
+func (h *httpStreamHandler) portForward(p *httpStreamPair) {
+ defer p.dataStream.Close()
+ defer p.errorStream.Close()
+
+ portString := p.dataStream.Headers().Get(api.PortHeader)
+ port, _ := strconv.ParseInt(portString, 10, 32)
+
+ klog.V(5).Infof("(conn=%p, request=%s) invoking forwarder.PortForward for port %s", h.conn, p.requestID, portString)
+ err := h.forwarder.PortForward(h.pod, h.uid, int32(port), p.dataStream)
+ klog.V(5).Infof("(conn=%p, request=%s) done invoking forwarder.PortForward for port %s", h.conn, p.requestID, portString)
+
+ if err != nil {
+ msg := fmt.Errorf("error forwarding port %d to pod %s, uid %v: %v", port, h.pod, h.uid, err)
+ utilruntime.HandleError(msg)
+ fmt.Fprint(p.errorStream, msg.Error())
+ }
+}
+
+// httpStreamPair represents the error and data streams for a port
+// forwarding request.
+type httpStreamPair struct {
+ lock sync.RWMutex
+ requestID string
+ dataStream httpstream.Stream
+ errorStream httpstream.Stream
+ complete chan struct{}
+}
+
+// newPortForwardPair creates a new httpStreamPair.
+func newPortForwardPair(requestID string) *httpStreamPair {
+ return &httpStreamPair{
+ requestID: requestID,
+ complete: make(chan struct{}),
+ }
+}
+
+// add adds the stream to the httpStreamPair. If the pair already
+// contains a stream for the new stream's type, an error is returned. add
+// returns true if both the data and error streams for this pair have been
+// received.
+func (p *httpStreamPair) add(stream httpstream.Stream) (bool, error) {
+ p.lock.Lock()
+ defer p.lock.Unlock()
+
+ switch stream.Headers().Get(api.StreamType) {
+ case api.StreamTypeError:
+ if p.errorStream != nil {
+ return false, errors.New("error stream already assigned")
+ }
+ p.errorStream = stream
+ case api.StreamTypeData:
+ if p.dataStream != nil {
+ return false, errors.New("data stream already assigned")
+ }
+ p.dataStream = stream
+ }
+
+ complete := p.errorStream != nil && p.dataStream != nil
+ if complete {
+ close(p.complete)
+ }
+ return complete, nil
+}
+
+// printError writes s to p.errorStream if p.errorStream has been set.
+func (p *httpStreamPair) printError(s string) {
+ p.lock.RLock()
+ defer p.lock.RUnlock()
+ if p.errorStream != nil {
+ fmt.Fprint(p.errorStream, s)
+ }
+}
diff --git a/pkg/streaming/portforward/portforward.go b/pkg/streaming/portforward/portforward.go
new file mode 100644
index 000000000..89a39ea66
--- /dev/null
+++ b/pkg/streaming/portforward/portforward.go
@@ -0,0 +1,69 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package portforward
+
+import (
+ "io"
+ "net/http"
+ "time"
+
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/apimachinery/pkg/util/runtime"
+ "k8s.io/apiserver/pkg/util/wsstream"
+)
+
+// PortForwarder knows how to forward content from a data stream to/from a port
+// in a pod.
+type PortForwarder interface {
+ // PortForwarder copies data between a data stream and a port in a pod.
+ PortForward(name string, uid types.UID, port int32, stream io.ReadWriteCloser) error
+}
+
+// ServePortForward handles a port forwarding request. A single request is
+// kept alive as long as the client is still alive and the connection has not
+// been timed out due to idleness. This function handles multiple forwarded
+// connections; i.e., multiple `curl http://localhost:8888/` requests will be
+// handled by a single invocation of ServePortForward.
+func ServePortForward(w http.ResponseWriter, req *http.Request, portForwarder PortForwarder, podName string, uid types.UID, portForwardOptions *V4Options, idleTimeout time.Duration, streamCreationTimeout time.Duration, supportedProtocols []string) {
+ var err error
+ if wsstream.IsWebSocketRequest(req) {
+ err = handleWebSocketStreams(req, w, portForwarder, podName, uid, portForwardOptions, supportedProtocols, idleTimeout, streamCreationTimeout)
+ } else {
+ err = handleHTTPStreams(req, w, portForwarder, podName, uid, supportedProtocols, idleTimeout, streamCreationTimeout)
+ }
+
+ if err != nil {
+ runtime.HandleError(err)
+ return
+ }
+}
diff --git a/pkg/streaming/portforward/websocket.go b/pkg/streaming/portforward/websocket.go
new file mode 100644
index 000000000..1b1c0151a
--- /dev/null
+++ b/pkg/streaming/portforward/websocket.go
@@ -0,0 +1,213 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package portforward
+
+import (
+ "encoding/binary"
+ "fmt"
+ "io"
+ "net/http"
+ "strconv"
+ "strings"
+ "sync"
+ "time"
+
+ "k8s.io/klog/v2"
+
+ api "k8s.io/api/core/v1"
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/apimachinery/pkg/util/runtime"
+ "k8s.io/apiserver/pkg/server/httplog"
+ "k8s.io/apiserver/pkg/util/wsstream"
+)
+
+const (
+ dataChannel = iota
+ errorChannel
+
+ v4BinaryWebsocketProtocol = "v4." + wsstream.ChannelWebSocketProtocol
+ v4Base64WebsocketProtocol = "v4." + wsstream.Base64ChannelWebSocketProtocol
+)
+
+// V4Options contains details about which streams are required for port
+// forwarding.
+// All fields included in V4Options need to be expressed explicitly in the
+// CRI (k8s.io/cri-api/pkg/apis/{version}/api.proto) PortForwardRequest.
+type V4Options struct {
+ Ports []int32
+}
+
+// NewV4Options creates a new options from the Request.
+func NewV4Options(req *http.Request) (*V4Options, error) {
+ if !wsstream.IsWebSocketRequest(req) {
+ return &V4Options{}, nil
+ }
+
+ portStrings := req.URL.Query()[api.PortHeader]
+ if len(portStrings) == 0 {
+ return nil, fmt.Errorf("query parameter %q is required", api.PortHeader)
+ }
+
+ ports := make([]int32, 0, len(portStrings))
+ for _, portString := range portStrings {
+ if len(portString) == 0 {
+ return nil, fmt.Errorf("query parameter %q cannot be empty", api.PortHeader)
+ }
+ for _, p := range strings.Split(portString, ",") {
+ port, err := strconv.ParseUint(p, 10, 16)
+ if err != nil {
+ return nil, fmt.Errorf("unable to parse %q as a port: %v", portString, err)
+ }
+ if port < 1 {
+ return nil, fmt.Errorf("port %q must be > 0", portString)
+ }
+ ports = append(ports, int32(port))
+ }
+ }
+
+ return &V4Options{
+ Ports: ports,
+ }, nil
+}
+
+// BuildV4Options returns a V4Options based on the given information.
+func BuildV4Options(ports []int32) (*V4Options, error) {
+ return &V4Options{Ports: ports}, nil
+}
+
+// handleWebSocketStreams handles requests to forward ports to a pod via
+// a PortForwarder. A pair of streams are created per port (DATA n,
+// ERROR n+1). The associated port is written to each stream as a unsigned 16
+// bit integer in little endian format.
+func handleWebSocketStreams(req *http.Request, w http.ResponseWriter, portForwarder PortForwarder, podName string, uid types.UID, opts *V4Options, supportedPortForwardProtocols []string, idleTimeout, streamCreationTimeout time.Duration) error {
+ channels := make([]wsstream.ChannelType, 0, len(opts.Ports)*2)
+ for i := 0; i < len(opts.Ports); i++ {
+ channels = append(channels, wsstream.ReadWriteChannel, wsstream.WriteChannel)
+ }
+ conn := wsstream.NewConn(map[string]wsstream.ChannelProtocolConfig{
+ "": {
+ Binary: true,
+ Channels: channels,
+ },
+ v4BinaryWebsocketProtocol: {
+ Binary: true,
+ Channels: channels,
+ },
+ v4Base64WebsocketProtocol: {
+ Binary: false,
+ Channels: channels,
+ },
+ })
+ conn.SetIdleTimeout(idleTimeout)
+ _, streams, err := conn.Open(httplog.Unlogged(req, w), req)
+ if err != nil {
+ err = fmt.Errorf("unable to upgrade websocket connection: %v", err)
+ return err
+ }
+ defer conn.Close()
+ streamPairs := make([]*websocketStreamPair, len(opts.Ports))
+ for i := range streamPairs {
+ streamPair := websocketStreamPair{
+ port: opts.Ports[i],
+ dataStream: streams[i*2+dataChannel],
+ errorStream: streams[i*2+errorChannel],
+ }
+ streamPairs[i] = &streamPair
+
+ portBytes := make([]byte, 2)
+ // port is always positive so conversion is allowable
+ binary.LittleEndian.PutUint16(portBytes, uint16(streamPair.port))
+ streamPair.dataStream.Write(portBytes)
+ streamPair.errorStream.Write(portBytes)
+ }
+ h := &websocketStreamHandler{
+ conn: conn,
+ streamPairs: streamPairs,
+ pod: podName,
+ uid: uid,
+ forwarder: portForwarder,
+ }
+ h.run()
+
+ return nil
+}
+
+// websocketStreamPair represents the error and data streams for a port
+// forwarding request.
+type websocketStreamPair struct {
+ port int32
+ dataStream io.ReadWriteCloser
+ errorStream io.WriteCloser
+}
+
+// websocketStreamHandler is capable of processing a single port forward
+// request over a websocket connection
+type websocketStreamHandler struct {
+ conn *wsstream.Conn
+ streamPairs []*websocketStreamPair
+ pod string
+ uid types.UID
+ forwarder PortForwarder
+}
+
+// run invokes the websocketStreamHandler's forwarder.PortForward
+// function for the given stream pair.
+func (h *websocketStreamHandler) run() {
+ wg := sync.WaitGroup{}
+ wg.Add(len(h.streamPairs))
+
+ for _, pair := range h.streamPairs {
+ p := pair
+ go func() {
+ defer wg.Done()
+ h.portForward(p)
+ }()
+ }
+
+ wg.Wait()
+}
+
+func (h *websocketStreamHandler) portForward(p *websocketStreamPair) {
+ defer p.dataStream.Close()
+ defer p.errorStream.Close()
+
+ klog.V(5).Infof("(conn=%p) invoking forwarder.PortForward for port %d", h.conn, p.port)
+ err := h.forwarder.PortForward(h.pod, h.uid, p.port, p.dataStream)
+ klog.V(5).Infof("(conn=%p) done invoking forwarder.PortForward for port %d", h.conn, p.port)
+
+ if err != nil {
+ msg := fmt.Errorf("error forwarding port %d to pod %s, uid %v: %v", p.port, h.pod, h.uid, err)
+ runtime.HandleError(msg)
+ fmt.Fprint(p.errorStream, msg.Error())
+ }
+}
diff --git a/pkg/streaming/remotecommand/attach.go b/pkg/streaming/remotecommand/attach.go
new file mode 100644
index 000000000..2b127bdf9
--- /dev/null
+++ b/pkg/streaming/remotecommand/attach.go
@@ -0,0 +1,75 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package remotecommand
+
+import (
+ "fmt"
+ "io"
+ "net/http"
+ "time"
+
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/types"
+ "k8s.io/apimachinery/pkg/util/runtime"
+ "k8s.io/client-go/tools/remotecommand"
+)
+
+// Attacher knows how to attach to a running container in a pod.
+type Attacher interface {
+ // AttachContainer attaches to the running container in the pod, copying data between in/out/err
+ // and the container's stdin/stdout/stderr.
+ AttachContainer(name string, uid types.UID, container string, in io.Reader, out, err io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error
+}
+
+// ServeAttach handles requests to attach to a container. After creating/receiving the required
+// streams, it delegates the actual attaching to attacher.
+func ServeAttach(w http.ResponseWriter, req *http.Request, attacher Attacher, podName string, uid types.UID, container string, streamOpts *Options, idleTimeout, streamCreationTimeout time.Duration, supportedProtocols []string) {
+ ctx, ok := createStreams(req, w, streamOpts, supportedProtocols, idleTimeout, streamCreationTimeout)
+ if !ok {
+ // error is handled by createStreams
+ return
+ }
+ defer ctx.conn.Close()
+
+ err := attacher.AttachContainer(podName, uid, container, ctx.stdinStream, ctx.stdoutStream, ctx.stderrStream, ctx.tty, ctx.resizeChan)
+ if err != nil {
+ err = fmt.Errorf("error attaching to container: %v", err)
+ runtime.HandleError(err)
+ ctx.writeStatus(apierrors.NewInternalError(err))
+ } else {
+ ctx.writeStatus(&apierrors.StatusError{ErrStatus: metav1.Status{
+ Status: metav1.StatusSuccess,
+ }})
+ }
+}
diff --git a/pkg/streaming/remotecommand/doc.go b/pkg/streaming/remotecommand/doc.go
new file mode 100644
index 000000000..6034cdc8e
--- /dev/null
+++ b/pkg/streaming/remotecommand/doc.go
@@ -0,0 +1,34 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+// Package remotecommand contains functions related to executing commands in and attaching to pods.
+package remotecommand
diff --git a/pkg/streaming/remotecommand/exec.go b/pkg/streaming/remotecommand/exec.go
new file mode 100644
index 000000000..5111521ba
--- /dev/null
+++ b/pkg/streaming/remotecommand/exec.go
@@ -0,0 +1,95 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package remotecommand
+
+import (
+ "fmt"
+ "io"
+ "net/http"
+ "time"
+
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/types"
+ remotecommandconsts "k8s.io/apimachinery/pkg/util/remotecommand"
+ "k8s.io/apimachinery/pkg/util/runtime"
+ "k8s.io/client-go/tools/remotecommand"
+ utilexec "k8s.io/utils/exec"
+)
+
+// Executor knows how to execute a command in a container in a pod.
+type Executor interface {
+ // ExecInContainer executes a command in a container in the pod, copying data
+ // between in/out/err and the container's stdin/stdout/stderr.
+ ExecInContainer(name string, uid types.UID, container string, cmd []string, in io.Reader, out, err io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize, timeout time.Duration) error
+}
+
+// ServeExec handles requests to execute a command in a container. After
+// creating/receiving the required streams, it delegates the actual execution
+// to the executor.
+func ServeExec(w http.ResponseWriter, req *http.Request, executor Executor, podName string, uid types.UID, container string, cmd []string, streamOpts *Options, idleTimeout, streamCreationTimeout time.Duration, supportedProtocols []string) {
+ ctx, ok := createStreams(req, w, streamOpts, supportedProtocols, idleTimeout, streamCreationTimeout)
+ if !ok {
+ // error is handled by createStreams
+ return
+ }
+ defer ctx.conn.Close()
+
+ err := executor.ExecInContainer(podName, uid, container, cmd, ctx.stdinStream, ctx.stdoutStream, ctx.stderrStream, ctx.tty, ctx.resizeChan, 0)
+ if err != nil {
+ if exitErr, ok := err.(utilexec.ExitError); ok && exitErr.Exited() {
+ rc := exitErr.ExitStatus()
+ ctx.writeStatus(&apierrors.StatusError{ErrStatus: metav1.Status{
+ Status: metav1.StatusFailure,
+ Reason: remotecommandconsts.NonZeroExitCodeReason,
+ Details: &metav1.StatusDetails{
+ Causes: []metav1.StatusCause{
+ {
+ Type: remotecommandconsts.ExitCodeCauseType,
+ Message: fmt.Sprintf("%d", rc),
+ },
+ },
+ },
+ Message: fmt.Sprintf("command terminated with non-zero exit code: %v", exitErr),
+ }})
+ } else {
+ err = fmt.Errorf("error executing command in container: %v", err)
+ runtime.HandleError(err)
+ ctx.writeStatus(apierrors.NewInternalError(err))
+ }
+ } else {
+ ctx.writeStatus(&apierrors.StatusError{ErrStatus: metav1.Status{
+ Status: metav1.StatusSuccess,
+ }})
+ }
+}
diff --git a/pkg/streaming/remotecommand/httpstream.go b/pkg/streaming/remotecommand/httpstream.go
new file mode 100644
index 000000000..0417a1a9e
--- /dev/null
+++ b/pkg/streaming/remotecommand/httpstream.go
@@ -0,0 +1,463 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package remotecommand
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "net/http"
+ "time"
+
+ api "k8s.io/api/core/v1"
+ apierrors "k8s.io/apimachinery/pkg/api/errors"
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+ "k8s.io/apimachinery/pkg/util/httpstream"
+ "k8s.io/apimachinery/pkg/util/httpstream/spdy"
+ remotecommandconsts "k8s.io/apimachinery/pkg/util/remotecommand"
+ "k8s.io/apimachinery/pkg/util/runtime"
+ "k8s.io/apiserver/pkg/util/wsstream"
+ "k8s.io/client-go/tools/remotecommand"
+
+ "k8s.io/klog/v2"
+)
+
+// Options contains details about which streams are required for
+// remote command execution.
+type Options struct {
+ Stdin bool
+ Stdout bool
+ Stderr bool
+ TTY bool
+}
+
+// NewOptions creates a new Options from the Request.
+func NewOptions(req *http.Request) (*Options, error) {
+ tty := req.FormValue(api.ExecTTYParam) == "1"
+ stdin := req.FormValue(api.ExecStdinParam) == "1"
+ stdout := req.FormValue(api.ExecStdoutParam) == "1"
+ stderr := req.FormValue(api.ExecStderrParam) == "1"
+ if tty && stderr {
+ // TODO: make this an error before we reach this method
+ klog.V(4).Infof("Access to exec with tty and stderr is not supported, bypassing stderr")
+ stderr = false
+ }
+
+ if !stdin && !stdout && !stderr {
+ return nil, fmt.Errorf("you must specify at least 1 of stdin, stdout, stderr")
+ }
+
+ return &Options{
+ Stdin: stdin,
+ Stdout: stdout,
+ Stderr: stderr,
+ TTY: tty,
+ }, nil
+}
+
+// context contains the connection and streams used when
+// forwarding an attach or execute session into a container.
+type context struct {
+ conn io.Closer
+ stdinStream io.ReadCloser
+ stdoutStream io.WriteCloser
+ stderrStream io.WriteCloser
+ writeStatus func(status *apierrors.StatusError) error
+ resizeStream io.ReadCloser
+ resizeChan chan remotecommand.TerminalSize
+ tty bool
+}
+
+// streamAndReply holds both a Stream and a channel that is closed when the stream's reply frame is
+// enqueued. Consumers can wait for replySent to be closed prior to proceeding, to ensure that the
+// replyFrame is enqueued before the connection's goaway frame is sent (e.g. if a stream was
+// received and right after, the connection gets closed).
+type streamAndReply struct {
+ httpstream.Stream
+ replySent <-chan struct{}
+}
+
+// waitStreamReply waits until either replySent or stop is closed. If replySent is closed, it sends
+// an empty struct to the notify channel.
+func waitStreamReply(replySent <-chan struct{}, notify chan<- struct{}, stop <-chan struct{}) {
+ select {
+ case <-replySent:
+ notify <- struct{}{}
+ case <-stop:
+ }
+}
+
+func createStreams(req *http.Request, w http.ResponseWriter, opts *Options, supportedStreamProtocols []string, idleTimeout, streamCreationTimeout time.Duration) (*context, bool) {
+ var ctx *context
+ var ok bool
+ if wsstream.IsWebSocketRequest(req) {
+ ctx, ok = createWebSocketStreams(req, w, opts, idleTimeout)
+ } else {
+ ctx, ok = createHTTPStreamStreams(req, w, opts, supportedStreamProtocols, idleTimeout, streamCreationTimeout)
+ }
+ if !ok {
+ return nil, false
+ }
+
+ if ctx.resizeStream != nil {
+ ctx.resizeChan = make(chan remotecommand.TerminalSize)
+ go handleResizeEvents(ctx.resizeStream, ctx.resizeChan)
+ }
+
+ return ctx, true
+}
+
+func createHTTPStreamStreams(req *http.Request, w http.ResponseWriter, opts *Options, supportedStreamProtocols []string, idleTimeout, streamCreationTimeout time.Duration) (*context, bool) {
+ protocol, err := httpstream.Handshake(req, w, supportedStreamProtocols)
+ if err != nil {
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return nil, false
+ }
+
+ streamCh := make(chan streamAndReply)
+
+ upgrader := spdy.NewResponseUpgrader()
+ conn := upgrader.UpgradeResponse(w, req, func(stream httpstream.Stream, replySent <-chan struct{}) error {
+ streamCh <- streamAndReply{Stream: stream, replySent: replySent}
+ return nil
+ })
+ // from this point on, we can no longer call methods on response
+ if conn == nil {
+ // The upgrader is responsible for notifying the client of any errors that
+ // occurred during upgrading. All we can do is return here at this point
+ // if we weren't successful in upgrading.
+ return nil, false
+ }
+
+ conn.SetIdleTimeout(idleTimeout)
+
+ var handler protocolHandler
+ switch protocol {
+ case remotecommandconsts.StreamProtocolV4Name:
+ handler = &v4ProtocolHandler{}
+ case remotecommandconsts.StreamProtocolV3Name:
+ handler = &v3ProtocolHandler{}
+ case remotecommandconsts.StreamProtocolV2Name:
+ handler = &v2ProtocolHandler{}
+ case "":
+ klog.V(4).Infof("Client did not request protocol negotiation. Falling back to %q", remotecommandconsts.StreamProtocolV1Name)
+ fallthrough
+ case remotecommandconsts.StreamProtocolV1Name:
+ handler = &v1ProtocolHandler{}
+ }
+
+ // count the streams client asked for, starting with 1
+ expectedStreams := 1
+ if opts.Stdin {
+ expectedStreams++
+ }
+ if opts.Stdout {
+ expectedStreams++
+ }
+ if opts.Stderr {
+ expectedStreams++
+ }
+ if opts.TTY && handler.supportsTerminalResizing() {
+ expectedStreams++
+ }
+
+ expired := time.NewTimer(streamCreationTimeout)
+ defer expired.Stop()
+
+ ctx, err := handler.waitForStreams(streamCh, expectedStreams, expired.C)
+ if err != nil {
+ runtime.HandleError(err)
+ return nil, false
+ }
+
+ ctx.conn = conn
+ ctx.tty = opts.TTY
+
+ return ctx, true
+}
+
+type protocolHandler interface {
+ // waitForStreams waits for the expected streams or a timeout, returning a
+ // remoteCommandContext if all the streams were received, or an error if not.
+ waitForStreams(streams <-chan streamAndReply, expectedStreams int, expired <-chan time.Time) (*context, error)
+ // supportsTerminalResizing returns true if the protocol handler supports terminal resizing
+ supportsTerminalResizing() bool
+}
+
+// v4ProtocolHandler implements the V4 protocol version for streaming command execution. It only differs
+// in from v3 in the error stream format using an json-marshaled metav1.Status which carries
+// the process' exit code.
+type v4ProtocolHandler struct{}
+
+func (*v4ProtocolHandler) waitForStreams(streams <-chan streamAndReply, expectedStreams int, expired <-chan time.Time) (*context, error) {
+ ctx := &context{}
+ receivedStreams := 0
+ replyChan := make(chan struct{})
+ stop := make(chan struct{})
+ defer close(stop)
+WaitForStreams:
+ for {
+ select {
+ case stream := <-streams:
+ streamType := stream.Headers().Get(api.StreamType)
+ switch streamType {
+ case api.StreamTypeError:
+ ctx.writeStatus = v4WriteStatusFunc(stream) // write json errors
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStdin:
+ ctx.stdinStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStdout:
+ ctx.stdoutStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStderr:
+ ctx.stderrStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeResize:
+ ctx.resizeStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ default:
+ runtime.HandleError(fmt.Errorf("unexpected stream type: %q", streamType))
+ }
+ case <-replyChan:
+ receivedStreams++
+ if receivedStreams == expectedStreams {
+ break WaitForStreams
+ }
+ case <-expired:
+ // TODO find a way to return the error to the user. Maybe use a separate
+ // stream to report errors?
+ return nil, errors.New("timed out waiting for client to create streams")
+ }
+ }
+
+ return ctx, nil
+}
+
+// supportsTerminalResizing returns true because v4ProtocolHandler supports it
+func (*v4ProtocolHandler) supportsTerminalResizing() bool { return true }
+
+// v3ProtocolHandler implements the V3 protocol version for streaming command execution.
+type v3ProtocolHandler struct{}
+
+func (*v3ProtocolHandler) waitForStreams(streams <-chan streamAndReply, expectedStreams int, expired <-chan time.Time) (*context, error) {
+ ctx := &context{}
+ receivedStreams := 0
+ replyChan := make(chan struct{})
+ stop := make(chan struct{})
+ defer close(stop)
+WaitForStreams:
+ for {
+ select {
+ case stream := <-streams:
+ streamType := stream.Headers().Get(api.StreamType)
+ switch streamType {
+ case api.StreamTypeError:
+ ctx.writeStatus = v1WriteStatusFunc(stream)
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStdin:
+ ctx.stdinStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStdout:
+ ctx.stdoutStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStderr:
+ ctx.stderrStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeResize:
+ ctx.resizeStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ default:
+ runtime.HandleError(fmt.Errorf("unexpected stream type: %q", streamType))
+ }
+ case <-replyChan:
+ receivedStreams++
+ if receivedStreams == expectedStreams {
+ break WaitForStreams
+ }
+ case <-expired:
+ // TODO find a way to return the error to the user. Maybe use a separate
+ // stream to report errors?
+ return nil, errors.New("timed out waiting for client to create streams")
+ }
+ }
+
+ return ctx, nil
+}
+
+// supportsTerminalResizing returns true because v3ProtocolHandler supports it
+func (*v3ProtocolHandler) supportsTerminalResizing() bool { return true }
+
+// v2ProtocolHandler implements the V2 protocol version for streaming command execution.
+type v2ProtocolHandler struct{}
+
+func (*v2ProtocolHandler) waitForStreams(streams <-chan streamAndReply, expectedStreams int, expired <-chan time.Time) (*context, error) {
+ ctx := &context{}
+ receivedStreams := 0
+ replyChan := make(chan struct{})
+ stop := make(chan struct{})
+ defer close(stop)
+WaitForStreams:
+ for {
+ select {
+ case stream := <-streams:
+ streamType := stream.Headers().Get(api.StreamType)
+ switch streamType {
+ case api.StreamTypeError:
+ ctx.writeStatus = v1WriteStatusFunc(stream)
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStdin:
+ ctx.stdinStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStdout:
+ ctx.stdoutStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStderr:
+ ctx.stderrStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ default:
+ runtime.HandleError(fmt.Errorf("unexpected stream type: %q", streamType))
+ }
+ case <-replyChan:
+ receivedStreams++
+ if receivedStreams == expectedStreams {
+ break WaitForStreams
+ }
+ case <-expired:
+ // TODO find a way to return the error to the user. Maybe use a separate
+ // stream to report errors?
+ return nil, errors.New("timed out waiting for client to create streams")
+ }
+ }
+
+ return ctx, nil
+}
+
+// supportsTerminalResizing returns false because v2ProtocolHandler doesn't support it.
+func (*v2ProtocolHandler) supportsTerminalResizing() bool { return false }
+
+// v1ProtocolHandler implements the V1 protocol version for streaming command execution.
+type v1ProtocolHandler struct{}
+
+func (*v1ProtocolHandler) waitForStreams(streams <-chan streamAndReply, expectedStreams int, expired <-chan time.Time) (*context, error) {
+ ctx := &context{}
+ receivedStreams := 0
+ replyChan := make(chan struct{})
+ stop := make(chan struct{})
+ defer close(stop)
+WaitForStreams:
+ for {
+ select {
+ case stream := <-streams:
+ streamType := stream.Headers().Get(api.StreamType)
+ switch streamType {
+ case api.StreamTypeError:
+ ctx.writeStatus = v1WriteStatusFunc(stream)
+
+ // This defer statement shouldn't be here, but due to previous refactoring, it ended up in
+ // here. This is what 1.0.x kubelets do, so we're retaining that behavior. This is fixed in
+ // the v2ProtocolHandler.
+ defer stream.Reset()
+
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStdin:
+ ctx.stdinStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStdout:
+ ctx.stdoutStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ case api.StreamTypeStderr:
+ ctx.stderrStream = stream
+ go waitStreamReply(stream.replySent, replyChan, stop)
+ default:
+ runtime.HandleError(fmt.Errorf("unexpected stream type: %q", streamType))
+ }
+ case <-replyChan:
+ receivedStreams++
+ if receivedStreams == expectedStreams {
+ break WaitForStreams
+ }
+ case <-expired:
+ // TODO find a way to return the error to the user. Maybe use a separate
+ // stream to report errors?
+ return nil, errors.New("timed out waiting for client to create streams")
+ }
+ }
+
+ if ctx.stdinStream != nil {
+ ctx.stdinStream.Close()
+ }
+
+ return ctx, nil
+}
+
+// supportsTerminalResizing returns false because v1ProtocolHandler doesn't support it.
+func (*v1ProtocolHandler) supportsTerminalResizing() bool { return false }
+
+func handleResizeEvents(stream io.Reader, channel chan<- remotecommand.TerminalSize) {
+ defer runtime.HandleCrash()
+ defer close(channel)
+
+ decoder := json.NewDecoder(stream)
+ for {
+ size := remotecommand.TerminalSize{}
+ if err := decoder.Decode(&size); err != nil {
+ break
+ }
+ channel <- size
+ }
+}
+
+func v1WriteStatusFunc(stream io.Writer) func(status *apierrors.StatusError) error {
+ return func(status *apierrors.StatusError) error {
+ if status.Status().Status == metav1.StatusSuccess {
+ return nil // send error messages
+ }
+ _, err := stream.Write([]byte(status.Error()))
+ return err
+ }
+}
+
+// v4WriteStatusFunc returns a WriteStatusFunc that marshals a given api Status
+// as json in the error channel.
+func v4WriteStatusFunc(stream io.Writer) func(status *apierrors.StatusError) error {
+ return func(status *apierrors.StatusError) error {
+ bs, err := json.Marshal(status.Status())
+ if err != nil {
+ return err
+ }
+ _, err = stream.Write(bs)
+ return err
+ }
+}
diff --git a/pkg/streaming/remotecommand/websocket.go b/pkg/streaming/remotecommand/websocket.go
new file mode 100644
index 000000000..99c0af7c3
--- /dev/null
+++ b/pkg/streaming/remotecommand/websocket.go
@@ -0,0 +1,148 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package remotecommand
+
+import (
+ "fmt"
+ "net/http"
+ "time"
+
+ "k8s.io/apimachinery/pkg/util/runtime"
+ "k8s.io/apiserver/pkg/server/httplog"
+ "k8s.io/apiserver/pkg/util/wsstream"
+)
+
+const (
+ stdinChannel = iota
+ stdoutChannel
+ stderrChannel
+ errorChannel
+ resizeChannel
+
+ preV4BinaryWebsocketProtocol = wsstream.ChannelWebSocketProtocol
+ preV4Base64WebsocketProtocol = wsstream.Base64ChannelWebSocketProtocol
+ v4BinaryWebsocketProtocol = "v4." + wsstream.ChannelWebSocketProtocol
+ v4Base64WebsocketProtocol = "v4." + wsstream.Base64ChannelWebSocketProtocol
+)
+
+// createChannels returns the standard channel types for a shell connection (STDIN 0, STDOUT 1, STDERR 2)
+// along with the approximate duplex value. It also creates the error (3) and resize (4) channels.
+func createChannels(opts *Options) []wsstream.ChannelType {
+ // open the requested channels, and always open the error channel
+ channels := make([]wsstream.ChannelType, 5)
+ channels[stdinChannel] = readChannel(opts.Stdin)
+ channels[stdoutChannel] = writeChannel(opts.Stdout)
+ channels[stderrChannel] = writeChannel(opts.Stderr)
+ channels[errorChannel] = wsstream.WriteChannel
+ channels[resizeChannel] = wsstream.ReadChannel
+ return channels
+}
+
+// readChannel returns wsstream.ReadChannel if real is true, or wsstream.IgnoreChannel.
+func readChannel(real bool) wsstream.ChannelType {
+ if real {
+ return wsstream.ReadChannel
+ }
+ return wsstream.IgnoreChannel
+}
+
+// writeChannel returns wsstream.WriteChannel if real is true, or wsstream.IgnoreChannel.
+func writeChannel(real bool) wsstream.ChannelType {
+ if real {
+ return wsstream.WriteChannel
+ }
+ return wsstream.IgnoreChannel
+}
+
+// createWebSocketStreams returns a context containing the websocket connection and
+// streams needed to perform an exec or an attach.
+func createWebSocketStreams(req *http.Request, w http.ResponseWriter, opts *Options, idleTimeout time.Duration) (*context, bool) {
+ channels := createChannels(opts)
+ conn := wsstream.NewConn(map[string]wsstream.ChannelProtocolConfig{
+ "": {
+ Binary: true,
+ Channels: channels,
+ },
+ preV4BinaryWebsocketProtocol: {
+ Binary: true,
+ Channels: channels,
+ },
+ preV4Base64WebsocketProtocol: {
+ Binary: false,
+ Channels: channels,
+ },
+ v4BinaryWebsocketProtocol: {
+ Binary: true,
+ Channels: channels,
+ },
+ v4Base64WebsocketProtocol: {
+ Binary: false,
+ Channels: channels,
+ },
+ })
+ conn.SetIdleTimeout(idleTimeout)
+ negotiatedProtocol, streams, err := conn.Open(httplog.Unlogged(req, w), req)
+ if err != nil {
+ runtime.HandleError(fmt.Errorf("unable to upgrade websocket connection: %v", err))
+ return nil, false
+ }
+
+ // Send an empty message to the lowest writable channel to notify the client the connection is established
+ // TODO: make generic to SPDY and WebSockets and do it outside of this method?
+ switch {
+ case opts.Stdout:
+ streams[stdoutChannel].Write([]byte{})
+ case opts.Stderr:
+ streams[stderrChannel].Write([]byte{})
+ default:
+ streams[errorChannel].Write([]byte{})
+ }
+
+ ctx := &context{
+ conn: conn,
+ stdinStream: streams[stdinChannel],
+ stdoutStream: streams[stdoutChannel],
+ stderrStream: streams[stderrChannel],
+ tty: opts.TTY,
+ resizeStream: streams[resizeChannel],
+ }
+
+ switch negotiatedProtocol {
+ case v4BinaryWebsocketProtocol, v4Base64WebsocketProtocol:
+ ctx.writeStatus = v4WriteStatusFunc(streams[errorChannel])
+ default:
+ ctx.writeStatus = v1WriteStatusFunc(streams[errorChannel])
+ }
+
+ return ctx, true
+}
diff --git a/pkg/streaming/request_cache.go b/pkg/streaming/request_cache.go
new file mode 100644
index 000000000..36d6921f6
--- /dev/null
+++ b/pkg/streaming/request_cache.go
@@ -0,0 +1,162 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package streaming
+
+import (
+ "container/list"
+ "crypto/rand"
+ "encoding/base64"
+ "fmt"
+ "math"
+ "sync"
+ "time"
+
+ "k8s.io/apimachinery/pkg/util/clock"
+)
+
+var (
+ // cacheTTL is the timeout after which tokens become invalid.
+ cacheTTL = 1 * time.Minute
+ // maxInFlight is the maximum number of in-flight requests to allow.
+ maxInFlight = 1000
+ // tokenLen is the length of the random base64 encoded token identifying the request.
+ tokenLen = 8
+)
+
+// requestCache caches streaming (exec/attach/port-forward) requests and generates a single-use
+// random token for their retrieval. The requestCache is used for building streaming URLs without
+// the need to encode every request parameter in the URL.
+type requestCache struct {
+ // clock is used to obtain the current time
+ clock clock.Clock
+
+ // tokens maps the generate token to the request for fast retrieval.
+ tokens map[string]*list.Element
+ // ll maintains an age-ordered request list for faster garbage collection of expired requests.
+ ll *list.List
+
+ lock sync.Mutex
+}
+
+// Type representing an *ExecRequest, *AttachRequest, or *PortForwardRequest.
+type request interface{}
+
+type cacheEntry struct {
+ token string
+ req request
+ expireTime time.Time
+}
+
+func newRequestCache() *requestCache {
+ return &requestCache{
+ clock: clock.RealClock{},
+ ll: list.New(),
+ tokens: make(map[string]*list.Element),
+ }
+}
+
+// Insert the given request into the cache and returns the token used for fetching it out.
+func (c *requestCache) Insert(req request) (token string, err error) {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+
+ // Remove expired entries.
+ c.gc()
+ // If the cache is full, reject the request.
+ if c.ll.Len() == maxInFlight {
+ return "", NewErrorTooManyInFlight()
+ }
+ token, err = c.uniqueToken()
+ if err != nil {
+ return "", err
+ }
+ ele := c.ll.PushFront(&cacheEntry{token, req, c.clock.Now().Add(cacheTTL)})
+
+ c.tokens[token] = ele
+ return token, nil
+}
+
+// Consume the token (remove it from the cache) and return the cached request, if found.
+func (c *requestCache) Consume(token string) (req request, found bool) {
+ c.lock.Lock()
+ defer c.lock.Unlock()
+ ele, ok := c.tokens[token]
+ if !ok {
+ return nil, false
+ }
+ c.ll.Remove(ele)
+ delete(c.tokens, token)
+
+ entry := ele.Value.(*cacheEntry)
+ if c.clock.Now().After(entry.expireTime) {
+ // Entry already expired.
+ return nil, false
+ }
+ return entry.req, true
+}
+
+// uniqueToken generates a random URL-safe token and ensures uniqueness.
+func (c *requestCache) uniqueToken() (string, error) {
+ const maxTries = 10
+ // Number of bytes to be tokenLen when base64 encoded.
+ tokenSize := math.Ceil(float64(tokenLen) * 6 / 8)
+ rawToken := make([]byte, int(tokenSize))
+ for i := 0; i < maxTries; i++ {
+ if _, err := rand.Read(rawToken); err != nil {
+ return "", err
+ }
+ encoded := base64.RawURLEncoding.EncodeToString(rawToken)
+ token := encoded[:tokenLen]
+ // If it's unique, return it. Otherwise retry.
+ if _, exists := c.tokens[encoded]; !exists {
+ return token, nil
+ }
+ }
+ return "", fmt.Errorf("failed to generate unique token")
+}
+
+// Must be write-locked prior to calling.
+func (c *requestCache) gc() {
+ now := c.clock.Now()
+ for c.ll.Len() > 0 {
+ oldest := c.ll.Back()
+ entry := oldest.Value.(*cacheEntry)
+ if !now.After(entry.expireTime) {
+ return
+ }
+
+ // Oldest value is expired; remove it.
+ c.ll.Remove(oldest)
+ delete(c.tokens, entry.token)
+ }
+}
diff --git a/pkg/streaming/server.go b/pkg/streaming/server.go
new file mode 100644
index 000000000..589c9a8ca
--- /dev/null
+++ b/pkg/streaming/server.go
@@ -0,0 +1,399 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+/*
+Copyright 2016 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package streaming
+
+import (
+ "crypto/tls"
+ "errors"
+ "io"
+ "net"
+ "net/http"
+ "net/url"
+ "path"
+ "time"
+
+ "google.golang.org/grpc/codes"
+ "google.golang.org/grpc/status"
+
+ restful "github.com/emicklei/go-restful"
+
+ "k8s.io/apimachinery/pkg/types"
+ remotecommandconsts "k8s.io/apimachinery/pkg/util/remotecommand"
+ "k8s.io/client-go/tools/remotecommand"
+ runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
+
+ "github.com/containerd/cri/pkg/streaming/portforward"
+ remotecommandserver "github.com/containerd/cri/pkg/streaming/remotecommand"
+)
+
+// Server is the library interface to serve the stream requests.
+type Server interface {
+ http.Handler
+
+ // Get the serving URL for the requests.
+ // Requests must not be nil. Responses may be nil iff an error is returned.
+ GetExec(*runtimeapi.ExecRequest) (*runtimeapi.ExecResponse, error)
+ GetAttach(req *runtimeapi.AttachRequest) (*runtimeapi.AttachResponse, error)
+ GetPortForward(*runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error)
+
+ // Start the server.
+ // addr is the address to serve on (address:port) stayUp indicates whether the server should
+ // listen until Stop() is called, or automatically stop after all expected connections are
+ // closed. Calling Get{Exec,Attach,PortForward} increments the expected connection count.
+ // Function does not return until the server is stopped.
+ Start(stayUp bool) error
+ // Stop the server, and terminate any open connections.
+ Stop() error
+}
+
+// Runtime is the interface to execute the commands and provide the streams.
+type Runtime interface {
+ Exec(containerID string, cmd []string, in io.Reader, out, err io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error
+ Attach(containerID string, in io.Reader, out, err io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error
+ PortForward(podSandboxID string, port int32, stream io.ReadWriteCloser) error
+}
+
+// Config defines the options used for running the stream server.
+type Config struct {
+ // The host:port address the server will listen on.
+ Addr string
+ // The optional base URL for constructing streaming URLs. If empty, the baseURL will be
+ // constructed from the serve address.
+ // Note that for port "0", the URL port will be set to actual port in use.
+ BaseURL *url.URL
+
+ // How long to leave idle connections open for.
+ StreamIdleTimeout time.Duration
+ // How long to wait for clients to create streams. Only used for SPDY streaming.
+ StreamCreationTimeout time.Duration
+
+ // The streaming protocols the server supports (understands and permits). See
+ // k8s.io/kubernetes/pkg/kubelet/server/remotecommand/constants.go for available protocols.
+ // Only used for SPDY streaming.
+ SupportedRemoteCommandProtocols []string
+
+ // The streaming protocols the server supports (understands and permits). See
+ // k8s.io/kubernetes/pkg/kubelet/server/portforward/constants.go for available protocols.
+ // Only used for SPDY streaming.
+ SupportedPortForwardProtocols []string
+
+ // The config for serving over TLS. If nil, TLS will not be used.
+ TLSConfig *tls.Config
+}
+
+// DefaultConfig provides default values for server Config. The DefaultConfig is partial, so
+// some fields like Addr must still be provided.
+var DefaultConfig = Config{
+ StreamIdleTimeout: 4 * time.Hour,
+ StreamCreationTimeout: remotecommandconsts.DefaultStreamCreationTimeout,
+ SupportedRemoteCommandProtocols: remotecommandconsts.SupportedStreamingProtocols,
+ SupportedPortForwardProtocols: portforward.SupportedProtocols,
+}
+
+// NewServer creates a new Server for stream requests.
+// TODO(tallclair): Add auth(n/z) interface & handling.
+func NewServer(config Config, runtime Runtime) (Server, error) {
+ s := &server{
+ config: config,
+ runtime: &criAdapter{runtime},
+ cache: newRequestCache(),
+ }
+
+ if s.config.BaseURL == nil {
+ s.config.BaseURL = &url.URL{
+ Scheme: "http",
+ Host: s.config.Addr,
+ }
+ if s.config.TLSConfig != nil {
+ s.config.BaseURL.Scheme = "https"
+ }
+ }
+
+ ws := &restful.WebService{}
+ endpoints := []struct {
+ path string
+ handler restful.RouteFunction
+ }{
+ {"/exec/{token}", s.serveExec},
+ {"/attach/{token}", s.serveAttach},
+ {"/portforward/{token}", s.servePortForward},
+ }
+ // If serving relative to a base path, set that here.
+ pathPrefix := path.Dir(s.config.BaseURL.Path)
+ for _, e := range endpoints {
+ for _, method := range []string{"GET", "POST"} {
+ ws.Route(ws.
+ Method(method).
+ Path(path.Join(pathPrefix, e.path)).
+ To(e.handler))
+ }
+ }
+ handler := restful.NewContainer()
+ handler.Add(ws)
+ s.handler = handler
+ s.server = &http.Server{
+ Addr: s.config.Addr,
+ Handler: s.handler,
+ TLSConfig: s.config.TLSConfig,
+ }
+
+ return s, nil
+}
+
+type server struct {
+ config Config
+ runtime *criAdapter
+ handler http.Handler
+ cache *requestCache
+ server *http.Server
+}
+
+func validateExecRequest(req *runtimeapi.ExecRequest) error {
+ if req.ContainerId == "" {
+ return status.Errorf(codes.InvalidArgument, "missing required container_id")
+ }
+ if req.Tty && req.Stderr {
+ // If TTY is set, stderr cannot be true because multiplexing is not
+ // supported.
+ return status.Errorf(codes.InvalidArgument, "tty and stderr cannot both be true")
+ }
+ if !req.Stdin && !req.Stdout && !req.Stderr {
+ return status.Errorf(codes.InvalidArgument, "one of stdin, stdout, or stderr must be set")
+ }
+ return nil
+}
+
+func (s *server) GetExec(req *runtimeapi.ExecRequest) (*runtimeapi.ExecResponse, error) {
+ if err := validateExecRequest(req); err != nil {
+ return nil, err
+ }
+ token, err := s.cache.Insert(req)
+ if err != nil {
+ return nil, err
+ }
+ return &runtimeapi.ExecResponse{
+ Url: s.buildURL("exec", token),
+ }, nil
+}
+
+func validateAttachRequest(req *runtimeapi.AttachRequest) error {
+ if req.ContainerId == "" {
+ return status.Errorf(codes.InvalidArgument, "missing required container_id")
+ }
+ if req.Tty && req.Stderr {
+ // If TTY is set, stderr cannot be true because multiplexing is not
+ // supported.
+ return status.Errorf(codes.InvalidArgument, "tty and stderr cannot both be true")
+ }
+ if !req.Stdin && !req.Stdout && !req.Stderr {
+ return status.Errorf(codes.InvalidArgument, "one of stdin, stdout, and stderr must be set")
+ }
+ return nil
+}
+
+func (s *server) GetAttach(req *runtimeapi.AttachRequest) (*runtimeapi.AttachResponse, error) {
+ if err := validateAttachRequest(req); err != nil {
+ return nil, err
+ }
+ token, err := s.cache.Insert(req)
+ if err != nil {
+ return nil, err
+ }
+ return &runtimeapi.AttachResponse{
+ Url: s.buildURL("attach", token),
+ }, nil
+}
+
+func (s *server) GetPortForward(req *runtimeapi.PortForwardRequest) (*runtimeapi.PortForwardResponse, error) {
+ if req.PodSandboxId == "" {
+ return nil, status.Errorf(codes.InvalidArgument, "missing required pod_sandbox_id")
+ }
+ token, err := s.cache.Insert(req)
+ if err != nil {
+ return nil, err
+ }
+ return &runtimeapi.PortForwardResponse{
+ Url: s.buildURL("portforward", token),
+ }, nil
+}
+
+func (s *server) Start(stayUp bool) error {
+ if !stayUp {
+ // TODO(tallclair): Implement this.
+ return errors.New("stayUp=false is not yet implemented")
+ }
+
+ listener, err := net.Listen("tcp", s.config.Addr)
+ if err != nil {
+ return err
+ }
+ // Use the actual address as baseURL host. This handles the "0" port case.
+ s.config.BaseURL.Host = listener.Addr().String()
+ if s.config.TLSConfig != nil {
+ return s.server.ServeTLS(listener, "", "") // Use certs from TLSConfig.
+ }
+ return s.server.Serve(listener)
+}
+
+func (s *server) Stop() error {
+ return s.server.Close()
+}
+
+func (s *server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ s.handler.ServeHTTP(w, r)
+}
+
+func (s *server) buildURL(method, token string) string {
+ return s.config.BaseURL.ResolveReference(&url.URL{
+ Path: path.Join(method, token),
+ }).String()
+}
+
+func (s *server) serveExec(req *restful.Request, resp *restful.Response) {
+ token := req.PathParameter("token")
+ cachedRequest, ok := s.cache.Consume(token)
+ if !ok {
+ http.NotFound(resp.ResponseWriter, req.Request)
+ return
+ }
+ exec, ok := cachedRequest.(*runtimeapi.ExecRequest)
+ if !ok {
+ http.NotFound(resp.ResponseWriter, req.Request)
+ return
+ }
+
+ streamOpts := &remotecommandserver.Options{
+ Stdin: exec.Stdin,
+ Stdout: exec.Stdout,
+ Stderr: exec.Stderr,
+ TTY: exec.Tty,
+ }
+
+ remotecommandserver.ServeExec(
+ resp.ResponseWriter,
+ req.Request,
+ s.runtime,
+ "", // unused: podName
+ "", // unusued: podUID
+ exec.ContainerId,
+ exec.Cmd,
+ streamOpts,
+ s.config.StreamIdleTimeout,
+ s.config.StreamCreationTimeout,
+ s.config.SupportedRemoteCommandProtocols)
+}
+
+func (s *server) serveAttach(req *restful.Request, resp *restful.Response) {
+ token := req.PathParameter("token")
+ cachedRequest, ok := s.cache.Consume(token)
+ if !ok {
+ http.NotFound(resp.ResponseWriter, req.Request)
+ return
+ }
+ attach, ok := cachedRequest.(*runtimeapi.AttachRequest)
+ if !ok {
+ http.NotFound(resp.ResponseWriter, req.Request)
+ return
+ }
+
+ streamOpts := &remotecommandserver.Options{
+ Stdin: attach.Stdin,
+ Stdout: attach.Stdout,
+ Stderr: attach.Stderr,
+ TTY: attach.Tty,
+ }
+ remotecommandserver.ServeAttach(
+ resp.ResponseWriter,
+ req.Request,
+ s.runtime,
+ "", // unused: podName
+ "", // unusued: podUID
+ attach.ContainerId,
+ streamOpts,
+ s.config.StreamIdleTimeout,
+ s.config.StreamCreationTimeout,
+ s.config.SupportedRemoteCommandProtocols)
+}
+
+func (s *server) servePortForward(req *restful.Request, resp *restful.Response) {
+ token := req.PathParameter("token")
+ cachedRequest, ok := s.cache.Consume(token)
+ if !ok {
+ http.NotFound(resp.ResponseWriter, req.Request)
+ return
+ }
+ pf, ok := cachedRequest.(*runtimeapi.PortForwardRequest)
+ if !ok {
+ http.NotFound(resp.ResponseWriter, req.Request)
+ return
+ }
+
+ portForwardOptions, err := portforward.BuildV4Options(pf.Port)
+ if err != nil {
+ resp.WriteError(http.StatusBadRequest, err)
+ return
+ }
+
+ portforward.ServePortForward(
+ resp.ResponseWriter,
+ req.Request,
+ s.runtime,
+ pf.PodSandboxId,
+ "", // unused: podUID
+ portForwardOptions,
+ s.config.StreamIdleTimeout,
+ s.config.StreamCreationTimeout,
+ s.config.SupportedPortForwardProtocols)
+}
+
+// criAdapter wraps the Runtime functions to conform to the remotecommand interfaces.
+// The adapter binds the container ID to the container name argument, and the pod sandbox ID to the pod name.
+type criAdapter struct {
+ Runtime
+}
+
+var _ remotecommandserver.Executor = &criAdapter{}
+var _ remotecommandserver.Attacher = &criAdapter{}
+var _ portforward.PortForwarder = &criAdapter{}
+
+func (a *criAdapter) ExecInContainer(podName string, podUID types.UID, container string, cmd []string, in io.Reader, out, err io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize, timeout time.Duration) error {
+ return a.Runtime.Exec(container, cmd, in, out, err, tty, resize)
+}
+
+func (a *criAdapter) AttachContainer(podName string, podUID types.UID, container string, in io.Reader, out, err io.WriteCloser, tty bool, resize <-chan remotecommand.TerminalSize) error {
+ return a.Runtime.Attach(container, in, out, err, tty, resize)
+}
+
+func (a *criAdapter) PortForward(podName string, podUID types.UID, port int32, stream io.ReadWriteCloser) error {
+ return a.Runtime.PortForward(podName, port, stream)
+}
diff --git a/pkg/util/deep_copy.go b/pkg/util/deep_copy.go
new file mode 100644
index 000000000..d0e0bf37e
--- /dev/null
+++ b/pkg/util/deep_copy.go
@@ -0,0 +1,42 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package util
+
+import (
+ "encoding/json"
+
+ "github.com/pkg/errors"
+)
+
+// DeepCopy makes a deep copy from src into dst.
+func DeepCopy(dst interface{}, src interface{}) error {
+ if dst == nil {
+ return errors.New("dst cannot be nil")
+ }
+ if src == nil {
+ return errors.New("src cannot be nil")
+ }
+ bytes, err := json.Marshal(src)
+ if err != nil {
+ return errors.Wrap(err, "unable to marshal src")
+ }
+ err = json.Unmarshal(bytes, dst)
+ if err != nil {
+ return errors.Wrap(err, "unable to unmarshal into dst")
+ }
+ return nil
+}
diff --git a/pkg/util/deep_copy_test.go b/pkg/util/deep_copy_test.go
new file mode 100644
index 000000000..4ca1ebb16
--- /dev/null
+++ b/pkg/util/deep_copy_test.go
@@ -0,0 +1,63 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package util
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+type A struct {
+ String string
+ Int int
+ Strings []string
+ Ints map[string]int
+ As map[string]*A
+}
+
+func TestCopy(t *testing.T) {
+ src := &A{
+ String: "Hello World",
+ Int: 5,
+ Strings: []string{"A", "B"},
+ Ints: map[string]int{"A": 1, "B": 2, "C": 4},
+ As: map[string]*A{
+ "One": {String: "2"},
+ "Two": {String: "3"},
+ },
+ }
+ dst := &A{
+ Strings: []string{"C"},
+ Ints: map[string]int{"B": 3, "C": 4},
+ As: map[string]*A{"One": {String: "1", Int: 5}},
+ }
+ expected := &A{
+ String: "Hello World",
+ Int: 5,
+ Strings: []string{"A", "B"},
+ Ints: map[string]int{"A": 1, "B": 2, "C": 4},
+ As: map[string]*A{
+ "One": {String: "2"},
+ "Two": {String: "3"},
+ },
+ }
+ assert.NotEqual(t, expected, dst)
+ err := DeepCopy(dst, src)
+ assert.NoError(t, err)
+ assert.Equal(t, expected, dst)
+}
diff --git a/pkg/util/id.go b/pkg/util/id.go
new file mode 100644
index 000000000..90f762ba7
--- /dev/null
+++ b/pkg/util/id.go
@@ -0,0 +1,29 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package util
+
+import (
+ "encoding/hex"
+ "math/rand"
+)
+
+// GenerateID generates a random unique id.
+func GenerateID() string {
+ b := make([]byte, 32)
+ rand.Read(b)
+ return hex.EncodeToString(b)
+}
diff --git a/pkg/util/image.go b/pkg/util/image.go
new file mode 100644
index 000000000..d3abcd31a
--- /dev/null
+++ b/pkg/util/image.go
@@ -0,0 +1,33 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package util
+
+import (
+ "github.com/containerd/containerd/reference/docker"
+)
+
+// NormalizeImageRef normalizes the image reference following the docker convention. This is added
+// mainly for backward compatibility.
+// The reference returned can only be either tagged or digested. For reference contains both tag
+// and digest, the function returns digested reference, e.g. docker.io/library/busybox:latest@
+// sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa will be returned as
+// docker.io/library/busybox@sha256:7cc4b5aefd1d0cadf8d97d4350462ba51c694ebca145b08d7d41b41acc8db5aa.
+//
+// Deprecated: use github.com/containerd/containerd/reference/docker.ParseDockerRef() instead
+func NormalizeImageRef(ref string) (docker.Named, error) {
+ return docker.ParseDockerRef(ref)
+}
diff --git a/pkg/util/image_test.go b/pkg/util/image_test.go
new file mode 100644
index 000000000..f4d911b1c
--- /dev/null
+++ b/pkg/util/image_test.go
@@ -0,0 +1,84 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package util
+
+import (
+ "testing"
+
+ "github.com/containerd/containerd/reference"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestNormalizeImageRef(t *testing.T) {
+ for _, test := range []struct {
+ input string
+ expect string
+ }{
+ { // has nothing
+ input: "busybox",
+ expect: "docker.io/library/busybox:latest",
+ },
+ { // only has tag
+ input: "busybox:latest",
+ expect: "docker.io/library/busybox:latest",
+ },
+ { // only has digest
+ input: "busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ expect: "docker.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ },
+ { // only has path
+ input: "library/busybox",
+ expect: "docker.io/library/busybox:latest",
+ },
+ { // only has hostname
+ input: "docker.io/busybox",
+ expect: "docker.io/library/busybox:latest",
+ },
+ { // has no tag
+ input: "docker.io/library/busybox",
+ expect: "docker.io/library/busybox:latest",
+ },
+ { // has no path
+ input: "docker.io/busybox:latest",
+ expect: "docker.io/library/busybox:latest",
+ },
+ { // has no hostname
+ input: "library/busybox:latest",
+ expect: "docker.io/library/busybox:latest",
+ },
+ { // full reference
+ input: "docker.io/library/busybox:latest",
+ expect: "docker.io/library/busybox:latest",
+ },
+ { // gcr reference
+ input: "gcr.io/library/busybox",
+ expect: "gcr.io/library/busybox:latest",
+ },
+ { // both tag and digest
+ input: "gcr.io/library/busybox:latest@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ expect: "gcr.io/library/busybox@sha256:e6693c20186f837fc393390135d8a598a96a833917917789d63766cab6c59582",
+ },
+ } {
+ t.Logf("TestCase %q", test.input)
+ normalized, err := NormalizeImageRef(test.input)
+ assert.NoError(t, err)
+ output := normalized.String()
+ assert.Equal(t, test.expect, output)
+ _, err = reference.Parse(output)
+ assert.NoError(t, err, "%q should be containerd supported reference", output)
+ }
+}
diff --git a/pkg/util/strings.go b/pkg/util/strings.go
new file mode 100644
index 000000000..df1a5c91a
--- /dev/null
+++ b/pkg/util/strings.go
@@ -0,0 +1,59 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package util
+
+import "strings"
+
+// InStringSlice checks whether a string is inside a string slice.
+// Comparison is case insensitive.
+func InStringSlice(ss []string, str string) bool {
+ for _, s := range ss {
+ if strings.EqualFold(s, str) {
+ return true
+ }
+ }
+ return false
+}
+
+// SubtractStringSlice subtracts string from string slice.
+// Comparison is case insensitive.
+func SubtractStringSlice(ss []string, str string) []string {
+ var res []string
+ for _, s := range ss {
+ if strings.EqualFold(s, str) {
+ continue
+ }
+ res = append(res, s)
+ }
+ return res
+}
+
+// MergeStringSlices merges 2 string slices into one and remove duplicated elements.
+func MergeStringSlices(a []string, b []string) []string {
+ set := map[string]struct{}{}
+ for _, s := range a {
+ set[s] = struct{}{}
+ }
+ for _, s := range b {
+ set[s] = struct{}{}
+ }
+ var ss []string
+ for s := range set {
+ ss = append(ss, s)
+ }
+ return ss
+}
diff --git a/pkg/util/strings_test.go b/pkg/util/strings_test.go
new file mode 100644
index 000000000..1679b5ab7
--- /dev/null
+++ b/pkg/util/strings_test.go
@@ -0,0 +1,59 @@
+/*
+ Copyright The containerd Authors.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+package util
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestInStringSlice(t *testing.T) {
+ ss := []string{"ABC", "def", "ghi"}
+
+ assert.True(t, InStringSlice(ss, "ABC"))
+ assert.True(t, InStringSlice(ss, "abc"))
+ assert.True(t, InStringSlice(ss, "def"))
+ assert.True(t, InStringSlice(ss, "DEF"))
+ assert.False(t, InStringSlice(ss, "hij"))
+ assert.False(t, InStringSlice(ss, "HIJ"))
+ assert.False(t, InStringSlice(nil, "HIJ"))
+}
+
+func TestSubtractStringSlice(t *testing.T) {
+ ss := []string{"ABC", "def", "ghi"}
+
+ assert.Equal(t, []string{"def", "ghi"}, SubtractStringSlice(ss, "abc"))
+ assert.Equal(t, []string{"def", "ghi"}, SubtractStringSlice(ss, "ABC"))
+ assert.Equal(t, []string{"ABC", "ghi"}, SubtractStringSlice(ss, "def"))
+ assert.Equal(t, []string{"ABC", "ghi"}, SubtractStringSlice(ss, "DEF"))
+ assert.Equal(t, []string{"ABC", "def", "ghi"}, SubtractStringSlice(ss, "hij"))
+ assert.Equal(t, []string{"ABC", "def", "ghi"}, SubtractStringSlice(ss, "HIJ"))
+ assert.Empty(t, SubtractStringSlice(nil, "hij"))
+ assert.Empty(t, SubtractStringSlice([]string{}, "hij"))
+}
+
+func TestMergeStringSlices(t *testing.T) {
+ s1 := []string{"abc", "def", "ghi"}
+ s2 := []string{"def", "jkl", "mno"}
+ expect := []string{"abc", "def", "ghi", "jkl", "mno"}
+ result := MergeStringSlices(s1, s2)
+ assert.Len(t, result, len(expect))
+ for _, s := range expect {
+ assert.Contains(t, result, s)
+ }
+}