Update cri and cgroup packages

This change includes a cri master bump and a cgroup bump for windows support
with cgroup stats and reusing the cgroup metric types.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
This commit is contained in:
Michael Crosby
2019-09-25 16:43:05 -04:00
parent 1009023783
commit 8ff5827e98
85 changed files with 8617 additions and 2124 deletions

View File

@@ -26,17 +26,33 @@ import (
"strconv"
"strings"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
func NewBlkio(root string) *blkioController {
return &blkioController{
root: filepath.Join(root, string(Blkio)),
// NewBlkio returns a Blkio controller given the root folder of cgroups.
// It may optionally accept other configuration options, such as ProcRoot(path)
func NewBlkio(root string, options ...func(controller *blkioController)) *blkioController {
ctrl := &blkioController{
root: filepath.Join(root, string(Blkio)),
procRoot: "/proc",
}
for _, opt := range options {
opt(ctrl)
}
return ctrl
}
// ProcRoot overrides the default location of the "/proc" filesystem
func ProcRoot(path string) func(controller *blkioController) {
return func(c *blkioController) {
c.procRoot = path
}
}
type blkioController struct {
root string
root string
procRoot string
}
func (b *blkioController) Name() Name {
@@ -72,8 +88,8 @@ func (b *blkioController) Update(path string, resources *specs.LinuxResources) e
return b.Create(path, resources)
}
func (b *blkioController) Stat(path string, stats *Metrics) error {
stats.Blkio = &BlkIOStat{}
func (b *blkioController) Stat(path string, stats *v1.Metrics) error {
stats.Blkio = &v1.BlkIOStat{}
settings := []blkioStatSettings{
{
name: "throttle.io_serviced",
@@ -122,7 +138,7 @@ func (b *blkioController) Stat(path string, stats *Metrics) error {
},
)
}
f, err := os.Open("/proc/diskstats")
f, err := os.Open(filepath.Join(b.procRoot, "diskstats"))
if err != nil {
return err
}
@@ -141,7 +157,7 @@ func (b *blkioController) Stat(path string, stats *Metrics) error {
return nil
}
func (b *blkioController) readEntry(devices map[deviceKey]string, path, name string, entry *[]*BlkIOEntry) error {
func (b *blkioController) readEntry(devices map[deviceKey]string, path, name string, entry *[]*v1.BlkIOEntry) error {
f, err := os.Open(filepath.Join(b.Path(path), fmt.Sprintf("blkio.%s", name)))
if err != nil {
return err
@@ -180,7 +196,7 @@ func (b *blkioController) readEntry(devices map[deviceKey]string, path, name str
if err != nil {
return err
}
*entry = append(*entry, &BlkIOEntry{
*entry = append(*entry, &v1.BlkIOEntry{
Device: devices[deviceKey{major, minor}],
Major: major,
Minor: minor,
@@ -268,7 +284,7 @@ type blkioSettings struct {
type blkioStatSettings struct {
name string
entry *[]*BlkIOEntry
entry *[]*v1.BlkIOEntry
}
func uintf(v interface{}) []byte {

View File

@@ -25,6 +25,7 @@ import (
"strings"
"sync"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
)
@@ -246,7 +247,7 @@ func (c *cgroup) Delete() error {
}
// Stat returns the current metrics for the cgroup
func (c *cgroup) Stat(handlers ...ErrorHandler) (*Metrics, error) {
func (c *cgroup) Stat(handlers ...ErrorHandler) (*v1.Metrics, error) {
c.mu.Lock()
defer c.mu.Unlock()
if c.err != nil {
@@ -256,10 +257,10 @@ func (c *cgroup) Stat(handlers ...ErrorHandler) (*Metrics, error) {
handlers = append(handlers, errPassthrough)
}
var (
stats = &Metrics{
CPU: &CPUStat{
Throttling: &Throttle{},
Usage: &CPUUsage{},
stats = &v1.Metrics{
CPU: &v1.CPUStat{
Throttling: &v1.Throttle{},
Usage: &v1.CPUUsage{},
},
}
wg = &sync.WaitGroup{}

View File

@@ -19,6 +19,7 @@ package cgroups
import (
"os"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
@@ -68,7 +69,7 @@ type Cgroup interface {
// subsystems are moved one at a time
MoveTo(Cgroup) error
// Stat returns the stats for all subsystems in the cgroup
Stat(...ErrorHandler) (*Metrics, error)
Stat(...ErrorHandler) (*v1.Metrics, error)
// Update updates all the subsystems with the provided resource changes
Update(resources *specs.LinuxResources) error
// Processes returns all the processes in a select subsystem for the cgroup

View File

@@ -24,6 +24,7 @@ import (
"path/filepath"
"strconv"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
@@ -100,7 +101,7 @@ func (c *cpuController) Update(path string, resources *specs.LinuxResources) err
return c.Create(path, resources)
}
func (c *cpuController) Stat(path string, stats *Metrics) error {
func (c *cpuController) Stat(path string, stats *v1.Metrics) error {
f, err := os.Open(filepath.Join(c.Path(path), "cpu.stat"))
if err != nil {
return err

View File

@@ -22,6 +22,8 @@ import (
"path/filepath"
"strconv"
"strings"
v1 "github.com/containerd/cgroups/stats/v1"
)
const nanosecondsInSecond = 1000000000
@@ -46,7 +48,7 @@ func (c *cpuacctController) Path(path string) string {
return filepath.Join(c.root, path)
}
func (c *cpuacctController) Stat(path string, stats *Metrics) error {
func (c *cpuacctController) Stat(path string, stats *v1.Metrics) error {
user, kernel, err := c.getUsage(path)
if err != nil {
return err

13
vendor/github.com/containerd/cgroups/go.mod generated vendored Normal file
View File

@@ -0,0 +1,13 @@
module github.com/containerd/cgroups
go 1.12
require (
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e
github.com/docker/go-units v0.4.0
github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e
github.com/gogo/protobuf v1.2.1
github.com/opencontainers/runtime-spec v0.1.2-0.20190507144316-5b71a03e2700
github.com/pkg/errors v0.8.1
golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f
)

View File

@@ -23,6 +23,7 @@ import (
"strconv"
"strings"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
@@ -67,7 +68,7 @@ func (h *hugetlbController) Create(path string, resources *specs.LinuxResources)
return nil
}
func (h *hugetlbController) Stat(path string, stats *Metrics) error {
func (h *hugetlbController) Stat(path string, stats *v1.Metrics) error {
for _, size := range h.sizes {
s, err := h.readSizeStat(path, size)
if err != nil {
@@ -78,8 +79,8 @@ func (h *hugetlbController) Stat(path string, stats *Metrics) error {
return nil
}
func (h *hugetlbController) readSizeStat(path, size string) (*HugetlbStat, error) {
s := HugetlbStat{
func (h *hugetlbController) readSizeStat(path, size string) (*v1.HugetlbStat, error) {
s := v1.HugetlbStat{
Pagesize: size,
}
for _, t := range []struct {

View File

@@ -27,19 +27,48 @@ import (
"strings"
"syscall"
"golang.org/x/sys/unix"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/sys/unix"
)
func NewMemory(root string) *memoryController {
return &memoryController{
root: filepath.Join(root, string(Memory)),
// NewMemory returns a Memory controller given the root folder of cgroups.
// It may optionally accept other configuration options, such as IgnoreModules(...)
func NewMemory(root string, options ...func(*memoryController)) *memoryController {
mc := &memoryController{
root: filepath.Join(root, string(Memory)),
ignored: map[string]struct{}{},
}
for _, opt := range options {
opt(mc)
}
return mc
}
// IgnoreModules configure the memory controller to not read memory metrics for some
// module names (e.g. passing "memsw" would avoid all the memory.memsw.* entries)
func IgnoreModules(names ...string) func(*memoryController) {
return func(mc *memoryController) {
for _, name := range names {
mc.ignored[name] = struct{}{}
}
}
}
// OptionalSwap allows the memory controller to not fail if cgroups is not accounting
// Swap memory (there are no memory.memsw.* entries)
func OptionalSwap() func(*memoryController) {
return func(mc *memoryController) {
_, err := os.Stat(filepath.Join(mc.root, "memory.memsw.usage_in_bytes"))
if os.IsNotExist(err) {
mc.ignored["memsw"] = struct{}{}
}
}
}
type memoryController struct {
root string
root string
ignored map[string]struct{}
}
func (m *memoryController) Name() Name {
@@ -97,24 +126,24 @@ func (m *memoryController) Update(path string, resources *specs.LinuxResources)
return m.set(path, settings)
}
func (m *memoryController) Stat(path string, stats *Metrics) error {
func (m *memoryController) Stat(path string, stats *v1.Metrics) error {
f, err := os.Open(filepath.Join(m.Path(path), "memory.stat"))
if err != nil {
return err
}
defer f.Close()
stats.Memory = &MemoryStat{
Usage: &MemoryEntry{},
Swap: &MemoryEntry{},
Kernel: &MemoryEntry{},
KernelTCP: &MemoryEntry{},
stats.Memory = &v1.MemoryStat{
Usage: &v1.MemoryEntry{},
Swap: &v1.MemoryEntry{},
Kernel: &v1.MemoryEntry{},
KernelTCP: &v1.MemoryEntry{},
}
if err := m.parseStats(f, stats.Memory); err != nil {
return err
}
for _, t := range []struct {
module string
entry *MemoryEntry
entry *v1.MemoryEntry
}{
{
module: "",
@@ -133,6 +162,9 @@ func (m *memoryController) Stat(path string, stats *Metrics) error {
entry: stats.Memory.KernelTCP,
},
} {
if _, ok := m.ignored[t.module]; ok {
continue
}
for _, tt := range []struct {
name string
value *uint64
@@ -197,7 +229,7 @@ func writeEventFD(root string, cfd, efd uintptr) error {
return err
}
func (m *memoryController) parseStats(r io.Reader, stat *MemoryStat) error {
func (m *memoryController) parseStats(r io.Reader, stat *v1.MemoryStat) error {
var (
raw = make(map[string]uint64)
sc = bufio.NewScanner(r)
@@ -282,7 +314,7 @@ func getMemorySettings(resources *specs.LinuxResources) []memorySettings {
value: mem.Limit,
},
{
name: "soft_limit_in_bytes",
name: "soft_limit_in_bytes",
value: mem.Reservation,
},
{

View File

@@ -23,6 +23,7 @@ import (
"strconv"
"strings"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
@@ -62,7 +63,7 @@ func (p *pidsController) Update(path string, resources *specs.LinuxResources) er
return p.Create(path, resources)
}
func (p *pidsController) Stat(path string, stats *Metrics) error {
func (p *pidsController) Stat(path string, stats *v1.Metrics) error {
current, err := readUint(filepath.Join(p.Path(path), "pids.current"))
if err != nil {
return err
@@ -77,7 +78,7 @@ func (p *pidsController) Stat(path string, stats *Metrics) error {
return err
}
}
stats.Pids = &PidsStat{
stats.Pids = &v1.PidsStat{
Current: current,
Limit: max,
}

View File

@@ -24,6 +24,7 @@ import (
"strconv"
"strings"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
@@ -80,7 +81,7 @@ func (p *rdmaController) Update(path string, resources *specs.LinuxResources) er
return p.Create(path, resources)
}
func parseRdmaKV(raw string, entry *RdmaEntry) {
func parseRdmaKV(raw string, entry *v1.RdmaEntry) {
var value uint64
var err error
@@ -103,13 +104,13 @@ func parseRdmaKV(raw string, entry *RdmaEntry) {
}
}
func toRdmaEntry(strEntries []string) []*RdmaEntry {
var rdmaEntries []*RdmaEntry
func toRdmaEntry(strEntries []string) []*v1.RdmaEntry {
var rdmaEntries []*v1.RdmaEntry
for i := range strEntries {
parts := strings.Fields(strEntries[i])
switch len(parts) {
case 3:
entry := new(RdmaEntry)
entry := new(v1.RdmaEntry)
entry.Device = parts[0]
parseRdmaKV(parts[1], entry)
parseRdmaKV(parts[2], entry)
@@ -122,7 +123,7 @@ func toRdmaEntry(strEntries []string) []*RdmaEntry {
return rdmaEntries
}
func (p *rdmaController) Stat(path string, stats *Metrics) error {
func (p *rdmaController) Stat(path string, stats *v1.Metrics) error {
currentData, err := ioutil.ReadFile(filepath.Join(p.Path(path), "rdma.current"))
if err != nil {
@@ -145,7 +146,7 @@ func (p *rdmaController) Stat(path string, stats *Metrics) error {
currentEntries := toRdmaEntry(currentPerDevices)
maxEntries := toRdmaEntry(maxPerDevices)
stats.Rdma = &RdmaStat{
stats.Rdma = &v1.RdmaStat{
Current: currentEntries,
Limit: maxEntries,
}

17
vendor/github.com/containerd/cgroups/stats/v1/doc.go generated vendored Normal file
View File

@@ -0,0 +1,17 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1

View File

@@ -19,6 +19,7 @@ package cgroups
import (
"fmt"
v1 "github.com/containerd/cgroups/stats/v1"
specs "github.com/opencontainers/runtime-spec/specs-go"
)
@@ -85,7 +86,7 @@ type deleter interface {
type stater interface {
Subsystem
Stat(path string, stats *Metrics) error
Stat(path string, stats *v1.Metrics) error
}
type updater interface {

View File

@@ -82,7 +82,7 @@ specifications as appropriate.
backport version of `libseccomp-dev` is required. See [travis.yml](.travis.yml) for an example on trusty.
* **btrfs development library.** Required by containerd btrfs support. `btrfs-tools`(Ubuntu, Debian) / `btrfs-progs-devel`(Fedora, CentOS, RHEL)
2. Install **`socat`** (required by portforward).
2. Install and setup a go 1.10 development environment.
2. Install and setup a go 1.12.9 development environment. (Note: You can check the travis logs for a recent pull request to confirm the version(s) of golang currently being used to build and test master.)
3. Make a local clone of this repository.
4. Install binary dependencies by running the following command from your cloned `cri/` project directory:
```bash

View File

@@ -40,6 +40,7 @@ import (
criconfig "github.com/containerd/cri/pkg/config"
"github.com/containerd/cri/pkg/constants"
criplatforms "github.com/containerd/cri/pkg/containerd/platforms"
"github.com/containerd/cri/pkg/server"
)
@@ -89,6 +90,7 @@ func initCRIService(ic *plugin.InitContext) (interface{}, error) {
client, err := containerd.New(
"",
containerd.WithDefaultNamespace(constants.K8sContainerdNamespace),
containerd.WithDefaultPlatform(criplatforms.Default()),
containerd.WithServices(servicesOpts...),
)
if err != nil {

View File

@@ -21,11 +21,9 @@ import (
"time"
"github.com/BurntSushi/toml"
"github.com/containerd/containerd"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/plugin"
"github.com/pkg/errors"
"k8s.io/kubernetes/pkg/kubelet/server/streaming"
)
// Runtime struct to contain the type(ID), engine, and root variables for a default runtime
@@ -40,6 +38,10 @@ type Runtime struct {
// PodAnnotations is a list of pod annotations passed to both pod sandbox as well as
// container OCI annotations.
PodAnnotations []string `toml:"pod_annotations" json:"PodAnnotations"`
// ContainerAnnotations is a list of container annotations passed through to the OCI config of the containers.
// Container annotations in CRI are usually generated by other Kubernetes node components (i.e., not users).
// Currently, only device plugins populate the annotations.
ContainerAnnotations []string `toml:"container_annotations" json:"ContainerAnnotations"`
// Root is the directory used by containerd for runtime state.
// DEPRECATED: use Options instead. Remove when shim v1 is deprecated.
// This only works for runtime type "io.containerd.runtime.v1.linux".
@@ -132,7 +134,7 @@ type Registry struct {
// Mirrors are namespace to mirror mapping for all namespaces.
Mirrors map[string]Mirror `toml:"mirrors" json:"mirrors"`
// Configs are configs for each registry.
// The key is the FDQN or IP of the registry.
// The key is the domain name or IP of the registry.
Configs map[string]RegistryConfig `toml:"configs" json:"configs"`
// Auths are registry endpoint to auth config mapping. The registry endpoint must
@@ -228,51 +230,6 @@ type Config struct {
StateDir string `json:"stateDir"`
}
// DefaultConfig returns default configurations of cri plugin.
func DefaultConfig() PluginConfig {
return PluginConfig{
CniConfig: CniConfig{
NetworkPluginBinDir: "/opt/cni/bin",
NetworkPluginConfDir: "/etc/cni/net.d",
NetworkPluginMaxConfNum: 1, // only one CNI plugin config file will be loaded
NetworkPluginConfTemplate: "",
},
ContainerdConfig: ContainerdConfig{
Snapshotter: containerd.DefaultSnapshotter,
DefaultRuntimeName: "runc",
NoPivot: false,
Runtimes: map[string]Runtime{
"runc": {
Type: "io.containerd.runc.v1",
},
},
},
DisableTCPService: true,
StreamServerAddress: "127.0.0.1",
StreamServerPort: "0",
StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
EnableSelinux: false,
EnableTLSStreaming: false,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "",
TLSCertFile: "",
},
SandboxImage: "k8s.gcr.io/pause:3.1",
StatsCollectPeriod: 10,
SystemdCgroup: false,
MaxContainerLogLineSize: 16 * 1024,
Registry: Registry{
Mirrors: map[string]Mirror{
"docker.io": {
Endpoints: []string{"https://registry-1.docker.io"},
},
},
},
MaxConcurrentDownloads: 3,
DisableProcMount: false,
}
}
const (
// RuntimeUntrusted is the implicit runtime defined for ContainerdConfig.UntrustedWorkloadRuntime
RuntimeUntrusted = "untrusted"

View File

@@ -0,0 +1,69 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"github.com/containerd/containerd"
"k8s.io/kubernetes/pkg/kubelet/server/streaming"
)
// DefaultConfig returns default configurations of cri plugin.
func DefaultConfig() PluginConfig {
return PluginConfig{
CniConfig: CniConfig{
NetworkPluginBinDir: "/opt/cni/bin",
NetworkPluginConfDir: "/etc/cni/net.d",
NetworkPluginMaxConfNum: 1, // only one CNI plugin config file will be loaded
NetworkPluginConfTemplate: "",
},
ContainerdConfig: ContainerdConfig{
Snapshotter: containerd.DefaultSnapshotter,
DefaultRuntimeName: "runc",
NoPivot: false,
Runtimes: map[string]Runtime{
"runc": {
Type: "io.containerd.runc.v1",
},
},
},
DisableTCPService: true,
StreamServerAddress: "127.0.0.1",
StreamServerPort: "0",
StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
EnableSelinux: false,
EnableTLSStreaming: false,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "",
TLSCertFile: "",
},
SandboxImage: "k8s.gcr.io/pause:3.1",
StatsCollectPeriod: 10,
SystemdCgroup: false,
MaxContainerLogLineSize: 16 * 1024,
Registry: Registry{
Mirrors: map[string]Mirror{
"docker.io": {
Endpoints: []string{"https://registry-1.docker.io"},
},
},
},
MaxConcurrentDownloads: 3,
DisableProcMount: false,
}
}

View File

@@ -0,0 +1,70 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package config
import (
"os"
"path/filepath"
"github.com/containerd/containerd"
"k8s.io/kubernetes/pkg/kubelet/server/streaming"
)
// DefaultConfig returns default configurations of cri plugin.
func DefaultConfig() PluginConfig {
return PluginConfig{
CniConfig: CniConfig{
NetworkPluginBinDir: filepath.Join(os.Getenv("ProgramFiles"), "containerd", "cni", "bin"),
NetworkPluginConfDir: filepath.Join(os.Getenv("ProgramFiles"), "containerd", "cni", "conf"),
NetworkPluginMaxConfNum: 1,
NetworkPluginConfTemplate: "",
},
ContainerdConfig: ContainerdConfig{
Snapshotter: containerd.DefaultSnapshotter,
DefaultRuntimeName: "runhcs-wcow-process",
NoPivot: false,
Runtimes: map[string]Runtime{
"runhcs-wcow-process": {
Type: "io.containerd.runhcs.v1",
},
},
},
DisableTCPService: true,
StreamServerAddress: "127.0.0.1",
StreamServerPort: "0",
StreamIdleTimeout: streaming.DefaultConfig.StreamIdleTimeout.String(), // 4 hour
EnableTLSStreaming: false,
X509KeyPairStreaming: X509KeyPairStreaming{
TLSKeyFile: "",
TLSCertFile: "",
},
SandboxImage: "mcr.microsoft.com/k8s/core/pause:1.2.0",
StatsCollectPeriod: 10,
MaxContainerLogLineSize: 16 * 1024,
Registry: Registry{
Mirrors: map[string]Mirror{
"docker.io": {
Endpoints: []string{"https://registry-1.docker.io"},
},
},
},
MaxConcurrentDownloads: 3,
// TODO(windows): Add platform specific config, so that most common defaults can be shared.
}
}

View File

@@ -18,99 +18,22 @@ package opts
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/oci"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
"github.com/opencontainers/runc/libcontainer/devices"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
osinterface "github.com/containerd/cri/pkg/os"
"github.com/containerd/cri/pkg/util"
)
const (
// DefaultSandboxCPUshares is default cpu shares for sandbox container.
DefaultSandboxCPUshares = 2
)
// WithAdditionalGIDs adds any additional groups listed for a particular user in the
// /etc/groups file of the image's root filesystem to the OCI spec's additionalGids array.
func WithAdditionalGIDs(userstr string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
gids := s.Process.User.AdditionalGids
if err := oci.WithAdditionalGIDs(userstr)(ctx, client, c, s); err != nil {
return err
}
// Merge existing gids and new gids.
s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, gids)
return nil
}
}
func mergeGids(gids1, gids2 []uint32) []uint32 {
gidsMap := make(map[uint32]struct{})
for _, gid1 := range gids1 {
gidsMap[gid1] = struct{}{}
}
for _, gid2 := range gids2 {
gidsMap[gid2] = struct{}{}
}
var gids []uint32
for gid := range gidsMap {
gids = append(gids, gid)
}
sort.Slice(gids, func(i, j int) bool { return gids[i] < gids[j] })
return gids
}
// WithoutRunMount removes the `/run` inside the spec
func WithoutRunMount(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
var (
mounts []runtimespec.Mount
current = s.Mounts
)
for _, m := range current {
if filepath.Clean(m.Destination) == "/run" {
continue
}
mounts = append(mounts, m)
}
s.Mounts = mounts
return nil
}
// WithoutDefaultSecuritySettings removes the default security settings generated on a spec
func WithoutDefaultSecuritySettings(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
// Make sure no default seccomp/apparmor is specified
s.Process.ApparmorProfile = ""
if s.Linux != nil {
s.Linux.Seccomp = nil
}
// Remove default rlimits (See issue #515)
s.Process.Rlimits = nil
return nil
}
// DefaultSandboxCPUshares is default cpu shares for sandbox container.
// TODO(windows): Revisit cpu shares for windows (https://github.com/containerd/cri/issues/1297)
const DefaultSandboxCPUshares = 2
// WithRelativeRoot sets the root for the container
func WithRelativeRoot(root string) oci.SpecOpts {
@@ -123,6 +46,12 @@ func WithRelativeRoot(root string) oci.SpecOpts {
}
}
// WithoutRoot sets the root to nil for the container.
func WithoutRoot(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
s.Root = nil
return nil
}
// WithProcessArgs sets the process args on the spec based on the image and runtime config
func WithProcessArgs(config *runtime.ContainerConfig, image *imagespec.ImageConfig) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
@@ -145,141 +74,6 @@ func WithProcessArgs(config *runtime.ContainerConfig, image *imagespec.ImageConf
}
}
// WithMounts sorts and adds runtime and CRI mounts to the spec
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
// is mounted by both a CRI mount and an extra mount, the CRI mount will
// be kept.
var (
criMounts = config.GetMounts()
mounts = append([]*runtime.Mount{}, criMounts...)
)
// Copy all mounts from extra mounts, except for mounts overriden by CRI.
for _, e := range extra {
found := false
for _, c := range criMounts {
if filepath.Clean(e.ContainerPath) == filepath.Clean(c.ContainerPath) {
found = true
break
}
}
if !found {
mounts = append(mounts, e)
}
}
// ---
// Sort mounts in number of parts. This ensures that high level mounts don't
// shadow other mounts.
sort.Sort(orderedMounts(mounts))
// Mount cgroup into the container as readonly, which inherits docker's behavior.
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: "cgroup",
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
})
// Copy all mounts from default mounts, except for
// - mounts overriden by supplied mount;
// - all mounts under /dev if a supplied /dev is present.
mountSet := make(map[string]struct{})
for _, m := range mounts {
mountSet[filepath.Clean(m.ContainerPath)] = struct{}{}
}
defaultMounts := s.Mounts
s.Mounts = nil
for _, m := range defaultMounts {
dst := filepath.Clean(m.Destination)
if _, ok := mountSet[dst]; ok {
// filter out mount overridden by a supplied mount
continue
}
if _, mountDev := mountSet["/dev"]; mountDev && strings.HasPrefix(dst, "/dev/") {
// filter out everything under /dev if /dev is a supplied mount
continue
}
s.Mounts = append(s.Mounts, m)
}
for _, mount := range mounts {
var (
dst = mount.GetContainerPath()
src = mount.GetHostPath()
)
// Create the host path if it doesn't exist.
// TODO(random-liu): Add CRI validation test for this case.
if _, err := osi.Stat(src); err != nil {
if !os.IsNotExist(err) {
return errors.Wrapf(err, "failed to stat %q", src)
}
if err := osi.MkdirAll(src, 0755); err != nil {
return errors.Wrapf(err, "failed to mkdir %q", src)
}
}
// TODO(random-liu): Add cri-containerd integration test or cri validation test
// for this.
src, err := osi.ResolveSymbolicLink(src)
if err != nil {
return errors.Wrapf(err, "failed to resolve symlink %q", src)
}
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
options := []string{"rbind"}
switch mount.GetPropagation() {
case runtime.MountPropagation_PROPAGATION_PRIVATE:
options = append(options, "rprivate")
// Since default root propogation in runc is rprivate ignore
// setting the root propagation
case runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL:
if err := ensureShared(src, osi.LookupMount); err != nil {
return err
}
options = append(options, "rshared")
s.Linux.RootfsPropagation = "rshared"
case runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER:
if err := ensureSharedOrSlave(src, osi.LookupMount); err != nil {
return err
}
options = append(options, "rslave")
if s.Linux.RootfsPropagation != "rshared" &&
s.Linux.RootfsPropagation != "rslave" {
s.Linux.RootfsPropagation = "rslave"
}
default:
log.G(ctx).Warnf("Unknown propagation mode for hostPath %q", mount.HostPath)
options = append(options, "rprivate")
}
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
// is readonly. This is different from docker's behavior, but make more sense.
if mount.GetReadonly() {
options = append(options, "ro")
} else {
options = append(options, "rw")
}
if mount.GetSelinuxRelabel() {
if err := label.Relabel(src, mountLabel, true); err != nil && err != unix.ENOTSUP {
return errors.Wrapf(err, "relabel %q with %q failed", src, mountLabel)
}
}
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: src,
Destination: dst,
Type: "bind",
Options: options,
})
}
return nil
}
}
// mounts defines how to sort runtime.Mount.
// This is the same with the Docker implementation:
// https://github.com/moby/moby/blob/17.05.x/daemon/volumes.go#L26
@@ -307,325 +101,6 @@ func (m orderedMounts) parts(i int) int {
return strings.Count(filepath.Clean(m[i].ContainerPath), string(os.PathSeparator))
}
// Ensure mount point on which path is mounted, is shared.
func ensureShared(path string, lookupMount func(string) (mount.Info, error)) error {
mountInfo, err := lookupMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(mountInfo.Optional, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
return nil
}
}
return errors.Errorf("path %q is mounted on %q but it is not a shared mount", path, mountInfo.Mountpoint)
}
// ensure mount point on which path is mounted, is either shared or slave.
func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, error)) error {
mountInfo, err := lookupMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(mountInfo.Optional, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
return nil
} else if strings.HasPrefix(opt, "master:") {
return nil
}
}
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
}
// WithPrivilegedDevices allows all host devices inside the container
func WithPrivilegedDevices(_ context.Context, _ oci.Client, _ *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
hostDevices, err := devices.HostDevices()
if err != nil {
return err
}
for _, hostDevice := range hostDevices {
rd := runtimespec.LinuxDevice{
Path: hostDevice.Path,
Type: string(hostDevice.Type),
Major: hostDevice.Major,
Minor: hostDevice.Minor,
UID: &hostDevice.Uid,
GID: &hostDevice.Gid,
}
if hostDevice.Major == 0 && hostDevice.Minor == 0 {
// Invalid device, most likely a symbolic link, skip it.
continue
}
addDevice(s, rd)
}
s.Linux.Resources.Devices = []runtimespec.LinuxDeviceCgroup{
{
Allow: true,
Access: "rwm",
},
}
return nil
}
func addDevice(s *runtimespec.Spec, rd runtimespec.LinuxDevice) {
for i, dev := range s.Linux.Devices {
if dev.Path == rd.Path {
s.Linux.Devices[i] = rd
return
}
}
s.Linux.Devices = append(s.Linux.Devices, rd)
}
// WithDevices sets the provided devices onto the container spec
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
for _, device := range config.GetDevices() {
path, err := osi.ResolveSymbolicLink(device.HostPath)
if err != nil {
return err
}
dev, err := devices.DeviceFromPath(path, device.Permissions)
if err != nil {
return err
}
rd := runtimespec.LinuxDevice{
Path: device.ContainerPath,
Type: string(dev.Type),
Major: dev.Major,
Minor: dev.Minor,
UID: &dev.Uid,
GID: &dev.Gid,
}
addDevice(s, rd)
s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, runtimespec.LinuxDeviceCgroup{
Allow: true,
Type: string(dev.Type),
Major: &dev.Major,
Minor: &dev.Minor,
Access: dev.Permissions,
})
}
return nil
}
}
// WithCapabilities sets the provided capabilties from the security context
func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts {
capabilities := sc.GetCapabilities()
if capabilities == nil {
return nullOpt
}
var opts []oci.SpecOpts
// Add/drop all capabilities if "all" is specified, so that
// following individual add/drop could still work. E.g.
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
// will be all capabilities without `CAP_CHOWN`.
if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") {
opts = append(opts, oci.WithAllCapabilities)
}
if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") {
opts = append(opts, oci.WithCapabilities(nil))
}
var caps []string
for _, c := range capabilities.GetAddCapabilities() {
if strings.ToUpper(c) == "ALL" {
continue
}
// Capabilities in CRI doesn't have `CAP_` prefix, so add it.
caps = append(caps, "CAP_"+strings.ToUpper(c))
}
opts = append(opts, oci.WithAddedCapabilities(caps))
caps = []string{}
for _, c := range capabilities.GetDropCapabilities() {
if strings.ToUpper(c) == "ALL" {
continue
}
caps = append(caps, "CAP_"+strings.ToUpper(c))
}
opts = append(opts, oci.WithDroppedCapabilities(caps))
return oci.Compose(opts...)
}
// WithoutAmbientCaps removes the ambient caps from the spec
func WithoutAmbientCaps(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
if s.Process.Capabilities == nil {
s.Process.Capabilities = &runtimespec.LinuxCapabilities{}
}
s.Process.Capabilities.Ambient = nil
return nil
}
// WithDisabledCgroups clears the Cgroups Path from the spec
func WithDisabledCgroups(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
s.Linux.CgroupsPath = ""
return nil
}
// WithSelinuxLabels sets the mount and process labels
func WithSelinuxLabels(process, mount string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
s.Linux.MountLabel = mount
s.Process.SelinuxLabel = process
return nil
}
}
// WithResources sets the provided resource restrictions
func WithResources(resources *runtime.LinuxContainerResources) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if resources == nil {
return nil
}
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
if s.Linux.Resources.CPU == nil {
s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
}
if s.Linux.Resources.Memory == nil {
s.Linux.Resources.Memory = &runtimespec.LinuxMemory{}
}
var (
p = uint64(resources.GetCpuPeriod())
q = resources.GetCpuQuota()
shares = uint64(resources.GetCpuShares())
limit = resources.GetMemoryLimitInBytes()
)
if p != 0 {
s.Linux.Resources.CPU.Period = &p
}
if q != 0 {
s.Linux.Resources.CPU.Quota = &q
}
if shares != 0 {
s.Linux.Resources.CPU.Shares = &shares
}
if cpus := resources.GetCpusetCpus(); cpus != "" {
s.Linux.Resources.CPU.Cpus = cpus
}
if mems := resources.GetCpusetMems(); mems != "" {
s.Linux.Resources.CPU.Mems = resources.GetCpusetMems()
}
if limit != 0 {
s.Linux.Resources.Memory.Limit = &limit
}
return nil
}
}
// WithOOMScoreAdj sets the oom score
func WithOOMScoreAdj(config *runtime.ContainerConfig, restrict bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
resources := config.GetLinux().GetResources()
if resources == nil {
return nil
}
adj := int(resources.GetOomScoreAdj())
if restrict {
var err error
adj, err = restrictOOMScoreAdj(adj)
if err != nil {
return err
}
}
s.Process.OOMScoreAdj = &adj
return nil
}
}
// WithSysctls sets the provided sysctls onto the spec
func WithSysctls(sysctls map[string]string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Sysctl == nil {
s.Linux.Sysctl = make(map[string]string)
}
for k, v := range sysctls {
s.Linux.Sysctl[k] = v
}
return nil
}
}
// WithPodOOMScoreAdj sets the oom score for the pod sandbox
func WithPodOOMScoreAdj(adj int, restrict bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
if restrict {
var err error
adj, err = restrictOOMScoreAdj(adj)
if err != nil {
return err
}
}
s.Process.OOMScoreAdj = &adj
return nil
}
}
// WithSupplementalGroups sets the supplemental groups for the process
func WithSupplementalGroups(groups []int64) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
var guids []uint32
for _, g := range groups {
guids = append(guids, uint32(g))
}
s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, guids)
return nil
}
}
// WithAnnotation sets the provided annotation
func WithAnnotation(k, v string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
@@ -636,110 +111,3 @@ func WithAnnotation(k, v string) oci.SpecOpts {
return nil
}
}
// WithPodNamespaces sets the pod namespaces for the container
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, pid uint32) oci.SpecOpts {
namespaces := config.GetNamespaceOptions()
opts := []oci.SpecOpts{
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.NetworkNamespace, Path: GetNetworkNamespace(pid)}),
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.IPCNamespace, Path: GetIPCNamespace(pid)}),
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UTSNamespace, Path: GetUTSNamespace(pid)}),
}
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(pid)}))
}
return oci.Compose(opts...)
}
// WithDefaultSandboxShares sets the default sandbox CPU shares
func WithDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
if s.Linux.Resources.CPU == nil {
s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
}
i := uint64(DefaultSandboxCPUshares)
s.Linux.Resources.CPU.Shares = &i
return nil
}
// WithoutNamespace removes the provided namespace
func WithoutNamespace(t runtimespec.LinuxNamespaceType) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
return nil
}
var namespaces []runtimespec.LinuxNamespace
for i, ns := range s.Linux.Namespaces {
if ns.Type != t {
namespaces = append(namespaces, s.Linux.Namespaces[i])
}
}
s.Linux.Namespaces = namespaces
return nil
}
}
func nullOpt(_ context.Context, _ oci.Client, _ *containers.Container, _ *runtimespec.Spec) error {
return nil
}
func getCurrentOOMScoreAdj() (int, error) {
b, err := ioutil.ReadFile("/proc/self/oom_score_adj")
if err != nil {
return 0, errors.Wrap(err, "could not get the daemon oom_score_adj")
}
s := strings.TrimSpace(string(b))
i, err := strconv.Atoi(s)
if err != nil {
return 0, errors.Wrap(err, "could not get the daemon oom_score_adj")
}
return i, nil
}
func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) {
currentOOMScoreAdj, err := getCurrentOOMScoreAdj()
if err != nil {
return preferredOOMScoreAdj, err
}
if preferredOOMScoreAdj < currentOOMScoreAdj {
return currentOOMScoreAdj, nil
}
return preferredOOMScoreAdj, nil
}
const (
// netNSFormat is the format of network namespace of a process.
netNSFormat = "/proc/%v/ns/net"
// ipcNSFormat is the format of ipc namespace of a process.
ipcNSFormat = "/proc/%v/ns/ipc"
// utsNSFormat is the format of uts namespace of a process.
utsNSFormat = "/proc/%v/ns/uts"
// pidNSFormat is the format of pid namespace of a process.
pidNSFormat = "/proc/%v/ns/pid"
)
// GetNetworkNamespace returns the network namespace of a process.
func GetNetworkNamespace(pid uint32) string {
return fmt.Sprintf(netNSFormat, pid)
}
// GetIPCNamespace returns the ipc namespace of a process.
func GetIPCNamespace(pid uint32) string {
return fmt.Sprintf(ipcNSFormat, pid)
}
// GetUTSNamespace returns the uts namespace of a process.
func GetUTSNamespace(pid uint32) string {
return fmt.Sprintf(utsNSFormat, pid)
}
// GetPIDNamespace returns the pid namespace of a process.
func GetPIDNamespace(pid uint32) string {
return fmt.Sprintf(pidNSFormat, pid)
}

View File

@@ -0,0 +1,668 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"sort"
"strconv"
"strings"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/mount"
"github.com/containerd/containerd/oci"
"github.com/opencontainers/runc/libcontainer/devices"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
osinterface "github.com/containerd/cri/pkg/os"
"github.com/containerd/cri/pkg/util"
)
// WithAdditionalGIDs adds any additional groups listed for a particular user in the
// /etc/groups file of the image's root filesystem to the OCI spec's additionalGids array.
func WithAdditionalGIDs(userstr string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
gids := s.Process.User.AdditionalGids
if err := oci.WithAdditionalGIDs(userstr)(ctx, client, c, s); err != nil {
return err
}
// Merge existing gids and new gids.
s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, gids)
return nil
}
}
func mergeGids(gids1, gids2 []uint32) []uint32 {
gidsMap := make(map[uint32]struct{})
for _, gid1 := range gids1 {
gidsMap[gid1] = struct{}{}
}
for _, gid2 := range gids2 {
gidsMap[gid2] = struct{}{}
}
var gids []uint32
for gid := range gidsMap {
gids = append(gids, gid)
}
sort.Slice(gids, func(i, j int) bool { return gids[i] < gids[j] })
return gids
}
// WithoutRunMount removes the `/run` inside the spec
func WithoutRunMount(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
var (
mounts []runtimespec.Mount
current = s.Mounts
)
for _, m := range current {
if filepath.Clean(m.Destination) == "/run" {
continue
}
mounts = append(mounts, m)
}
s.Mounts = mounts
return nil
}
// WithoutDefaultSecuritySettings removes the default security settings generated on a spec
func WithoutDefaultSecuritySettings(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
// Make sure no default seccomp/apparmor is specified
s.Process.ApparmorProfile = ""
if s.Linux != nil {
s.Linux.Seccomp = nil
}
// Remove default rlimits (See issue #515)
s.Process.Rlimits = nil
return nil
}
// WithMounts sorts and adds runtime and CRI mounts to the spec
func WithMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount, mountLabel string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) (err error) {
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
// is mounted by both a CRI mount and an extra mount, the CRI mount will
// be kept.
var (
criMounts = config.GetMounts()
mounts = append([]*runtime.Mount{}, criMounts...)
)
// Copy all mounts from extra mounts, except for mounts overridden by CRI.
for _, e := range extra {
found := false
for _, c := range criMounts {
if filepath.Clean(e.ContainerPath) == filepath.Clean(c.ContainerPath) {
found = true
break
}
}
if !found {
mounts = append(mounts, e)
}
}
// Sort mounts in number of parts. This ensures that high level mounts don't
// shadow other mounts.
sort.Sort(orderedMounts(mounts))
// Mount cgroup into the container as readonly, which inherits docker's behavior.
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: "cgroup",
Destination: "/sys/fs/cgroup",
Type: "cgroup",
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
})
// Copy all mounts from default mounts, except for
// - mounts overridden by supplied mount;
// - all mounts under /dev if a supplied /dev is present.
mountSet := make(map[string]struct{})
for _, m := range mounts {
mountSet[filepath.Clean(m.ContainerPath)] = struct{}{}
}
defaultMounts := s.Mounts
s.Mounts = nil
for _, m := range defaultMounts {
dst := filepath.Clean(m.Destination)
if _, ok := mountSet[dst]; ok {
// filter out mount overridden by a supplied mount
continue
}
if _, mountDev := mountSet["/dev"]; mountDev && strings.HasPrefix(dst, "/dev/") {
// filter out everything under /dev if /dev is a supplied mount
continue
}
s.Mounts = append(s.Mounts, m)
}
for _, mount := range mounts {
var (
dst = mount.GetContainerPath()
src = mount.GetHostPath()
)
// Create the host path if it doesn't exist.
// TODO(random-liu): Add CRI validation test for this case.
if _, err := osi.Stat(src); err != nil {
if !os.IsNotExist(err) {
return errors.Wrapf(err, "failed to stat %q", src)
}
if err := osi.MkdirAll(src, 0755); err != nil {
return errors.Wrapf(err, "failed to mkdir %q", src)
}
}
// TODO(random-liu): Add cri-containerd integration test or cri validation test
// for this.
src, err := osi.ResolveSymbolicLink(src)
if err != nil {
return errors.Wrapf(err, "failed to resolve symlink %q", src)
}
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
options := []string{"rbind"}
switch mount.GetPropagation() {
case runtime.MountPropagation_PROPAGATION_PRIVATE:
options = append(options, "rprivate")
// Since default root propagation in runc is rprivate ignore
// setting the root propagation
case runtime.MountPropagation_PROPAGATION_BIDIRECTIONAL:
if err := ensureShared(src, osi.(osinterface.UNIX).LookupMount); err != nil {
return err
}
options = append(options, "rshared")
s.Linux.RootfsPropagation = "rshared"
case runtime.MountPropagation_PROPAGATION_HOST_TO_CONTAINER:
if err := ensureSharedOrSlave(src, osi.(osinterface.UNIX).LookupMount); err != nil {
return err
}
options = append(options, "rslave")
if s.Linux.RootfsPropagation != "rshared" &&
s.Linux.RootfsPropagation != "rslave" {
s.Linux.RootfsPropagation = "rslave"
}
default:
log.G(ctx).Warnf("Unknown propagation mode for hostPath %q", mount.HostPath)
options = append(options, "rprivate")
}
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
// is readonly. This is different from docker's behavior, but make more sense.
if mount.GetReadonly() {
options = append(options, "ro")
} else {
options = append(options, "rw")
}
if mount.GetSelinuxRelabel() {
if err := label.Relabel(src, mountLabel, true); err != nil && err != unix.ENOTSUP {
return errors.Wrapf(err, "relabel %q with %q failed", src, mountLabel)
}
}
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: src,
Destination: dst,
Type: "bind",
Options: options,
})
}
return nil
}
}
// Ensure mount point on which path is mounted, is shared.
func ensureShared(path string, lookupMount func(string) (mount.Info, error)) error {
mountInfo, err := lookupMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(mountInfo.Optional, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
return nil
}
}
return errors.Errorf("path %q is mounted on %q but it is not a shared mount", path, mountInfo.Mountpoint)
}
// ensure mount point on which path is mounted, is either shared or slave.
func ensureSharedOrSlave(path string, lookupMount func(string) (mount.Info, error)) error {
mountInfo, err := lookupMount(path)
if err != nil {
return err
}
// Make sure source mount point is shared.
optsSplit := strings.Split(mountInfo.Optional, " ")
for _, opt := range optsSplit {
if strings.HasPrefix(opt, "shared:") {
return nil
} else if strings.HasPrefix(opt, "master:") {
return nil
}
}
return errors.Errorf("path %q is mounted on %q but it is not a shared or slave mount", path, mountInfo.Mountpoint)
}
// WithPrivilegedDevices allows all host devices inside the container
func WithPrivilegedDevices(_ context.Context, _ oci.Client, _ *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
hostDevices, err := devices.HostDevices()
if err != nil {
return err
}
for _, hostDevice := range hostDevices {
rd := runtimespec.LinuxDevice{
Path: hostDevice.Path,
Type: string(hostDevice.Type),
Major: hostDevice.Major,
Minor: hostDevice.Minor,
UID: &hostDevice.Uid,
GID: &hostDevice.Gid,
}
if hostDevice.Major == 0 && hostDevice.Minor == 0 {
// Invalid device, most likely a symbolic link, skip it.
continue
}
addDevice(s, rd)
}
s.Linux.Resources.Devices = []runtimespec.LinuxDeviceCgroup{
{
Allow: true,
Access: "rwm",
},
}
return nil
}
func addDevice(s *runtimespec.Spec, rd runtimespec.LinuxDevice) {
for i, dev := range s.Linux.Devices {
if dev.Path == rd.Path {
s.Linux.Devices[i] = rd
return
}
}
s.Linux.Devices = append(s.Linux.Devices, rd)
}
// WithDevices sets the provided devices onto the container spec
func WithDevices(osi osinterface.OS, config *runtime.ContainerConfig) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
for _, device := range config.GetDevices() {
path, err := osi.ResolveSymbolicLink(device.HostPath)
if err != nil {
return err
}
dev, err := devices.DeviceFromPath(path, device.Permissions)
if err != nil {
return err
}
rd := runtimespec.LinuxDevice{
Path: device.ContainerPath,
Type: string(dev.Type),
Major: dev.Major,
Minor: dev.Minor,
UID: &dev.Uid,
GID: &dev.Gid,
}
addDevice(s, rd)
s.Linux.Resources.Devices = append(s.Linux.Resources.Devices, runtimespec.LinuxDeviceCgroup{
Allow: true,
Type: string(dev.Type),
Major: &dev.Major,
Minor: &dev.Minor,
Access: dev.Permissions,
})
}
return nil
}
}
// WithCapabilities sets the provided capabilties from the security context
func WithCapabilities(sc *runtime.LinuxContainerSecurityContext) oci.SpecOpts {
capabilities := sc.GetCapabilities()
if capabilities == nil {
return nullOpt
}
var opts []oci.SpecOpts
// Add/drop all capabilities if "all" is specified, so that
// following individual add/drop could still work. E.g.
// AddCapabilities: []string{"ALL"}, DropCapabilities: []string{"CHOWN"}
// will be all capabilities without `CAP_CHOWN`.
if util.InStringSlice(capabilities.GetAddCapabilities(), "ALL") {
opts = append(opts, oci.WithAllCapabilities)
}
if util.InStringSlice(capabilities.GetDropCapabilities(), "ALL") {
opts = append(opts, oci.WithCapabilities(nil))
}
var caps []string
for _, c := range capabilities.GetAddCapabilities() {
if strings.ToUpper(c) == "ALL" {
continue
}
// Capabilities in CRI doesn't have `CAP_` prefix, so add it.
caps = append(caps, "CAP_"+strings.ToUpper(c))
}
opts = append(opts, oci.WithAddedCapabilities(caps))
caps = []string{}
for _, c := range capabilities.GetDropCapabilities() {
if strings.ToUpper(c) == "ALL" {
continue
}
caps = append(caps, "CAP_"+strings.ToUpper(c))
}
opts = append(opts, oci.WithDroppedCapabilities(caps))
return oci.Compose(opts...)
}
// WithoutAmbientCaps removes the ambient caps from the spec
func WithoutAmbientCaps(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
if s.Process.Capabilities == nil {
s.Process.Capabilities = &runtimespec.LinuxCapabilities{}
}
s.Process.Capabilities.Ambient = nil
return nil
}
// WithDisabledCgroups clears the Cgroups Path from the spec
func WithDisabledCgroups(_ context.Context, _ oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
s.Linux.CgroupsPath = ""
return nil
}
// WithSelinuxLabels sets the mount and process labels
func WithSelinuxLabels(process, mount string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
s.Linux.MountLabel = mount
s.Process.SelinuxLabel = process
return nil
}
}
// WithResources sets the provided resource restrictions
func WithResources(resources *runtime.LinuxContainerResources) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) {
if resources == nil {
return nil
}
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
if s.Linux.Resources.CPU == nil {
s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
}
if s.Linux.Resources.Memory == nil {
s.Linux.Resources.Memory = &runtimespec.LinuxMemory{}
}
var (
p = uint64(resources.GetCpuPeriod())
q = resources.GetCpuQuota()
shares = uint64(resources.GetCpuShares())
limit = resources.GetMemoryLimitInBytes()
)
if p != 0 {
s.Linux.Resources.CPU.Period = &p
}
if q != 0 {
s.Linux.Resources.CPU.Quota = &q
}
if shares != 0 {
s.Linux.Resources.CPU.Shares = &shares
}
if cpus := resources.GetCpusetCpus(); cpus != "" {
s.Linux.Resources.CPU.Cpus = cpus
}
if mems := resources.GetCpusetMems(); mems != "" {
s.Linux.Resources.CPU.Mems = resources.GetCpusetMems()
}
if limit != 0 {
s.Linux.Resources.Memory.Limit = &limit
}
return nil
}
}
// WithOOMScoreAdj sets the oom score
func WithOOMScoreAdj(config *runtime.ContainerConfig, restrict bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
resources := config.GetLinux().GetResources()
if resources == nil {
return nil
}
adj := int(resources.GetOomScoreAdj())
if restrict {
var err error
adj, err = restrictOOMScoreAdj(adj)
if err != nil {
return err
}
}
s.Process.OOMScoreAdj = &adj
return nil
}
}
// WithSysctls sets the provided sysctls onto the spec
func WithSysctls(sysctls map[string]string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Sysctl == nil {
s.Linux.Sysctl = make(map[string]string)
}
for k, v := range sysctls {
s.Linux.Sysctl[k] = v
}
return nil
}
}
// WithPodOOMScoreAdj sets the oom score for the pod sandbox
func WithPodOOMScoreAdj(adj int, restrict bool) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
if restrict {
var err error
adj, err = restrictOOMScoreAdj(adj)
if err != nil {
return err
}
}
s.Process.OOMScoreAdj = &adj
return nil
}
}
// WithSupplementalGroups sets the supplemental groups for the process
func WithSupplementalGroups(groups []int64) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Process == nil {
s.Process = &runtimespec.Process{}
}
var guids []uint32
for _, g := range groups {
guids = append(guids, uint32(g))
}
s.Process.User.AdditionalGids = mergeGids(s.Process.User.AdditionalGids, guids)
return nil
}
}
// WithPodNamespaces sets the pod namespaces for the container
func WithPodNamespaces(config *runtime.LinuxContainerSecurityContext, pid uint32) oci.SpecOpts {
namespaces := config.GetNamespaceOptions()
opts := []oci.SpecOpts{
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.NetworkNamespace, Path: GetNetworkNamespace(pid)}),
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.IPCNamespace, Path: GetIPCNamespace(pid)}),
oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.UTSNamespace, Path: GetUTSNamespace(pid)}),
}
if namespaces.GetPid() != runtime.NamespaceMode_CONTAINER {
opts = append(opts, oci.WithLinuxNamespace(runtimespec.LinuxNamespace{Type: runtimespec.PIDNamespace, Path: GetPIDNamespace(pid)}))
}
return oci.Compose(opts...)
}
// WithDefaultSandboxShares sets the default sandbox CPU shares
func WithDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
s.Linux = &runtimespec.Linux{}
}
if s.Linux.Resources == nil {
s.Linux.Resources = &runtimespec.LinuxResources{}
}
if s.Linux.Resources.CPU == nil {
s.Linux.Resources.CPU = &runtimespec.LinuxCPU{}
}
i := uint64(DefaultSandboxCPUshares)
s.Linux.Resources.CPU.Shares = &i
return nil
}
// WithoutNamespace removes the provided namespace
func WithoutNamespace(t runtimespec.LinuxNamespaceType) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Linux == nil {
return nil
}
var namespaces []runtimespec.LinuxNamespace
for i, ns := range s.Linux.Namespaces {
if ns.Type != t {
namespaces = append(namespaces, s.Linux.Namespaces[i])
}
}
s.Linux.Namespaces = namespaces
return nil
}
}
func nullOpt(_ context.Context, _ oci.Client, _ *containers.Container, _ *runtimespec.Spec) error {
return nil
}
func getCurrentOOMScoreAdj() (int, error) {
b, err := ioutil.ReadFile("/proc/self/oom_score_adj")
if err != nil {
return 0, errors.Wrap(err, "could not get the daemon oom_score_adj")
}
s := strings.TrimSpace(string(b))
i, err := strconv.Atoi(s)
if err != nil {
return 0, errors.Wrap(err, "could not get the daemon oom_score_adj")
}
return i, nil
}
func restrictOOMScoreAdj(preferredOOMScoreAdj int) (int, error) {
currentOOMScoreAdj, err := getCurrentOOMScoreAdj()
if err != nil {
return preferredOOMScoreAdj, err
}
if preferredOOMScoreAdj < currentOOMScoreAdj {
return currentOOMScoreAdj, nil
}
return preferredOOMScoreAdj, nil
}
const (
// netNSFormat is the format of network namespace of a process.
netNSFormat = "/proc/%v/ns/net"
// ipcNSFormat is the format of ipc namespace of a process.
ipcNSFormat = "/proc/%v/ns/ipc"
// utsNSFormat is the format of uts namespace of a process.
utsNSFormat = "/proc/%v/ns/uts"
// pidNSFormat is the format of pid namespace of a process.
pidNSFormat = "/proc/%v/ns/pid"
)
// GetNetworkNamespace returns the network namespace of a process.
func GetNetworkNamespace(pid uint32) string {
return fmt.Sprintf(netNSFormat, pid)
}
// GetIPCNamespace returns the ipc namespace of a process.
func GetIPCNamespace(pid uint32) string {
return fmt.Sprintf(ipcNSFormat, pid)
}
// GetUTSNamespace returns the uts namespace of a process.
func GetUTSNamespace(pid uint32) string {
return fmt.Sprintf(utsNSFormat, pid)
}
// GetPIDNamespace returns the pid namespace of a process.
func GetPIDNamespace(pid uint32) string {
return fmt.Sprintf(pidNSFormat, pid)
}

View File

@@ -0,0 +1,190 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package opts
import (
"context"
"path/filepath"
"sort"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/oci"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
osinterface "github.com/containerd/cri/pkg/os"
)
// WithWindowsNetworkNamespace sets windows network namespace for container.
// TODO(windows): Move this into container/containerd.
func WithWindowsNetworkNamespace(path string) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Windows == nil {
s.Windows = &runtimespec.Windows{}
}
if s.Windows.Network == nil {
s.Windows.Network = &runtimespec.WindowsNetwork{}
}
s.Windows.Network.NetworkNamespace = path
return nil
}
}
// WithWindowsMounts sorts and adds runtime and CRI mounts to the spec for
// windows container.
func WithWindowsMounts(osi osinterface.OS, config *runtime.ContainerConfig, extra []*runtime.Mount) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, _ *containers.Container, s *runtimespec.Spec) error {
// mergeMounts merge CRI mounts with extra mounts. If a mount destination
// is mounted by both a CRI mount and an extra mount, the CRI mount will
// be kept.
var (
criMounts = config.GetMounts()
mounts = append([]*runtime.Mount{}, criMounts...)
)
// Copy all mounts from extra mounts, except for mounts overridden by CRI.
for _, e := range extra {
found := false
for _, c := range criMounts {
if filepath.Clean(e.ContainerPath) == filepath.Clean(c.ContainerPath) {
found = true
break
}
}
if !found {
mounts = append(mounts, e)
}
}
// Sort mounts in number of parts. This ensures that high level mounts don't
// shadow other mounts.
sort.Sort(orderedMounts(mounts))
// Copy all mounts from default mounts, except for
// - mounts overridden by supplied mount;
// - all mounts under /dev if a supplied /dev is present.
mountSet := make(map[string]struct{})
for _, m := range mounts {
mountSet[filepath.Clean(m.ContainerPath)] = struct{}{}
}
defaultMounts := s.Mounts
s.Mounts = nil
for _, m := range defaultMounts {
dst := filepath.Clean(m.Destination)
if _, ok := mountSet[dst]; ok {
// filter out mount overridden by a supplied mount
continue
}
s.Mounts = append(s.Mounts, m)
}
for _, mount := range mounts {
var (
dst = mount.GetContainerPath()
src = mount.GetHostPath()
)
// TODO(windows): Support special mount sources, e.g. named pipe.
// Create the host path if it doesn't exist.
if _, err := osi.Stat(src); err != nil {
// If the source doesn't exist, return an error instead
// of creating the source. This aligns with Docker's
// behavior on windows.
return errors.Wrapf(err, "failed to stat %q", src)
}
src, err := osi.ResolveSymbolicLink(src)
if err != nil {
return errors.Wrapf(err, "failed to resolve symlink %q", src)
}
var options []string
// NOTE(random-liu): we don't change all mounts to `ro` when root filesystem
// is readonly. This is different from docker's behavior, but make more sense.
if mount.GetReadonly() {
options = append(options, "ro")
} else {
options = append(options, "rw")
}
s.Mounts = append(s.Mounts, runtimespec.Mount{
Source: src,
Destination: dst,
Options: options,
})
}
return nil
}
}
// WithWindowsResources sets the provided resource restrictions for windows.
func WithWindowsResources(resources *runtime.WindowsContainerResources) oci.SpecOpts {
return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if resources == nil {
return nil
}
if s.Windows == nil {
s.Windows = &runtimespec.Windows{}
}
if s.Windows.Resources == nil {
s.Windows.Resources = &runtimespec.WindowsResources{}
}
if s.Windows.Resources.CPU == nil {
s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
}
if s.Windows.Resources.Memory == nil {
s.Windows.Resources.Memory = &runtimespec.WindowsMemoryResources{}
}
var (
count = uint64(resources.GetCpuCount())
shares = uint16(resources.GetCpuShares())
max = uint16(resources.GetCpuMaximum())
limit = uint64(resources.GetMemoryLimitInBytes())
)
if count != 0 {
s.Windows.Resources.CPU.Count = &count
}
if shares != 0 {
s.Windows.Resources.CPU.Shares = &shares
}
if max != 0 {
s.Windows.Resources.CPU.Maximum = &max
}
if limit != 0 {
s.Windows.Resources.Memory.Limit = &limit
}
return nil
}
}
// WithWindowsDefaultSandboxShares sets the default sandbox CPU shares
func WithWindowsDefaultSandboxShares(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) error {
if s.Windows == nil {
s.Windows = &runtimespec.Windows{}
}
if s.Windows.Resources == nil {
s.Windows.Resources = &runtimespec.WindowsResources{}
}
if s.Windows.Resources.CPU == nil {
s.Windows.Resources.CPU = &runtimespec.WindowsCPUResources{}
}
i := uint16(DefaultSandboxCPUshares)
s.Windows.Resources.CPU.Shares = &i
return nil
}

View File

@@ -0,0 +1,28 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package platforms
import (
"github.com/containerd/containerd/platforms"
)
// Default returns the current platform's default platform specification.
func Default() platforms.MatchComparer {
return platforms.Default()
}

View File

@@ -0,0 +1,77 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package platforms
import (
"fmt"
"strconv"
"strings"
"github.com/containerd/containerd/platforms"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
"golang.org/x/sys/windows"
)
type matchComparer struct {
defaults platforms.Matcher
osVersionPrefix string
}
// Match matches platform with the same windows major, minor
// and build version.
func (m matchComparer) Match(p imagespec.Platform) bool {
if m.defaults.Match(p) {
// TODO(windows): Figure out whether OSVersion is deprecated.
return strings.HasPrefix(p.OSVersion, m.osVersionPrefix)
}
return false
}
// Less sorts matched platforms in front of other platforms.
// For matched platforms, it puts platforms with larger revision
// number in front.
func (m matchComparer) Less(p1, p2 imagespec.Platform) bool {
m1, m2 := m.Match(p1), m.Match(p2)
if m1 && m2 {
r1, r2 := revision(p1.OSVersion), revision(p2.OSVersion)
return r1 > r2
}
return m1 && !m2
}
func revision(v string) int {
parts := strings.Split(v, ".")
if len(parts) < 4 {
return 0
}
r, err := strconv.Atoi(parts[3])
if err != nil {
return 0
}
return r
}
// Default returns the current platform's default platform specification.
func Default() platforms.MatchComparer {
major, minor, build := windows.RtlGetNtVersionNumbers()
return matchComparer{
defaults: platforms.Only(platforms.DefaultSpec()),
osVersionPrefix: fmt.Sprintf("%d.%d.%d", major, minor, build),
}
}

View File

@@ -1,5 +1,7 @@
// +build !windows
/*
Copyright 2018 The Containerd Authors.
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -0,0 +1,78 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package netns
import "github.com/Microsoft/hcsshim/hcn"
// NetNS holds network namespace for sandbox
type NetNS struct {
path string
}
// NewNetNS creates a network namespace for the sandbox
func NewNetNS() (*NetNS, error) {
temp := hcn.HostComputeNamespace{}
hcnNamespace, err := temp.Create()
if err != nil {
return nil, err
}
return &NetNS{path: hcnNamespace.Id}, nil
}
// LoadNetNS loads existing network namespace.
func LoadNetNS(path string) *NetNS {
return &NetNS{path: path}
}
// Remove removes network namepace if it exists and not closed. Remove is idempotent,
// meaning it might be invoked multiple times and provides consistent result.
func (n *NetNS) Remove() error {
hcnNamespace, err := hcn.GetNamespaceByID(n.path)
if err != nil {
if hcn.IsNotFoundError(err) {
return nil
}
return err
}
err = hcnNamespace.Delete()
if err == nil || hcn.IsNotFoundError(err) {
return nil
}
return err
}
// Closed checks whether the network namespace has been closed.
func (n *NetNS) Closed() (bool, error) {
_, err := hcn.GetNamespaceByID(n.path)
if err == nil {
return false, nil
}
if hcn.IsNotFoundError(err) {
return true, nil
}
return false, err
}
// GetPath returns network namespace path for sandbox container
func (n *NetNS) GetPath() string {
return n.path
}
// NOTE: Do function is not supported.

View File

@@ -22,11 +22,7 @@ import (
"os"
"path/filepath"
"github.com/containerd/containerd/mount"
"github.com/containerd/fifo"
"github.com/docker/docker/pkg/symlink"
"golang.org/x/net/context"
"golang.org/x/sys/unix"
)
// OS collects system level operations that need to be mocked out
@@ -34,15 +30,11 @@ import (
type OS interface {
MkdirAll(path string, perm os.FileMode) error
RemoveAll(path string) error
OpenFifo(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error)
Stat(name string) (os.FileInfo, error)
ResolveSymbolicLink(name string) (string, error)
FollowSymlinkInScope(path, scope string) (string, error)
CopyFile(src, dest string, perm os.FileMode) error
WriteFile(filename string, data []byte, perm os.FileMode) error
Mount(source string, target string, fstype string, flags uintptr, data string) error
Unmount(target string) error
LookupMount(path string) (mount.Info, error)
Hostname() (string, error)
}
@@ -59,11 +51,6 @@ func (RealOS) RemoveAll(path string) error {
return os.RemoveAll(path)
}
// OpenFifo will call fifo.OpenFifo to open a fifo.
func (RealOS) OpenFifo(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) {
return fifo.OpenFifo(ctx, fn, flag, perm)
}
// Stat will call os.Stat to get the status of the given file.
func (RealOS) Stat(name string) (os.FileInfo, error) {
return os.Stat(name)
@@ -109,33 +96,6 @@ func (RealOS) WriteFile(filename string, data []byte, perm os.FileMode) error {
return ioutil.WriteFile(filename, data, perm)
}
// Mount will call unix.Mount to mount the file.
func (RealOS) Mount(source string, target string, fstype string, flags uintptr, data string) error {
return unix.Mount(source, target, fstype, flags, data)
}
// Unmount will call Unmount to unmount the file.
func (RealOS) Unmount(target string) error {
return Unmount(target)
}
// LookupMount gets mount info of a given path.
func (RealOS) LookupMount(path string) (mount.Info, error) {
return mount.Lookup(path)
}
// Unmount unmounts the target. It does not return an error in case the target is not mounted.
// In case the target does not exist, the appropriate error is returned.
func Unmount(target string) error {
err := unix.Unmount(target, unix.MNT_DETACH)
if err == unix.EINVAL {
// ignore "not mounted" error
err = nil
}
return err
}
// Hostname will call os.Hostname to get the hostname of the host.
func (RealOS) Hostname() (string, error) {
return os.Hostname()

59
vendor/github.com/containerd/cri/pkg/os/os_unix.go generated vendored Normal file
View File

@@ -0,0 +1,59 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package os
import (
"github.com/containerd/containerd/mount"
"golang.org/x/sys/unix"
)
// UNIX collects unix system level operations that need to be
// mocked out during tests.
type UNIX interface {
Mount(source string, target string, fstype string, flags uintptr, data string) error
Unmount(target string) error
LookupMount(path string) (mount.Info, error)
}
// Mount will call unix.Mount to mount the file.
func (RealOS) Mount(source string, target string, fstype string, flags uintptr, data string) error {
return unix.Mount(source, target, fstype, flags, data)
}
// Unmount will call Unmount to unmount the file.
func (RealOS) Unmount(target string) error {
return Unmount(target)
}
// LookupMount gets mount info of a given path.
func (RealOS) LookupMount(path string) (mount.Info, error) {
return mount.Lookup(path)
}
// Unmount unmounts the target. It does not return an error in case the target is not mounted.
// In case the target does not exist, the appropriate error is returned.
func Unmount(target string) error {
err := unix.Unmount(target, unix.MNT_DETACH)
if err == unix.EINVAL {
// ignore "not mounted" error
err = nil
}
return err
}

View File

@@ -18,23 +18,12 @@ package server
import (
"path/filepath"
"strconv"
"strings"
"time"
"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/contrib/apparmor"
"github.com/containerd/containerd/contrib/seccomp"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/oci"
"github.com/containerd/cri/pkg/annotations"
"github.com/containerd/cri/pkg/config"
customopts "github.com/containerd/cri/pkg/containerd/opts"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
cio "github.com/containerd/cri/pkg/server/io"
containerstore "github.com/containerd/cri/pkg/store/container"
"github.com/containerd/cri/pkg/util"
"github.com/containerd/typeurl"
"github.com/davecgh/go-spew/spew"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
@@ -42,21 +31,12 @@ import (
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
const (
// profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
// runtimeDefault indicates that we should use or create a runtime default profile.
runtimeDefault = "runtime/default"
// dockerDefault indicates that we should use or create a docker default profile.
dockerDefault = "docker/default"
// appArmorDefaultProfileName is name to use when creating a default apparmor profile.
appArmorDefaultProfileName = "cri-containerd.apparmor.d"
// unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
unconfinedProfile = "unconfined"
// seccompDefaultProfile is the default seccomp profile.
seccompDefaultProfile = dockerDefault
customopts "github.com/containerd/cri/pkg/containerd/opts"
ctrdutil "github.com/containerd/cri/pkg/containerd/util"
cio "github.com/containerd/cri/pkg/server/io"
containerstore "github.com/containerd/cri/pkg/store/container"
"github.com/containerd/cri/pkg/util"
)
func init() {
@@ -156,10 +136,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}()
// Create container volumes mounts.
volumeMounts := c.generateVolumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config)
volumeMounts := c.volumeMounts(containerRootDir, config.GetMounts(), &image.ImageSpec.Config)
// Generate container runtime spec.
mounts := c.generateContainerMounts(sandboxID, config)
// Generate container mounts.
mounts := c.containerMounts(sandboxID, config)
ociRuntime, err := c.getSandboxRuntime(sandboxConfig, sandbox.Metadata.RuntimeHandler)
if err != nil {
@@ -167,7 +147,7 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}
log.G(ctx).Debugf("Use OCI runtime %+v for sandbox %q and container %q", ociRuntime, sandboxID, id)
spec, err := c.generateContainerSpec(id, sandboxID, sandboxPid, config, sandboxConfig,
spec, err := c.containerSpec(id, sandboxID, sandboxPid, sandbox.NetNSPath, config, sandboxConfig,
&image.ImageSpec.Config, append(mounts, volumeMounts...), ociRuntime)
if err != nil {
return nil, errors.Wrapf(err, "failed to generate container %q spec", id)
@@ -185,7 +165,6 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
// rootfs readonly (requested by spec.Root.Readonly).
customopts.WithNewSnapshot(id, containerdImage),
}
if len(volumeMounts) > 0 {
mountMap := make(map[string]string)
for _, v := range volumeMounts {
@@ -219,58 +198,11 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
}
}()
var specOpts []oci.SpecOpts
securityContext := config.GetLinux().GetSecurityContext()
// Set container username. This could only be done by containerd, because it needs
// access to the container rootfs. Pass user name to containerd, and let it overwrite
// the spec for us.
userstr, err := generateUserString(
securityContext.GetRunAsUsername(),
securityContext.GetRunAsUser(),
securityContext.GetRunAsGroup())
specOpts, err := c.containerSpecOpts(config, &image.ImageSpec.Config)
if err != nil {
return nil, errors.Wrap(err, "failed to generate user string")
}
if userstr == "" {
// Lastly, since no user override was passed via CRI try to set via OCI
// Image
userstr = image.ImageSpec.Config.User
}
if userstr != "" {
specOpts = append(specOpts, oci.WithUser(userstr))
return nil, errors.Wrap(err, "")
}
if securityContext.GetRunAsUsername() != "" {
userstr = securityContext.GetRunAsUsername()
} else {
// Even if RunAsUser is not set, we still call `GetValue` to get uid 0.
// Because it is still useful to get additional gids for uid 0.
userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10)
}
specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr))
apparmorSpecOpts, err := generateApparmorSpecOpts(
securityContext.GetApparmorProfile(),
securityContext.GetPrivileged(),
c.apparmorEnabled)
if err != nil {
return nil, errors.Wrap(err, "failed to generate apparmor spec opts")
}
if apparmorSpecOpts != nil {
specOpts = append(specOpts, apparmorSpecOpts)
}
seccompSpecOpts, err := generateSeccompSpecOpts(
securityContext.GetSeccompProfilePath(),
securityContext.GetPrivileged(),
c.seccompEnabled)
if err != nil {
return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
}
containerLabels := buildLabels(config.Labels, containerKindContainer)
runtimeOptions, err := getRuntimeOptions(sandboxInfo)
@@ -322,129 +254,10 @@ func (c *criService) CreateContainer(ctx context.Context, r *runtime.CreateConta
return &runtime.CreateContainerResponse{ContainerId: id}, nil
}
func (c *criService) generateContainerSpec(id string, sandboxID string, sandboxPid uint32, config *runtime.ContainerConfig,
sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig, extraMounts []*runtime.Mount,
ociRuntime config.Runtime) (*runtimespec.Spec, error) {
specOpts := []oci.SpecOpts{
customopts.WithoutRunMount,
customopts.WithoutDefaultSecuritySettings,
customopts.WithRelativeRoot(relativeRootfsPath),
customopts.WithProcessArgs(config, imageConfig),
oci.WithDefaultPathEnv,
// this will be set based on the security context below
oci.WithNewPrivileges,
}
if config.GetWorkingDir() != "" {
specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
} else if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if config.GetTty() {
specOpts = append(specOpts, oci.WithTTY)
}
// Add HOSTNAME env.
var (
err error
hostname = sandboxConfig.GetHostname()
)
if hostname == "" {
if hostname, err = c.os.Hostname(); err != nil {
return nil, err
}
}
specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname}))
// Apply envs from image config first, so that envs from container config
// can override them.
env := imageConfig.Env
for _, e := range config.GetEnvs() {
env = append(env, e.GetKey()+"="+e.GetValue())
}
specOpts = append(specOpts, oci.WithEnv(env))
securityContext := config.GetLinux().GetSecurityContext()
selinuxOpt := securityContext.GetSelinuxOptions()
processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt)
if err != nil {
return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
}
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
if !c.config.DisableProcMount {
// Apply masked paths if specified.
// If the container is privileged, this will be cleared later on.
specOpts = append(specOpts, oci.WithMaskedPaths(securityContext.GetMaskedPaths()))
// Apply readonly paths if specified.
// If the container is privileged, this will be cleared later on.
specOpts = append(specOpts, oci.WithReadonlyPaths(securityContext.GetReadonlyPaths()))
}
if securityContext.GetPrivileged() {
if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() {
return nil, errors.New("no privileged container allowed in sandbox")
}
specOpts = append(specOpts, oci.WithPrivileged)
if !ociRuntime.PrivilegedWithoutHostDevices {
specOpts = append(specOpts, customopts.WithPrivilegedDevices)
}
} else { // not privileged
specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext))
}
// Clear all ambient capabilities. The implication of non-root + caps
// is not clearly defined in Kubernetes.
// See https://github.com/kubernetes/kubernetes/issues/56374
// Keep docker's behavior for now.
specOpts = append(specOpts,
customopts.WithoutAmbientCaps,
customopts.WithSelinuxLabels(processLabel, mountLabel),
)
// TODO: Figure out whether we should set no new privilege for sandbox container by default
if securityContext.GetNoNewPrivs() {
specOpts = append(specOpts, oci.WithNoNewPrivileges)
}
// TODO(random-liu): [P1] Set selinux options (privileged or not).
if securityContext.GetReadonlyRootfs() {
specOpts = append(specOpts, oci.WithRootFSReadonly())
}
if c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDisabledCgroups)
} else {
specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources()))
if sandboxConfig.GetLinux().GetCgroupParent() != "" {
cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id)
specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
}
}
supplementalGroups := securityContext.GetSupplementalGroups()
for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
ociRuntime.PodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid),
customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
)
return runtimeSpec(id, specOpts...)
}
// generateVolumeMounts sets up image volumes for container. Rely on the removal of container
// volumeMounts sets up image volumes for container. Rely on the removal of container
// root directory to do cleanup. Note that image volume will be skipped, if there is criMounts
// specified with the same destination.
func (c *criService) generateVolumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
func (c *criService) volumeMounts(containerRootDir string, criMounts []*runtime.Mount, config *imagespec.ImageConfig) []*runtime.Mount {
if len(config.Volumes) == 0 {
return nil
}
@@ -470,59 +283,6 @@ func (c *criService) generateVolumeMounts(containerRootDir string, criMounts []*
return mounts
}
// generateContainerMounts sets up necessary container mounts including /dev/shm, /etc/hosts
// and /etc/resolv.conf.
func (c *criService) generateContainerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
var mounts []*runtime.Mount
securityContext := config.GetLinux().GetSecurityContext()
if !isInCRIMounts(etcHostname, config.GetMounts()) {
// /etc/hostname is added since 1.1.6, 1.2.4 and 1.3.
// For in-place upgrade, the old sandbox doesn't have the hostname file,
// do not mount this in that case.
// TODO(random-liu): Remove the check and always mount this when
// containerd 1.1 and 1.2 are deprecated.
hostpath := c.getSandboxHostname(sandboxID)
if _, err := c.os.Stat(hostpath); err == nil {
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHostname,
HostPath: hostpath,
Readonly: securityContext.GetReadonlyRootfs(),
})
}
}
if !isInCRIMounts(etcHosts, config.GetMounts()) {
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHosts,
HostPath: c.getSandboxHosts(sandboxID),
Readonly: securityContext.GetReadonlyRootfs(),
})
}
// Mount sandbox resolv.config.
// TODO: Need to figure out whether we should always mount it as read-only
if !isInCRIMounts(resolvConfPath, config.GetMounts()) {
mounts = append(mounts, &runtime.Mount{
ContainerPath: resolvConfPath,
HostPath: c.getResolvPath(sandboxID),
Readonly: securityContext.GetReadonlyRootfs(),
})
}
if !isInCRIMounts(devShm, config.GetMounts()) {
sandboxDevShm := c.getSandboxDevShm(sandboxID)
if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
sandboxDevShm = devShm
}
mounts = append(mounts, &runtime.Mount{
ContainerPath: devShm,
HostPath: sandboxDevShm,
Readonly: false,
})
}
return mounts
}
// runtimeSpec returns a default runtime spec used in cri-containerd.
func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) {
// GenerateSpec needs namespace.
@@ -533,105 +293,3 @@ func runtimeSpec(id string, opts ...oci.SpecOpts) (*runtimespec.Spec, error) {
}
return spec, nil
}
// generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
func generateSeccompSpecOpts(seccompProf string, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
if privileged {
// Do not set seccomp profile when container is privileged
return nil, nil
}
// Set seccomp profile
if seccompProf == runtimeDefault || seccompProf == dockerDefault {
// use correct default profile (Eg. if not configured otherwise, the default is docker/default)
seccompProf = seccompDefaultProfile
}
if !seccompEnabled {
if seccompProf != "" && seccompProf != unconfinedProfile {
return nil, errors.New("seccomp is not supported")
}
return nil, nil
}
switch seccompProf {
case "", unconfinedProfile:
// Do not set seccomp profile.
return nil, nil
case dockerDefault:
// Note: WithDefaultProfile specOpts must be added after capabilities
return seccomp.WithDefaultProfile(), nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(seccompProf, profileNamePrefix) {
return nil, errors.Errorf("invalid seccomp profile %q", seccompProf)
}
return seccomp.WithProfile(strings.TrimPrefix(seccompProf, profileNamePrefix)), nil
}
}
// generateApparmorSpecOpts generates containerd SpecOpts for apparmor.
func generateApparmorSpecOpts(apparmorProf string, privileged, apparmorEnabled bool) (oci.SpecOpts, error) {
if !apparmorEnabled {
// Should fail loudly if user try to specify apparmor profile
// but we don't support it.
if apparmorProf != "" && apparmorProf != unconfinedProfile {
return nil, errors.New("apparmor is not supported")
}
return nil, nil
}
switch apparmorProf {
// Based on kubernetes#51746, default apparmor profile should be applied
// for when apparmor is not specified.
case runtimeDefault, "":
if privileged {
// Do not set apparmor profile when container is privileged
return nil, nil
}
// TODO (mikebrow): delete created apparmor default profile
return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil
case unconfinedProfile:
return nil, nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(apparmorProf, profileNamePrefix) {
return nil, errors.Errorf("invalid apparmor profile %q", apparmorProf)
}
return apparmor.WithProfile(strings.TrimPrefix(apparmorProf, profileNamePrefix)), nil
}
}
// generateUserString generates valid user string based on OCI Image Spec
// v1.0.0.
//
// CRI defines that the following combinations are valid:
//
// (none) -> ""
// username -> username
// username, uid -> username
// username, uid, gid -> username:gid
// username, gid -> username:gid
// uid -> uid
// uid, gid -> uid:gid
// gid -> error
//
// TODO(random-liu): Add group name support in CRI.
func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) {
var userstr, groupstr string
if uid != nil {
userstr = strconv.FormatInt(uid.GetValue(), 10)
}
if username != "" {
userstr = username
}
if gid != nil {
groupstr = strconv.FormatInt(gid.GetValue(), 10)
}
if userstr == "" {
if groupstr != "" {
return "", errors.Errorf("user group %q is specified without user", groupstr)
}
return "", nil
}
if groupstr != "" {
userstr = userstr + ":" + groupstr
}
return userstr, nil
}

View File

@@ -0,0 +1,385 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"strconv"
"strings"
"github.com/containerd/containerd/contrib/apparmor"
"github.com/containerd/containerd/contrib/seccomp"
"github.com/containerd/containerd/oci"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"github.com/containerd/cri/pkg/annotations"
"github.com/containerd/cri/pkg/config"
customopts "github.com/containerd/cri/pkg/containerd/opts"
)
const (
// profileNamePrefix is the prefix for loading profiles on a localhost. Eg. AppArmor localhost/profileName.
profileNamePrefix = "localhost/" // TODO (mikebrow): get localhost/ & runtime/default from CRI kubernetes/kubernetes#51747
// runtimeDefault indicates that we should use or create a runtime default profile.
runtimeDefault = "runtime/default"
// dockerDefault indicates that we should use or create a docker default profile.
dockerDefault = "docker/default"
// appArmorDefaultProfileName is name to use when creating a default apparmor profile.
appArmorDefaultProfileName = "cri-containerd.apparmor.d"
// unconfinedProfile is a string indicating one should run a pod/containerd without a security profile
unconfinedProfile = "unconfined"
// seccompDefaultProfile is the default seccomp profile.
seccompDefaultProfile = dockerDefault
)
// containerMounts sets up necessary container system file mounts
// including /dev/shm, /etc/hosts and /etc/resolv.conf.
func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
var mounts []*runtime.Mount
securityContext := config.GetLinux().GetSecurityContext()
if !isInCRIMounts(etcHostname, config.GetMounts()) {
// /etc/hostname is added since 1.1.6, 1.2.4 and 1.3.
// For in-place upgrade, the old sandbox doesn't have the hostname file,
// do not mount this in that case.
// TODO(random-liu): Remove the check and always mount this when
// containerd 1.1 and 1.2 are deprecated.
hostpath := c.getSandboxHostname(sandboxID)
if _, err := c.os.Stat(hostpath); err == nil {
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHostname,
HostPath: hostpath,
Readonly: securityContext.GetReadonlyRootfs(),
})
}
}
if !isInCRIMounts(etcHosts, config.GetMounts()) {
mounts = append(mounts, &runtime.Mount{
ContainerPath: etcHosts,
HostPath: c.getSandboxHosts(sandboxID),
Readonly: securityContext.GetReadonlyRootfs(),
})
}
// Mount sandbox resolv.config.
// TODO: Need to figure out whether we should always mount it as read-only
if !isInCRIMounts(resolvConfPath, config.GetMounts()) {
mounts = append(mounts, &runtime.Mount{
ContainerPath: resolvConfPath,
HostPath: c.getResolvPath(sandboxID),
Readonly: securityContext.GetReadonlyRootfs(),
})
}
if !isInCRIMounts(devShm, config.GetMounts()) {
sandboxDevShm := c.getSandboxDevShm(sandboxID)
if securityContext.GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
sandboxDevShm = devShm
}
mounts = append(mounts, &runtime.Mount{
ContainerPath: devShm,
HostPath: sandboxDevShm,
Readonly: false,
})
}
return mounts
}
func (c *criService) containerSpec(id string, sandboxID string, sandboxPid uint32, netNSPath string,
config *runtime.ContainerConfig, sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig,
extraMounts []*runtime.Mount, ociRuntime config.Runtime) (*runtimespec.Spec, error) {
specOpts := []oci.SpecOpts{
customopts.WithoutRunMount,
customopts.WithoutDefaultSecuritySettings,
customopts.WithRelativeRoot(relativeRootfsPath),
customopts.WithProcessArgs(config, imageConfig),
oci.WithDefaultPathEnv,
// this will be set based on the security context below
oci.WithNewPrivileges,
}
if config.GetWorkingDir() != "" {
specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
} else if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if config.GetTty() {
specOpts = append(specOpts, oci.WithTTY)
}
// Add HOSTNAME env.
var (
err error
hostname = sandboxConfig.GetHostname()
)
if hostname == "" {
if hostname, err = c.os.Hostname(); err != nil {
return nil, err
}
}
specOpts = append(specOpts, oci.WithEnv([]string{hostnameEnv + "=" + hostname}))
// Apply envs from image config first, so that envs from container config
// can override them.
env := imageConfig.Env
for _, e := range config.GetEnvs() {
env = append(env, e.GetKey()+"="+e.GetValue())
}
specOpts = append(specOpts, oci.WithEnv(env))
securityContext := config.GetLinux().GetSecurityContext()
selinuxOpt := securityContext.GetSelinuxOptions()
processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt)
if err != nil {
return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
}
specOpts = append(specOpts, customopts.WithMounts(c.os, config, extraMounts, mountLabel))
if !c.config.DisableProcMount {
// Apply masked paths if specified.
// If the container is privileged, this will be cleared later on.
specOpts = append(specOpts, oci.WithMaskedPaths(securityContext.GetMaskedPaths()))
// Apply readonly paths if specified.
// If the container is privileged, this will be cleared later on.
specOpts = append(specOpts, oci.WithReadonlyPaths(securityContext.GetReadonlyPaths()))
}
if securityContext.GetPrivileged() {
if !sandboxConfig.GetLinux().GetSecurityContext().GetPrivileged() {
return nil, errors.New("no privileged container allowed in sandbox")
}
specOpts = append(specOpts, oci.WithPrivileged)
if !ociRuntime.PrivilegedWithoutHostDevices {
specOpts = append(specOpts, customopts.WithPrivilegedDevices)
}
} else { // not privileged
specOpts = append(specOpts, customopts.WithDevices(c.os, config), customopts.WithCapabilities(securityContext))
}
// Clear all ambient capabilities. The implication of non-root + caps
// is not clearly defined in Kubernetes.
// See https://github.com/kubernetes/kubernetes/issues/56374
// Keep docker's behavior for now.
specOpts = append(specOpts,
customopts.WithoutAmbientCaps,
customopts.WithSelinuxLabels(processLabel, mountLabel),
)
// TODO: Figure out whether we should set no new privilege for sandbox container by default
if securityContext.GetNoNewPrivs() {
specOpts = append(specOpts, oci.WithNoNewPrivileges)
}
// TODO(random-liu): [P1] Set selinux options (privileged or not).
if securityContext.GetReadonlyRootfs() {
specOpts = append(specOpts, oci.WithRootFSReadonly())
}
if c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDisabledCgroups)
} else {
specOpts = append(specOpts, customopts.WithResources(config.GetLinux().GetResources()))
if sandboxConfig.GetLinux().GetCgroupParent() != "" {
cgroupsPath := getCgroupsPath(sandboxConfig.GetLinux().GetCgroupParent(), id)
specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
}
}
supplementalGroups := securityContext.GetSupplementalGroups()
for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
ociRuntime.PodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
ociRuntime.ContainerAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts,
customopts.WithOOMScoreAdj(config, c.config.RestrictOOMScoreAdj),
customopts.WithPodNamespaces(securityContext, sandboxPid),
customopts.WithSupplementalGroups(supplementalGroups),
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
)
return runtimeSpec(id, specOpts...)
}
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
var specOpts []oci.SpecOpts
securityContext := config.GetLinux().GetSecurityContext()
// Set container username. This could only be done by containerd, because it needs
// access to the container rootfs. Pass user name to containerd, and let it overwrite
// the spec for us.
userstr, err := generateUserString(
securityContext.GetRunAsUsername(),
securityContext.GetRunAsUser(),
securityContext.GetRunAsGroup())
if err != nil {
return nil, errors.Wrap(err, "failed to generate user string")
}
if userstr == "" {
// Lastly, since no user override was passed via CRI try to set via OCI
// Image
userstr = imageConfig.User
}
if userstr != "" {
specOpts = append(specOpts, oci.WithUser(userstr))
}
if securityContext.GetRunAsUsername() != "" {
userstr = securityContext.GetRunAsUsername()
} else {
// Even if RunAsUser is not set, we still call `GetValue` to get uid 0.
// Because it is still useful to get additional gids for uid 0.
userstr = strconv.FormatInt(securityContext.GetRunAsUser().GetValue(), 10)
}
specOpts = append(specOpts, customopts.WithAdditionalGIDs(userstr))
apparmorSpecOpts, err := generateApparmorSpecOpts(
securityContext.GetApparmorProfile(),
securityContext.GetPrivileged(),
c.apparmorEnabled())
if err != nil {
return nil, errors.Wrap(err, "failed to generate apparmor spec opts")
}
if apparmorSpecOpts != nil {
specOpts = append(specOpts, apparmorSpecOpts)
}
seccompSpecOpts, err := generateSeccompSpecOpts(
securityContext.GetSeccompProfilePath(),
securityContext.GetPrivileged(),
c.seccompEnabled())
if err != nil {
return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
}
return specOpts, nil
}
// generateSeccompSpecOpts generates containerd SpecOpts for seccomp.
func generateSeccompSpecOpts(seccompProf string, privileged, seccompEnabled bool) (oci.SpecOpts, error) {
if privileged {
// Do not set seccomp profile when container is privileged
return nil, nil
}
// Set seccomp profile
if seccompProf == runtimeDefault || seccompProf == dockerDefault {
// use correct default profile (Eg. if not configured otherwise, the default is docker/default)
seccompProf = seccompDefaultProfile
}
if !seccompEnabled {
if seccompProf != "" && seccompProf != unconfinedProfile {
return nil, errors.New("seccomp is not supported")
}
return nil, nil
}
switch seccompProf {
case "", unconfinedProfile:
// Do not set seccomp profile.
return nil, nil
case dockerDefault:
// Note: WithDefaultProfile specOpts must be added after capabilities
return seccomp.WithDefaultProfile(), nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(seccompProf, profileNamePrefix) {
return nil, errors.Errorf("invalid seccomp profile %q", seccompProf)
}
return seccomp.WithProfile(strings.TrimPrefix(seccompProf, profileNamePrefix)), nil
}
}
// generateApparmorSpecOpts generates containerd SpecOpts for apparmor.
func generateApparmorSpecOpts(apparmorProf string, privileged, apparmorEnabled bool) (oci.SpecOpts, error) {
if !apparmorEnabled {
// Should fail loudly if user try to specify apparmor profile
// but we don't support it.
if apparmorProf != "" && apparmorProf != unconfinedProfile {
return nil, errors.New("apparmor is not supported")
}
return nil, nil
}
switch apparmorProf {
// Based on kubernetes#51746, default apparmor profile should be applied
// for when apparmor is not specified.
case runtimeDefault, "":
if privileged {
// Do not set apparmor profile when container is privileged
return nil, nil
}
// TODO (mikebrow): delete created apparmor default profile
return apparmor.WithDefaultProfile(appArmorDefaultProfileName), nil
case unconfinedProfile:
return nil, nil
default:
// Require and Trim default profile name prefix
if !strings.HasPrefix(apparmorProf, profileNamePrefix) {
return nil, errors.Errorf("invalid apparmor profile %q", apparmorProf)
}
return apparmor.WithProfile(strings.TrimPrefix(apparmorProf, profileNamePrefix)), nil
}
}
// generateUserString generates valid user string based on OCI Image Spec
// v1.0.0.
//
// CRI defines that the following combinations are valid:
//
// (none) -> ""
// username -> username
// username, uid -> username
// username, uid, gid -> username:gid
// username, gid -> username:gid
// uid -> uid
// uid, gid -> uid:gid
// gid -> error
//
// TODO(random-liu): Add group name support in CRI.
func generateUserString(username string, uid, gid *runtime.Int64Value) (string, error) {
var userstr, groupstr string
if uid != nil {
userstr = strconv.FormatInt(uid.GetValue(), 10)
}
if username != "" {
userstr = username
}
if gid != nil {
groupstr = strconv.FormatInt(gid.GetValue(), 10)
}
if userstr == "" {
if groupstr != "" {
return "", errors.Errorf("user group %q is specified without user", groupstr)
}
return "", nil
}
if groupstr != "" {
userstr = userstr + ":" + groupstr
}
return userstr, nil
}

View File

@@ -0,0 +1,100 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"github.com/containerd/containerd/oci"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"github.com/containerd/cri/pkg/annotations"
"github.com/containerd/cri/pkg/config"
customopts "github.com/containerd/cri/pkg/containerd/opts"
)
// No container mounts for windows.
func (c *criService) containerMounts(sandboxID string, config *runtime.ContainerConfig) []*runtime.Mount {
return nil
}
func (c *criService) containerSpec(id string, sandboxID string, sandboxPid uint32, netNSPath string,
config *runtime.ContainerConfig, sandboxConfig *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig,
extraMounts []*runtime.Mount, ociRuntime config.Runtime) (*runtimespec.Spec, error) {
specOpts := []oci.SpecOpts{
customopts.WithProcessArgs(config, imageConfig),
}
if config.GetWorkingDir() != "" {
specOpts = append(specOpts, oci.WithProcessCwd(config.GetWorkingDir()))
} else if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if config.GetTty() {
specOpts = append(specOpts, oci.WithTTY)
}
// Apply envs from image config first, so that envs from container config
// can override them.
env := imageConfig.Env
for _, e := range config.GetEnvs() {
env = append(env, e.GetKey()+"="+e.GetValue())
}
specOpts = append(specOpts, oci.WithEnv(env))
specOpts = append(specOpts,
// Clear the root location since hcsshim expects it.
// NOTE: readonly rootfs doesn't work on windows.
customopts.WithoutRoot,
customopts.WithWindowsNetworkNamespace(netNSPath),
oci.WithHostname(sandboxConfig.GetHostname()),
)
specOpts = append(specOpts, customopts.WithWindowsMounts(c.os, config, extraMounts))
specOpts = append(specOpts, customopts.WithWindowsResources(config.GetWindows().GetResources()))
username := config.GetWindows().GetSecurityContext().GetRunAsUsername()
if username != "" {
specOpts = append(specOpts, oci.WithUser(username))
}
// TODO(windows): Add CredentialSpec support.
for pKey, pValue := range getPassthroughAnnotations(sandboxConfig.Annotations,
ociRuntime.PodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
ociRuntime.ContainerAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts,
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeContainer),
customopts.WithAnnotation(annotations.SandboxID, sandboxID),
)
return runtimeSpec(id, specOpts...)
}
// No extra spec options needed for windows.
func (c *criService) containerSpecOpts(config *runtime.ContainerConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return nil, nil
}

View File

@@ -70,28 +70,13 @@ type execOptions struct {
timeout time.Duration
}
// execInContainer executes a command inside the container synchronously, and
// redirects stdio stream properly.
func (c *criService) execInContainer(ctx context.Context, id string, opts execOptions) (*uint32, error) {
func (c *criService) execInternal(ctx context.Context, container containerd.Container, id string, opts execOptions) (*uint32, error) {
// Cancel the context before returning to ensure goroutines are stopped.
// This is important, because if `Start` returns error, `Wait` will hang
// forever unless we cancel the context.
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Get container from our container store.
cntr, err := c.containerStore.Get(id)
if err != nil {
return nil, errors.Wrapf(err, "failed to find container %q in store", id)
}
id = cntr.ID
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, errors.Errorf("container is in %s state", criContainerStateToString(state))
}
container := cntr.Container
spec, err := container.Spec(ctx)
if err != nil {
return nil, errors.Wrap(err, "failed to get container spec")
@@ -195,3 +180,32 @@ func (c *criService) execInContainer(ctx context.Context, id string, opts execOp
return &code, nil
}
}
// execInContainer executes a command inside the container synchronously, and
// redirects stdio stream properly.
// This function only returns when the exec process exits, this means that:
// 1) As long as the exec process is running, the goroutine in the cri plugin
// will be running and wait for the exit code;
// 2) `kubectl exec -it` will hang until the exec process exits, even after io
// is detached. This is different from dockershim, which leaves the exec process
// running in background after io is detached.
// https://github.com/kubernetes/kubernetes/blob/v1.15.0/pkg/kubelet/dockershim/exec.go#L127
// For example, if the `kubectl exec -it` process is killed, IO will be closed. In
// this case, the CRI plugin will still have a goroutine waiting for the exec process
// to exit and log the exit code, but dockershim won't.
func (c *criService) execInContainer(ctx context.Context, id string, opts execOptions) (*uint32, error) {
// Get container from our container store.
cntr, err := c.containerStore.Get(id)
if err != nil {
return nil, errors.Wrapf(err, "failed to find container %q in store", id)
}
id = cntr.ID
state := cntr.Status.Get().State()
if state != runtime.ContainerState_CONTAINER_RUNNING {
return nil, errors.Errorf("container is in %s state", criContainerStateToString(state))
}
return c.execInternal(ctx, cntr.Container, id, opts)
}

View File

@@ -18,14 +18,12 @@ package server
import (
"io"
"os"
"time"
"github.com/containerd/containerd"
containerdio "github.com/containerd/containerd/cio"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/plugin"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
@@ -99,11 +97,7 @@ func (c *criService) StartContainer(ctx context.Context, r *runtime.StartContain
return nil, errors.Wrap(err, "failed to get container info")
}
var taskOpts []containerd.NewTaskOpts
// TODO(random-liu): Remove this after shim v1 is deprecated.
if c.config.NoPivot && ctrInfo.Runtime.Name == plugin.RuntimeLinuxV1 {
taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
}
taskOpts := c.taskOpts(ctrInfo.Runtime.Name)
task, err := container.NewTask(ctx, ioCreation, taskOpts...)
if err != nil {
return nil, errors.Wrap(err, "failed to create containerd task")
@@ -180,7 +174,7 @@ func resetContainerStarting(container containerstore.Container) error {
func (c *criService) createContainerLoggers(logPath string, tty bool) (stdout io.WriteCloser, stderr io.WriteCloser, err error) {
if logPath != "" {
// Only generate container log when log path is specified.
f, err := os.OpenFile(logPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
f, err := openLogFile(logPath)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to create and open log file")
}

View File

@@ -39,7 +39,7 @@ func (c *criService) ContainerStats(ctx context.Context, in *runtime.ContainerSt
return nil, errors.Errorf("unexpected metrics response: %+v", resp.Metrics)
}
cs, err := c.getContainerMetrics(cntr.Metadata, resp.Metrics[0])
cs, err := c.containerMetrics(cntr.Metadata, resp.Metrics[0])
if err != nil {
return nil, errors.Wrap(err, "failed to decode container metrics")
}

View File

@@ -17,10 +17,8 @@ limitations under the License.
package server
import (
"github.com/containerd/cgroups"
tasks "github.com/containerd/containerd/api/services/tasks/v1"
"github.com/containerd/containerd/api/types"
"github.com/containerd/typeurl"
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -58,7 +56,7 @@ func (c *criService) toCRIContainerStats(
}
containerStats := new(runtime.ListContainerStatsResponse)
for _, cntr := range containers {
cs, err := c.getContainerMetrics(cntr.Metadata, statsMap[cntr.ID])
cs, err := c.containerMetrics(cntr.Metadata, statsMap[cntr.ID])
if err != nil {
return nil, errors.Wrapf(err, "failed to decode container metrics for %q", cntr.ID)
}
@@ -67,59 +65,6 @@ func (c *criService) toCRIContainerStats(
return containerStats, nil
}
func (c *criService) getContainerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.snapshotStore.Get(meta.ID)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {
usedBytes = sn.Size
inodesUsed = sn.Inodes
}
cs.WritableLayer = &runtime.FilesystemUsage{
Timestamp: sn.Timestamp,
FsId: &runtime.FilesystemIdentifier{
Mountpoint: c.imageFSPath,
},
UsedBytes: &runtime.UInt64Value{Value: usedBytes},
InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
}
cs.Attributes = &runtime.ContainerAttributes{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
}
if stats != nil {
s, err := typeurl.UnmarshalAny(stats.Data)
if err != nil {
return nil, errors.Wrap(err, "failed to extract container metrics")
}
metrics := s.(*cgroups.Metrics)
if metrics.CPU != nil && metrics.CPU.Usage != nil {
cs.Cpu = &runtime.CpuUsage{
Timestamp: stats.Timestamp.UnixNano(),
UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total},
}
}
if metrics.Memory != nil && metrics.Memory.Usage != nil {
cs.Memory = &runtime.MemoryUsage{
Timestamp: stats.Timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{
Value: getWorkingSet(metrics.Memory),
},
}
}
}
return &cs, nil
}
func (c *criService) normalizeContainerStatsFilter(filter *runtime.ContainerStatsFilter) {
if cntr, err := c.containerStore.Get(filter.GetId()); err == nil {
filter.Id = cntr.ID
@@ -169,17 +114,3 @@ func matchLabelSelector(selector, labels map[string]string) bool {
}
return true
}
// getWorkingSet calculates workingset memory from cgroup memory stats.
// The caller should make sure memory is not nil.
// workingset = usage - total_inactive_file
func getWorkingSet(memory *cgroups.MemoryStat) uint64 {
if memory.Usage == nil {
return 0
}
var workingSet uint64
if memory.TotalInactiveFile < memory.Usage.Usage {
workingSet = memory.Usage.Usage - memory.TotalInactiveFile
}
return workingSet
}

View File

@@ -0,0 +1,96 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"github.com/containerd/containerd/api/types"
v1 "github.com/containerd/containerd/metrics/types/v1"
"github.com/containerd/typeurl"
"github.com/pkg/errors"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
containerstore "github.com/containerd/cri/pkg/store/container"
)
func (c *criService) containerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.snapshotStore.Get(meta.ID)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {
usedBytes = sn.Size
inodesUsed = sn.Inodes
}
cs.WritableLayer = &runtime.FilesystemUsage{
Timestamp: sn.Timestamp,
FsId: &runtime.FilesystemIdentifier{
Mountpoint: c.imageFSPath,
},
UsedBytes: &runtime.UInt64Value{Value: usedBytes},
InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
}
cs.Attributes = &runtime.ContainerAttributes{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
}
if stats != nil {
s, err := typeurl.UnmarshalAny(stats.Data)
if err != nil {
return nil, errors.Wrap(err, "failed to extract container metrics")
}
metrics := s.(*v1.Metrics)
if metrics.CPU != nil && metrics.CPU.Usage != nil {
cs.Cpu = &runtime.CpuUsage{
Timestamp: stats.Timestamp.UnixNano(),
UsageCoreNanoSeconds: &runtime.UInt64Value{Value: metrics.CPU.Usage.Total},
}
}
if metrics.Memory != nil && metrics.Memory.Usage != nil {
cs.Memory = &runtime.MemoryUsage{
Timestamp: stats.Timestamp.UnixNano(),
WorkingSetBytes: &runtime.UInt64Value{
Value: getWorkingSet(metrics.Memory),
},
}
}
}
return &cs, nil
}
// getWorkingSet calculates workingset memory from cgroup memory stats.
// The caller should make sure memory is not nil.
// workingset = usage - total_inactive_file
func getWorkingSet(memory *v1.MemoryStat) uint64 {
if memory.Usage == nil {
return 0
}
var workingSet uint64
if memory.TotalInactiveFile < memory.Usage.Usage {
workingSet = memory.Usage.Usage - memory.TotalInactiveFile
}
return workingSet
}

View File

@@ -0,0 +1,60 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"github.com/containerd/containerd/api/types"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
containerstore "github.com/containerd/cri/pkg/store/container"
)
func (c *criService) containerMetrics(
meta containerstore.Metadata,
stats *types.Metric,
) (*runtime.ContainerStats, error) {
var cs runtime.ContainerStats
var usedBytes, inodesUsed uint64
sn, err := c.snapshotStore.Get(meta.ID)
// If snapshotstore doesn't have cached snapshot information
// set WritableLayer usage to zero
if err == nil {
usedBytes = sn.Size
inodesUsed = sn.Inodes
}
cs.WritableLayer = &runtime.FilesystemUsage{
Timestamp: sn.Timestamp,
FsId: &runtime.FilesystemIdentifier{
Mountpoint: c.imageFSPath,
},
UsedBytes: &runtime.UInt64Value{Value: usedBytes},
InodesUsed: &runtime.UInt64Value{Value: inodesUsed},
}
cs.Attributes = &runtime.ContainerAttributes{
Id: meta.ID,
Metadata: meta.Config.GetMetadata(),
Labels: meta.Config.GetLabels(),
Annotations: meta.Config.GetAnnotations(),
}
// TODO(windows): hcsshim Stats is not implemented, add windows support after
// that is implemented.
return &cs, nil
}

View File

@@ -1,3 +1,5 @@
// +build !windows
/*
Copyright 2017 The Kubernetes Authors.

View File

@@ -0,0 +1,31 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"github.com/containerd/containerd/errdefs"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
// UpdateContainerResources updates ContainerConfig of the container.
// TODO(windows): Figure out whether windows support this.
func (c *criService) UpdateContainerResources(ctx context.Context, r *runtime.UpdateContainerResourcesRequest) (*runtime.UpdateContainerResourcesResponse, error) {
return nil, errdefs.ErrNotImplemented
}

View File

@@ -20,11 +20,11 @@ import (
"fmt"
"path"
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/BurntSushi/toml"
runhcsoptions "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/options"
"github.com/containerd/containerd"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/plugin"
@@ -33,7 +33,6 @@ import (
"github.com/containerd/typeurl"
"github.com/docker/distribution/reference"
imagedigest "github.com/opencontainers/go-digest"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -51,6 +50,7 @@ const (
errorStartReason = "StartError"
// errorStartExitCode is the exit code when fails to start container.
// 128 is the same with Docker's behavior.
// TODO(windows): Figure out what should be used for windows.
errorStartExitCode = 128
// completeExitReason is the exit reason when container exits with code 0.
completeExitReason = "Completed"
@@ -58,39 +58,16 @@ const (
errorExitReason = "Error"
// oomExitReason is the exit reason when process in container is oom killed.
oomExitReason = "OOMKilled"
)
const (
// defaultSandboxOOMAdj is default omm adj for sandbox container. (kubernetes#47938).
defaultSandboxOOMAdj = -998
// defaultShmSize is the default size of the sandbox shm.
defaultShmSize = int64(1024 * 1024 * 64)
// relativeRootfsPath is the rootfs path relative to bundle path.
relativeRootfsPath = "rootfs"
// sandboxesDir contains all sandbox root. A sandbox root is the running
// directory of the sandbox, all files created for the sandbox will be
// placed under this directory.
sandboxesDir = "sandboxes"
// containersDir contains all container root.
containersDir = "containers"
// According to http://man7.org/linux/man-pages/man5/resolv.conf.5.html:
// "The search list is currently limited to six domains with a total of 256 characters."
maxDNSSearches = 6
// Delimiter used to construct container/sandbox names.
nameDelimiter = "_"
// devShm is the default path of /dev/shm.
devShm = "/dev/shm"
// etcHosts is the default path of /etc/hosts file.
etcHosts = "/etc/hosts"
// etcHostname is the default path of /etc/hostname file.
etcHostname = "/etc/hostname"
// resolvConfPath is the abs path of resolv.conf on host or container.
resolvConfPath = "/etc/resolv.conf"
// hostnameEnv is the key for HOSTNAME env.
hostnameEnv = "HOSTNAME"
)
const (
// criContainerdPrefix is common prefix for cri-containerd
criContainerdPrefix = "io.cri-containerd"
// containerKindLabel is a label key indicating container is sandbox container or application container
@@ -107,14 +84,12 @@ const (
sandboxMetadataExtension = criContainerdPrefix + ".sandbox.metadata"
// containerMetadataExtension is an extension name that identify metadata of container in CreateContainerRequest
containerMetadataExtension = criContainerdPrefix + ".container.metadata"
)
const (
// defaultIfName is the default network interface for the pods
defaultIfName = "eth0"
// networkAttachCount is the minimum number of networks the PodSandbox
// attaches to
networkAttachCount = 2
// runtimeRunhcsV1 is the runtime type for runhcs.
runtimeRunhcsV1 = "io.containerd.runhcs.v1"
)
// makeSandboxName generates sandbox name from sandbox metadata. The name
@@ -141,17 +116,6 @@ func makeContainerName(c *runtime.ContainerMetadata, s *runtime.PodSandboxMetada
}, nameDelimiter)
}
// getCgroupsPath generates container cgroups path.
func getCgroupsPath(cgroupsParent, id string) string {
base := path.Base(cgroupsParent)
if strings.HasSuffix(base, ".slice") {
// For a.slice/b.slice/c.slice, base is c.slice.
// runc systemd cgroup path format is "slice:prefix:name".
return strings.Join([]string{base, "cri-containerd", id}, ":")
}
return filepath.Join(cgroupsParent, id)
}
// getSandboxRootDir returns the root directory for managing sandbox files,
// e.g. hosts files.
func (c *criService) getSandboxRootDir(id string) string {
@@ -176,26 +140,6 @@ func (c *criService) getVolatileContainerRootDir(id string) string {
return filepath.Join(c.config.StateDir, containersDir, id)
}
// getSandboxHostname returns the hostname file path inside the sandbox root directory.
func (c *criService) getSandboxHostname(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hostname")
}
// getSandboxHosts returns the hosts file path inside the sandbox root directory.
func (c *criService) getSandboxHosts(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hosts")
}
// getResolvPath returns resolv.conf filepath for specified sandbox.
func (c *criService) getResolvPath(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "resolv.conf")
}
// getSandboxDevShm returns the shm file path inside the sandbox root directory.
func (c *criService) getSandboxDevShm(id string) string {
return filepath.Join(c.getVolatileSandboxRootDir(id), "shm")
}
// criContainerStateToString formats CRI container state to string.
func criContainerStateToString(state runtime.ContainerState) string {
return runtime.ContainerState_name[int32(state)]
@@ -298,49 +242,6 @@ func (c *criService) ensureImageExists(ctx context.Context, ref string, config *
return &newImage, nil
}
func initSelinuxOpts(selinuxOpt *runtime.SELinuxOption) (string, string, error) {
if selinuxOpt == nil {
return "", "", nil
}
// Should ignored selinuxOpts if they are incomplete.
if selinuxOpt.GetUser() == "" ||
selinuxOpt.GetRole() == "" ||
selinuxOpt.GetType() == "" {
return "", "", nil
}
// make sure the format of "level" is correct.
ok, err := checkSelinuxLevel(selinuxOpt.GetLevel())
if err != nil || !ok {
return "", "", err
}
labelOpts := fmt.Sprintf("%s:%s:%s:%s",
selinuxOpt.GetUser(),
selinuxOpt.GetRole(),
selinuxOpt.GetType(),
selinuxOpt.GetLevel())
options, err := label.DupSecOpt(labelOpts)
if err != nil {
return "", "", err
}
return label.InitLabels(options)
}
func checkSelinuxLevel(level string) (bool, error) {
if len(level) == 0 {
return true, nil
}
matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}((.c\d{1,4})?,c\d{1,4})*(.c\d{1,4})?(,c\d{1,4}(.c\d{1,4})?)*)?$`, level)
if err != nil || !matched {
return false, errors.Wrapf(err, "the format of 'level' %q is not correct", level)
}
return true, nil
}
// isInCRIMounts checks whether a destination is in CRI mount list.
func isInCRIMounts(dst string, mounts []*runtime.Mount) bool {
for _, m := range mounts {
@@ -367,15 +268,6 @@ func buildLabels(configLabels map[string]string, containerType string) map[strin
return labels
}
func getPodCNILabels(id string, config *runtime.PodSandboxConfig) map[string]string {
return map[string]string{
"K8S_POD_NAMESPACE": config.GetMetadata().GetNamespace(),
"K8S_POD_NAME": config.GetMetadata().GetName(),
"K8S_POD_INFRA_CONTAINER_ID": id,
"IgnoreUnknown": "1",
}
}
// toRuntimeAuthConfig converts cri plugin auth config to runtime auth config.
func toRuntimeAuthConfig(a criconfig.AuthConfig) *runtime.AuthConfig {
return &runtime.AuthConfig{
@@ -433,6 +325,8 @@ func getRuntimeOptionsType(t string) interface{} {
return &runcoptions.Options{}
case plugin.RuntimeLinuxV1:
return &runctypes.RuncOptions{}
case runtimeRunhcsV1:
return &runhcsoptions.Options{}
default:
return &runtimeoptions.Options{}
}

View File

@@ -0,0 +1,143 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"strings"
runcapparmor "github.com/opencontainers/runc/libcontainer/apparmor"
runcseccomp "github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/selinux/go-selinux/label"
"github.com/pkg/errors"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
const (
// defaultSandboxOOMAdj is default omm adj for sandbox container. (kubernetes#47938).
defaultSandboxOOMAdj = -998
// defaultShmSize is the default size of the sandbox shm.
defaultShmSize = int64(1024 * 1024 * 64)
// relativeRootfsPath is the rootfs path relative to bundle path.
relativeRootfsPath = "rootfs"
// According to http://man7.org/linux/man-pages/man5/resolv.conf.5.html:
// "The search list is currently limited to six domains with a total of 256 characters."
maxDNSSearches = 6
// devShm is the default path of /dev/shm.
devShm = "/dev/shm"
// etcHosts is the default path of /etc/hosts file.
etcHosts = "/etc/hosts"
// etcHostname is the default path of /etc/hostname file.
etcHostname = "/etc/hostname"
// resolvConfPath is the abs path of resolv.conf on host or container.
resolvConfPath = "/etc/resolv.conf"
// hostnameEnv is the key for HOSTNAME env.
hostnameEnv = "HOSTNAME"
)
// getCgroupsPath generates container cgroups path.
func getCgroupsPath(cgroupsParent, id string) string {
base := path.Base(cgroupsParent)
if strings.HasSuffix(base, ".slice") {
// For a.slice/b.slice/c.slice, base is c.slice.
// runc systemd cgroup path format is "slice:prefix:name".
return strings.Join([]string{base, "cri-containerd", id}, ":")
}
return filepath.Join(cgroupsParent, id)
}
// getSandboxHostname returns the hostname file path inside the sandbox root directory.
func (c *criService) getSandboxHostname(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hostname")
}
// getSandboxHosts returns the hosts file path inside the sandbox root directory.
func (c *criService) getSandboxHosts(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "hosts")
}
// getResolvPath returns resolv.conf filepath for specified sandbox.
func (c *criService) getResolvPath(id string) string {
return filepath.Join(c.getSandboxRootDir(id), "resolv.conf")
}
// getSandboxDevShm returns the shm file path inside the sandbox root directory.
func (c *criService) getSandboxDevShm(id string) string {
return filepath.Join(c.getVolatileSandboxRootDir(id), "shm")
}
func initSelinuxOpts(selinuxOpt *runtime.SELinuxOption) (string, string, error) {
if selinuxOpt == nil {
return "", "", nil
}
// Should ignored selinuxOpts if they are incomplete.
if selinuxOpt.GetUser() == "" ||
selinuxOpt.GetRole() == "" ||
selinuxOpt.GetType() == "" {
return "", "", nil
}
// make sure the format of "level" is correct.
ok, err := checkSelinuxLevel(selinuxOpt.GetLevel())
if err != nil || !ok {
return "", "", err
}
labelOpts := fmt.Sprintf("%s:%s:%s:%s",
selinuxOpt.GetUser(),
selinuxOpt.GetRole(),
selinuxOpt.GetType(),
selinuxOpt.GetLevel())
options, err := label.DupSecOpt(labelOpts)
if err != nil {
return "", "", err
}
return label.InitLabels(options)
}
func checkSelinuxLevel(level string) (bool, error) {
if len(level) == 0 {
return true, nil
}
matched, err := regexp.MatchString(`^s\d(-s\d)??(:c\d{1,4}((.c\d{1,4})?,c\d{1,4})*(.c\d{1,4})?(,c\d{1,4}(.c\d{1,4})?)*)?$`, level)
if err != nil || !matched {
return false, errors.Wrapf(err, "the format of 'level' %q is not correct", level)
}
return true, nil
}
func (c *criService) apparmorEnabled() bool {
return runcapparmor.IsEnabled() && !c.config.DisableApparmor
}
func (c *criService) seccompEnabled() bool {
return runcseccomp.IsEnabled()
}
// openLogFile opens/creates a container log file.
func openLogFile(path string) (*os.File, error) {
return os.OpenFile(path, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0640)
}

View File

@@ -0,0 +1,158 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"os"
"path/filepath"
"syscall"
)
// openLogFile opens/creates a container log file.
// It specifies `FILE_SHARE_DELETE` option to make sure
// log files can be rotated by kubelet.
// TODO(windows): Use golang support after 1.14. (https://github.com/golang/go/issues/32088)
func openLogFile(path string) (*os.File, error) {
path = fixLongPath(path)
if len(path) == 0 {
return nil, syscall.ERROR_FILE_NOT_FOUND
}
pathp, err := syscall.UTF16PtrFromString(path)
if err != nil {
return nil, err
}
createmode := uint32(syscall.OPEN_ALWAYS)
access := uint32(syscall.FILE_APPEND_DATA)
sharemode := uint32(syscall.FILE_SHARE_READ | syscall.FILE_SHARE_WRITE | syscall.FILE_SHARE_DELETE)
h, err := syscall.CreateFile(pathp, access, sharemode, nil, createmode, syscall.FILE_ATTRIBUTE_NORMAL, 0)
if err != nil {
return nil, err
}
return os.NewFile(uintptr(h), path), nil
}
// Copyright (c) 2009 The Go Authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// fixLongPath returns the extended-length (\\?\-prefixed) form of
// path when needed, in order to avoid the default 260 character file
// path limit imposed by Windows. If path is not easily converted to
// the extended-length form (for example, if path is a relative path
// or contains .. elements), or is short enough, fixLongPath returns
// path unmodified.
//
// See https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247(v=vs.85).aspx#maxpath
//
// This is copied from https://golang.org/src/path/filepath/path_windows.go.
func fixLongPath(path string) string {
// Do nothing (and don't allocate) if the path is "short".
// Empirically (at least on the Windows Server 2013 builder),
// the kernel is arbitrarily okay with < 248 bytes. That
// matches what the docs above say:
// "When using an API to create a directory, the specified
// path cannot be so long that you cannot append an 8.3 file
// name (that is, the directory name cannot exceed MAX_PATH
// minus 12)." Since MAX_PATH is 260, 260 - 12 = 248.
//
// The MSDN docs appear to say that a normal path that is 248 bytes long
// will work; empirically the path must be less then 248 bytes long.
if len(path) < 248 {
// Don't fix. (This is how Go 1.7 and earlier worked,
// not automatically generating the \\?\ form)
return path
}
// The extended form begins with \\?\, as in
// \\?\c:\windows\foo.txt or \\?\UNC\server\share\foo.txt.
// The extended form disables evaluation of . and .. path
// elements and disables the interpretation of / as equivalent
// to \. The conversion here rewrites / to \ and elides
// . elements as well as trailing or duplicate separators. For
// simplicity it avoids the conversion entirely for relative
// paths or paths containing .. elements. For now,
// \\server\share paths are not converted to
// \\?\UNC\server\share paths because the rules for doing so
// are less well-specified.
if len(path) >= 2 && path[:2] == `\\` {
// Don't canonicalize UNC paths.
return path
}
if !filepath.IsAbs(path) {
// Relative path
return path
}
const prefix = `\\?`
pathbuf := make([]byte, len(prefix)+len(path)+len(`\`))
copy(pathbuf, prefix)
n := len(path)
r, w := 0, len(prefix)
for r < n {
switch {
case os.IsPathSeparator(path[r]):
// empty block
r++
case path[r] == '.' && (r+1 == n || os.IsPathSeparator(path[r+1])):
// /./
r++
case r+1 < n && path[r] == '.' && path[r+1] == '.' && (r+2 == n || os.IsPathSeparator(path[r+2])):
// /../ is currently unhandled
return path
default:
pathbuf[w] = '\\'
w++
for ; r < n && !os.IsPathSeparator(path[r]); r++ {
pathbuf[w] = path[r]
w++
}
}
}
// A drive's root directory needs a trailing \
if w == len(`\\?\c:`) {
pathbuf[w] = '\\'
w++
}
return string(pathbuf[:w])
}

View File

@@ -25,6 +25,7 @@ import (
)
// ImageFsInfo returns information of the filesystem that is used to store images.
// TODO(windows): Usage for windows is always 0 right now. Support this for windows.
func (c *criService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (*runtime.ImageFsInfoResponse, error) {
snapshots := c.snapshotStore.List()
timestamp := time.Now().UnixNano()

View File

@@ -24,7 +24,6 @@ import (
"syscall"
"github.com/containerd/containerd/cio"
"github.com/containerd/fifo"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
@@ -113,7 +112,7 @@ func newStdioPipes(fifos *cio.FIFOSet) (_ *stdioPipes, _ *wgCloser, err error) {
}()
if fifos.Stdin != "" {
if f, err = fifo.OpenFifo(ctx, fifos.Stdin, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
if f, err = openPipe(ctx, fifos.Stdin, syscall.O_WRONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
return nil, nil, err
}
p.stdin = f
@@ -121,7 +120,7 @@ func newStdioPipes(fifos *cio.FIFOSet) (_ *stdioPipes, _ *wgCloser, err error) {
}
if fifos.Stdout != "" {
if f, err = fifo.OpenFifo(ctx, fifos.Stdout, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
if f, err = openPipe(ctx, fifos.Stdout, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
return nil, nil, err
}
p.stdout = f
@@ -129,7 +128,7 @@ func newStdioPipes(fifos *cio.FIFOSet) (_ *stdioPipes, _ *wgCloser, err error) {
}
if fifos.Stderr != "" {
if f, err = fifo.OpenFifo(ctx, fifos.Stderr, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
if f, err = openPipe(ctx, fifos.Stderr, syscall.O_RDONLY|syscall.O_CREAT|syscall.O_NONBLOCK, 0700); err != nil {
return nil, nil, err
}
p.stderr = f

View File

@@ -0,0 +1,31 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"io"
"os"
"github.com/containerd/fifo"
"golang.org/x/net/context"
)
func openPipe(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) {
return fifo.OpenFifo(ctx, fn, flag, perm)
}

View File

@@ -0,0 +1,81 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package io
import (
"io"
"net"
"os"
"sync"
winio "github.com/Microsoft/go-winio"
"github.com/pkg/errors"
"golang.org/x/net/context"
)
type pipe struct {
l net.Listener
con net.Conn
conErr error
conWg sync.WaitGroup
}
func openPipe(ctx context.Context, fn string, flag int, perm os.FileMode) (io.ReadWriteCloser, error) {
l, err := winio.ListenPipe(fn, nil)
if err != nil {
return nil, err
}
p := &pipe{l: l}
p.conWg.Add(1)
go func() {
defer p.conWg.Done()
c, err := l.Accept()
if err != nil {
p.conErr = err
return
}
p.con = c
}()
return p, nil
}
func (p *pipe) Write(b []byte) (int, error) {
p.conWg.Wait()
if p.conErr != nil {
return 0, errors.Wrap(p.conErr, "connection error")
}
return p.con.Write(b)
}
func (p *pipe) Read(b []byte) (int, error) {
p.conWg.Wait()
if p.conErr != nil {
return 0, errors.Wrap(p.conErr, "connection error")
}
return p.con.Read(b)
}
func (p *pipe) Close() error {
p.l.Close()
p.conWg.Wait()
if p.con != nil {
return p.con.Close()
}
return p.conErr
}

View File

@@ -20,6 +20,7 @@ import (
"io/ioutil"
"os"
"path/filepath"
goruntime "runtime"
"time"
"github.com/containerd/containerd"
@@ -407,7 +408,8 @@ func (c *criService) loadSandbox(ctx context.Context, cntr containerd.Container)
sandbox.Container = cntr
// Load network namespace.
if meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
if goruntime.GOOS != "windows" &&
meta.Config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
// Don't need to load netns for host network sandbox.
return sandbox, nil
}

View File

@@ -1,5 +1,5 @@
/*
Copyright 2017 The Kubernetes Authors.
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -17,16 +17,7 @@ limitations under the License.
package server
import (
"bytes"
"fmt"
"io"
"os/exec"
"strings"
"github.com/containerd/containerd/log"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -45,82 +36,3 @@ func (c *criService) PortForward(ctx context.Context, r *runtime.PortForwardRequ
// TODO(random-liu): Verify that ports are exposed.
return c.streamServer.GetPortForward(r)
}
// portForward requires `socat` on the node. It uses netns to enter the sandbox namespace,
// and run `socat` insidethe namespace to forward stream for a specific port. The `socat`
// command keeps running until it exits or client disconnect.
func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriter) error {
s, err := c.sandboxStore.Get(id)
if err != nil {
return errors.Wrapf(err, "failed to find sandbox %q in store", id)
}
var netNSDo func(func(ns.NetNS) error) error
// netNSPath is the network namespace path for logging.
var netNSPath string
securityContext := s.Config.GetLinux().GetSecurityContext()
hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
if !hostNet {
if closed, err := s.NetNS.Closed(); err != nil {
return errors.Wrapf(err, "failed to check netwok namespace closed for sandbox %q", id)
} else if closed {
return errors.Errorf("network namespace for sandbox %q is closed", id)
}
netNSDo = s.NetNS.Do
netNSPath = s.NetNS.GetPath()
} else {
// Run the function directly for host network.
netNSDo = func(do func(_ ns.NetNS) error) error {
return do(nil)
}
netNSPath = "host"
}
socat, err := exec.LookPath("socat")
if err != nil {
return errors.Wrap(err, "failed to find socat")
}
// Check https://linux.die.net/man/1/socat for meaning of the options.
args := []string{socat, "-", fmt.Sprintf("TCP4:localhost:%d", port)}
log.G(ctx).Infof("Executing port forwarding command %q in network namespace %q", strings.Join(args, " "), netNSPath)
err = netNSDo(func(_ ns.NetNS) error {
cmd := exec.Command(args[0], args[1:]...)
cmd.Stdout = stream
stderr := new(bytes.Buffer)
cmd.Stderr = stderr
// If we use Stdin, command.Run() won't return until the goroutine that's copying
// from stream finishes. Unfortunately, if you have a client like telnet connected
// via port forwarding, as long as the user's telnet client is connected to the user's
// local listener that port forwarding sets up, the telnet session never exits. This
// means that even if socat has finished running, command.Run() won't ever return
// (because the client still has the connection and stream open).
//
// The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe
// when the command (socat) exits.
in, err := cmd.StdinPipe()
if err != nil {
return errors.Wrap(err, "failed to create stdin pipe")
}
go func() {
if _, err := io.Copy(in, stream); err != nil {
logrus.WithError(err).Errorf("Failed to copy port forward input for %q port %d", id, port)
}
in.Close()
logrus.Debugf("Finish copying port forward input for %q port %d", id, port)
}()
if err := cmd.Run(); err != nil {
return errors.Errorf("socat command returns error: %v, stderr: %q", err, stderr.String())
}
return nil
})
if err != nil {
return errors.Wrapf(err, "failed to execute portforward in network namespace %q", netNSPath)
}
log.G(ctx).Infof("Finish port forwarding for %q port %d", id, port)
return nil
}

View File

@@ -0,0 +1,113 @@
// +build !windows
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bytes"
"fmt"
"io"
"os/exec"
"strings"
"github.com/containerd/containerd/log"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
// portForward requires `socat` on the node. It uses netns to enter the sandbox namespace,
// and run `socat` inside the namespace to forward stream for a specific port. The `socat`
// command keeps running until it exits or client disconnect.
func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriter) error {
s, err := c.sandboxStore.Get(id)
if err != nil {
return errors.Wrapf(err, "failed to find sandbox %q in store", id)
}
var netNSDo func(func(ns.NetNS) error) error
// netNSPath is the network namespace path for logging.
var netNSPath string
securityContext := s.Config.GetLinux().GetSecurityContext()
hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
if !hostNet {
if closed, err := s.NetNS.Closed(); err != nil {
return errors.Wrapf(err, "failed to check netwok namespace closed for sandbox %q", id)
} else if closed {
return errors.Errorf("network namespace for sandbox %q is closed", id)
}
netNSDo = s.NetNS.Do
netNSPath = s.NetNS.GetPath()
} else {
// Run the function directly for host network.
netNSDo = func(do func(_ ns.NetNS) error) error {
return do(nil)
}
netNSPath = "host"
}
socat, err := exec.LookPath("socat")
if err != nil {
return errors.Wrap(err, "failed to find socat")
}
// Check https://linux.die.net/man/1/socat for meaning of the options.
args := []string{socat, "-", fmt.Sprintf("TCP4:localhost:%d", port)}
log.G(ctx).Infof("Executing port forwarding command %q in network namespace %q", strings.Join(args, " "), netNSPath)
err = netNSDo(func(_ ns.NetNS) error {
cmd := exec.Command(args[0], args[1:]...)
cmd.Stdout = stream
stderr := new(bytes.Buffer)
cmd.Stderr = stderr
// If we use Stdin, command.Run() won't return until the goroutine that's copying
// from stream finishes. Unfortunately, if you have a client like telnet connected
// via port forwarding, as long as the user's telnet client is connected to the user's
// local listener that port forwarding sets up, the telnet session never exits. This
// means that even if socat has finished running, command.Run() won't ever return
// (because the client still has the connection and stream open).
//
// The work around is to use StdinPipe(), as Wait() (called by Run()) closes the pipe
// when the command (socat) exits.
in, err := cmd.StdinPipe()
if err != nil {
return errors.Wrap(err, "failed to create stdin pipe")
}
go func() {
if _, err := io.Copy(in, stream); err != nil {
logrus.WithError(err).Errorf("Failed to copy port forward input for %q port %d", id, port)
}
in.Close()
logrus.Debugf("Finish copying port forward input for %q port %d", id, port)
}()
if err := cmd.Run(); err != nil {
return errors.Errorf("socat command returns error: %v, stderr: %q", err, stderr.String())
}
return nil
})
if err != nil {
return errors.Wrapf(err, "failed to execute portforward in network namespace %q", netNSPath)
}
log.G(ctx).Infof("Finish port forwarding for %q port %d", id, port)
return nil
}

View File

@@ -0,0 +1,80 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"bytes"
"fmt"
"io"
"github.com/pkg/errors"
"golang.org/x/net/context"
"k8s.io/utils/exec"
"github.com/containerd/cri/pkg/ioutil"
sandboxstore "github.com/containerd/cri/pkg/store/sandbox"
)
func (c *criService) portForward(ctx context.Context, id string, port int32, stream io.ReadWriter) error {
stdout := ioutil.NewNopWriteCloser(stream)
stderrBuffer := new(bytes.Buffer)
stderr := ioutil.NewNopWriteCloser(stderrBuffer)
// localhost is resolved to 127.0.0.1 in ipv4, and ::1 in ipv6.
// Explicitly using ipv4 IP address in here to avoid flakiness.
cmd := []string{"wincat.exe", "127.0.0.1", fmt.Sprint(port)}
err := c.execInSandbox(ctx, id, cmd, stream, stdout, stderr)
if err != nil {
return errors.Wrapf(err, "failed to execute port forward in sandbox: %s", stderrBuffer.String())
}
return nil
}
func (c *criService) execInSandbox(ctx context.Context, sandboxID string, cmd []string, stdin io.Reader, stdout, stderr io.WriteCloser) error {
// Get sandbox from our sandbox store.
sb, err := c.sandboxStore.Get(sandboxID)
if err != nil {
return errors.Wrapf(err, "failed to find sandbox %q in store", sandboxID)
}
// Check the sandbox state
state := sb.Status.Get().State
if state != sandboxstore.StateReady {
return errors.Errorf("sandbox is in %s state", fmt.Sprint(state))
}
opts := execOptions{
cmd: cmd,
stdin: stdin,
stdout: stdout,
stderr: stderr,
tty: false,
resize: nil,
}
exitCode, err := c.execInternal(ctx, sb.Container, sandboxID, opts)
if err != nil {
return errors.Wrap(err, "failed to exec in sandbox")
}
if *exitCode == 0 {
return nil
}
return &exec.CodeExitError{
Err: errors.Errorf("error executing command %v, exit code %d", cmd, *exitCode),
Code: int(*exitCode),
}
}

View File

@@ -18,26 +18,20 @@ package server
import (
"encoding/json"
"fmt"
"math"
"os"
goruntime "runtime"
"strings"
"github.com/containerd/containerd"
containerdio "github.com/containerd/containerd/cio"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/plugin"
cni "github.com/containerd/go-cni"
"github.com/containerd/typeurl"
"github.com/davecgh/go-spew/spew"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/net/context"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/kubernetes/pkg/util/bandwidth"
@@ -110,10 +104,14 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
}
log.G(ctx).Debugf("Use OCI %+v for sandbox %q", ociRuntime, id)
securityContext := config.GetLinux().GetSecurityContext()
//Create Network Namespace if it is not in host network
hostNet := securityContext.GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE
if !hostNet {
podNetwork := true
// Pod network is always needed on windows.
if goruntime.GOOS != "windows" &&
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
// Pod network is not needed on linux with host network.
podNetwork = false
}
if podNetwork {
// If it is not in host network namespace then create a namespace and set the sandbox
// handle. NetNSPath in sandbox metadata and NetNS is non empty only for non host network
// namespaces. If the pod is in host network namespace then both are empty and should not
@@ -153,39 +151,19 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
}
// Create sandbox container.
spec, err := c.generateSandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
// NOTE: sandboxContainerSpec SHOULD NOT have side
// effect, e.g. accessing/creating files, so that we can test
// it safely.
spec, err := c.sandboxContainerSpec(id, config, &image.ImageSpec.Config, sandbox.NetNSPath, ociRuntime.PodAnnotations)
if err != nil {
return nil, errors.Wrap(err, "failed to generate sandbox container spec")
}
log.G(ctx).Debugf("Sandbox container %q spec: %#+v", id, spew.NewFormatter(spec))
var specOpts []oci.SpecOpts
userstr, err := generateUserString(
"",
securityContext.GetRunAsUser(),
securityContext.GetRunAsGroup(),
)
// Generate spec options that will be applied to the spec later.
specOpts, err := c.sandboxContainerSpecOpts(config, &image.ImageSpec.Config)
if err != nil {
return nil, errors.Wrap(err, "failed to generate user string")
}
if userstr == "" {
// Lastly, since no user override was passed via CRI try to set via OCI
// Image
userstr = image.ImageSpec.Config.User
}
if userstr != "" {
specOpts = append(specOpts, oci.WithUser(userstr))
}
seccompSpecOpts, err := generateSeccompSpecOpts(
securityContext.GetSeccompProfilePath(),
securityContext.GetPrivileged(),
c.seccompEnabled)
if err != nil {
return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
return nil, errors.Wrap(err, "failed to generate sanbdox container spec options")
}
sandboxLabels := buildLabels(config.Labels, containerKindSandbox)
@@ -246,14 +224,14 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
}
}()
// Setup sandbox /dev/shm, /etc/hosts, /etc/resolv.conf and /etc/hostname.
// Setup files required for the sandbox.
if err = c.setupSandboxFiles(id, config); err != nil {
return nil, errors.Wrapf(err, "failed to setup sandbox files")
}
defer func() {
if retErr != nil {
if err = c.unmountSandboxFiles(id, config); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to unmount sandbox files in %q",
if err = c.cleanupSandboxFiles(id, config); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to cleanup sandbox files in %q",
sandboxRootDir)
}
}
@@ -269,11 +247,7 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
log.G(ctx).Tracef("Create sandbox container (id=%q, name=%q).",
id, name)
var taskOpts []containerd.NewTaskOpts
// TODO(random-liu): Remove this after shim v1 is deprecated.
if c.config.NoPivot && ociRuntime.Type == plugin.RuntimeRuncV1 {
taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
}
taskOpts := c.taskOpts(ociRuntime.Type)
// We don't need stdio for sandbox container.
task, err := container.NewTask(ctx, containerdio.NullIO, taskOpts...)
if err != nil {
@@ -327,222 +301,6 @@ func (c *criService) RunPodSandbox(ctx context.Context, r *runtime.RunPodSandbox
return &runtime.RunPodSandboxResponse{PodSandboxId: id}, nil
}
func (c *criService) generateSandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) {
// Creates a spec Generator with the default spec.
// TODO(random-liu): [P1] Compare the default settings with docker and containerd default.
specOpts := []oci.SpecOpts{
customopts.WithoutRunMount,
customopts.WithoutDefaultSecuritySettings,
customopts.WithRelativeRoot(relativeRootfsPath),
oci.WithEnv(imageConfig.Env),
oci.WithRootFSReadonly(),
oci.WithHostname(config.GetHostname()),
}
if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 {
// Pause image must have entrypoint or cmd.
return nil, errors.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig)
}
specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...))
// TODO(random-liu): [P2] Consider whether to add labels and annotations to the container.
// Set cgroups parent.
if c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDisabledCgroups)
} else {
if config.GetLinux().GetCgroupParent() != "" {
cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id)
specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
}
}
// When cgroup parent is not set, containerd-shim will create container in a child cgroup
// of the cgroup itself is in.
// TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified.
// Set namespace options.
var (
securityContext = config.GetLinux().GetSecurityContext()
nsOptions = securityContext.GetNamespaceOptions()
)
if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace))
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace))
} else {
//TODO(Abhi): May be move this to containerd spec opts (WithLinuxSpaceOption)
specOpts = append(specOpts, oci.WithLinuxNamespace(
runtimespec.LinuxNamespace{
Type: runtimespec.NetworkNamespace,
Path: nsPath,
}))
}
if nsOptions.GetPid() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace))
}
if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
}
// It's fine to generate the spec before the sandbox /dev/shm
// is actually created.
sandboxDevShm := c.getSandboxDevShm(id)
if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
sandboxDevShm = devShm
}
specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{
{
Source: sandboxDevShm,
Destination: devShm,
Type: "bind",
Options: []string{"rbind", "ro"},
},
}))
selinuxOpt := securityContext.GetSelinuxOptions()
processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt)
if err != nil {
return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
}
supplementalGroups := securityContext.GetSupplementalGroups()
specOpts = append(specOpts,
customopts.WithSelinuxLabels(processLabel, mountLabel),
customopts.WithSupplementalGroups(supplementalGroups),
)
// Add sysctls
sysctls := config.GetLinux().GetSysctls()
specOpts = append(specOpts, customopts.WithSysctls(sysctls))
// Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile
if !c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDefaultSandboxShares)
}
specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj))
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
runtimePodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts,
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox),
customopts.WithAnnotation(annotations.SandboxID, id),
customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()),
)
return runtimeSpec(id, specOpts...)
}
// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts,
// /etc/resolv.conf and /etc/hostname.
func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
sandboxEtcHostname := c.getSandboxHostname(id)
hostname := config.GetHostname()
if hostname == "" {
var err error
hostname, err = c.os.Hostname()
if err != nil {
return errors.Wrap(err, "failed to get hostname")
}
}
if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil {
return errors.Wrapf(err, "failed to write hostname to %q", sandboxEtcHostname)
}
// TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet.
sandboxEtcHosts := c.getSandboxHosts(id)
if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil {
return errors.Wrapf(err, "failed to generate sandbox hosts file %q", sandboxEtcHosts)
}
// Set DNS options. Maintain a resolv.conf for the sandbox.
var err error
resolvContent := ""
if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
resolvContent, err = parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options)
if err != nil {
return errors.Wrapf(err, "failed to parse sandbox DNSConfig %+v", dnsConfig)
}
}
resolvPath := c.getResolvPath(id)
if resolvContent == "" {
// copy host's resolv.conf to resolvPath
err = c.os.CopyFile(resolvConfPath, resolvPath, 0644)
if err != nil {
return errors.Wrapf(err, "failed to copy host's resolv.conf to %q", resolvPath)
}
} else {
err = c.os.WriteFile(resolvPath, []byte(resolvContent), 0644)
if err != nil {
return errors.Wrapf(err, "failed to write resolv content to %q", resolvPath)
}
}
// Setup sandbox /dev/shm.
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
if _, err := c.os.Stat(devShm); err != nil {
return errors.Wrapf(err, "host %q is not available for host ipc", devShm)
}
} else {
sandboxDevShm := c.getSandboxDevShm(id)
if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil {
return errors.Wrap(err, "failed to create sandbox shm")
}
shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize)
if err := c.os.Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil {
return errors.Wrap(err, "failed to mount sandbox shm")
}
}
return nil
}
// parseDNSOptions parse DNS options into resolv.conf format content,
// if none option is specified, will return empty with no error.
func parseDNSOptions(servers, searches, options []string) (string, error) {
resolvContent := ""
if len(searches) > maxDNSSearches {
return "", errors.Errorf("DNSOption.Searches has more than %d domains", maxDNSSearches)
}
if len(searches) > 0 {
resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " "))
}
if len(servers) > 0 {
resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver "))
}
if len(options) > 0 {
resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " "))
}
return resolvContent, nil
}
// unmountSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
// remove these files. Unmount should *NOT* return error if the mount point is already unmounted.
func (c *criService) unmountSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE {
path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/")
if err != nil {
return errors.Wrap(err, "failed to follow symlink")
}
if err := c.os.Unmount(path); err != nil && !os.IsNotExist(err) {
return errors.Wrapf(err, "failed to unmount %q", path)
}
}
return nil
}
// setupPodNetwork setups up the network for a pod
func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.Sandbox) error {
var (
@@ -554,22 +312,12 @@ func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.
return errors.New("cni config not initialized")
}
labels := getPodCNILabels(id, config)
// Will return an error if the bandwidth limitation has the wrong unit
// or an unreasonable valure see validateBandwidthIsReasonable()
bandWidth, err := toCNIBandWidth(config.Annotations)
opts, err := cniNamespaceOpts(id, config)
if err != nil {
return errors.Wrap(err, "failed to get bandwidth info from annotations")
return errors.Wrap(err, "get cni namespace options")
}
result, err := c.netPlugin.Setup(ctx, id,
path,
cni.WithLabels(labels),
cni.WithCapabilityPortMap(toCNIPortMappings(config.GetPortMappings())),
cni.WithCapabilityBandWidth(*bandWidth),
)
result, err := c.netPlugin.Setup(ctx, id, path, opts...)
if err != nil {
return err
}
@@ -587,11 +335,54 @@ func (c *criService) setupPodNetwork(ctx context.Context, sandbox *sandboxstore.
return errors.Errorf("failed to find network info for sandbox %q", id)
}
// cniNamespaceOpts get CNI namespace options from sandbox config.
func cniNamespaceOpts(id string, config *runtime.PodSandboxConfig) ([]cni.NamespaceOpts, error) {
opts := []cni.NamespaceOpts{
cni.WithLabels(toCNILabels(id, config)),
}
portMappings := toCNIPortMappings(config.GetPortMappings())
if len(portMappings) > 0 {
opts = append(opts, cni.WithCapabilityPortMap(portMappings))
}
// Will return an error if the bandwidth limitation has the wrong unit
// or an unreasonable value see validateBandwidthIsReasonable()
bandWidth, err := toCNIBandWidth(config.Annotations)
if err != nil {
return nil, err
}
if bandWidth != nil {
opts = append(opts, cni.WithCapabilityBandWidth(*bandWidth))
}
dns := toCNIDNS(config.GetDnsConfig())
if dns != nil {
opts = append(opts, cni.WithCapabilityDNS(*dns))
}
return opts, nil
}
// toCNILabels adds pod metadata into CNI labels.
func toCNILabels(id string, config *runtime.PodSandboxConfig) map[string]string {
return map[string]string{
"K8S_POD_NAMESPACE": config.GetMetadata().GetNamespace(),
"K8S_POD_NAME": config.GetMetadata().GetName(),
"K8S_POD_INFRA_CONTAINER_ID": id,
"IgnoreUnknown": "1",
}
}
// toCNIBandWidth converts CRI annotations to CNI bandwidth.
func toCNIBandWidth(annotations map[string]string) (*cni.BandWidth, error) {
ingress, egress, err := bandwidth.ExtractPodBandwidthResources(annotations)
if err != nil {
return nil, errors.Errorf("reading pod bandwidth annotations: %v", err)
return nil, errors.Wrap(err, "reading pod bandwidth annotations")
}
if ingress == nil && egress == nil {
return nil, nil
}
bandWidth := &cni.BandWidth{}
@@ -629,6 +420,18 @@ func toCNIPortMappings(criPortMappings []*runtime.PortMapping) []cni.PortMapping
return portMappings
}
// toCNIDNS converts CRI DNSConfig to CNI.
func toCNIDNS(dns *runtime.DNSConfig) *cni.DNS {
if dns == nil {
return nil
}
return &cni.DNS{
Servers: dns.GetServers(),
Searches: dns.GetSearches(),
Options: dns.GetOptions(),
}
}
// selectPodIPs select an ip from the ip list. It prefers ipv4 more than ipv6
// and returns the additional ips
// TODO(random-liu): Revisit the ip order in the ipv6 beta stage. (cri#1278)
@@ -688,6 +491,7 @@ func (c *criService) getSandboxRuntime(config *runtime.PodSandboxConfig, runtime
// Note: If the workload is marked untrusted but requests privileged, this can be granted, as the
// runtime may support this. For example, in a virtual-machine isolated runtime, privileged
// is a supported option, granting the workload to access the entire guest VM instead of host.
// TODO(windows): Deprecate this so that we don't need to handle it for windows.
if hostAccessingSandbox(config) {
return criconfig.Runtime{}, errors.New("untrusted workload with host access is not allowed")
}

View File

@@ -0,0 +1,298 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"fmt"
"os"
"strings"
"github.com/containerd/containerd"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/plugin"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
"golang.org/x/sys/unix"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"github.com/containerd/cri/pkg/annotations"
customopts "github.com/containerd/cri/pkg/containerd/opts"
osinterface "github.com/containerd/cri/pkg/os"
)
func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) {
// Creates a spec Generator with the default spec.
// TODO(random-liu): [P1] Compare the default settings with docker and containerd default.
specOpts := []oci.SpecOpts{
customopts.WithoutRunMount,
customopts.WithoutDefaultSecuritySettings,
customopts.WithRelativeRoot(relativeRootfsPath),
oci.WithEnv(imageConfig.Env),
oci.WithRootFSReadonly(),
oci.WithHostname(config.GetHostname()),
}
if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 {
// Pause image must have entrypoint or cmd.
return nil, errors.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig)
}
specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...))
// Set cgroups parent.
if c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDisabledCgroups)
} else {
if config.GetLinux().GetCgroupParent() != "" {
cgroupsPath := getCgroupsPath(config.GetLinux().GetCgroupParent(), id)
specOpts = append(specOpts, oci.WithCgroup(cgroupsPath))
}
}
// When cgroup parent is not set, containerd-shim will create container in a child cgroup
// of the cgroup itself is in.
// TODO(random-liu): [P2] Set default cgroup path if cgroup parent is not specified.
// Set namespace options.
var (
securityContext = config.GetLinux().GetSecurityContext()
nsOptions = securityContext.GetNamespaceOptions()
)
if nsOptions.GetNetwork() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.NetworkNamespace))
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.UTSNamespace))
} else {
specOpts = append(specOpts, oci.WithLinuxNamespace(
runtimespec.LinuxNamespace{
Type: runtimespec.NetworkNamespace,
Path: nsPath,
}))
}
if nsOptions.GetPid() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.PIDNamespace))
}
if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
specOpts = append(specOpts, customopts.WithoutNamespace(runtimespec.IPCNamespace))
}
// It's fine to generate the spec before the sandbox /dev/shm
// is actually created.
sandboxDevShm := c.getSandboxDevShm(id)
if nsOptions.GetIpc() == runtime.NamespaceMode_NODE {
sandboxDevShm = devShm
}
specOpts = append(specOpts, oci.WithMounts([]runtimespec.Mount{
{
Source: sandboxDevShm,
Destination: devShm,
Type: "bind",
Options: []string{"rbind", "ro"},
},
}))
selinuxOpt := securityContext.GetSelinuxOptions()
processLabel, mountLabel, err := initSelinuxOpts(selinuxOpt)
if err != nil {
return nil, errors.Wrapf(err, "failed to init selinux options %+v", securityContext.GetSelinuxOptions())
}
supplementalGroups := securityContext.GetSupplementalGroups()
specOpts = append(specOpts,
customopts.WithSelinuxLabels(processLabel, mountLabel),
customopts.WithSupplementalGroups(supplementalGroups),
)
// Add sysctls
sysctls := config.GetLinux().GetSysctls()
specOpts = append(specOpts, customopts.WithSysctls(sysctls))
// Note: LinuxSandboxSecurityContext does not currently provide an apparmor profile
if !c.config.DisableCgroup {
specOpts = append(specOpts, customopts.WithDefaultSandboxShares)
}
specOpts = append(specOpts, customopts.WithPodOOMScoreAdj(int(defaultSandboxOOMAdj), c.config.RestrictOOMScoreAdj))
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
runtimePodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts,
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox),
customopts.WithAnnotation(annotations.SandboxID, id),
customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()),
)
return runtimeSpec(id, specOpts...)
}
// sandboxContainerSpecOpts generates OCI spec options for
// the sandbox container.
func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
var (
securityContext = config.GetLinux().GetSecurityContext()
specOpts []oci.SpecOpts
)
seccompSpecOpts, err := generateSeccompSpecOpts(
securityContext.GetSeccompProfilePath(),
securityContext.GetPrivileged(),
c.seccompEnabled())
if err != nil {
return nil, errors.Wrap(err, "failed to generate seccomp spec opts")
}
if seccompSpecOpts != nil {
specOpts = append(specOpts, seccompSpecOpts)
}
userstr, err := generateUserString(
"",
securityContext.GetRunAsUser(),
securityContext.GetRunAsGroup(),
)
if err != nil {
return nil, errors.Wrap(err, "failed to generate user string")
}
if userstr == "" {
// Lastly, since no user override was passed via CRI try to set via OCI
// Image
userstr = imageConfig.User
}
if userstr != "" {
specOpts = append(specOpts, oci.WithUser(userstr))
}
return specOpts, nil
}
// setupSandboxFiles sets up necessary sandbox files including /dev/shm, /etc/hosts,
// /etc/resolv.conf and /etc/hostname.
func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
sandboxEtcHostname := c.getSandboxHostname(id)
hostname := config.GetHostname()
if hostname == "" {
var err error
hostname, err = c.os.Hostname()
if err != nil {
return errors.Wrap(err, "failed to get hostname")
}
}
if err := c.os.WriteFile(sandboxEtcHostname, []byte(hostname+"\n"), 0644); err != nil {
return errors.Wrapf(err, "failed to write hostname to %q", sandboxEtcHostname)
}
// TODO(random-liu): Consider whether we should maintain /etc/hosts and /etc/resolv.conf in kubelet.
sandboxEtcHosts := c.getSandboxHosts(id)
if err := c.os.CopyFile(etcHosts, sandboxEtcHosts, 0644); err != nil {
return errors.Wrapf(err, "failed to generate sandbox hosts file %q", sandboxEtcHosts)
}
// Set DNS options. Maintain a resolv.conf for the sandbox.
var err error
resolvContent := ""
if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
resolvContent, err = parseDNSOptions(dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options)
if err != nil {
return errors.Wrapf(err, "failed to parse sandbox DNSConfig %+v", dnsConfig)
}
}
resolvPath := c.getResolvPath(id)
if resolvContent == "" {
// copy host's resolv.conf to resolvPath
err = c.os.CopyFile(resolvConfPath, resolvPath, 0644)
if err != nil {
return errors.Wrapf(err, "failed to copy host's resolv.conf to %q", resolvPath)
}
} else {
err = c.os.WriteFile(resolvPath, []byte(resolvContent), 0644)
if err != nil {
return errors.Wrapf(err, "failed to write resolv content to %q", resolvPath)
}
}
// Setup sandbox /dev/shm.
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() == runtime.NamespaceMode_NODE {
if _, err := c.os.Stat(devShm); err != nil {
return errors.Wrapf(err, "host %q is not available for host ipc", devShm)
}
} else {
sandboxDevShm := c.getSandboxDevShm(id)
if err := c.os.MkdirAll(sandboxDevShm, 0700); err != nil {
return errors.Wrap(err, "failed to create sandbox shm")
}
shmproperty := fmt.Sprintf("mode=1777,size=%d", defaultShmSize)
if err := c.os.(osinterface.UNIX).Mount("shm", sandboxDevShm, "tmpfs", uintptr(unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV), shmproperty); err != nil {
return errors.Wrap(err, "failed to mount sandbox shm")
}
}
return nil
}
// parseDNSOptions parse DNS options into resolv.conf format content,
// if none option is specified, will return empty with no error.
func parseDNSOptions(servers, searches, options []string) (string, error) {
resolvContent := ""
if len(searches) > maxDNSSearches {
return "", errors.Errorf("DNSOption.Searches has more than %d domains", maxDNSSearches)
}
if len(searches) > 0 {
resolvContent += fmt.Sprintf("search %s\n", strings.Join(searches, " "))
}
if len(servers) > 0 {
resolvContent += fmt.Sprintf("nameserver %s\n", strings.Join(servers, "\nnameserver "))
}
if len(options) > 0 {
resolvContent += fmt.Sprintf("options %s\n", strings.Join(options, " "))
}
return resolvContent, nil
}
// cleanupSandboxFiles unmount some sandbox files, we rely on the removal of sandbox root directory to
// remove these files. Unmount should *NOT* return error if the mount point is already unmounted.
func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetIpc() != runtime.NamespaceMode_NODE {
path, err := c.os.FollowSymlinkInScope(c.getSandboxDevShm(id), "/")
if err != nil {
return errors.Wrap(err, "failed to follow symlink")
}
if err := c.os.(osinterface.UNIX).Unmount(path); err != nil && !os.IsNotExist(err) {
return errors.Wrapf(err, "failed to unmount %q", path)
}
}
return nil
}
// taskOpts generates task options for a (sandbox) container.
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
// TODO(random-liu): Remove this after shim v1 is deprecated.
var taskOpts []containerd.NewTaskOpts
if c.config.NoPivot && runtimeType == plugin.RuntimeRuncV1 {
taskOpts = append(taskOpts, containerd.WithNoPivotRoot)
}
return taskOpts
}

View File

@@ -0,0 +1,91 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
"github.com/containerd/containerd"
"github.com/containerd/containerd/oci"
imagespec "github.com/opencontainers/image-spec/specs-go/v1"
runtimespec "github.com/opencontainers/runtime-spec/specs-go"
"github.com/pkg/errors"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"github.com/containerd/cri/pkg/annotations"
customopts "github.com/containerd/cri/pkg/containerd/opts"
)
func (c *criService) sandboxContainerSpec(id string, config *runtime.PodSandboxConfig,
imageConfig *imagespec.ImageConfig, nsPath string, runtimePodAnnotations []string) (*runtimespec.Spec, error) {
// Creates a spec Generator with the default spec.
specOpts := []oci.SpecOpts{
oci.WithEnv(imageConfig.Env),
oci.WithHostname(config.GetHostname()),
}
if imageConfig.WorkingDir != "" {
specOpts = append(specOpts, oci.WithProcessCwd(imageConfig.WorkingDir))
}
if len(imageConfig.Entrypoint) == 0 && len(imageConfig.Cmd) == 0 {
// Pause image must have entrypoint or cmd.
return nil, errors.Errorf("invalid empty entrypoint and cmd in image config %+v", imageConfig)
}
specOpts = append(specOpts, oci.WithProcessArgs(append(imageConfig.Entrypoint, imageConfig.Cmd...)...))
specOpts = append(specOpts,
// Clear the root location since hcsshim expects it.
// NOTE: readonly rootfs doesn't work on windows.
customopts.WithoutRoot,
customopts.WithWindowsNetworkNamespace(nsPath),
)
specOpts = append(specOpts, customopts.WithWindowsDefaultSandboxShares)
for pKey, pValue := range getPassthroughAnnotations(config.Annotations,
runtimePodAnnotations) {
specOpts = append(specOpts, customopts.WithAnnotation(pKey, pValue))
}
specOpts = append(specOpts,
customopts.WithAnnotation(annotations.ContainerType, annotations.ContainerTypeSandbox),
customopts.WithAnnotation(annotations.SandboxID, id),
customopts.WithAnnotation(annotations.SandboxLogDir, config.GetLogDirectory()),
)
return runtimeSpec(id, specOpts...)
}
// No sandbox container spec options for windows yet.
func (c *criService) sandboxContainerSpecOpts(config *runtime.PodSandboxConfig, imageConfig *imagespec.ImageConfig) ([]oci.SpecOpts, error) {
return nil, nil
}
// No sandbox files needed for windows.
func (c *criService) setupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
return nil
}
// No sandbox files needed for windows.
func (c *criService) cleanupSandboxFiles(id string, config *runtime.PodSandboxConfig) error {
return nil
}
// No task options needed for windows.
func (c *criService) taskOpts(runtimeType string) []containerd.NewTaskOpts {
return nil
}

View File

@@ -18,6 +18,7 @@ package server
import (
"encoding/json"
goruntime "runtime"
"github.com/containerd/containerd"
"github.com/containerd/containerd/errdefs"
@@ -69,7 +70,8 @@ func (c *criService) PodSandboxStatus(ctx context.Context, r *runtime.PodSandbox
func (c *criService) getIPs(sandbox sandboxstore.Sandbox) (string, []string, error) {
config := sandbox.Config
if config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
if goruntime.GOOS != "windows" &&
config.GetLinux().GetSecurityContext().GetNamespaceOptions().GetNetwork() == runtime.NamespaceMode_NODE {
// For sandboxes using the node network we are not
// responsible for reporting the IP.
return "", nil, nil

View File

@@ -23,7 +23,6 @@ import (
eventtypes "github.com/containerd/containerd/api/events"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/log"
cni "github.com/containerd/go-cni"
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -58,8 +57,8 @@ func (c *criService) StopPodSandbox(ctx context.Context, r *runtime.StopPodSandb
}
}
if err := c.unmountSandboxFiles(id, sandbox.Config); err != nil {
return nil, errors.Wrap(err, "failed to unmount sandbox files")
if err := c.cleanupSandboxFiles(id, sandbox.Config); err != nil {
return nil, errors.Wrap(err, "failed to cleanup sandbox files")
}
// Only stop sandbox container when it's running or unknown.
@@ -166,11 +165,12 @@ func (c *criService) teardownPodNetwork(ctx context.Context, sandbox sandboxstor
path = sandbox.NetNSPath
config = sandbox.Config
)
labels := getPodCNILabels(id, config)
return c.netPlugin.Remove(ctx, id,
path,
cni.WithLabels(labels),
cni.WithCapabilityPortMap(toCNIPortMappings(config.GetPortMappings())))
opts, err := cniNamespaceOpts(id, config)
if err != nil {
return errors.Wrap(err, "get cni namespace options")
}
return c.netPlugin.Remove(ctx, id, path, opts...)
}
// cleanupUnknownSandbox cleanup stopped sandbox in unknown state.

View File

@@ -26,10 +26,6 @@ import (
"github.com/containerd/containerd"
"github.com/containerd/containerd/plugin"
cni "github.com/containerd/go-cni"
runcapparmor "github.com/opencontainers/runc/libcontainer/apparmor"
runcseccomp "github.com/opencontainers/runc/libcontainer/seccomp"
runcsystem "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"google.golang.org/grpc"
@@ -68,10 +64,6 @@ type criService struct {
config criconfig.Config
// imageFSPath is the path to image filesystem.
imageFSPath string
// apparmorEnabled indicates whether apparmor is enabled.
apparmorEnabled bool
// seccompEnabled indicates whether seccomp is enabled.
seccompEnabled bool
// os is an interface for all required os operations.
os osinterface.OS
// sandboxStore stores all resources associated with sandboxes.
@@ -107,8 +99,6 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi
c := &criService{
config: config,
client: client,
apparmorEnabled: runcapparmor.IsEnabled() && !config.DisableApparmor,
seccompEnabled: runcseccomp.IsEnabled(),
os: osinterface.RealOS{},
sandboxStore: sandboxstore.NewStore(),
containerStore: containerstore.NewStore(),
@@ -119,20 +109,6 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi
initialized: atomic.NewBool(false),
}
if runcsystem.RunningInUserNS() {
if !(config.DisableCgroup && !c.apparmorEnabled && config.RestrictOOMScoreAdj) {
logrus.Warn("Running containerd in a user namespace typically requires disable_cgroup, disable_apparmor, restrict_oom_score_adj set to be true")
}
}
if c.config.EnableSelinux {
if !selinux.GetEnabled() {
logrus.Warn("Selinux is not supported")
}
} else {
selinux.SetDisabled()
}
if client.SnapshotService(c.config.ContainerdConfig.Snapshotter) == nil {
return nil, errors.Errorf("failed to find snapshotter %q", c.config.ContainerdConfig.Snapshotter)
}
@@ -140,23 +116,10 @@ func NewCRIService(config criconfig.Config, client *containerd.Client) (CRIServi
c.imageFSPath = imageFSPath(config.ContainerdRootDir, config.ContainerdConfig.Snapshotter)
logrus.Infof("Get image filesystem path %q", c.imageFSPath)
// Pod needs to attach to atleast loopback network and a non host network,
// hence networkAttachCount is 2. If there are more network configs the
// pod will be attached to all the networks but we will only use the ip
// of the default network interface as the pod IP.
c.netPlugin, err = cni.New(cni.WithMinNetworkCount(networkAttachCount),
cni.WithPluginConfDir(config.NetworkPluginConfDir),
cni.WithPluginMaxConfNum(config.NetworkPluginMaxConfNum),
cni.WithPluginDir([]string{config.NetworkPluginBinDir}))
if err != nil {
return nil, errors.Wrap(err, "failed to initialize cni")
if err := c.initPlatform(); err != nil {
return nil, errors.Wrap(err, "initialize platform")
}
// Try to load the config if it exists. Just log the error if load fails
// This is not disruptive for containerd to panic
if err := c.netPlugin.Load(cni.WithLoNetwork, cni.WithDefaultConf); err != nil {
logrus.WithError(err).Error("Failed to load cni during init, please check CRI plugin status before setting up network for pods")
}
// prepare streaming server
c.streamServer, err = newStreamServer(c, config.StreamServerAddress, config.StreamServerPort, config.StreamIdleTimeout)
if err != nil {

View File

@@ -0,0 +1,74 @@
// +build !windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
cni "github.com/containerd/go-cni"
runcsystem "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/selinux/go-selinux"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// networkAttachCount is the minimum number of networks the PodSandbox
// attaches to
const networkAttachCount = 2
// initPlatform handles linux specific initialization for the CRI service.
func (c *criService) initPlatform() error {
var err error
if runcsystem.RunningInUserNS() {
if !(c.config.DisableCgroup && !c.apparmorEnabled() && c.config.RestrictOOMScoreAdj) {
logrus.Warn("Running containerd in a user namespace typically requires disable_cgroup, disable_apparmor, restrict_oom_score_adj set to be true")
}
}
if c.config.EnableSelinux {
if !selinux.GetEnabled() {
logrus.Warn("Selinux is not supported")
}
} else {
selinux.SetDisabled()
}
// Pod needs to attach to at least loopback network and a non host network,
// hence networkAttachCount is 2. If there are more network configs the
// pod will be attached to all the networks but we will only use the ip
// of the default network interface as the pod IP.
c.netPlugin, err = cni.New(cni.WithMinNetworkCount(networkAttachCount),
cni.WithPluginConfDir(c.config.NetworkPluginConfDir),
cni.WithPluginMaxConfNum(c.config.NetworkPluginMaxConfNum),
cni.WithPluginDir([]string{c.config.NetworkPluginBinDir}))
if err != nil {
return errors.Wrap(err, "failed to initialize cni")
}
// Try to load the config if it exists. Just log the error if load fails
// This is not disruptive for containerd to panic
if err := c.netPlugin.Load(c.cniLoadOptions()...); err != nil {
logrus.WithError(err).Error("Failed to load cni during init, please check CRI plugin status before setting up network for pods")
}
return nil
}
// cniLoadOptions returns cni load options for the linux.
func (c *criService) cniLoadOptions() []cni.CNIOpt {
return []cni.CNIOpt{cni.WithLoNetwork, cni.WithDefaultConf}
}

View File

@@ -0,0 +1,58 @@
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package server
import (
cni "github.com/containerd/go-cni"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// windowsNetworkAttachCount is the minimum number of networks the PodSandbox
// attaches to
const windowsNetworkAttachCount = 1
// initPlatform handles linux specific initialization for the CRI service.
func (c *criService) initPlatform() error {
var err error
// For windows, the loopback network is added as default.
// There is no need to explicitly add one hence networkAttachCount is 1.
// If there are more network configs the pod will be attached to all the
// networks but we will only use the ip of the default network interface
// as the pod IP.
c.netPlugin, err = cni.New(cni.WithMinNetworkCount(windowsNetworkAttachCount),
cni.WithPluginConfDir(c.config.NetworkPluginConfDir),
cni.WithPluginMaxConfNum(c.config.NetworkPluginMaxConfNum),
cni.WithPluginDir([]string{c.config.NetworkPluginBinDir}))
if err != nil {
return errors.Wrap(err, "failed to initialize cni")
}
// Try to load the config if it exists. Just log the error if load fails
// This is not disruptive for containerd to panic
if err := c.netPlugin.Load(c.cniLoadOptions()...); err != nil {
logrus.WithError(err).Error("Failed to load cni during init, please check CRI plugin status before setting up network for pods")
}
return nil
}
// cniLoadOptions returns cni load options for the windows.
func (c *criService) cniLoadOptions() []cni.CNIOpt {
return []cni.CNIOpt{cni.WithDefaultConf}
}

View File

@@ -22,7 +22,6 @@ import (
goruntime "runtime"
"github.com/containerd/containerd/log"
cni "github.com/containerd/go-cni"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
)
@@ -42,9 +41,8 @@ func (c *criService) Status(ctx context.Context, r *runtime.StatusRequest) (*run
Type: runtime.NetworkReady,
Status: true,
}
// Load the latest cni configuration to be in sync with the latest network configuration
if err := c.netPlugin.Load(cni.WithLoNetwork, cni.WithDefaultConf); err != nil {
if err := c.netPlugin.Load(c.cniLoadOptions()...); err != nil {
log.G(ctx).WithError(err).Errorf("Failed to load cni configuration")
}
// Check the status of the cni initialization

View File

@@ -25,7 +25,6 @@ import (
"time"
"github.com/pkg/errors"
"golang.org/x/net/context"
k8snet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/client-go/tools/remotecommand"
@@ -156,7 +155,8 @@ func (s *streamRuntime) PortForward(podSandboxID string, port int32, stream io.R
if port <= 0 || port > math.MaxUint16 {
return errors.Errorf("invalid port %d", port)
}
return s.c.portForward(context.Background(), podSandboxID, port, stream)
ctx := ctrdutil.NamespacedContext()
return s.c.portForward(ctx, podSandboxID, port, stream)
}
// handleResizing spawns a goroutine that processes the resize channel, calling resizeFunc for each

View File

@@ -24,7 +24,6 @@ import (
"text/template"
"github.com/containerd/containerd/log"
cni "github.com/containerd/go-cni"
"github.com/pkg/errors"
"golang.org/x/net/context"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
@@ -73,7 +72,7 @@ func (c *criService) UpdateRuntimeConfig(ctx context.Context, r *runtime.UpdateR
if err := c.netPlugin.Status(); err == nil {
log.G(ctx).Infof("Network plugin is ready, skip generating cni config from template %q", confTemplate)
return &runtime.UpdateRuntimeConfigResponse{}, nil
} else if err := c.netPlugin.Load(cni.WithLoNetwork, cni.WithDefaultConf); err == nil {
} else if err := c.netPlugin.Load(c.cniLoadOptions()...); err == nil {
log.G(ctx).Infof("CNI config is successfully loaded, skip generating cni config from template %q", confTemplate)
return &runtime.UpdateRuntimeConfigResponse{}, nil
}

View File

@@ -5,11 +5,12 @@ github.com/docker/docker 86f080cff0914e9694068ed78d503701667c4c00
github.com/docker/distribution 0d3efadf0154c2b8a4e7b6621fff9809655cc580
# containerd dependencies
go.etcd.io/bbolt 2eb7227adea1d5cf85f0bc2a82b7059b13c2fa68
google.golang.org/grpc 25c4f928eaa6d96443009bd842389fb4fa48664e # v1.20.1
go.opencensus.io v0.22.0
go.etcd.io/bbolt v1.3.3
google.golang.org/grpc 6eaf6f47437a6b4e2153a190160ef39a92c7eceb # v1.23.0
google.golang.org/genproto d80a6e20e776b0b17a324d0ba1ab50a39c8e8944
golang.org/x/text 19e51611da83d6be54ddafce4a4af510cb3e9ea4
golang.org/x/sys 4c4f7f33c9ed00de01c4c741d2177abfcfe19307 https://github.com/golang/sys
golang.org/x/sys fb81701db80f1745f51259b1f286de3fe2ec80c8 https://github.com/golang/sys # TODO(windows): update this in containerd/containerd
golang.org/x/sync 42b317875d0fa942474b76e1b46a6060d720ae6e
golang.org/x/net f3200d17e092c607f615320ecaad13d87ad9a2b3
github.com/urfave/cli 7bc6a0acffa589f415f88aca16cc1de5ffd66f9c
@@ -25,6 +26,9 @@ github.com/opencontainers/runc f4982d86f7fde0b6f953cc62ccc4022c519a10a9 # v1.0.0
github.com/opencontainers/image-spec v1.0.1
github.com/opencontainers/go-digest c9281466c8b2f606084ac71339773efd177436e7
github.com/matttproud/golang_protobuf_extensions v1.0.1
github.com/konsorten/go-windows-terminal-sequences v1.0.1
github.com/imdario/mergo v0.3.7
github.com/hashicorp/golang-lru v0.5.3
github.com/grpc-ecosystem/go-grpc-prometheus v1.1
github.com/google/uuid v1.1.1
github.com/golang/protobuf v1.2.0
@@ -37,17 +41,16 @@ github.com/docker/go-events 9461782956ad83b30282bf90e31fa6a70c255ba9
github.com/coreos/go-systemd v14
github.com/containerd/typeurl a93fcdb778cd272c6e9b3028b2f42d813e785d40
github.com/containerd/ttrpc 92c8520ef9f86600c650dd540266a007bf03670f
github.com/containerd/go-runc e029b79d8cda8374981c64eba71f28ec38e5526f
github.com/containerd/go-runc 9007c2405372fe28918845901a3276c0915689a1
github.com/containerd/fifo bda0ff6ed73c67bfb5e62bc9c697f146b7fd7f13
github.com/containerd/continuity f2a389ac0a02ce21c09edd7344677a601970f41c
github.com/containerd/containerd d4802a64f9737f02db3426751f380d97fc878dec
github.com/containerd/containerd ed16170c4c399c57f25d6aa1e97b345ed6ab96cb
github.com/containerd/console 0650fd9eeb50bab4fc99dceb9f2e14cf58f36e7f
github.com/containerd/cgroups c4b9ac5c7601384c965b9646fc515884e091ebb9
github.com/beorn7/perks 4c0e84591b9aa9e6dcfdf3e020114cd81f89d5f9
github.com/Microsoft/hcsshim 9e921883ac929bbe515b39793ece99ce3a9d7706
github.com/Microsoft/hcsshim c088f411aaf3585d8dffc9deb4289ffa32854497 # TODO(windows): update this in containerd/containerd
github.com/Microsoft/go-winio v0.4.14
github.com/BurntSushi/toml v0.3.1
github.com/imdario/mergo v0.3.7
# kubernetes dependencies
sigs.k8s.io/yaml v1.1.0
@@ -78,4 +81,4 @@ github.com/davecgh/go-spew v1.1.1
# cni dependencies
github.com/containernetworking/plugins v0.7.6
github.com/containernetworking/cni v0.7.1
github.com/containerd/go-cni 49fbd9b210f3c8ee3b7fd3cd797aabaf364627c1
github.com/containerd/go-cni 0d360c50b10b350b6bb23863fd4dfb1c232b01c9

View File

@@ -42,6 +42,14 @@ func WithCapabilityBandWidth(bandWidth BandWidth) NamespaceOpts {
}
}
// WithCapabilityDNS adds support for dns
func WithCapabilityDNS(dns DNS) NamespaceOpts {
return func(c *Namespace) error {
c.capabilityArgs["dns"] = dns
return nil
}
}
func WithCapability(name string, capability interface{}) NamespaceOpts {
return func(c *Namespace) error {
c.capabilityArgs[name] = capability

View File

@@ -53,3 +53,13 @@ type BandWidth struct {
EgressRate uint64
EgressBurst uint64
}
// DNS defines the dns config
type DNS struct {
// List of DNS servers of the cluster.
Servers []string
// List of DNS search domains of the cluster.
Searches []string
// List of DNS options.
Options []string
}