diff --git a/pkg/cri/sbserver/sandbox_stats.go b/pkg/cri/sbserver/sandbox_stats.go index bf59846d1..464435168 100644 --- a/pkg/cri/sbserver/sandbox_stats.go +++ b/pkg/cri/sbserver/sandbox_stats.go @@ -33,12 +33,7 @@ func (c *criService) PodSandboxStats( return nil, fmt.Errorf("an error occurred when trying to find sandbox %s: %w", r.GetPodSandboxId(), err) } - metrics, err := metricsForSandbox(sandbox) - if err != nil { - return nil, fmt.Errorf("failed getting metrics for sandbox %s: %w", r.GetPodSandboxId(), err) - } - - podSandboxStats, err := c.podSandboxStats(ctx, sandbox, metrics) + podSandboxStats, err := c.podSandboxStats(ctx, sandbox) if err != nil { return nil, fmt.Errorf("failed to decode pod sandbox metrics %s: %w", r.GetPodSandboxId(), err) } diff --git a/pkg/cri/sbserver/sandbox_stats_linux.go b/pkg/cri/sbserver/sandbox_stats_linux.go index aa7a5cbce..ca07c6290 100644 --- a/pkg/cri/sbserver/sandbox_stats_linux.go +++ b/pkg/cri/sbserver/sandbox_stats_linux.go @@ -21,39 +21,39 @@ import ( "fmt" "time" - "github.com/containernetworking/plugins/pkg/ns" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - "github.com/containerd/cgroups/v3" "github.com/containerd/cgroups/v3/cgroup1" cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2" - - "github.com/vishvananda/netlink" - "github.com/containerd/containerd/log" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containernetworking/plugins/pkg/ns" + "github.com/vishvananda/netlink" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) func (c *criService) podSandboxStats( ctx context.Context, - sandbox sandboxstore.Sandbox, - stats interface{}, -) (*runtime.PodSandboxStats, error) { + sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { meta := sandbox.Metadata if sandbox.Status.Get().State != sandboxstore.StateReady { return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID) } - var podSandboxStats runtime.PodSandboxStats - podSandboxStats.Attributes = &runtime.PodSandboxAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), + stats, err := metricsForSandbox(sandbox) + if err != nil { + return nil, fmt.Errorf("failed getting metrics for sandbox %s: %w", sandbox.ID, err) } - podSandboxStats.Linux = &runtime.LinuxPodSandboxStats{} + podSandboxStats := &runtime.PodSandboxStats{ + Linux: &runtime.LinuxPodSandboxStats{}, + Attributes: &runtime.PodSandboxAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + }, + } if stats != nil { timestamp := time.Now() @@ -119,7 +119,7 @@ func (c *criService) podSandboxStats( podSandboxStats.Linux.Containers = resp.GetStats() } - return &podSandboxStats, nil + return podSandboxStats, nil } // https://github.com/cri-o/cri-o/blob/74a5cf8dffd305b311eb1c7f43a4781738c388c1/internal/oci/stats.go#L32 diff --git a/pkg/cri/sbserver/sandbox_stats_list.go b/pkg/cri/sbserver/sandbox_stats_list.go index 3cff21b65..ef53fb6d0 100644 --- a/pkg/cri/sbserver/sandbox_stats_list.go +++ b/pkg/cri/sbserver/sandbox_stats_list.go @@ -33,12 +33,7 @@ func (c *criService) ListPodSandboxStats( podSandboxStats := new(runtime.ListPodSandboxStatsResponse) for _, sandbox := range sandboxes { - metrics, err := metricsForSandbox(sandbox) - if err != nil { - return nil, fmt.Errorf("failed to obtain metrics for sandbox %q: %w", sandbox.ID, err) - } - - sandboxStats, err := c.podSandboxStats(ctx, sandbox, metrics) + sandboxStats, err := c.podSandboxStats(ctx, sandbox) if err != nil { return nil, fmt.Errorf("failed to decode sandbox container metrics for sandbox %q: %w", sandbox.ID, err) } diff --git a/pkg/cri/sbserver/sandbox_stats_other.go b/pkg/cri/sbserver/sandbox_stats_other.go index 2da4f28d8..8a249a485 100644 --- a/pkg/cri/sbserver/sandbox_stats_other.go +++ b/pkg/cri/sbserver/sandbox_stats_other.go @@ -22,16 +22,13 @@ import ( "context" "fmt" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - "github.com/containerd/containerd/errdefs" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) -func (c *criService) podSandboxStats(ctx context.Context, sandbox sandboxstore.Sandbox, stats interface{}) (*runtime.PodSandboxStats, error) { +func (c *criService) podSandboxStats( + ctx context.Context, + sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { return nil, fmt.Errorf("pod sandbox stats not implemented: %w", errdefs.ErrNotImplemented) } - -func metricsForSandbox(sandbox sandboxstore.Sandbox) (interface{}, error) { - return nil, fmt.Errorf("metrics for sandbox not implemented: %w", errdefs.ErrNotImplemented) -} diff --git a/pkg/cri/sbserver/sandbox_stats_windows.go b/pkg/cri/sbserver/sandbox_stats_windows.go index 54b346e6c..dc4ea5307 100644 --- a/pkg/cri/sbserver/sandbox_stats_windows.go +++ b/pkg/cri/sbserver/sandbox_stats_windows.go @@ -19,17 +19,400 @@ package sbserver import ( "context" "fmt" + "time" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/errdefs" + "github.com/Microsoft/hcsshim" + wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + "github.com/Microsoft/hcsshim/hcn" + "github.com/containerd/containerd/api/services/tasks/v1" + "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/log" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containerd/containerd/pkg/cri/store/stats" + "github.com/containerd/typeurl/v2" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) -func (c *criService) podSandboxStats(ctx context.Context, sandbox sandboxstore.Sandbox, stats interface{}) (*runtime.PodSandboxStats, error) { - return nil, fmt.Errorf("pod sandbox stats not implemented on windows: %w", errdefs.ErrNotImplemented) +func (c *criService) podSandboxStats( + ctx context.Context, + sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { + meta := sandbox.Metadata + + if sandbox.Status.Get().State != sandboxstore.StateReady { + return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID) + } + + timestamp := time.Now() + podSandboxStats := &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{}, + Attributes: &runtime.PodSandboxAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + }, + } + + metrics, containers, err := c.listWindowsMetricsForSandbox(ctx, sandbox) + if err != nil { + return nil, fmt.Errorf("failed to obtain container stats during podSandboxStats call: %w", err) + } + + statsMap, err := convertMetricsToWindowsStats(metrics, sandbox) + if err != nil { + return nil, err + } + + podCPU, containerStats, err := c.toPodSandboxStats(sandbox, statsMap, containers, timestamp) + if err != nil { + return nil, fmt.Errorf("failed to convert container stats during podSandboxStats call: %w", err) + } + podSandboxStats.Windows.Cpu = podCPU.Cpu + podSandboxStats.Windows.Memory = podCPU.Memory + podSandboxStats.Windows.Containers = containerStats + + podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp) + + pidCount, err := c.getSandboxPidCount(ctx, sandbox) + if err != nil { + return nil, err + } + + podSandboxStats.Windows.Process = &runtime.WindowsProcessUsage{ + Timestamp: timestamp.UnixNano(), + ProcessCount: &runtime.UInt64Value{Value: pidCount}, + } + + c.saveSandBoxMetrics(podSandboxStats.Attributes.Id, podSandboxStats) + + return podSandboxStats, nil } -func metricsForSandbox(sandbox sandboxstore.Sandbox) (interface{}, error) { - return nil, fmt.Errorf("metrics for sandbox not implemented on windows: %w", errdefs.ErrNotImplemented) +func convertMetricsToWindowsStats(metrics []*types.Metric, sandbox sandboxstore.Sandbox) (map[string]*wstats.Statistics, error) { + isHostProcess := sandbox.Config.GetWindows().GetSecurityContext().GetHostProcess() + + statsMap := make(map[string]*wstats.Statistics) + for _, stat := range metrics { + containerStatsData, err := typeurl.UnmarshalAny(stat.Data) + if err != nil { + return nil, fmt.Errorf("failed to extract metrics for container with id %s: %w", stat.ID, err) + } + + // extract the metrics if available for this container + // containerStatsData can be nil for pods that don't have an actual podsandbox container such as HPC + // In the case of HostProcess sandbox container we will use the nil value for the statsmap which is used later + // otherwise return an error since we should have gotten stats + containerStats, ok := containerStatsData.(*wstats.Statistics) + if !ok && !(isHostProcess && sandbox.ID == stat.ID) { + return nil, fmt.Errorf("failed to extract metrics for container with id %s: %w", stat.ID, err) + } + + statsMap[stat.ID] = containerStats + } + return statsMap, nil +} + +func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap map[string]*wstats.Statistics, containers []containerstore.Container, timestamp time.Time) (*runtime.WindowsContainerStats, []*runtime.WindowsContainerStats, error) { + podMetric, ok := statsMap[sandbox.ID] + if !ok { + return nil, nil, fmt.Errorf("failed to find container metric for pod with id %s", sandbox.ID) + } + + podRuntimeStats, err := c.convertToCRIStats(podMetric) + if err != nil { + return nil, nil, fmt.Errorf("failed to covert container metrics for sandbox with id %s: %w", sandbox.ID, err) + } + + windowsContainerStats := make([]*runtime.WindowsContainerStats, 0, len(statsMap)) + for _, cntr := range containers { + containerMetric := statsMap[cntr.ID] + + if containerMetric == nil { + return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err) + } + + containerStats, err := c.convertToCRIStats(containerMetric) + if err != nil { + return nil, nil, fmt.Errorf("failed to convert metrics for container with id %s: %w", cntr.ID, err) + } + + // Calculate NanoCores for container + if containerStats.Cpu.UsageCoreNanoSeconds != nil { + nanoCoreUsage := getUsageNanoCores(containerStats.Cpu.UsageCoreNanoSeconds.Value, cntr.Stats, containerStats.Cpu.Timestamp) + containerStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage} + } + + // On Windows we need to add up all the podStatsData to get the Total for the Pod as there isn't something + // like a parent cgroup that queried for all the pod podStatsData + appendCPUPodStats(podRuntimeStats, containerStats, timestamp) + appendMemoryPodStats(podRuntimeStats, containerStats, timestamp) + + // If snapshotstore doesn't have cached snapshot information + // set WritableLayer usage to zero + var usedBytes uint64 + sn, err := c.snapshotStore.Get(cntr.ID) + if err == nil { + usedBytes = sn.Size + } + containerStats.WritableLayer = &runtime.WindowsFilesystemUsage{ + Timestamp: sn.Timestamp, + FsId: &runtime.FilesystemIdentifier{ + Mountpoint: c.imageFSPath, + }, + UsedBytes: &runtime.UInt64Value{Value: usedBytes}, + } + + containerStats.Attributes = &runtime.ContainerAttributes{ + Id: cntr.ID, + Metadata: cntr.Config.GetMetadata(), + Labels: cntr.Config.GetLabels(), + Annotations: cntr.Config.GetAnnotations(), + } + + windowsContainerStats = append(windowsContainerStats, containerStats) + } + + // Calculate NanoCores for pod after adding containers cpu including the pods cpu + if podRuntimeStats.Cpu.UsageCoreNanoSeconds != nil { + nanoCoreUsage := getUsageNanoCores(podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value, sandbox.Stats, podRuntimeStats.Cpu.Timestamp) + podRuntimeStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage} + } + + return podRuntimeStats, windowsContainerStats, nil +} + +func appendCPUPodStats(podRuntimeStats *runtime.WindowsContainerStats, containerRunTimeStats *runtime.WindowsContainerStats, timestamp time.Time) { + // protect against missing stats in case container hasn't started yet + if containerRunTimeStats.Cpu == nil || containerRunTimeStats.Cpu.UsageCoreNanoSeconds == nil { + return + } + + // It is possible the pod sandbox might not be populated with values if it doesn't exist + // HostProcess pods are an example where there is no actual pod sandbox running and therefor no stats + if podRuntimeStats.Cpu == nil { + podRuntimeStats.Cpu = &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 0}, + } + } + + if podRuntimeStats.Cpu.UsageCoreNanoSeconds == nil { + podRuntimeStats.Cpu.UsageCoreNanoSeconds = &runtime.UInt64Value{Value: 0} + } + + podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value += containerRunTimeStats.Cpu.UsageCoreNanoSeconds.Value +} + +func appendMemoryPodStats(podRuntimeStats *runtime.WindowsContainerStats, containerRunTimeStats *runtime.WindowsContainerStats, timestamp time.Time) { + // protect against missing stats in case container hasn't started yet + if containerRunTimeStats.Memory == nil { + return + } + + // It is possible the pod sandbox might not be populated with values if it doesn't exist + // HostProcess pods are an example where there is no actual pod sandbox running and therefor no stats + if podRuntimeStats.Memory == nil { + podRuntimeStats.Memory = &runtime.WindowsMemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 0}, + AvailableBytes: &runtime.UInt64Value{Value: 0}, + PageFaults: &runtime.UInt64Value{Value: 0}, + } + } + + if containerRunTimeStats.Memory.WorkingSetBytes != nil { + if podRuntimeStats.Memory.WorkingSetBytes == nil { + podRuntimeStats.Memory.WorkingSetBytes = &runtime.UInt64Value{Value: 0} + } + podRuntimeStats.Memory.WorkingSetBytes.Value += containerRunTimeStats.Memory.WorkingSetBytes.Value + } + + if containerRunTimeStats.Memory.AvailableBytes != nil { + if podRuntimeStats.Memory.AvailableBytes == nil { + podRuntimeStats.Memory.AvailableBytes = &runtime.UInt64Value{Value: 0} + } + podRuntimeStats.Memory.AvailableBytes.Value += containerRunTimeStats.Memory.AvailableBytes.Value + } + + if containerRunTimeStats.Memory.PageFaults != nil { + if podRuntimeStats.Memory.PageFaults == nil { + podRuntimeStats.Memory.PageFaults = &runtime.UInt64Value{Value: 0} + } + podRuntimeStats.Memory.PageFaults.Value += containerRunTimeStats.Memory.PageFaults.Value + } +} + +func (c *criService) listWindowsMetricsForSandbox(ctx context.Context, sandbox sandboxstore.Sandbox) ([]*types.Metric, []containerstore.Container, error) { + req := &tasks.MetricsRequest{} + var containers []containerstore.Container + for _, cntr := range c.containerStore.List() { + if cntr.SandboxID != sandbox.ID { + continue + } + containers = append(containers, cntr) + req.Filters = append(req.Filters, "id=="+cntr.ID) + } + + //add sandbox container as well + req.Filters = append(req.Filters, "id=="+sandbox.ID) + + resp, err := c.client.TaskService().Metrics(ctx, req) + if err != nil { + return nil, nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err) + } + return resp.Metrics, containers, nil +} + +func (c *criService) convertToCRIStats(stats *wstats.Statistics) (*runtime.WindowsContainerStats, error) { + var cs runtime.WindowsContainerStats + if stats != nil { + wstats := stats.GetWindows() + if wstats == nil { + return nil, fmt.Errorf("windows stats is empty") + } + if wstats.Processor != nil { + cs.Cpu = &runtime.WindowsCpuUsage{ + Timestamp: wstats.Timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS}, + } + } + + if wstats.Memory != nil { + cs.Memory = &runtime.WindowsMemoryUsage{ + Timestamp: wstats.Timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes, + }, + } + } + + } + return &cs, nil +} + +func getUsageNanoCores(usageCoreNanoSeconds uint64, oldStats *stats.ContainerStats, newtimestamp int64) uint64 { + if oldStats == nil { + return 0 + } + + nanoSeconds := newtimestamp - oldStats.Timestamp.UnixNano() + + // zero or negative interval + if nanoSeconds <= 0 { + return 0 + } + + return uint64(float64(usageCoreNanoSeconds-oldStats.UsageCoreNanoSeconds) / + float64(nanoSeconds) * float64(time.Second/time.Nanosecond)) +} + +func windowsNetworkUsage(ctx context.Context, sandbox sandboxstore.Sandbox, timestamp time.Time) *runtime.WindowsNetworkUsage { + eps, err := hcn.GetNamespaceEndpointIds(sandbox.NetNSPath) + if err != nil { + log.G(ctx).WithField("podsandboxid", sandbox.ID).WithError(err).Errorf("unable to retrieve windows endpoint metrics for netNsPath: %v", sandbox.NetNSPath) + return nil + } + networkUsage := &runtime.WindowsNetworkUsage{ + Timestamp: timestamp.UnixNano(), + } + for _, ep := range eps { + endpointStats, err := hcsshim.GetHNSEndpointStats(ep) + if err != nil { + log.G(ctx).WithError(err).Errorf("unable to gather stats for endpoint: %s", ep) + continue + } + rtStats := runtime.WindowsNetworkInterfaceUsage{ + Name: endpointStats.EndpointID, + RxBytes: &runtime.UInt64Value{Value: endpointStats.BytesReceived}, + RxPacketsDropped: &runtime.UInt64Value{Value: endpointStats.DroppedPacketsIncoming}, + TxBytes: &runtime.UInt64Value{Value: endpointStats.BytesSent}, + TxPacketsDropped: &runtime.UInt64Value{Value: endpointStats.DroppedPacketsOutgoing}, + } + networkUsage.Interfaces = append(networkUsage.Interfaces, &rtStats) + + // if the default interface isn't set add it. + // We don't have a way to determine the default interface in windows + if networkUsage.DefaultInterface == nil { + networkUsage.DefaultInterface = &rtStats + } + } + + return networkUsage +} + +func (c *criService) saveSandBoxMetrics(sandboxID string, sandboxStats *runtime.PodSandboxStats) error { + // we may not have stats since container hasn't started yet so skip saving to cache + if sandboxStats == nil || sandboxStats.Windows == nil || sandboxStats.Windows.Cpu == nil || + sandboxStats.Windows.Cpu.UsageCoreNanoSeconds == nil { + return nil + } + + newStats := &stats.ContainerStats{ + UsageCoreNanoSeconds: sandboxStats.Windows.Cpu.UsageCoreNanoSeconds.Value, + Timestamp: time.Unix(0, sandboxStats.Windows.Cpu.Timestamp), + } + err := c.sandboxStore.UpdateContainerStats(sandboxID, newStats) + if err != nil { + return err + } + + // We queried the stats when getting sandbox stats. We need to save the query to cache + for _, cntr := range sandboxStats.Windows.Containers { + // we may not have stats since container hasn't started yet so skip saving to cache + if cntr == nil || cntr.Cpu == nil || cntr.Cpu.UsageCoreNanoSeconds == nil { + return nil + } + + newStats := &stats.ContainerStats{ + UsageCoreNanoSeconds: cntr.Cpu.UsageCoreNanoSeconds.Value, + Timestamp: time.Unix(0, cntr.Cpu.Timestamp), + } + err = c.containerStore.UpdateContainerStats(cntr.Attributes.Id, newStats) + if err != nil { + return err + } + } + + return nil +} + +func (c *criService) getSandboxPidCount(ctx context.Context, sandbox sandboxstore.Sandbox) (uint64, error) { + var pidCount uint64 + + // get process count inside PodSandbox for Windows + task, err := sandbox.Container.Task(ctx, nil) + if err != nil { + return 0, err + } + processes, err := task.Pids(ctx) + if err != nil { + return 0, err + } + pidCount += uint64(len(processes)) + + for _, cntr := range c.containerStore.List() { + if cntr.SandboxID != sandbox.ID { + continue + } + + state := cntr.Status.Get().State() + if state != runtime.ContainerState_CONTAINER_RUNNING { + continue + } + + task, err := cntr.Container.Task(ctx, nil) + if err != nil { + return 0, err + } + + processes, err := task.Pids(ctx) + if err != nil { + return 0, err + } + pidCount += uint64(len(processes)) + + } + + return pidCount, nil } diff --git a/pkg/cri/sbserver/sandbox_stats_windows_test.go b/pkg/cri/sbserver/sandbox_stats_windows_test.go new file mode 100644 index 000000000..ac4580e79 --- /dev/null +++ b/pkg/cri/sbserver/sandbox_stats_windows_test.go @@ -0,0 +1,394 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package sbserver + +import ( + "testing" + "time" + + wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containerd/containerd/pkg/cri/store/stats" + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +func TestGetUsageNanoCores(t *testing.T) { + timestamp := time.Now() + secondAfterTimeStamp := timestamp.Add(time.Second) + ID := "ID" + + for desc, test := range map[string]struct { + firstCPUValue uint64 + secondCPUValue uint64 + expectedNanoCoreUsageFirst uint64 + expectedNanoCoreUsageSecond uint64 + }{ + "metrics": { + firstCPUValue: 50, + secondCPUValue: 500, + expectedNanoCoreUsageFirst: 0, + expectedNanoCoreUsageSecond: 450, + }, + } { + t.Run(desc, func(t *testing.T) { + container, err := containerstore.NewContainer( + containerstore.Metadata{ID: ID}, + ) + assert.NoError(t, err) + + // calculate for first iteration + // first run so container stats will be nil + assert.Nil(t, container.Stats) + cpuUsage := getUsageNanoCores(test.firstCPUValue, container.Stats, timestamp.UnixNano()) + assert.NoError(t, err) + assert.Equal(t, test.expectedNanoCoreUsageFirst, cpuUsage) + + // fill in the stats as if they now exist + container.Stats = &stats.ContainerStats{} + container.Stats.UsageCoreNanoSeconds = test.firstCPUValue + container.Stats.Timestamp = timestamp + assert.NotNil(t, container.Stats) + + // calculate for second iteration + cpuUsage = getUsageNanoCores(test.secondCPUValue, container.Stats, secondAfterTimeStamp.UnixNano()) + assert.NoError(t, err) + assert.Equal(t, test.expectedNanoCoreUsageSecond, cpuUsage) + }) + } + +} + +func Test_criService_podSandboxStats(t *testing.T) { + initialStatsTimestamp := time.Now() + currentStatsTimestamp := initialStatsTimestamp.Add(time.Second) + + c := newTestCRIService() + + type expectedStats struct { + UsageCoreNanoSeconds uint64 + UsageNanoCores uint64 + WorkingSetBytes uint64 + } + for desc, test := range map[string]struct { + metrics map[string]*wstats.Statistics + sandbox sandboxstore.Sandbox + containers []containerstore.Container + expectedPodStats expectedStats + expectedContainerStats []expectedStats + expectError bool + }{ + "no metrics found should return error": { + metrics: map[string]*wstats.Statistics{}, + sandbox: sandboxstore.Sandbox{}, + containers: []containerstore.Container{}, + expectedPodStats: expectedStats{}, + expectedContainerStats: []expectedStats{}, + expectError: true, + }, + "pod stats will include the container stats": { + metrics: map[string]*wstats.Statistics{ + "c1": { + Container: windowsStat(currentStatsTimestamp, 200, 20), + }, + "s1": { + Container: windowsStat(currentStatsTimestamp, 200, 20), + }, + }, + sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}}, + containers: []containerstore.Container{ + {Metadata: containerstore.Metadata{ID: "c1"}}, + }, + expectedPodStats: expectedStats{ + UsageCoreNanoSeconds: 400, + UsageNanoCores: 0, + WorkingSetBytes: 40, + }, + expectedContainerStats: []expectedStats{ + { + UsageCoreNanoSeconds: 200, + UsageNanoCores: 0, + WorkingSetBytes: 20, + }, + }, + expectError: false, + }, + "pod with existing stats will have usagenanocores totalled across pods and containers": { + metrics: map[string]*wstats.Statistics{ + "c1": { + Container: windowsStat(currentStatsTimestamp, 400, 20), + }, + "s1": { + Container: windowsStat(currentStatsTimestamp, 400, 20), + }, + }, + sandbox: sandboxPod("s1", initialStatsTimestamp, 400), + containers: []containerstore.Container{ + { + Metadata: containerstore.Metadata{ID: "c1"}, + Stats: &stats.ContainerStats{ + Timestamp: initialStatsTimestamp, + UsageCoreNanoSeconds: 200, + }, + }, + }, + expectedPodStats: expectedStats{ + UsageCoreNanoSeconds: 800, + UsageNanoCores: 400, + WorkingSetBytes: 40, + }, + expectedContainerStats: []expectedStats{ + { + UsageCoreNanoSeconds: 400, + UsageNanoCores: 200, + WorkingSetBytes: 20, + }, + }, + expectError: false, + }, + "pod sandbox with nil stats still works (hostprocess container scenario)": { + metrics: map[string]*wstats.Statistics{ + "c1": { + Container: windowsStat(currentStatsTimestamp, 400, 20), + }, + "s1": nil, + }, + sandbox: sandboxPod("s1", initialStatsTimestamp, 200), + containers: []containerstore.Container{ + { + Metadata: containerstore.Metadata{ID: "c1"}, + Stats: &stats.ContainerStats{ + Timestamp: initialStatsTimestamp, + UsageCoreNanoSeconds: 200, + }, + }, + }, + expectedPodStats: expectedStats{ + UsageCoreNanoSeconds: 400, + UsageNanoCores: 200, + WorkingSetBytes: 20, + }, + expectedContainerStats: []expectedStats{ + { + UsageCoreNanoSeconds: 400, + UsageNanoCores: 200, + WorkingSetBytes: 20, + }, + }, + expectError: false, + }, + } { + t.Run(desc, func(t *testing.T) { + actualPodStats, actualContainerStats, err := c.toPodSandboxStats(test.sandbox, test.metrics, test.containers, currentStatsTimestamp) + if test.expectError { + assert.NotNil(t, err) + return + } + + assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value) + assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value) + + for i, expectedStat := range test.expectedContainerStats { + actutalStat := actualContainerStats[i] + + assert.Equal(t, expectedStat.UsageCoreNanoSeconds, actutalStat.Cpu.UsageCoreNanoSeconds.Value) + assert.Equal(t, expectedStat.UsageNanoCores, actutalStat.Cpu.UsageNanoCores.Value) + } + }) + } +} + +func sandboxPod(id string, timestamp time.Time, cachedCPU uint64) sandboxstore.Sandbox { + return sandboxstore.Sandbox{ + Metadata: sandboxstore.Metadata{ID: id}, Stats: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: cachedCPU, + }} +} + +func windowsStat(timestamp time.Time, cpu uint64, memory uint64) *wstats.Statistics_Windows { + return &wstats.Statistics_Windows{ + Windows: &wstats.WindowsContainerStatistics{ + Timestamp: timestamp, + Processor: &wstats.WindowsContainerProcessorStatistics{ + TotalRuntimeNS: cpu, + }, + Memory: &wstats.WindowsContainerMemoryStatistics{ + MemoryUsagePrivateWorkingSetBytes: memory, + }, + }, + } +} + +func Test_criService_saveSandBoxMetrics(t *testing.T) { + + timestamp := time.Now() + containerID := "c1" + sandboxID := "s1" + for desc, test := range map[string]struct { + sandboxStats *runtime.PodSandboxStats + expectError bool + expectedSandboxvalue *stats.ContainerStats + expectedContainervalue *stats.ContainerStats + }{ + "if sandboxstats is nil then skip ": { + sandboxStats: nil, + expectError: false, + expectedSandboxvalue: nil, + }, + "if sandboxstats.windows is nil then skip": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: nil, + }, + expectError: false, + expectedSandboxvalue: nil, + }, + "if sandboxstats.windows.cpu is nil then skip": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: nil, + }, + }, + expectError: false, + expectedSandboxvalue: nil, + }, + "if sandboxstats.windows.cpu.UsageCoreNanoSeconds is nil then skip": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: &runtime.WindowsCpuUsage{ + UsageCoreNanoSeconds: nil, + }, + }, + }, + expectError: false, + expectedSandboxvalue: nil, + }, + "Stats for containers that have cpu nil are skipped": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, + }, + Containers: []*runtime.WindowsContainerStats{ + { + Attributes: &runtime.ContainerAttributes{Id: containerID}, + Cpu: nil, + }, + }, + }, + }, + expectError: false, + expectedSandboxvalue: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: 100, + }, + expectedContainervalue: nil, + }, + "Stats for containers that have UsageCoreNanoSeconds nil are skipped": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, + }, + Containers: []*runtime.WindowsContainerStats{ + { + Attributes: &runtime.ContainerAttributes{Id: containerID}, + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: nil}, + }, + }, + }, + }, + expectError: false, + expectedSandboxvalue: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: 100, + }, + expectedContainervalue: nil, + }, + "Stats are updated for sandbox and containers": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, + }, + Containers: []*runtime.WindowsContainerStats{ + { + Attributes: &runtime.ContainerAttributes{Id: containerID}, + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 50}, + }, + }, + }, + }, + }, + expectError: false, + expectedSandboxvalue: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: 100, + }, + expectedContainervalue: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: 50, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + c.sandboxStore.Add(sandboxstore.Sandbox{ + Metadata: sandboxstore.Metadata{ID: sandboxID}, + }) + + c.containerStore.Add(containerstore.Container{ + Metadata: containerstore.Metadata{ID: containerID}, + }) + + err := c.saveSandBoxMetrics(sandboxID, test.sandboxStats) + + if test.expectError { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + } + + sandbox, err := c.sandboxStore.Get(sandboxID) + assert.Nil(t, err) + + if test.expectedSandboxvalue != nil { + assert.Equal(t, test.expectedSandboxvalue.Timestamp.UnixNano(), sandbox.Stats.Timestamp.UnixNano()) + assert.Equal(t, test.expectedSandboxvalue.UsageCoreNanoSeconds, sandbox.Stats.UsageCoreNanoSeconds) + } else { + assert.Nil(t, sandbox.Stats) + } + + container, err := c.containerStore.Get(containerID) + assert.Nil(t, err) + if test.expectedContainervalue != nil { + assert.Equal(t, test.expectedContainervalue.Timestamp.UnixNano(), container.Stats.Timestamp.UnixNano()) + assert.Equal(t, test.expectedContainervalue.UsageCoreNanoSeconds, container.Stats.UsageCoreNanoSeconds) + } else { + assert.Nil(t, container.Stats) + } + }) + } +} diff --git a/pkg/cri/server/sandbox_stats.go b/pkg/cri/server/sandbox_stats.go index feb19120c..905e1e58f 100644 --- a/pkg/cri/server/sandbox_stats.go +++ b/pkg/cri/server/sandbox_stats.go @@ -33,12 +33,7 @@ func (c *criService) PodSandboxStats( return nil, fmt.Errorf("an error occurred when trying to find sandbox %s: %w", r.GetPodSandboxId(), err) } - metrics, err := metricsForSandbox(sandbox) - if err != nil { - return nil, fmt.Errorf("failed getting metrics for sandbox %s: %w", r.GetPodSandboxId(), err) - } - - podSandboxStats, err := c.podSandboxStats(ctx, sandbox, metrics) + podSandboxStats, err := c.podSandboxStats(ctx, sandbox) if err != nil { return nil, fmt.Errorf("failed to decode pod sandbox metrics %s: %w", r.GetPodSandboxId(), err) } diff --git a/pkg/cri/server/sandbox_stats_linux.go b/pkg/cri/server/sandbox_stats_linux.go index c6a1cb0f5..9bd9f9b1f 100644 --- a/pkg/cri/server/sandbox_stats_linux.go +++ b/pkg/cri/server/sandbox_stats_linux.go @@ -21,39 +21,39 @@ import ( "fmt" "time" - "github.com/containernetworking/plugins/pkg/ns" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - "github.com/containerd/cgroups/v3" "github.com/containerd/cgroups/v3/cgroup1" cgroupsv2 "github.com/containerd/cgroups/v3/cgroup2" - - "github.com/vishvananda/netlink" - "github.com/containerd/containerd/log" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containernetworking/plugins/pkg/ns" + "github.com/vishvananda/netlink" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) func (c *criService) podSandboxStats( ctx context.Context, - sandbox sandboxstore.Sandbox, - stats interface{}, -) (*runtime.PodSandboxStats, error) { + sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { meta := sandbox.Metadata if sandbox.Status.Get().State != sandboxstore.StateReady { return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID) } - var podSandboxStats runtime.PodSandboxStats - podSandboxStats.Attributes = &runtime.PodSandboxAttributes{ - Id: meta.ID, - Metadata: meta.Config.GetMetadata(), - Labels: meta.Config.GetLabels(), - Annotations: meta.Config.GetAnnotations(), + stats, err := metricsForSandbox(sandbox) + if err != nil { + return nil, fmt.Errorf("failed getting metrics for sandbox %s: %w", sandbox.ID, err) } - podSandboxStats.Linux = &runtime.LinuxPodSandboxStats{} + podSandboxStats := &runtime.PodSandboxStats{ + Linux: &runtime.LinuxPodSandboxStats{}, + Attributes: &runtime.PodSandboxAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + }, + } if stats != nil { timestamp := time.Now() @@ -119,7 +119,7 @@ func (c *criService) podSandboxStats( podSandboxStats.Linux.Containers = resp.GetStats() } - return &podSandboxStats, nil + return podSandboxStats, nil } // https://github.com/cri-o/cri-o/blob/74a5cf8dffd305b311eb1c7f43a4781738c388c1/internal/oci/stats.go#L32 diff --git a/pkg/cri/server/sandbox_stats_list.go b/pkg/cri/server/sandbox_stats_list.go index c989bbfa9..c11a998c1 100644 --- a/pkg/cri/server/sandbox_stats_list.go +++ b/pkg/cri/server/sandbox_stats_list.go @@ -33,12 +33,7 @@ func (c *criService) ListPodSandboxStats( podSandboxStats := new(runtime.ListPodSandboxStatsResponse) for _, sandbox := range sandboxes { - metrics, err := metricsForSandbox(sandbox) - if err != nil { - return nil, fmt.Errorf("failed to obtain metrics for sandbox %q: %w", sandbox.ID, err) - } - - sandboxStats, err := c.podSandboxStats(ctx, sandbox, metrics) + sandboxStats, err := c.podSandboxStats(ctx, sandbox) if err != nil { return nil, fmt.Errorf("failed to decode sandbox container metrics for sandbox %q: %w", sandbox.ID, err) } diff --git a/pkg/cri/server/sandbox_stats_other.go b/pkg/cri/server/sandbox_stats_other.go index f28753a0e..3ecdaa4bf 100644 --- a/pkg/cri/server/sandbox_stats_other.go +++ b/pkg/cri/server/sandbox_stats_other.go @@ -22,16 +22,13 @@ import ( "context" "fmt" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - "github.com/containerd/containerd/errdefs" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) -func (c *criService) podSandboxStats(ctx context.Context, sandbox sandboxstore.Sandbox, stats interface{}) (*runtime.PodSandboxStats, error) { - return nil, fmt.Errorf("pod sandbox stats not implemented: %w", errdefs.ErrNotImplemented) -} - -func metricsForSandbox(sandbox sandboxstore.Sandbox) (interface{}, error) { +func (c *criService) podSandboxStats( + ctx context.Context, + sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { return nil, fmt.Errorf("metrics for sandbox not implemented: %w", errdefs.ErrNotImplemented) } diff --git a/pkg/cri/server/sandbox_stats_windows.go b/pkg/cri/server/sandbox_stats_windows.go index ac8ab9e60..5d6b11193 100644 --- a/pkg/cri/server/sandbox_stats_windows.go +++ b/pkg/cri/server/sandbox_stats_windows.go @@ -19,17 +19,400 @@ package server import ( "context" "fmt" + "time" - runtime "k8s.io/cri-api/pkg/apis/runtime/v1" - - "github.com/containerd/containerd/errdefs" + "github.com/Microsoft/hcsshim" + wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + "github.com/Microsoft/hcsshim/hcn" + "github.com/containerd/containerd/api/services/tasks/v1" + "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/log" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containerd/containerd/pkg/cri/store/stats" + "github.com/containerd/typeurl/v2" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" ) -func (c *criService) podSandboxStats(ctx context.Context, sandbox sandboxstore.Sandbox, stats interface{}) (*runtime.PodSandboxStats, error) { - return nil, fmt.Errorf("pod sandbox stats not implemented on windows: %w", errdefs.ErrNotImplemented) +func (c *criService) podSandboxStats( + ctx context.Context, + sandbox sandboxstore.Sandbox) (*runtime.PodSandboxStats, error) { + meta := sandbox.Metadata + + if sandbox.Status.Get().State != sandboxstore.StateReady { + return nil, fmt.Errorf("failed to get pod sandbox stats since sandbox container %q is not in ready state", meta.ID) + } + + timestamp := time.Now() + podSandboxStats := &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{}, + Attributes: &runtime.PodSandboxAttributes{ + Id: meta.ID, + Metadata: meta.Config.GetMetadata(), + Labels: meta.Config.GetLabels(), + Annotations: meta.Config.GetAnnotations(), + }, + } + + metrics, containers, err := c.listWindowsMetricsForSandbox(ctx, sandbox) + if err != nil { + return nil, fmt.Errorf("failed to obtain container stats during podSandboxStats call: %w", err) + } + + statsMap, err := convertMetricsToWindowsStats(metrics, sandbox) + if err != nil { + return nil, err + } + + podCPU, containerStats, err := c.toPodSandboxStats(sandbox, statsMap, containers, timestamp) + if err != nil { + return nil, fmt.Errorf("failed to convert container stats during podSandboxStats call: %w", err) + } + podSandboxStats.Windows.Cpu = podCPU.Cpu + podSandboxStats.Windows.Memory = podCPU.Memory + podSandboxStats.Windows.Containers = containerStats + + podSandboxStats.Windows.Network = windowsNetworkUsage(ctx, sandbox, timestamp) + + pidCount, err := c.getSandboxPidCount(ctx, sandbox) + if err != nil { + return nil, err + } + + podSandboxStats.Windows.Process = &runtime.WindowsProcessUsage{ + Timestamp: timestamp.UnixNano(), + ProcessCount: &runtime.UInt64Value{Value: pidCount}, + } + + c.saveSandBoxMetrics(podSandboxStats.Attributes.Id, podSandboxStats) + + return podSandboxStats, nil } -func metricsForSandbox(sandbox sandboxstore.Sandbox) (interface{}, error) { - return nil, fmt.Errorf("metrics for sandbox not implemented on windows: %w", errdefs.ErrNotImplemented) +func convertMetricsToWindowsStats(metrics []*types.Metric, sandbox sandboxstore.Sandbox) (map[string]*wstats.Statistics, error) { + isHostProcess := sandbox.Config.GetWindows().GetSecurityContext().GetHostProcess() + + statsMap := make(map[string]*wstats.Statistics) + for _, stat := range metrics { + containerStatsData, err := typeurl.UnmarshalAny(stat.Data) + if err != nil { + return nil, fmt.Errorf("failed to extract metrics for container with id %s: %w", stat.ID, err) + } + + // extract the metrics if available for this container + // containerStatsData can be nil for pods that don't have an actual podsandbox container such as HPC + // In the case of HostProcess sandbox container we will use the nil value for the statsmap which is used later + // otherwise return an error since we should have gotten stats + containerStats, ok := containerStatsData.(*wstats.Statistics) + if !ok && !(isHostProcess && sandbox.ID == stat.ID) { + return nil, fmt.Errorf("failed to extract metrics for container with id %s: %w", stat.ID, err) + } + + statsMap[stat.ID] = containerStats + } + return statsMap, nil +} + +func (c *criService) toPodSandboxStats(sandbox sandboxstore.Sandbox, statsMap map[string]*wstats.Statistics, containers []containerstore.Container, timestamp time.Time) (*runtime.WindowsContainerStats, []*runtime.WindowsContainerStats, error) { + podMetric, ok := statsMap[sandbox.ID] + if !ok { + return nil, nil, fmt.Errorf("failed to find container metric for pod with id %s", sandbox.ID) + } + + podRuntimeStats, err := c.convertToCRIStats(podMetric) + if err != nil { + return nil, nil, fmt.Errorf("failed to covert container metrics for sandbox with id %s: %w", sandbox.ID, err) + } + + windowsContainerStats := make([]*runtime.WindowsContainerStats, 0, len(statsMap)) + for _, cntr := range containers { + containerMetric := statsMap[cntr.ID] + + if containerMetric == nil { + return nil, nil, fmt.Errorf("failed to find metrics for container with id %s: %w", cntr.ID, err) + } + + containerStats, err := c.convertToCRIStats(containerMetric) + if err != nil { + return nil, nil, fmt.Errorf("failed to convert metrics for container with id %s: %w", cntr.ID, err) + } + + // Calculate NanoCores for container + if containerStats.Cpu.UsageCoreNanoSeconds != nil { + nanoCoreUsage := getUsageNanoCores(containerStats.Cpu.UsageCoreNanoSeconds.Value, cntr.Stats, containerStats.Cpu.Timestamp) + containerStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage} + } + + // On Windows we need to add up all the podStatsData to get the Total for the Pod as there isn't something + // like a parent cgroup that queried for all the pod podStatsData + appendCPUPodStats(podRuntimeStats, containerStats, timestamp) + appendMemoryPodStats(podRuntimeStats, containerStats, timestamp) + + // If snapshotstore doesn't have cached snapshot information + // set WritableLayer usage to zero + var usedBytes uint64 + sn, err := c.snapshotStore.Get(cntr.ID) + if err == nil { + usedBytes = sn.Size + } + containerStats.WritableLayer = &runtime.WindowsFilesystemUsage{ + Timestamp: sn.Timestamp, + FsId: &runtime.FilesystemIdentifier{ + Mountpoint: c.imageFSPath, + }, + UsedBytes: &runtime.UInt64Value{Value: usedBytes}, + } + + containerStats.Attributes = &runtime.ContainerAttributes{ + Id: cntr.ID, + Metadata: cntr.Config.GetMetadata(), + Labels: cntr.Config.GetLabels(), + Annotations: cntr.Config.GetAnnotations(), + } + + windowsContainerStats = append(windowsContainerStats, containerStats) + } + + // Calculate NanoCores for pod after adding containers cpu including the pods cpu + if podRuntimeStats.Cpu.UsageCoreNanoSeconds != nil { + nanoCoreUsage := getUsageNanoCores(podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value, sandbox.Stats, podRuntimeStats.Cpu.Timestamp) + podRuntimeStats.Cpu.UsageNanoCores = &runtime.UInt64Value{Value: nanoCoreUsage} + } + + return podRuntimeStats, windowsContainerStats, nil +} + +func appendCPUPodStats(podRuntimeStats *runtime.WindowsContainerStats, containerRunTimeStats *runtime.WindowsContainerStats, timestamp time.Time) { + // protect against missing stats in case container hasn't started yet + if containerRunTimeStats.Cpu == nil || containerRunTimeStats.Cpu.UsageCoreNanoSeconds == nil { + return + } + + // It is possible the pod sandbox might not be populated with values if it doesn't exist + // HostProcess pods are an example where there is no actual pod sandbox running and therefor no stats + if podRuntimeStats.Cpu == nil { + podRuntimeStats.Cpu = &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 0}, + } + } + + if podRuntimeStats.Cpu.UsageCoreNanoSeconds == nil { + podRuntimeStats.Cpu.UsageCoreNanoSeconds = &runtime.UInt64Value{Value: 0} + } + + podRuntimeStats.Cpu.UsageCoreNanoSeconds.Value += containerRunTimeStats.Cpu.UsageCoreNanoSeconds.Value +} + +func appendMemoryPodStats(podRuntimeStats *runtime.WindowsContainerStats, containerRunTimeStats *runtime.WindowsContainerStats, timestamp time.Time) { + // protect against missing stats in case container hasn't started yet + if containerRunTimeStats.Memory == nil { + return + } + + // It is possible the pod sandbox might not be populated with values if it doesn't exist + // HostProcess pods are an example where there is no actual pod sandbox running and therefor no stats + if podRuntimeStats.Memory == nil { + podRuntimeStats.Memory = &runtime.WindowsMemoryUsage{ + Timestamp: timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{Value: 0}, + AvailableBytes: &runtime.UInt64Value{Value: 0}, + PageFaults: &runtime.UInt64Value{Value: 0}, + } + } + + if containerRunTimeStats.Memory.WorkingSetBytes != nil { + if podRuntimeStats.Memory.WorkingSetBytes == nil { + podRuntimeStats.Memory.WorkingSetBytes = &runtime.UInt64Value{Value: 0} + } + podRuntimeStats.Memory.WorkingSetBytes.Value += containerRunTimeStats.Memory.WorkingSetBytes.Value + } + + if containerRunTimeStats.Memory.AvailableBytes != nil { + if podRuntimeStats.Memory.AvailableBytes == nil { + podRuntimeStats.Memory.AvailableBytes = &runtime.UInt64Value{Value: 0} + } + podRuntimeStats.Memory.AvailableBytes.Value += containerRunTimeStats.Memory.AvailableBytes.Value + } + + if containerRunTimeStats.Memory.PageFaults != nil { + if podRuntimeStats.Memory.PageFaults == nil { + podRuntimeStats.Memory.PageFaults = &runtime.UInt64Value{Value: 0} + } + podRuntimeStats.Memory.PageFaults.Value += containerRunTimeStats.Memory.PageFaults.Value + } +} + +func (c *criService) listWindowsMetricsForSandbox(ctx context.Context, sandbox sandboxstore.Sandbox) ([]*types.Metric, []containerstore.Container, error) { + req := &tasks.MetricsRequest{} + var containers []containerstore.Container + for _, cntr := range c.containerStore.List() { + if cntr.SandboxID != sandbox.ID { + continue + } + containers = append(containers, cntr) + req.Filters = append(req.Filters, "id=="+cntr.ID) + } + + //add sandbox container as well + req.Filters = append(req.Filters, "id=="+sandbox.ID) + + resp, err := c.client.TaskService().Metrics(ctx, req) + if err != nil { + return nil, nil, fmt.Errorf("failed to fetch metrics for tasks: %w", err) + } + return resp.Metrics, containers, nil +} + +func (c *criService) convertToCRIStats(stats *wstats.Statistics) (*runtime.WindowsContainerStats, error) { + var cs runtime.WindowsContainerStats + if stats != nil { + wstats := stats.GetWindows() + if wstats == nil { + return nil, fmt.Errorf("windows stats is empty") + } + if wstats.Processor != nil { + cs.Cpu = &runtime.WindowsCpuUsage{ + Timestamp: wstats.Timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: wstats.Processor.TotalRuntimeNS}, + } + } + + if wstats.Memory != nil { + cs.Memory = &runtime.WindowsMemoryUsage{ + Timestamp: wstats.Timestamp.UnixNano(), + WorkingSetBytes: &runtime.UInt64Value{ + Value: wstats.Memory.MemoryUsagePrivateWorkingSetBytes, + }, + } + } + + } + return &cs, nil +} + +func getUsageNanoCores(usageCoreNanoSeconds uint64, oldStats *stats.ContainerStats, newtimestamp int64) uint64 { + if oldStats == nil { + return 0 + } + + nanoSeconds := newtimestamp - oldStats.Timestamp.UnixNano() + + // zero or negative interval + if nanoSeconds <= 0 { + return 0 + } + + return uint64(float64(usageCoreNanoSeconds-oldStats.UsageCoreNanoSeconds) / + float64(nanoSeconds) * float64(time.Second/time.Nanosecond)) +} + +func windowsNetworkUsage(ctx context.Context, sandbox sandboxstore.Sandbox, timestamp time.Time) *runtime.WindowsNetworkUsage { + eps, err := hcn.GetNamespaceEndpointIds(sandbox.NetNSPath) + if err != nil { + log.G(ctx).WithField("podsandboxid", sandbox.ID).WithError(err).Errorf("unable to retrieve windows endpoint metrics for netNsPath: %v", sandbox.NetNSPath) + return nil + } + networkUsage := &runtime.WindowsNetworkUsage{ + Timestamp: timestamp.UnixNano(), + } + for _, ep := range eps { + endpointStats, err := hcsshim.GetHNSEndpointStats(ep) + if err != nil { + log.G(ctx).WithError(err).Errorf("unable to gather stats for endpoint: %s", ep) + continue + } + rtStats := runtime.WindowsNetworkInterfaceUsage{ + Name: endpointStats.EndpointID, + RxBytes: &runtime.UInt64Value{Value: endpointStats.BytesReceived}, + RxPacketsDropped: &runtime.UInt64Value{Value: endpointStats.DroppedPacketsIncoming}, + TxBytes: &runtime.UInt64Value{Value: endpointStats.BytesSent}, + TxPacketsDropped: &runtime.UInt64Value{Value: endpointStats.DroppedPacketsOutgoing}, + } + networkUsage.Interfaces = append(networkUsage.Interfaces, &rtStats) + + // if the default interface isn't set add it. + // We don't have a way to determine the default interface in windows + if networkUsage.DefaultInterface == nil { + networkUsage.DefaultInterface = &rtStats + } + } + + return networkUsage +} + +func (c *criService) saveSandBoxMetrics(sandboxID string, sandboxStats *runtime.PodSandboxStats) error { + // we may not have stats since container hasn't started yet so skip saving to cache + if sandboxStats == nil || sandboxStats.Windows == nil || sandboxStats.Windows.Cpu == nil || + sandboxStats.Windows.Cpu.UsageCoreNanoSeconds == nil { + return nil + } + + newStats := &stats.ContainerStats{ + UsageCoreNanoSeconds: sandboxStats.Windows.Cpu.UsageCoreNanoSeconds.Value, + Timestamp: time.Unix(0, sandboxStats.Windows.Cpu.Timestamp), + } + err := c.sandboxStore.UpdateContainerStats(sandboxID, newStats) + if err != nil { + return err + } + + // We queried the stats when getting sandbox stats. We need to save the query to cache + for _, cntr := range sandboxStats.Windows.Containers { + // we may not have stats since container hasn't started yet so skip saving to cache + if cntr == nil || cntr.Cpu == nil || cntr.Cpu.UsageCoreNanoSeconds == nil { + return nil + } + + newStats := &stats.ContainerStats{ + UsageCoreNanoSeconds: cntr.Cpu.UsageCoreNanoSeconds.Value, + Timestamp: time.Unix(0, cntr.Cpu.Timestamp), + } + err = c.containerStore.UpdateContainerStats(cntr.Attributes.Id, newStats) + if err != nil { + return err + } + } + + return nil +} + +func (c *criService) getSandboxPidCount(ctx context.Context, sandbox sandboxstore.Sandbox) (uint64, error) { + var pidCount uint64 + + // get process count inside PodSandbox for Windows + task, err := sandbox.Container.Task(ctx, nil) + if err != nil { + return 0, err + } + processes, err := task.Pids(ctx) + if err != nil { + return 0, err + } + pidCount += uint64(len(processes)) + + for _, cntr := range c.containerStore.List() { + if cntr.SandboxID != sandbox.ID { + continue + } + + state := cntr.Status.Get().State() + if state != runtime.ContainerState_CONTAINER_RUNNING { + continue + } + + task, err := cntr.Container.Task(ctx, nil) + if err != nil { + return 0, err + } + + processes, err := task.Pids(ctx) + if err != nil { + return 0, err + } + pidCount += uint64(len(processes)) + + } + + return pidCount, nil } diff --git a/pkg/cri/server/sandbox_stats_windows_test.go b/pkg/cri/server/sandbox_stats_windows_test.go new file mode 100644 index 000000000..8e6436e2b --- /dev/null +++ b/pkg/cri/server/sandbox_stats_windows_test.go @@ -0,0 +1,394 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package server + +import ( + "testing" + "time" + + wstats "github.com/Microsoft/hcsshim/cmd/containerd-shim-runhcs-v1/stats" + containerstore "github.com/containerd/containerd/pkg/cri/store/container" + sandboxstore "github.com/containerd/containerd/pkg/cri/store/sandbox" + "github.com/containerd/containerd/pkg/cri/store/stats" + "github.com/stretchr/testify/assert" + runtime "k8s.io/cri-api/pkg/apis/runtime/v1" +) + +func TestGetUsageNanoCores(t *testing.T) { + timestamp := time.Now() + secondAfterTimeStamp := timestamp.Add(time.Second) + ID := "ID" + + for desc, test := range map[string]struct { + firstCPUValue uint64 + secondCPUValue uint64 + expectedNanoCoreUsageFirst uint64 + expectedNanoCoreUsageSecond uint64 + }{ + "metrics": { + firstCPUValue: 50, + secondCPUValue: 500, + expectedNanoCoreUsageFirst: 0, + expectedNanoCoreUsageSecond: 450, + }, + } { + t.Run(desc, func(t *testing.T) { + container, err := containerstore.NewContainer( + containerstore.Metadata{ID: ID}, + ) + assert.NoError(t, err) + + // calculate for first iteration + // first run so container stats will be nil + assert.Nil(t, container.Stats) + cpuUsage := getUsageNanoCores(test.firstCPUValue, container.Stats, timestamp.UnixNano()) + assert.NoError(t, err) + assert.Equal(t, test.expectedNanoCoreUsageFirst, cpuUsage) + + // fill in the stats as if they now exist + container.Stats = &stats.ContainerStats{} + container.Stats.UsageCoreNanoSeconds = test.firstCPUValue + container.Stats.Timestamp = timestamp + assert.NotNil(t, container.Stats) + + // calculate for second iteration + cpuUsage = getUsageNanoCores(test.secondCPUValue, container.Stats, secondAfterTimeStamp.UnixNano()) + assert.NoError(t, err) + assert.Equal(t, test.expectedNanoCoreUsageSecond, cpuUsage) + }) + } + +} + +func Test_criService_podSandboxStats(t *testing.T) { + initialStatsTimestamp := time.Now() + currentStatsTimestamp := initialStatsTimestamp.Add(time.Second) + + c := newTestCRIService() + + type expectedStats struct { + UsageCoreNanoSeconds uint64 + UsageNanoCores uint64 + WorkingSetBytes uint64 + } + for desc, test := range map[string]struct { + metrics map[string]*wstats.Statistics + sandbox sandboxstore.Sandbox + containers []containerstore.Container + expectedPodStats expectedStats + expectedContainerStats []expectedStats + expectError bool + }{ + "no metrics found should return error": { + metrics: map[string]*wstats.Statistics{}, + sandbox: sandboxstore.Sandbox{}, + containers: []containerstore.Container{}, + expectedPodStats: expectedStats{}, + expectedContainerStats: []expectedStats{}, + expectError: true, + }, + "pod stats will include the container stats": { + metrics: map[string]*wstats.Statistics{ + "c1": { + Container: windowsStat(currentStatsTimestamp, 200, 20), + }, + "s1": { + Container: windowsStat(currentStatsTimestamp, 200, 20), + }, + }, + sandbox: sandboxstore.Sandbox{Metadata: sandboxstore.Metadata{ID: "s1"}}, + containers: []containerstore.Container{ + {Metadata: containerstore.Metadata{ID: "c1"}}, + }, + expectedPodStats: expectedStats{ + UsageCoreNanoSeconds: 400, + UsageNanoCores: 0, + WorkingSetBytes: 40, + }, + expectedContainerStats: []expectedStats{ + { + UsageCoreNanoSeconds: 200, + UsageNanoCores: 0, + WorkingSetBytes: 20, + }, + }, + expectError: false, + }, + "pod with existing stats will have usagenanocores totalled across pods and containers": { + metrics: map[string]*wstats.Statistics{ + "c1": { + Container: windowsStat(currentStatsTimestamp, 400, 20), + }, + "s1": { + Container: windowsStat(currentStatsTimestamp, 400, 20), + }, + }, + sandbox: sandboxPod("s1", initialStatsTimestamp, 400), + containers: []containerstore.Container{ + { + Metadata: containerstore.Metadata{ID: "c1"}, + Stats: &stats.ContainerStats{ + Timestamp: initialStatsTimestamp, + UsageCoreNanoSeconds: 200, + }, + }, + }, + expectedPodStats: expectedStats{ + UsageCoreNanoSeconds: 800, + UsageNanoCores: 400, + WorkingSetBytes: 40, + }, + expectedContainerStats: []expectedStats{ + { + UsageCoreNanoSeconds: 400, + UsageNanoCores: 200, + WorkingSetBytes: 20, + }, + }, + expectError: false, + }, + "pod sandbox with nil stats still works (hostprocess container scenario)": { + metrics: map[string]*wstats.Statistics{ + "c1": { + Container: windowsStat(currentStatsTimestamp, 400, 20), + }, + "s1": nil, + }, + sandbox: sandboxPod("s1", initialStatsTimestamp, 200), + containers: []containerstore.Container{ + { + Metadata: containerstore.Metadata{ID: "c1"}, + Stats: &stats.ContainerStats{ + Timestamp: initialStatsTimestamp, + UsageCoreNanoSeconds: 200, + }, + }, + }, + expectedPodStats: expectedStats{ + UsageCoreNanoSeconds: 400, + UsageNanoCores: 200, + WorkingSetBytes: 20, + }, + expectedContainerStats: []expectedStats{ + { + UsageCoreNanoSeconds: 400, + UsageNanoCores: 200, + WorkingSetBytes: 20, + }, + }, + expectError: false, + }, + } { + t.Run(desc, func(t *testing.T) { + actualPodStats, actualContainerStats, err := c.toPodSandboxStats(test.sandbox, test.metrics, test.containers, currentStatsTimestamp) + if test.expectError { + assert.NotNil(t, err) + return + } + + assert.Equal(t, test.expectedPodStats.UsageCoreNanoSeconds, actualPodStats.Cpu.UsageCoreNanoSeconds.Value) + assert.Equal(t, test.expectedPodStats.UsageNanoCores, actualPodStats.Cpu.UsageNanoCores.Value) + + for i, expectedStat := range test.expectedContainerStats { + actutalStat := actualContainerStats[i] + + assert.Equal(t, expectedStat.UsageCoreNanoSeconds, actutalStat.Cpu.UsageCoreNanoSeconds.Value) + assert.Equal(t, expectedStat.UsageNanoCores, actutalStat.Cpu.UsageNanoCores.Value) + } + }) + } +} + +func sandboxPod(id string, timestamp time.Time, cachedCPU uint64) sandboxstore.Sandbox { + return sandboxstore.Sandbox{ + Metadata: sandboxstore.Metadata{ID: id}, Stats: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: cachedCPU, + }} +} + +func windowsStat(timestamp time.Time, cpu uint64, memory uint64) *wstats.Statistics_Windows { + return &wstats.Statistics_Windows{ + Windows: &wstats.WindowsContainerStatistics{ + Timestamp: timestamp, + Processor: &wstats.WindowsContainerProcessorStatistics{ + TotalRuntimeNS: cpu, + }, + Memory: &wstats.WindowsContainerMemoryStatistics{ + MemoryUsagePrivateWorkingSetBytes: memory, + }, + }, + } +} + +func Test_criService_saveSandBoxMetrics(t *testing.T) { + + timestamp := time.Now() + containerID := "c1" + sandboxID := "s1" + for desc, test := range map[string]struct { + sandboxStats *runtime.PodSandboxStats + expectError bool + expectedSandboxvalue *stats.ContainerStats + expectedContainervalue *stats.ContainerStats + }{ + "if sandboxstats is nil then skip ": { + sandboxStats: nil, + expectError: false, + expectedSandboxvalue: nil, + }, + "if sandboxstats.windows is nil then skip": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: nil, + }, + expectError: false, + expectedSandboxvalue: nil, + }, + "if sandboxstats.windows.cpu is nil then skip": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: nil, + }, + }, + expectError: false, + expectedSandboxvalue: nil, + }, + "if sandboxstats.windows.cpu.UsageCoreNanoSeconds is nil then skip": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: &runtime.WindowsCpuUsage{ + UsageCoreNanoSeconds: nil, + }, + }, + }, + expectError: false, + expectedSandboxvalue: nil, + }, + "Stats for containers that have cpu nil are skipped": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, + }, + Containers: []*runtime.WindowsContainerStats{ + { + Attributes: &runtime.ContainerAttributes{Id: containerID}, + Cpu: nil, + }, + }, + }, + }, + expectError: false, + expectedSandboxvalue: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: 100, + }, + expectedContainervalue: nil, + }, + "Stats for containers that have UsageCoreNanoSeconds nil are skipped": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, + }, + Containers: []*runtime.WindowsContainerStats{ + { + Attributes: &runtime.ContainerAttributes{Id: containerID}, + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: nil}, + }, + }, + }, + }, + expectError: false, + expectedSandboxvalue: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: 100, + }, + expectedContainervalue: nil, + }, + "Stats are updated for sandbox and containers": { + sandboxStats: &runtime.PodSandboxStats{ + Windows: &runtime.WindowsPodSandboxStats{ + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 100}, + }, + Containers: []*runtime.WindowsContainerStats{ + { + Attributes: &runtime.ContainerAttributes{Id: containerID}, + Cpu: &runtime.WindowsCpuUsage{ + Timestamp: timestamp.UnixNano(), + UsageCoreNanoSeconds: &runtime.UInt64Value{Value: 50}, + }, + }, + }, + }, + }, + expectError: false, + expectedSandboxvalue: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: 100, + }, + expectedContainervalue: &stats.ContainerStats{ + Timestamp: timestamp, + UsageCoreNanoSeconds: 50, + }, + }, + } { + t.Run(desc, func(t *testing.T) { + c := newTestCRIService() + c.sandboxStore.Add(sandboxstore.Sandbox{ + Metadata: sandboxstore.Metadata{ID: sandboxID}, + }) + + c.containerStore.Add(containerstore.Container{ + Metadata: containerstore.Metadata{ID: containerID}, + }) + + err := c.saveSandBoxMetrics(sandboxID, test.sandboxStats) + + if test.expectError { + assert.NotNil(t, err) + } else { + assert.Nil(t, err) + } + + sandbox, err := c.sandboxStore.Get(sandboxID) + assert.Nil(t, err) + + if test.expectedSandboxvalue != nil { + assert.Equal(t, test.expectedSandboxvalue.Timestamp.UnixNano(), sandbox.Stats.Timestamp.UnixNano()) + assert.Equal(t, test.expectedSandboxvalue.UsageCoreNanoSeconds, sandbox.Stats.UsageCoreNanoSeconds) + } else { + assert.Nil(t, sandbox.Stats) + } + + container, err := c.containerStore.Get(containerID) + assert.Nil(t, err) + if test.expectedContainervalue != nil { + assert.Equal(t, test.expectedContainervalue.Timestamp.UnixNano(), container.Stats.Timestamp.UnixNano()) + assert.Equal(t, test.expectedContainervalue.UsageCoreNanoSeconds, container.Stats.UsageCoreNanoSeconds) + } else { + assert.Nil(t, container.Stats) + } + }) + } +}