Merge pull request #102884 from vinaykul/restart-free-pod-vertical-scaling

In-place Pod Vertical Scaling feature
This commit is contained in:
Kubernetes Prow Robot 2023-02-27 22:53:15 -08:00 committed by GitHub
commit b9fd1802ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
161 changed files with 10282 additions and 1271 deletions

View File

@ -4753,6 +4753,14 @@
"$ref": "#/definitions/io.k8s.api.core.v1.Probe",
"description": "Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes"
},
"resizePolicy": {
"description": "Resources resize policy for the container.",
"items": {
"$ref": "#/definitions/io.k8s.api.core.v1.ContainerResizePolicy"
},
"type": "array",
"x-kubernetes-list-type": "atomic"
},
"resources": {
"$ref": "#/definitions/io.k8s.api.core.v1.ResourceRequirements",
"description": "Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/"
@ -4862,6 +4870,24 @@
],
"type": "object"
},
"io.k8s.api.core.v1.ContainerResizePolicy": {
"description": "ContainerResizePolicy represents resource resize policy for a single container.",
"properties": {
"policy": {
"description": "Resource resize policy applicable to the specified resource name. If not specified, it defaults to RestartNotRequired.",
"type": "string"
},
"resourceName": {
"description": "Name of the resource type to which this resource resize policy applies. Supported values: cpu, memory.",
"type": "string"
}
},
"required": [
"resourceName",
"policy"
],
"type": "object"
},
"io.k8s.api.core.v1.ContainerState": {
"description": "ContainerState holds a possible state of container. Only one of its members may be specified. If none of them is specified, the default one is ContainerStateWaiting.",
"properties": {
@ -4970,6 +4996,17 @@
"description": "Specifies whether the container has passed its readiness probe.",
"type": "boolean"
},
"resources": {
"$ref": "#/definitions/io.k8s.api.core.v1.ResourceRequirements",
"description": "Resources represents the compute resource requests and limits that have been successfully enacted on the running container after it has been started or has been successfully resized."
},
"resourcesAllocated": {
"additionalProperties": {
"$ref": "#/definitions/io.k8s.apimachinery.pkg.api.resource.Quantity"
},
"description": "ResourcesAllocated represents the compute resources allocated for this container by the node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission and after successfully admitting desired pod resize.",
"type": "object"
},
"restartCount": {
"description": "The number of times the container has been restarted.",
"format": "int32",
@ -5357,6 +5394,14 @@
"$ref": "#/definitions/io.k8s.api.core.v1.Probe",
"description": "Probes are not allowed for ephemeral containers."
},
"resizePolicy": {
"description": "Resources resize policy for the container.",
"items": {
"$ref": "#/definitions/io.k8s.api.core.v1.ContainerResizePolicy"
},
"type": "array",
"x-kubernetes-list-type": "atomic"
},
"resources": {
"$ref": "#/definitions/io.k8s.api.core.v1.ResourceRequirements",
"description": "Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources already allocated to the pod."
@ -7982,6 +8027,10 @@
"description": "A brief CamelCase message indicating details about why the pod is in this state. e.g. 'Evicted'",
"type": "string"
},
"resize": {
"description": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\"",
"type": "string"
},
"startTime": {
"$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Time",
"description": "RFC 3339 date and time at which the object was acknowledged by the Kubelet. This is before the Kubelet pulled the container image(s) for the pod."

View File

@ -1157,6 +1157,19 @@
],
"description": "Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes"
},
"resizePolicy": {
"description": "Resources resize policy for the container.",
"items": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.core.v1.ContainerResizePolicy"
}
],
"default": {}
},
"type": "array",
"x-kubernetes-list-type": "atomic"
},
"resources": {
"allOf": [
{
@ -1292,6 +1305,26 @@
],
"type": "object"
},
"io.k8s.api.core.v1.ContainerResizePolicy": {
"description": "ContainerResizePolicy represents resource resize policy for a single container.",
"properties": {
"policy": {
"default": "",
"description": "Resource resize policy applicable to the specified resource name. If not specified, it defaults to RestartNotRequired.",
"type": "string"
},
"resourceName": {
"default": "",
"description": "Name of the resource type to which this resource resize policy applies. Supported values: cpu, memory.",
"type": "string"
}
},
"required": [
"resourceName",
"policy"
],
"type": "object"
},
"io.k8s.api.core.v1.ContainerState": {
"description": "ContainerState holds a possible state of container. Only one of its members may be specified. If none of them is specified, the default one is ContainerStateWaiting.",
"properties": {
@ -1437,6 +1470,26 @@
"description": "Specifies whether the container has passed its readiness probe.",
"type": "boolean"
},
"resources": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.core.v1.ResourceRequirements"
}
],
"description": "Resources represents the compute resource requests and limits that have been successfully enacted on the running container after it has been started or has been successfully resized."
},
"resourcesAllocated": {
"additionalProperties": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.apimachinery.pkg.api.resource.Quantity"
}
],
"default": {}
},
"description": "ResourcesAllocated represents the compute resources allocated for this container by the node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission and after successfully admitting desired pod resize.",
"type": "object"
},
"restartCount": {
"default": 0,
"description": "The number of times the container has been restarted.",
@ -1954,6 +2007,19 @@
],
"description": "Probes are not allowed for ephemeral containers."
},
"resizePolicy": {
"description": "Resources resize policy for the container.",
"items": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.core.v1.ContainerResizePolicy"
}
],
"default": {}
},
"type": "array",
"x-kubernetes-list-type": "atomic"
},
"resources": {
"allOf": [
{
@ -5405,6 +5471,10 @@
"description": "A brief CamelCase message indicating details about why the pod is in this state. e.g. 'Evicted'",
"type": "string"
},
"resize": {
"description": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\"",
"type": "string"
},
"startTime": {
"allOf": [
{

View File

@ -1780,6 +1780,19 @@
],
"description": "Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes"
},
"resizePolicy": {
"description": "Resources resize policy for the container.",
"items": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.core.v1.ContainerResizePolicy"
}
],
"default": {}
},
"type": "array",
"x-kubernetes-list-type": "atomic"
},
"resources": {
"allOf": [
{
@ -1896,6 +1909,26 @@
],
"type": "object"
},
"io.k8s.api.core.v1.ContainerResizePolicy": {
"description": "ContainerResizePolicy represents resource resize policy for a single container.",
"properties": {
"policy": {
"default": "",
"description": "Resource resize policy applicable to the specified resource name. If not specified, it defaults to RestartNotRequired.",
"type": "string"
},
"resourceName": {
"default": "",
"description": "Name of the resource type to which this resource resize policy applies. Supported values: cpu, memory.",
"type": "string"
}
},
"required": [
"resourceName",
"policy"
],
"type": "object"
},
"io.k8s.api.core.v1.DownwardAPIProjection": {
"description": "Represents downward API info for projecting into a projected volume. Note that this is identical to a downwardAPI volume source without the default mode.",
"properties": {
@ -2181,6 +2214,19 @@
],
"description": "Probes are not allowed for ephemeral containers."
},
"resizePolicy": {
"description": "Resources resize policy for the container.",
"items": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.core.v1.ContainerResizePolicy"
}
],
"default": {}
},
"type": "array",
"x-kubernetes-list-type": "atomic"
},
"resources": {
"allOf": [
{

View File

@ -1071,6 +1071,19 @@
],
"description": "Periodic probe of container service readiness. Container will be removed from service endpoints if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes"
},
"resizePolicy": {
"description": "Resources resize policy for the container.",
"items": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.core.v1.ContainerResizePolicy"
}
],
"default": {}
},
"type": "array",
"x-kubernetes-list-type": "atomic"
},
"resources": {
"allOf": [
{
@ -1187,6 +1200,26 @@
],
"type": "object"
},
"io.k8s.api.core.v1.ContainerResizePolicy": {
"description": "ContainerResizePolicy represents resource resize policy for a single container.",
"properties": {
"policy": {
"default": "",
"description": "Resource resize policy applicable to the specified resource name. If not specified, it defaults to RestartNotRequired.",
"type": "string"
},
"resourceName": {
"default": "",
"description": "Name of the resource type to which this resource resize policy applies. Supported values: cpu, memory.",
"type": "string"
}
},
"required": [
"resourceName",
"policy"
],
"type": "object"
},
"io.k8s.api.core.v1.DownwardAPIProjection": {
"description": "Represents downward API info for projecting into a projected volume. Note that this is identical to a downwardAPI volume source without the default mode.",
"properties": {
@ -1472,6 +1505,19 @@
],
"description": "Probes are not allowed for ephemeral containers."
},
"resizePolicy": {
"description": "Resources resize policy for the container.",
"items": {
"allOf": [
{
"$ref": "#/components/schemas/io.k8s.api.core.v1.ContainerResizePolicy"
}
],
"default": {}
},
"type": "array",
"x-kubernetes-list-type": "atomic"
},
"resources": {
"allOf": [
{

View File

@ -22,6 +22,7 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
metavalidation "k8s.io/apimachinery/pkg/apis/meta/v1/validation"
"k8s.io/apimachinery/pkg/util/diff"
utilfeature "k8s.io/apiserver/pkg/util/feature"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/apis/core/helper"
@ -455,19 +456,24 @@ func DropDisabledTemplateFields(podTemplate, oldPodTemplate *api.PodTemplateSpec
func DropDisabledPodFields(pod, oldPod *api.Pod) {
var (
podSpec *api.PodSpec
podStatus *api.PodStatus
podAnnotations map[string]string
oldPodSpec *api.PodSpec
oldPodStatus *api.PodStatus
oldPodAnnotations map[string]string
)
if pod != nil {
podSpec = &pod.Spec
podStatus = &pod.Status
podAnnotations = pod.Annotations
}
if oldPod != nil {
oldPodSpec = &oldPod.Spec
oldPodStatus = &oldPod.Status
oldPodAnnotations = oldPod.Annotations
}
dropDisabledFields(podSpec, podAnnotations, oldPodSpec, oldPodAnnotations)
dropDisabledPodStatusFields(podStatus, oldPodStatus, podSpec, oldPodSpec)
}
// dropDisabledFields removes disabled fields from the pod metadata and spec.
@ -522,6 +528,42 @@ func dropDisabledFields(
dropDisabledNodeInclusionPolicyFields(podSpec, oldPodSpec)
dropDisabledMatchLabelKeysField(podSpec, oldPodSpec)
dropDisabledDynamicResourceAllocationFields(podSpec, oldPodSpec)
if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && !inPlacePodVerticalScalingInUse(oldPodSpec) {
// Drop ResizePolicy fields. Don't drop updates to Resources field as template.spec.resources
// field is mutable for certain controllers. Let ValidatePodUpdate handle it.
for i := range podSpec.Containers {
podSpec.Containers[i].ResizePolicy = nil
}
for i := range podSpec.InitContainers {
podSpec.InitContainers[i].ResizePolicy = nil
}
for i := range podSpec.EphemeralContainers {
podSpec.EphemeralContainers[i].ResizePolicy = nil
}
}
}
// dropDisabledPodStatusFields removes disabled fields from the pod status
func dropDisabledPodStatusFields(podStatus, oldPodStatus *api.PodStatus, podSpec, oldPodSpec *api.PodSpec) {
// the new status is always be non-nil
if podStatus == nil {
podStatus = &api.PodStatus{}
}
if !utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && !inPlacePodVerticalScalingInUse(oldPodSpec) {
// Drop Resize, ResourcesAllocated, and Resources fields
dropResourcesFields := func(csl []api.ContainerStatus) {
for i := range csl {
csl[i].ResourcesAllocated = nil
csl[i].Resources = nil
}
}
dropResourcesFields(podStatus.ContainerStatuses)
dropResourcesFields(podStatus.InitContainerStatuses)
dropResourcesFields(podStatus.EphemeralContainerStatuses)
podStatus.Resize = ""
}
}
// dropDisabledDynamicResourceAllocationFields removes pod claim references from
@ -692,6 +734,22 @@ func hostUsersInUse(podSpec *api.PodSpec) bool {
return false
}
// inPlacePodVerticalScalingInUse returns true if pod spec is non-nil and ResizePolicy is set
func inPlacePodVerticalScalingInUse(podSpec *api.PodSpec) bool {
if podSpec == nil {
return false
}
var inUse bool
VisitContainers(podSpec, Containers, func(c *api.Container, containerType ContainerType) bool {
if len(c.ResizePolicy) > 0 {
inUse = true
return false
}
return true
})
return inUse
}
// procMountInUse returns true if the pod spec is non-nil and has a SecurityContext's ProcMount field set to a non-default value
func procMountInUse(podSpec *api.PodSpec) bool {
if podSpec == nil {
@ -785,3 +843,28 @@ func hasInvalidLabelValueInAffinitySelector(spec *api.PodSpec) bool {
}
return false
}
func MarkPodProposedForResize(oldPod, newPod *api.Pod) {
for i, c := range newPod.Spec.Containers {
if c.Resources.Requests == nil {
continue
}
if diff.ObjectDiff(oldPod.Spec.Containers[i].Resources, c.Resources) == "" {
continue
}
findContainerStatus := func(css []api.ContainerStatus, cName string) (api.ContainerStatus, bool) {
for i := range css {
if css[i].Name == cName {
return css[i], true
}
}
return api.ContainerStatus{}, false
}
if cs, ok := findContainerStatus(newPod.Status.ContainerStatuses, c.Name); ok {
if diff.ObjectDiff(c.Resources.Requests, cs.ResourcesAllocated) != "" {
newPod.Status.Resize = api.PodResizeStatusProposed
break
}
}
}
}

View File

@ -25,7 +25,9 @@ import (
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -2274,3 +2276,394 @@ func TestDropVolumesClaimField(t *testing.T) {
}
}
}
func TestDropInPlacePodVerticalScaling(t *testing.T) {
podWithInPlaceVerticalScaling := func() *api.Pod {
return &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
ResizePolicy: []api.ContainerResizePolicy{
{ResourceName: api.ResourceCPU, Policy: api.RestartNotRequired},
{ResourceName: api.ResourceMemory, Policy: api.RestartRequired},
},
},
},
},
Status: api.PodStatus{
Resize: api.PodResizeStatusInProgress,
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Resources: &api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("300m")},
},
},
},
},
}
}
podWithoutInPlaceVerticalScaling := func() *api.Pod {
return &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
},
},
},
}
}
podInfo := []struct {
description string
hasInPlaceVerticalScaling bool
pod func() *api.Pod
}{
{
description: "has in-place vertical scaling enabled with resources",
hasInPlaceVerticalScaling: true,
pod: podWithInPlaceVerticalScaling,
},
{
description: "has in-place vertical scaling disabled",
hasInPlaceVerticalScaling: false,
pod: podWithoutInPlaceVerticalScaling,
},
{
description: "is nil",
hasInPlaceVerticalScaling: false,
pod: func() *api.Pod { return nil },
},
}
for _, enabled := range []bool{true, false} {
for _, oldPodInfo := range podInfo {
for _, newPodInfo := range podInfo {
oldPodHasInPlaceVerticalScaling, oldPod := oldPodInfo.hasInPlaceVerticalScaling, oldPodInfo.pod()
newPodHasInPlaceVerticalScaling, newPod := newPodInfo.hasInPlaceVerticalScaling, newPodInfo.pod()
if newPod == nil {
continue
}
t.Run(fmt.Sprintf("feature enabled=%v, old pod %v, new pod %v", enabled, oldPodInfo.description, newPodInfo.description), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, enabled)()
var oldPodSpec *api.PodSpec
var oldPodStatus *api.PodStatus
if oldPod != nil {
oldPodSpec = &oldPod.Spec
oldPodStatus = &oldPod.Status
}
dropDisabledFields(&newPod.Spec, nil, oldPodSpec, nil)
dropDisabledPodStatusFields(&newPod.Status, oldPodStatus, &newPod.Spec, oldPodSpec)
// old pod should never be changed
if !reflect.DeepEqual(oldPod, oldPodInfo.pod()) {
t.Errorf("old pod changed: %v", diff.ObjectReflectDiff(oldPod, oldPodInfo.pod()))
}
switch {
case enabled || oldPodHasInPlaceVerticalScaling:
// new pod shouldn't change if feature enabled or if old pod has ResizePolicy set
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod changed: %v", diff.ObjectReflectDiff(newPod, newPodInfo.pod()))
}
case newPodHasInPlaceVerticalScaling:
// new pod should be changed
if reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod was not changed")
}
// new pod should not have ResizePolicy
if !reflect.DeepEqual(newPod, podWithoutInPlaceVerticalScaling()) {
t.Errorf("new pod has ResizePolicy: %v", diff.ObjectReflectDiff(newPod, podWithoutInPlaceVerticalScaling()))
}
default:
// new pod should not need to be changed
if !reflect.DeepEqual(newPod, newPodInfo.pod()) {
t.Errorf("new pod changed: %v", diff.ObjectReflectDiff(newPod, newPodInfo.pod()))
}
}
})
}
}
}
}
func TestMarkPodProposedForResize(t *testing.T) {
testCases := []struct {
desc string
newPod *api.Pod
oldPod *api.Pod
expectedPod *api.Pod
}{
{
desc: "nil requests",
newPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
},
},
},
},
oldPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
},
},
},
},
expectedPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
},
},
},
},
},
{
desc: "resources unchanged",
newPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
},
},
},
},
oldPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
},
},
},
},
expectedPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
},
},
},
},
},
{
desc: "resize desired",
newPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
{
Name: "c2",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("300m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("400m")},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
},
{
Name: "c2",
Image: "image",
ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
},
},
oldPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
{
Name: "c2",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("300m")},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
},
{
Name: "c2",
Image: "image",
ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
},
},
expectedPod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Name: "c1",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
{
Name: "c2",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{api.ResourceCPU: resource.MustParse("300m")},
Limits: api.ResourceList{api.ResourceCPU: resource.MustParse("400m")},
},
},
},
},
Status: api.PodStatus{
Resize: api.PodResizeStatusProposed,
ContainerStatuses: []api.ContainerStatus{
{
Name: "c1",
Image: "image",
ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("100m")},
},
{
Name: "c2",
Image: "image",
ResourcesAllocated: api.ResourceList{api.ResourceCPU: resource.MustParse("200m")},
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
MarkPodProposedForResize(tc.oldPod, tc.newPod)
if diff := cmp.Diff(tc.expectedPod, tc.newPod); diff != "" {
t.Errorf("unexpected pod spec (-want, +got):\n%s", diff)
}
})
}
}

View File

@ -257,7 +257,7 @@ func visitContainerConfigmapNames(container *v1.Container, visitor Visitor) bool
}
// GetContainerStatus extracts the status of container "name" from "statuses".
// It also returns if "name" exists.
// It returns true if "name" exists, else returns false.
func GetContainerStatus(statuses []v1.ContainerStatus, name string) (v1.ContainerStatus, bool) {
for i := range statuses {
if statuses[i].Name == name {
@ -274,6 +274,17 @@ func GetExistingContainerStatus(statuses []v1.ContainerStatus, name string) v1.C
return status
}
// GetIndexOfContainerStatus gets the index of status of container "name" from "statuses",
// It returns (index, true) if "name" exists, else returns (0, false).
func GetIndexOfContainerStatus(statuses []v1.ContainerStatus, name string) (int, bool) {
for i := range statuses {
if statuses[i].Name == name {
return i, true
}
}
return 0, false
}
// IsPodAvailable returns true if a pod is available; false otherwise.
// Precondition for an available pod is that it must be ready. On top
// of that, there are two cases when a pod can be considered available:

View File

@ -809,6 +809,53 @@ func TestGetContainerStatus(t *testing.T) {
}
}
func TestGetIndexOfContainerStatus(t *testing.T) {
testStatus := []v1.ContainerStatus{
{
Name: "c1",
Ready: false,
Image: "image1",
},
{
Name: "c2",
Ready: true,
Image: "image1",
},
}
tests := []struct {
desc string
containerName string
expectedExists bool
expectedIndex int
}{
{
desc: "first container",
containerName: "c1",
expectedExists: true,
expectedIndex: 0,
},
{
desc: "second container",
containerName: "c2",
expectedExists: true,
expectedIndex: 1,
},
{
desc: "non-existent container",
containerName: "c3",
expectedExists: false,
expectedIndex: 0,
},
}
for _, test := range tests {
idx, exists := GetIndexOfContainerStatus(testStatus, test.containerName)
assert.Equal(t, test.expectedExists, exists, "GetIndexOfContainerStatus: "+test.desc)
assert.Equal(t, test.expectedIndex, idx, "GetIndexOfContainerStatus: "+test.desc)
}
}
func TestUpdatePodCondition(t *testing.T) {
time := metav1.Now()

View File

@ -2138,6 +2138,33 @@ const (
PullIfNotPresent PullPolicy = "IfNotPresent"
)
// ResourceResizePolicy specifies how Kubernetes should handle resource resize.
type ResourceResizePolicy string
// These are the valid resource resize policy values:
const (
// RestartNotRequired tells Kubernetes to resize the container in-place
// without restarting it, if possible. Kubernetes may however choose to
// restart the container if it is unable to actuate resize without a
// restart. For e.g. the runtime doesn't support restart-free resizing.
RestartNotRequired ResourceResizePolicy = "RestartNotRequired"
// 'RestartRequired' tells Kubernetes to resize the container in-place
// by stopping and starting the container when new resources are applied.
// This is needed for legacy applications. For e.g. java apps using the
// -xmxN flag which are unable to use resized memory without restarting.
RestartRequired ResourceResizePolicy = "RestartRequired"
)
// ContainerResizePolicy represents resource resize policy for a single container.
type ContainerResizePolicy struct {
// Name of the resource type to which this resource resize policy applies.
// Supported values: cpu, memory.
ResourceName ResourceName
// Resource resize policy applicable to the specified resource name.
// If not specified, it defaults to RestartNotRequired.
Policy ResourceResizePolicy
}
// PreemptionPolicy describes a policy for if/when to preempt a pod.
type PreemptionPolicy string
@ -2246,6 +2273,10 @@ type Container struct {
// Compute resource requirements.
// +optional
Resources ResourceRequirements
// Resources resize policy for the container.
// +featureGate=InPlacePodVerticalScaling
// +optional
ResizePolicy []ContainerResizePolicy
// +optional
VolumeMounts []VolumeMount
// volumeDevices is the list of block devices to be used by the container.
@ -2430,6 +2461,17 @@ type ContainerStatus struct {
// +optional
ContainerID string
Started *bool
// ResourcesAllocated represents the compute resources allocated for this container by the
// node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission
// and after successfully admitting desired pod resize.
// +featureGate=InPlacePodVerticalScaling
// +optional
ResourcesAllocated ResourceList
// Resources represents the compute resource requests and limits that have been successfully
// enacted on the running container after it has been started or has been successfully resized.
// +featureGate=InPlacePodVerticalScaling
// +optional
Resources *ResourceRequirements
}
// PodPhase is a label for the condition of a pod at the current time.
@ -2495,6 +2537,20 @@ type PodCondition struct {
Message string
}
// PodResizeStatus shows status of desired resize of a pod's containers.
type PodResizeStatus string
const (
// Pod resources resize has been requested and will be evaluated by node.
PodResizeStatusProposed PodResizeStatus = "Proposed"
// Pod resources resize has been accepted by node and is being actuated.
PodResizeStatusInProgress PodResizeStatus = "InProgress"
// Node cannot resize the pod at this time and will keep retrying.
PodResizeStatusDeferred PodResizeStatus = "Deferred"
// Requested pod resize is not feasible and will not be re-evaluated.
PodResizeStatusInfeasible PodResizeStatus = "Infeasible"
)
// RestartPolicy describes how the container should be restarted.
// Only one of the following restart policies may be specified.
// If none of the following policies is specified, the default one
@ -3412,6 +3468,10 @@ type EphemeralContainerCommon struct {
// already allocated to the pod.
// +optional
Resources ResourceRequirements
// Resources resize policy for the container.
// +featureGate=InPlacePodVerticalScaling
// +optional
ResizePolicy []ContainerResizePolicy
// Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers.
// +optional
VolumeMounts []VolumeMount
@ -3528,6 +3588,13 @@ type PodStatus struct {
// Status for any ephemeral containers that have run in this pod.
// +optional
EphemeralContainerStatuses []ContainerStatus
// Status of resources resize desired for pod's containers.
// It is empty if no resources resize is pending.
// Any changes to container resources will automatically set this to "Proposed"
// +featureGate=InPlacePodVerticalScaling
// +optional
Resize PodResizeStatus
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

View File

@ -342,6 +342,16 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1.ContainerResizePolicy)(nil), (*core.ContainerResizePolicy)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_ContainerResizePolicy_To_core_ContainerResizePolicy(a.(*v1.ContainerResizePolicy), b.(*core.ContainerResizePolicy), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*core.ContainerResizePolicy)(nil), (*v1.ContainerResizePolicy)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_core_ContainerResizePolicy_To_v1_ContainerResizePolicy(a.(*core.ContainerResizePolicy), b.(*v1.ContainerResizePolicy), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*v1.ContainerState)(nil), (*core.ContainerState)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1_ContainerState_To_core_ContainerState(a.(*v1.ContainerState), b.(*core.ContainerState), scope)
}); err != nil {
@ -2975,6 +2985,7 @@ func autoConvert_v1_Container_To_core_Container(in *v1.Container, out *core.Cont
if err := Convert_v1_ResourceRequirements_To_core_ResourceRequirements(&in.Resources, &out.Resources, s); err != nil {
return err
}
out.ResizePolicy = *(*[]core.ContainerResizePolicy)(unsafe.Pointer(&in.ResizePolicy))
out.VolumeMounts = *(*[]core.VolumeMount)(unsafe.Pointer(&in.VolumeMounts))
out.VolumeDevices = *(*[]core.VolumeDevice)(unsafe.Pointer(&in.VolumeDevices))
out.LivenessProbe = (*core.Probe)(unsafe.Pointer(in.LivenessProbe))
@ -3008,6 +3019,7 @@ func autoConvert_core_Container_To_v1_Container(in *core.Container, out *v1.Cont
if err := Convert_core_ResourceRequirements_To_v1_ResourceRequirements(&in.Resources, &out.Resources, s); err != nil {
return err
}
out.ResizePolicy = *(*[]v1.ContainerResizePolicy)(unsafe.Pointer(&in.ResizePolicy))
out.VolumeMounts = *(*[]v1.VolumeMount)(unsafe.Pointer(&in.VolumeMounts))
out.VolumeDevices = *(*[]v1.VolumeDevice)(unsafe.Pointer(&in.VolumeDevices))
out.LivenessProbe = (*v1.Probe)(unsafe.Pointer(in.LivenessProbe))
@ -3079,6 +3091,28 @@ func Convert_core_ContainerPort_To_v1_ContainerPort(in *core.ContainerPort, out
return autoConvert_core_ContainerPort_To_v1_ContainerPort(in, out, s)
}
func autoConvert_v1_ContainerResizePolicy_To_core_ContainerResizePolicy(in *v1.ContainerResizePolicy, out *core.ContainerResizePolicy, s conversion.Scope) error {
out.ResourceName = core.ResourceName(in.ResourceName)
out.Policy = core.ResourceResizePolicy(in.Policy)
return nil
}
// Convert_v1_ContainerResizePolicy_To_core_ContainerResizePolicy is an autogenerated conversion function.
func Convert_v1_ContainerResizePolicy_To_core_ContainerResizePolicy(in *v1.ContainerResizePolicy, out *core.ContainerResizePolicy, s conversion.Scope) error {
return autoConvert_v1_ContainerResizePolicy_To_core_ContainerResizePolicy(in, out, s)
}
func autoConvert_core_ContainerResizePolicy_To_v1_ContainerResizePolicy(in *core.ContainerResizePolicy, out *v1.ContainerResizePolicy, s conversion.Scope) error {
out.ResourceName = v1.ResourceName(in.ResourceName)
out.Policy = v1.ResourceResizePolicy(in.Policy)
return nil
}
// Convert_core_ContainerResizePolicy_To_v1_ContainerResizePolicy is an autogenerated conversion function.
func Convert_core_ContainerResizePolicy_To_v1_ContainerResizePolicy(in *core.ContainerResizePolicy, out *v1.ContainerResizePolicy, s conversion.Scope) error {
return autoConvert_core_ContainerResizePolicy_To_v1_ContainerResizePolicy(in, out, s)
}
func autoConvert_v1_ContainerState_To_core_ContainerState(in *v1.ContainerState, out *core.ContainerState, s conversion.Scope) error {
out.Waiting = (*core.ContainerStateWaiting)(unsafe.Pointer(in.Waiting))
out.Running = (*core.ContainerStateRunning)(unsafe.Pointer(in.Running))
@ -3191,6 +3225,8 @@ func autoConvert_v1_ContainerStatus_To_core_ContainerStatus(in *v1.ContainerStat
out.ImageID = in.ImageID
out.ContainerID = in.ContainerID
out.Started = (*bool)(unsafe.Pointer(in.Started))
out.ResourcesAllocated = *(*core.ResourceList)(unsafe.Pointer(&in.ResourcesAllocated))
out.Resources = (*core.ResourceRequirements)(unsafe.Pointer(in.Resources))
return nil
}
@ -3213,6 +3249,8 @@ func autoConvert_core_ContainerStatus_To_v1_ContainerStatus(in *core.ContainerSt
out.ImageID = in.ImageID
out.ContainerID = in.ContainerID
out.Started = (*bool)(unsafe.Pointer(in.Started))
out.ResourcesAllocated = *(*v1.ResourceList)(unsafe.Pointer(&in.ResourcesAllocated))
out.Resources = (*v1.ResourceRequirements)(unsafe.Pointer(in.Resources))
return nil
}
@ -3563,6 +3601,7 @@ func autoConvert_v1_EphemeralContainerCommon_To_core_EphemeralContainerCommon(in
if err := Convert_v1_ResourceRequirements_To_core_ResourceRequirements(&in.Resources, &out.Resources, s); err != nil {
return err
}
out.ResizePolicy = *(*[]core.ContainerResizePolicy)(unsafe.Pointer(&in.ResizePolicy))
out.VolumeMounts = *(*[]core.VolumeMount)(unsafe.Pointer(&in.VolumeMounts))
out.VolumeDevices = *(*[]core.VolumeDevice)(unsafe.Pointer(&in.VolumeDevices))
out.LivenessProbe = (*core.Probe)(unsafe.Pointer(in.LivenessProbe))
@ -3596,6 +3635,7 @@ func autoConvert_core_EphemeralContainerCommon_To_v1_EphemeralContainerCommon(in
if err := Convert_core_ResourceRequirements_To_v1_ResourceRequirements(&in.Resources, &out.Resources, s); err != nil {
return err
}
out.ResizePolicy = *(*[]v1.ContainerResizePolicy)(unsafe.Pointer(&in.ResizePolicy))
out.VolumeMounts = *(*[]v1.VolumeMount)(unsafe.Pointer(&in.VolumeMounts))
out.VolumeDevices = *(*[]v1.VolumeDevice)(unsafe.Pointer(&in.VolumeDevices))
out.LivenessProbe = (*v1.Probe)(unsafe.Pointer(in.LivenessProbe))
@ -6380,6 +6420,7 @@ func autoConvert_v1_PodStatus_To_core_PodStatus(in *v1.PodStatus, out *core.PodS
out.ContainerStatuses = *(*[]core.ContainerStatus)(unsafe.Pointer(&in.ContainerStatuses))
out.QOSClass = core.PodQOSClass(in.QOSClass)
out.EphemeralContainerStatuses = *(*[]core.ContainerStatus)(unsafe.Pointer(&in.EphemeralContainerStatuses))
out.Resize = core.PodResizeStatus(in.Resize)
return nil
}
@ -6396,6 +6437,7 @@ func autoConvert_core_PodStatus_To_v1_PodStatus(in *core.PodStatus, out *v1.PodS
out.InitContainerStatuses = *(*[]v1.ContainerStatus)(unsafe.Pointer(&in.InitContainerStatuses))
out.ContainerStatuses = *(*[]v1.ContainerStatus)(unsafe.Pointer(&in.ContainerStatuses))
out.EphemeralContainerStatuses = *(*[]v1.ContainerStatus)(unsafe.Pointer(&in.EphemeralContainerStatuses))
out.Resize = v1.PodResizeStatus(in.Resize)
return nil
}

View File

@ -48,6 +48,7 @@ func RegisterDefaults(scheme *runtime.Scheme) error {
scheme.AddTypeDefaultingFunc(&v1.PersistentVolumeList{}, func(obj interface{}) { SetObjectDefaults_PersistentVolumeList(obj.(*v1.PersistentVolumeList)) })
scheme.AddTypeDefaultingFunc(&v1.Pod{}, func(obj interface{}) { SetObjectDefaults_Pod(obj.(*v1.Pod)) })
scheme.AddTypeDefaultingFunc(&v1.PodList{}, func(obj interface{}) { SetObjectDefaults_PodList(obj.(*v1.PodList)) })
scheme.AddTypeDefaultingFunc(&v1.PodStatusResult{}, func(obj interface{}) { SetObjectDefaults_PodStatusResult(obj.(*v1.PodStatusResult)) })
scheme.AddTypeDefaultingFunc(&v1.PodTemplate{}, func(obj interface{}) { SetObjectDefaults_PodTemplate(obj.(*v1.PodTemplate)) })
scheme.AddTypeDefaultingFunc(&v1.PodTemplateList{}, func(obj interface{}) { SetObjectDefaults_PodTemplateList(obj.(*v1.PodTemplateList)) })
scheme.AddTypeDefaultingFunc(&v1.ReplicationController{}, func(obj interface{}) { SetObjectDefaults_ReplicationController(obj.(*v1.ReplicationController)) })
@ -438,6 +439,30 @@ func SetObjectDefaults_Pod(in *v1.Pod) {
}
}
SetDefaults_ResourceList(&in.Spec.Overhead)
for i := range in.Status.InitContainerStatuses {
a := &in.Status.InitContainerStatuses[i]
SetDefaults_ResourceList(&a.ResourcesAllocated)
if a.Resources != nil {
SetDefaults_ResourceList(&a.Resources.Limits)
SetDefaults_ResourceList(&a.Resources.Requests)
}
}
for i := range in.Status.ContainerStatuses {
a := &in.Status.ContainerStatuses[i]
SetDefaults_ResourceList(&a.ResourcesAllocated)
if a.Resources != nil {
SetDefaults_ResourceList(&a.Resources.Limits)
SetDefaults_ResourceList(&a.Resources.Requests)
}
}
for i := range in.Status.EphemeralContainerStatuses {
a := &in.Status.EphemeralContainerStatuses[i]
SetDefaults_ResourceList(&a.ResourcesAllocated)
if a.Resources != nil {
SetDefaults_ResourceList(&a.Resources.Limits)
SetDefaults_ResourceList(&a.Resources.Requests)
}
}
}
func SetObjectDefaults_PodList(in *v1.PodList) {
@ -447,6 +472,33 @@ func SetObjectDefaults_PodList(in *v1.PodList) {
}
}
func SetObjectDefaults_PodStatusResult(in *v1.PodStatusResult) {
for i := range in.Status.InitContainerStatuses {
a := &in.Status.InitContainerStatuses[i]
SetDefaults_ResourceList(&a.ResourcesAllocated)
if a.Resources != nil {
SetDefaults_ResourceList(&a.Resources.Limits)
SetDefaults_ResourceList(&a.Resources.Requests)
}
}
for i := range in.Status.ContainerStatuses {
a := &in.Status.ContainerStatuses[i]
SetDefaults_ResourceList(&a.ResourcesAllocated)
if a.Resources != nil {
SetDefaults_ResourceList(&a.Resources.Limits)
SetDefaults_ResourceList(&a.Resources.Requests)
}
}
for i := range in.Status.EphemeralContainerStatuses {
a := &in.Status.EphemeralContainerStatuses[i]
SetDefaults_ResourceList(&a.ResourcesAllocated)
if a.Resources != nil {
SetDefaults_ResourceList(&a.Resources.Limits)
SetDefaults_ResourceList(&a.Resources.Requests)
}
}
}
func SetObjectDefaults_PodTemplate(in *v1.PodTemplate) {
SetDefaults_PodSpec(&in.Template.Spec)
for i := range in.Template.Spec.Volumes {

View File

@ -46,6 +46,7 @@ import (
apiservice "k8s.io/kubernetes/pkg/api/service"
"k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/apis/core/helper"
"k8s.io/kubernetes/pkg/apis/core/helper/qos"
podshelper "k8s.io/kubernetes/pkg/apis/core/pods"
corev1 "k8s.io/kubernetes/pkg/apis/core/v1"
"k8s.io/kubernetes/pkg/capabilities"
@ -3011,6 +3012,37 @@ func validatePullPolicy(policy core.PullPolicy, fldPath *field.Path) field.Error
return allErrors
}
var supportedResizeResources = sets.NewString(string(core.ResourceCPU), string(core.ResourceMemory))
var supportedResizePolicies = sets.NewString(string(core.RestartNotRequired), string(core.RestartRequired))
func validateResizePolicy(policyList []core.ContainerResizePolicy, fldPath *field.Path) field.ErrorList {
allErrors := field.ErrorList{}
// validate that resource name is not repeated, supported resource names and policy values are specified
resources := make(map[core.ResourceName]bool)
for i, p := range policyList {
if _, found := resources[p.ResourceName]; found {
allErrors = append(allErrors, field.Duplicate(fldPath.Index(i), p.ResourceName))
}
resources[p.ResourceName] = true
switch p.ResourceName {
case core.ResourceCPU, core.ResourceMemory:
case "":
allErrors = append(allErrors, field.Required(fldPath, ""))
default:
allErrors = append(allErrors, field.NotSupported(fldPath, p.ResourceName, supportedResizeResources.List()))
}
switch p.Policy {
case core.RestartNotRequired, core.RestartRequired:
case "":
allErrors = append(allErrors, field.Required(fldPath, ""))
default:
allErrors = append(allErrors, field.NotSupported(fldPath, p.Policy, supportedResizePolicies.List()))
}
}
return allErrors
}
// validateEphemeralContainers is called by pod spec and template validation to validate the list of ephemeral containers.
// Note that this is called for pod template even though ephemeral containers aren't allowed in pod templates.
func validateEphemeralContainers(ephemeralContainers []core.EphemeralContainer, containers, initContainers []core.Container, volumes map[string]core.VolumeSource, podClaimNames sets.String, fldPath *field.Path, opts PodValidationOptions) field.ErrorList {
@ -3133,6 +3165,9 @@ func validateInitContainers(containers []core.Container, regularContainers []cor
if ctr.StartupProbe != nil {
allErrs = append(allErrs, field.Forbidden(idxPath.Child("startupProbe"), "may not be set for init containers"))
}
if len(ctr.ResizePolicy) > 0 {
allErrs = append(allErrs, field.Invalid(idxPath.Child("resizePolicy"), ctr.ResizePolicy, "must not be set for init containers"))
}
}
return allErrs
@ -4479,6 +4514,24 @@ func validateSeccompAnnotationsAndFieldsMatch(annotationValue string, seccompFie
return nil
}
var updatablePodSpecFields = []string{
"`spec.containers[*].image`",
"`spec.initContainers[*].image`",
"`spec.activeDeadlineSeconds`",
"`spec.tolerations` (only additions to existing tolerations)",
"`spec.terminationGracePeriodSeconds` (allow it to be set to 1 if it was previously negative)",
"`spec.containers[*].resources` (for CPU/memory only)",
}
// TODO(vinaykul,InPlacePodVerticalScaling): Drop this var once InPlacePodVerticalScaling goes GA and featuregate is gone.
var updatablePodSpecFieldsNoResources = []string{
"`spec.containers[*].image`",
"`spec.initContainers[*].image`",
"`spec.activeDeadlineSeconds`",
"`spec.tolerations` (only additions to existing tolerations)",
"`spec.terminationGracePeriodSeconds` (allow it to be set to 1 if it was previously negative)",
}
// ValidatePodUpdate tests to see if the update is legal for an end user to make. newPod is updated with fields
// that cannot be changed.
func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) field.ErrorList {
@ -4538,12 +4591,56 @@ func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel
return allErrs
}
//TODO(vinaykul,InPlacePodVerticalScaling): With KEP 2527, we can rely on persistence of PodStatus.QOSClass
// We can use PodStatus.QOSClass instead of GetPodQOS here, in kubelet, and elsewhere, as PodStatus.QOSClass
// does not change once it is bootstrapped in podCreate. This needs to be addressed before beta as a
// separate PR covering all uses of GetPodQOS. With that change, we can drop the below block.
// Ref: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r1093790446
// Ref: https://github.com/kubernetes/kubernetes/pull/102884/#discussion_r663280487
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// reject attempts to change pod qos
oldQoS := qos.GetPodQOS(oldPod)
newQoS := qos.GetPodQOS(newPod)
if newQoS != oldQoS {
allErrs = append(allErrs, field.Invalid(fldPath, newQoS, "Pod QoS is immutable"))
}
}
// handle updateable fields by munging those fields prior to deep equal comparison.
mungedPodSpec := *newPod.Spec.DeepCopy()
// munge spec.containers[*].image
var newContainers []core.Container
for ix, container := range mungedPodSpec.Containers {
container.Image = oldPod.Spec.Containers[ix].Image // +k8s:verify-mutation:reason=clone
// When the feature-gate is turned off, any new requests attempting to update CPU or memory
// resource values will result in validation failure.
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// Resources are mutable for CPU & memory only
// - user can now modify Resources to express new desired Resources
mungeCpuMemResources := func(resourceList, oldResourceList core.ResourceList) core.ResourceList {
if oldResourceList == nil {
return nil
}
var mungedResourceList core.ResourceList
if resourceList == nil {
mungedResourceList = make(core.ResourceList)
} else {
mungedResourceList = resourceList.DeepCopy()
}
delete(mungedResourceList, core.ResourceCPU)
delete(mungedResourceList, core.ResourceMemory)
if cpu, found := oldResourceList[core.ResourceCPU]; found {
mungedResourceList[core.ResourceCPU] = cpu
}
if mem, found := oldResourceList[core.ResourceMemory]; found {
mungedResourceList[core.ResourceMemory] = mem
}
return mungedResourceList
}
lim := mungeCpuMemResources(container.Resources.Limits, oldPod.Spec.Containers[ix].Resources.Limits)
req := mungeCpuMemResources(container.Resources.Requests, oldPod.Spec.Containers[ix].Resources.Requests)
container.Resources = core.ResourceRequirements{Limits: lim, Requests: req}
}
newContainers = append(newContainers, container)
}
mungedPodSpec.Containers = newContainers
@ -4575,7 +4672,11 @@ func ValidatePodUpdate(newPod, oldPod *core.Pod, opts PodValidationOptions) fiel
// This diff isn't perfect, but it's a helluva lot better an "I'm not going to tell you what the difference is".
// TODO: Pinpoint the specific field that causes the invalid error after we have strategic merge diff
specDiff := cmp.Diff(oldPod.Spec, mungedPodSpec)
allErrs = append(allErrs, field.Forbidden(specPath, fmt.Sprintf("pod updates may not change fields other than `spec.containers[*].image`, `spec.initContainers[*].image`, `spec.activeDeadlineSeconds`, `spec.tolerations` (only additions to existing tolerations) or `spec.terminationGracePeriodSeconds` (allow it to be set to 1 if it was previously negative)\n%v", specDiff)))
errs := field.Forbidden(specPath, fmt.Sprintf("pod updates may not change fields other than %s\n%v", strings.Join(updatablePodSpecFieldsNoResources, ","), specDiff))
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
errs = field.Forbidden(specPath, fmt.Sprintf("pod updates may not change fields other than %s\n%v", strings.Join(updatablePodSpecFields, ","), specDiff))
}
allErrs = append(allErrs, errs)
}
return allErrs

View File

@ -6707,6 +6707,100 @@ func TestValidatePullPolicy(t *testing.T) {
}
}
func TestValidateResizePolicy(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
tSupportedResizeResources := sets.NewString(string(core.ResourceCPU), string(core.ResourceMemory))
tSupportedResizePolicies := sets.NewString(string(core.RestartNotRequired), string(core.RestartRequired))
type T struct {
PolicyList []core.ContainerResizePolicy
ExpectError bool
Errors field.ErrorList
}
testCases := map[string]T{
"ValidCPUandMemoryPolicies": {
[]core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartNotRequired"},
{ResourceName: "memory", Policy: "RestartRequired"},
},
false,
nil,
},
"ValidCPUPolicy": {
[]core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartRequired"},
},
false,
nil,
},
"ValidMemoryPolicy": {
[]core.ContainerResizePolicy{
{ResourceName: "memory", Policy: "RestartNotRequired"},
},
false,
nil,
},
"NoPolicy": {
[]core.ContainerResizePolicy{},
false,
nil,
},
"ValidCPUandInvalidMemoryPolicy": {
[]core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartNotRequired"},
{ResourceName: "memory", Policy: "Restarrrt"},
},
true,
field.ErrorList{field.NotSupported(field.NewPath("field"), core.ResourceResizePolicy("Restarrrt"), tSupportedResizePolicies.List())},
},
"ValidMemoryandInvalidCPUPolicy": {
[]core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartNotRequirrred"},
{ResourceName: "memory", Policy: "RestartRequired"},
},
true,
field.ErrorList{field.NotSupported(field.NewPath("field"), core.ResourceResizePolicy("RestartNotRequirrred"), tSupportedResizePolicies.List())},
},
"InvalidResourceNameValidPolicy": {
[]core.ContainerResizePolicy{
{ResourceName: "cpuuu", Policy: "RestartNotRequired"},
},
true,
field.ErrorList{field.NotSupported(field.NewPath("field"), core.ResourceName("cpuuu"), tSupportedResizeResources.List())},
},
"ValidResourceNameMissingPolicy": {
[]core.ContainerResizePolicy{
{ResourceName: "memory", Policy: ""},
},
true,
field.ErrorList{field.Required(field.NewPath("field"), "")},
},
"RepeatedPolicies": {
[]core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartNotRequired"},
{ResourceName: "memory", Policy: "RestartRequired"},
{ResourceName: "cpu", Policy: "RestartRequired"},
},
true,
field.ErrorList{field.Duplicate(field.NewPath("field").Index(2), core.ResourceCPU)},
},
}
for k, v := range testCases {
errs := validateResizePolicy(v.PolicyList, field.NewPath("field"))
if !v.ExpectError && len(errs) > 0 {
t.Errorf("Testcase %s - expected success, got error: %+v", k, errs)
}
if v.ExpectError {
if len(errs) == 0 {
t.Errorf("Testcase %s - expected error, got success", k)
}
delta := cmp.Diff(errs, v.Errors)
if delta != "" {
t.Errorf("Testcase %s - expected errors '%v', got '%v', diff: '%v'", k, v.Errors, errs, delta)
}
}
}
}
func getResourceLimits(cpu, memory string) core.ResourceList {
res := core.ResourceList{}
res[core.ResourceCPU] = resource.MustParse(cpu)
@ -6714,6 +6808,20 @@ func getResourceLimits(cpu, memory string) core.ResourceList {
return res
}
func getResources(cpu, memory, storage string) core.ResourceList {
res := core.ResourceList{}
if cpu != "" {
res[core.ResourceCPU] = resource.MustParse(cpu)
}
if memory != "" {
res[core.ResourceMemory] = resource.MustParse(memory)
}
if storage != "" {
res[core.ResourceEphemeralStorage] = resource.MustParse(storage)
}
return res
}
func TestValidateEphemeralContainers(t *testing.T) {
containers := []core.Container{{Name: "ctr", Image: "image", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}}
initContainers := []core.Container{{Name: "ictr", Image: "iimage", ImagePullPolicy: "IfNotPresent", TerminationMessagePolicy: "File"}}
@ -7057,6 +7165,24 @@ func TestValidateEphemeralContainers(t *testing.T) {
},
field.ErrorList{{Type: field.ErrorTypeForbidden, Field: "ephemeralContainers[0].lifecycle"}},
},
{
"Container uses disallowed field: ResizePolicy",
line(),
[]core.EphemeralContainer{
{
EphemeralContainerCommon: core.EphemeralContainerCommon{
Name: "resources-resize-policy",
Image: "image",
ImagePullPolicy: "IfNotPresent",
TerminationMessagePolicy: "File",
ResizePolicy: []core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartNotRequired"},
},
},
},
},
field.ErrorList{{Type: field.ErrorTypeForbidden, Field: "ephemeralContainers[0].resizePolicy"}},
},
}
for _, tc := range tcs {
@ -7273,6 +7399,16 @@ func TestValidateContainers(t *testing.T) {
ImagePullPolicy: "IfNotPresent",
TerminationMessagePolicy: "File",
},
{
Name: "resources-resize-policy",
Image: "image",
ResizePolicy: []core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartNotRequired"},
{ResourceName: "memory", Policy: "RestartRequired"},
},
ImagePullPolicy: "IfNotPresent",
TerminationMessagePolicy: "File",
},
{
Name: "same-host-port-different-protocol",
Image: "image",
@ -9036,6 +9172,32 @@ func TestValidatePodSpec(t *testing.T) {
RestartPolicy: core.RestartPolicyAlways,
DNSPolicy: core.DNSClusterFirst,
},
"disallowed resources resize policy for init containers": {
InitContainers: []core.Container{
{
Name: "initctr",
Image: "initimage",
ResizePolicy: []core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartNotRequired"},
},
ImagePullPolicy: "IfNotPresent",
TerminationMessagePolicy: "File",
},
},
Containers: []core.Container{
{
Name: "ctr",
Image: "image",
ResizePolicy: []core.ContainerResizePolicy{
{ResourceName: "cpu", Policy: "RestartNotRequired"},
},
ImagePullPolicy: "IfNotPresent",
TerminationMessagePolicy: "File",
},
},
RestartPolicy: core.RestartPolicyAlways,
DNSPolicy: core.DNSClusterFirst,
},
}
for k, v := range failureCases {
if errs := ValidatePodSpec(&v, nil, field.NewPath("field"), PodValidationOptions{}); len(errs) == 0 {
@ -10818,6 +10980,7 @@ func TestValidatePodCreateWithSchedulingGates(t *testing.T) {
}
func TestValidatePodUpdate(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
var (
activeDeadlineSecondsZero = int64(0)
activeDeadlineSecondsNegative = int64(-30)
@ -11272,33 +11435,586 @@ func TestValidatePodUpdate(t *testing.T) {
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Image: "foo:V1",
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("100m", "0"),
Limits: getResources("200m", "0", "1Gi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Image: "foo:V2",
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("1000m", "0"),
Limits: getResources("100m", "0", "1Gi"),
},
},
},
},
},
err: "spec: Forbidden: pod updates may not change fields",
test: "cpu change",
err: "",
test: "cpu limit change",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("100m", "200Mi"),
},
},
},
},
},
err: "",
test: "memory limit change",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Limits: getResources("100m", "100Mi", "1Gi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResources("100m", "100Mi", "2Gi"),
},
},
},
},
},
err: "Forbidden: pod updates may not change fields other than",
test: "storage limit change",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Requests: getResourceLimits("100m", "0"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Requests: getResourceLimits("200m", "0"),
},
},
},
},
},
err: "",
test: "cpu request change",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Requests: getResourceLimits("0", "200Mi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Requests: getResourceLimits("0", "100Mi"),
},
},
},
},
},
err: "",
test: "memory request change",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Requests: getResources("100m", "0", "2Gi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Requests: getResources("100m", "0", "1Gi"),
},
},
},
},
},
err: "Forbidden: pod updates may not change fields other than",
test: "storage request change",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Limits: getResources("200m", "400Mi", "1Gi"),
Requests: getResources("200m", "400Mi", "1Gi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Limits: getResources("100m", "100Mi", "1Gi"),
Requests: getResources("100m", "100Mi", "1Gi"),
},
},
},
},
},
err: "",
test: "Pod QoS unchanged, guaranteed -> guaranteed",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Limits: getResources("200m", "200Mi", "2Gi"),
Requests: getResources("100m", "100Mi", "1Gi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V1",
Resources: core.ResourceRequirements{
Limits: getResources("400m", "400Mi", "2Gi"),
Requests: getResources("200m", "200Mi", "1Gi"),
},
},
},
},
},
err: "",
test: "Pod QoS unchanged, burstable -> burstable",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("200m", "200Mi"),
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
err: "",
test: "Pod QoS unchanged, burstable -> burstable, add limits",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("200m", "200Mi"),
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
err: "",
test: "Pod QoS unchanged, burstable -> burstable, remove limits",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResources("400m", "", "1Gi"),
Requests: getResources("300m", "", "1Gi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResources("200m", "500Mi", "1Gi"),
},
},
},
},
},
err: "",
test: "Pod QoS unchanged, burstable -> burstable, add requests",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResources("400m", "500Mi", "2Gi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResources("200m", "300Mi", "2Gi"),
Requests: getResourceLimits("100m", "200Mi"),
},
},
},
},
},
err: "",
test: "Pod QoS unchanged, burstable -> burstable, remove requests",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("200m", "200Mi"),
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("100m", "100Mi"),
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
err: "Pod QoS is immutable",
test: "Pod QoS change, guaranteed -> burstable",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("100m", "100Mi"),
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
err: "Pod QoS is immutable",
test: "Pod QoS change, burstable -> guaranteed",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("200m", "200Mi"),
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
},
},
},
},
err: "Pod QoS is immutable",
test: "Pod QoS change, besteffort -> burstable",
},
{
new: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
},
},
},
},
old: core.Pod{
ObjectMeta: metav1.ObjectMeta{Name: "pod"},
Spec: core.PodSpec{
Containers: []core.Container{
{
Name: "container",
TerminationMessagePolicy: "File",
ImagePullPolicy: "Always",
Image: "foo:V2",
Resources: core.ResourceRequirements{
Limits: getResourceLimits("200m", "200Mi"),
Requests: getResourceLimits("100m", "100Mi"),
},
},
},
},
},
err: "Pod QoS is immutable",
test: "Pod QoS change, burstable -> besteffort",
},
{
new: core.Pod{
@ -18511,6 +19227,8 @@ func TestValidateOSFields(t *testing.T) {
"Containers[*].Ports",
"Containers[*].ReadinessProbe",
"Containers[*].Resources",
"Containers[*].ResizePolicy[*].Policy",
"Containers[*].ResizePolicy[*].ResourceName",
"Containers[*].SecurityContext.RunAsNonRoot",
"Containers[*].Stdin",
"Containers[*].StdinOnce",
@ -18535,6 +19253,8 @@ func TestValidateOSFields(t *testing.T) {
"EphemeralContainers[*].EphemeralContainerCommon.Ports",
"EphemeralContainers[*].EphemeralContainerCommon.ReadinessProbe",
"EphemeralContainers[*].EphemeralContainerCommon.Resources",
"EphemeralContainers[*].EphemeralContainerCommon.ResizePolicy[*].Policy",
"EphemeralContainers[*].EphemeralContainerCommon.ResizePolicy[*].ResourceName",
"EphemeralContainers[*].EphemeralContainerCommon.Stdin",
"EphemeralContainers[*].EphemeralContainerCommon.StdinOnce",
"EphemeralContainers[*].EphemeralContainerCommon.TTY",
@ -18561,6 +19281,8 @@ func TestValidateOSFields(t *testing.T) {
"InitContainers[*].Ports",
"InitContainers[*].ReadinessProbe",
"InitContainers[*].Resources",
"InitContainers[*].ResizePolicy[*].Policy",
"InitContainers[*].ResizePolicy[*].ResourceName",
"InitContainers[*].Stdin",
"InitContainers[*].StdinOnce",
"InitContainers[*].TTY",

View File

@ -788,6 +788,11 @@ func (in *Container) DeepCopyInto(out *Container) {
}
}
in.Resources.DeepCopyInto(&out.Resources)
if in.ResizePolicy != nil {
in, out := &in.ResizePolicy, &out.ResizePolicy
*out = make([]ContainerResizePolicy, len(*in))
copy(*out, *in)
}
if in.VolumeMounts != nil {
in, out := &in.VolumeMounts, &out.VolumeMounts
*out = make([]VolumeMount, len(*in))
@ -875,6 +880,22 @@ func (in *ContainerPort) DeepCopy() *ContainerPort {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ContainerResizePolicy) DeepCopyInto(out *ContainerResizePolicy) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerResizePolicy.
func (in *ContainerResizePolicy) DeepCopy() *ContainerResizePolicy {
if in == nil {
return nil
}
out := new(ContainerResizePolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ContainerState) DeepCopyInto(out *ContainerState) {
*out = *in
@ -967,6 +988,18 @@ func (in *ContainerStatus) DeepCopyInto(out *ContainerStatus) {
*out = new(bool)
**out = **in
}
if in.ResourcesAllocated != nil {
in, out := &in.ResourcesAllocated, &out.ResourcesAllocated
*out = make(ResourceList, len(*in))
for key, val := range *in {
(*out)[key] = val.DeepCopy()
}
}
if in.Resources != nil {
in, out := &in.Resources, &out.Resources
*out = new(ResourceRequirements)
(*in).DeepCopyInto(*out)
}
return
}
@ -1382,6 +1415,11 @@ func (in *EphemeralContainerCommon) DeepCopyInto(out *EphemeralContainerCommon)
}
}
in.Resources.DeepCopyInto(&out.Resources)
if in.ResizePolicy != nil {
in, out := &in.ResizePolicy, &out.ResizePolicy
*out = make([]ContainerResizePolicy, len(*in))
copy(*out, *in)
}
if in.VolumeMounts != nil {
in, out := &in.VolumeMounts, &out.VolumeMounts
*out = make([]VolumeMount, len(*in))

View File

@ -845,6 +845,13 @@ const (
// instead of changing each file on the volumes recursively.
// Initial implementation focused on ReadWriteOncePod volumes.
SELinuxMountReadWriteOncePod featuregate.Feature = "SELinuxMountReadWriteOncePod"
// owner: @vinaykul
// kep: http://kep.k8s.io/1287
// alpha: v1.27
//
// Enables In-Place Pod Vertical Scaling
InPlacePodVerticalScaling featuregate.Feature = "InPlacePodVerticalScaling"
)
func init() {
@ -1074,6 +1081,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
SELinuxMountReadWriteOncePod: {Default: false, PreRelease: featuregate.Alpha},
InPlacePodVerticalScaling: {Default: false, PreRelease: featuregate.Alpha},
// inherited features from generic apiserver, relisted here to get a conflict if it is changed
// unintentionally on either side:

View File

@ -355,6 +355,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA
"k8s.io/api/core/v1.Container": schema_k8sio_api_core_v1_Container(ref),
"k8s.io/api/core/v1.ContainerImage": schema_k8sio_api_core_v1_ContainerImage(ref),
"k8s.io/api/core/v1.ContainerPort": schema_k8sio_api_core_v1_ContainerPort(ref),
"k8s.io/api/core/v1.ContainerResizePolicy": schema_k8sio_api_core_v1_ContainerResizePolicy(ref),
"k8s.io/api/core/v1.ContainerState": schema_k8sio_api_core_v1_ContainerState(ref),
"k8s.io/api/core/v1.ContainerStateRunning": schema_k8sio_api_core_v1_ContainerStateRunning(ref),
"k8s.io/api/core/v1.ContainerStateTerminated": schema_k8sio_api_core_v1_ContainerStateTerminated(ref),
@ -17074,6 +17075,25 @@ func schema_k8sio_api_core_v1_Container(ref common.ReferenceCallback) common.Ope
Ref: ref("k8s.io/api/core/v1.ResourceRequirements"),
},
},
"resizePolicy": {
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
"x-kubernetes-list-type": "atomic",
},
},
SchemaProps: spec.SchemaProps{
Description: "Resources resize policy for the container.",
Type: []string{"array"},
Items: &spec.SchemaOrArray{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Default: map[string]interface{}{},
Ref: ref("k8s.io/api/core/v1.ContainerResizePolicy"),
},
},
},
},
},
"volumeMounts": {
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
@ -17193,7 +17213,7 @@ func schema_k8sio_api_core_v1_Container(ref common.ReferenceCallback) common.Ope
},
},
Dependencies: []string{
"k8s.io/api/core/v1.ContainerPort", "k8s.io/api/core/v1.EnvFromSource", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.Lifecycle", "k8s.io/api/core/v1.Probe", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecurityContext", "k8s.io/api/core/v1.VolumeDevice", "k8s.io/api/core/v1.VolumeMount"},
"k8s.io/api/core/v1.ContainerPort", "k8s.io/api/core/v1.ContainerResizePolicy", "k8s.io/api/core/v1.EnvFromSource", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.Lifecycle", "k8s.io/api/core/v1.Probe", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecurityContext", "k8s.io/api/core/v1.VolumeDevice", "k8s.io/api/core/v1.VolumeMount"},
}
}
@ -17284,6 +17304,36 @@ func schema_k8sio_api_core_v1_ContainerPort(ref common.ReferenceCallback) common
}
}
func schema_k8sio_api_core_v1_ContainerResizePolicy(ref common.ReferenceCallback) common.OpenAPIDefinition {
return common.OpenAPIDefinition{
Schema: spec.Schema{
SchemaProps: spec.SchemaProps{
Description: "ContainerResizePolicy represents resource resize policy for a single container.",
Type: []string{"object"},
Properties: map[string]spec.Schema{
"resourceName": {
SchemaProps: spec.SchemaProps{
Description: "Name of the resource type to which this resource resize policy applies. Supported values: cpu, memory.",
Default: "",
Type: []string{"string"},
Format: "",
},
},
"policy": {
SchemaProps: spec.SchemaProps{
Description: "Resource resize policy applicable to the specified resource name. If not specified, it defaults to RestartNotRequired.",
Default: "",
Type: []string{"string"},
Format: "",
},
},
},
Required: []string{"resourceName", "policy"},
},
},
}
}
func schema_k8sio_api_core_v1_ContainerState(ref common.ReferenceCallback) common.OpenAPIDefinition {
return common.OpenAPIDefinition{
Schema: spec.Schema{
@ -17507,12 +17557,33 @@ func schema_k8sio_api_core_v1_ContainerStatus(ref common.ReferenceCallback) comm
Format: "",
},
},
"resourcesAllocated": {
SchemaProps: spec.SchemaProps{
Description: "ResourcesAllocated represents the compute resources allocated for this container by the node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission and after successfully admitting desired pod resize.",
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Allows: true,
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Default: map[string]interface{}{},
Ref: ref("k8s.io/apimachinery/pkg/api/resource.Quantity"),
},
},
},
},
},
"resources": {
SchemaProps: spec.SchemaProps{
Description: "Resources represents the compute resource requests and limits that have been successfully enacted on the running container after it has been started or has been successfully resized.",
Ref: ref("k8s.io/api/core/v1.ResourceRequirements"),
},
},
},
Required: []string{"name", "ready", "restartCount", "image", "imageID"},
},
},
Dependencies: []string{
"k8s.io/api/core/v1.ContainerState"},
"k8s.io/api/core/v1.ContainerState", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/apimachinery/pkg/api/resource.Quantity"},
}
}
@ -18165,6 +18236,25 @@ func schema_k8sio_api_core_v1_EphemeralContainer(ref common.ReferenceCallback) c
Ref: ref("k8s.io/api/core/v1.ResourceRequirements"),
},
},
"resizePolicy": {
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
"x-kubernetes-list-type": "atomic",
},
},
SchemaProps: spec.SchemaProps{
Description: "Resources resize policy for the container.",
Type: []string{"array"},
Items: &spec.SchemaOrArray{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Default: map[string]interface{}{},
Ref: ref("k8s.io/api/core/v1.ContainerResizePolicy"),
},
},
},
},
},
"volumeMounts": {
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
@ -18291,7 +18381,7 @@ func schema_k8sio_api_core_v1_EphemeralContainer(ref common.ReferenceCallback) c
},
},
Dependencies: []string{
"k8s.io/api/core/v1.ContainerPort", "k8s.io/api/core/v1.EnvFromSource", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.Lifecycle", "k8s.io/api/core/v1.Probe", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecurityContext", "k8s.io/api/core/v1.VolumeDevice", "k8s.io/api/core/v1.VolumeMount"},
"k8s.io/api/core/v1.ContainerPort", "k8s.io/api/core/v1.ContainerResizePolicy", "k8s.io/api/core/v1.EnvFromSource", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.Lifecycle", "k8s.io/api/core/v1.Probe", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecurityContext", "k8s.io/api/core/v1.VolumeDevice", "k8s.io/api/core/v1.VolumeMount"},
}
}
@ -18420,6 +18510,25 @@ func schema_k8sio_api_core_v1_EphemeralContainerCommon(ref common.ReferenceCallb
Ref: ref("k8s.io/api/core/v1.ResourceRequirements"),
},
},
"resizePolicy": {
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
"x-kubernetes-list-type": "atomic",
},
},
SchemaProps: spec.SchemaProps{
Description: "Resources resize policy for the container.",
Type: []string{"array"},
Items: &spec.SchemaOrArray{
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Default: map[string]interface{}{},
Ref: ref("k8s.io/api/core/v1.ContainerResizePolicy"),
},
},
},
},
},
"volumeMounts": {
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
@ -18539,7 +18648,7 @@ func schema_k8sio_api_core_v1_EphemeralContainerCommon(ref common.ReferenceCallb
},
},
Dependencies: []string{
"k8s.io/api/core/v1.ContainerPort", "k8s.io/api/core/v1.EnvFromSource", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.Lifecycle", "k8s.io/api/core/v1.Probe", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecurityContext", "k8s.io/api/core/v1.VolumeDevice", "k8s.io/api/core/v1.VolumeMount"},
"k8s.io/api/core/v1.ContainerPort", "k8s.io/api/core/v1.ContainerResizePolicy", "k8s.io/api/core/v1.EnvFromSource", "k8s.io/api/core/v1.EnvVar", "k8s.io/api/core/v1.Lifecycle", "k8s.io/api/core/v1.Probe", "k8s.io/api/core/v1.ResourceRequirements", "k8s.io/api/core/v1.SecurityContext", "k8s.io/api/core/v1.VolumeDevice", "k8s.io/api/core/v1.VolumeMount"},
}
}
@ -23753,6 +23862,13 @@ func schema_k8sio_api_core_v1_PodStatus(ref common.ReferenceCallback) common.Ope
},
},
},
"resize": {
SchemaProps: spec.SchemaProps{
Description: "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\"",
Type: []string{"string"},
Format: "",
},
},
},
},
},

View File

@ -22,6 +22,7 @@ import (
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
@ -31,6 +32,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups/manager"
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
v1 "k8s.io/api/core/v1"
"k8s.io/klog/v2"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
@ -46,7 +48,8 @@ const (
// MemoryMin is memory.min for cgroup v2
MemoryMin string = "memory.min"
// MemoryHigh is memory.high for cgroup v2
MemoryHigh string = "memory.high"
MemoryHigh string = "memory.high"
Cgroup2MaxCpuLimit string = "max"
)
var RootCgroupName = CgroupName([]string{})
@ -557,3 +560,189 @@ func (m *cgroupManagerImpl) MemoryUsage(name CgroupName) (int64, error) {
val, err := fscommon.GetCgroupParamUint(path, file)
return int64(val), err
}
// Convert cgroup v1 cpu.shares value to cgroup v2 cpu.weight
// https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2
func CpuSharesToCpuWeight(cpuShares uint64) uint64 {
return uint64((((cpuShares - 2) * 9999) / 262142) + 1)
}
// Convert cgroup v2 cpu.weight value to cgroup v1 cpu.shares
// https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/2254-cgroup-v2#phase-1-convert-from-cgroups-v1-settings-to-v2
func CpuWeightToCpuShares(cpuWeight uint64) uint64 {
return uint64((((cpuWeight - 1) * 262142) / 9999) + 2)
}
func getCgroupv1CpuConfig(cgroupPath string) (*ResourceConfig, error) {
cpuQuotaStr, errQ := fscommon.GetCgroupParamString(cgroupPath, "cpu.cfs_quota_us")
if errQ != nil {
return nil, fmt.Errorf("failed to read CPU quota for cgroup %v: %v", cgroupPath, errQ)
}
cpuQuota, errInt := strconv.ParseInt(cpuQuotaStr, 10, 64)
if errInt != nil {
return nil, fmt.Errorf("failed to convert CPU quota as integer for cgroup %v: %v", cgroupPath, errInt)
}
cpuPeriod, errP := fscommon.GetCgroupParamUint(cgroupPath, "cpu.cfs_period_us")
if errP != nil {
return nil, fmt.Errorf("failed to read CPU period for cgroup %v: %v", cgroupPath, errP)
}
cpuShares, errS := fscommon.GetCgroupParamUint(cgroupPath, "cpu.shares")
if errS != nil {
return nil, fmt.Errorf("failed to read CPU shares for cgroup %v: %v", cgroupPath, errS)
}
return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuQuota, CPUPeriod: &cpuPeriod}, nil
}
func getCgroupv2CpuConfig(cgroupPath string) (*ResourceConfig, error) {
var cpuLimitStr, cpuPeriodStr string
cpuLimitAndPeriod, err := fscommon.GetCgroupParamString(cgroupPath, "cpu.max")
if err != nil {
return nil, fmt.Errorf("failed to read cpu.max file for cgroup %v: %v", cgroupPath, err)
}
numItems, errScan := fmt.Sscanf(cpuLimitAndPeriod, "%s %s", &cpuLimitStr, &cpuPeriodStr)
if errScan != nil || numItems != 2 {
return nil, fmt.Errorf("failed to correctly parse content of cpu.max file ('%s') for cgroup %v: %v",
cpuLimitAndPeriod, cgroupPath, errScan)
}
cpuLimit := int64(-1)
if cpuLimitStr != Cgroup2MaxCpuLimit {
cpuLimit, err = strconv.ParseInt(cpuLimitStr, 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to convert CPU limit as integer for cgroup %v: %v", cgroupPath, err)
}
}
cpuPeriod, errPeriod := strconv.ParseUint(cpuPeriodStr, 10, 64)
if errPeriod != nil {
return nil, fmt.Errorf("failed to convert CPU period as integer for cgroup %v: %v", cgroupPath, errPeriod)
}
cpuWeight, errWeight := fscommon.GetCgroupParamUint(cgroupPath, "cpu.weight")
if errWeight != nil {
return nil, fmt.Errorf("failed to read CPU weight for cgroup %v: %v", cgroupPath, errWeight)
}
cpuShares := CpuWeightToCpuShares(cpuWeight)
return &ResourceConfig{CPUShares: &cpuShares, CPUQuota: &cpuLimit, CPUPeriod: &cpuPeriod}, nil
}
func getCgroupCpuConfig(cgroupPath string) (*ResourceConfig, error) {
if libcontainercgroups.IsCgroup2UnifiedMode() {
return getCgroupv2CpuConfig(cgroupPath)
} else {
return getCgroupv1CpuConfig(cgroupPath)
}
}
func getCgroupMemoryConfig(cgroupPath string) (*ResourceConfig, error) {
memLimitFile := "memory.limit_in_bytes"
if libcontainercgroups.IsCgroup2UnifiedMode() {
memLimitFile = "memory.max"
}
memLimit, err := fscommon.GetCgroupParamUint(cgroupPath, memLimitFile)
if err != nil {
return nil, fmt.Errorf("failed to read %s for cgroup %v: %v", memLimitFile, cgroupPath, err)
}
mLim := int64(memLimit)
//TODO(vinaykul,InPlacePodVerticalScaling): Add memory request support
return &ResourceConfig{Memory: &mLim}, nil
}
// Get the resource config values applied to the cgroup for specified resource type
func (m *cgroupManagerImpl) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) {
cgroupPaths := m.buildCgroupPaths(name)
cgroupResourcePath, found := cgroupPaths[string(resource)]
if !found {
return nil, fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name)
}
switch resource {
case v1.ResourceCPU:
return getCgroupCpuConfig(cgroupResourcePath)
case v1.ResourceMemory:
return getCgroupMemoryConfig(cgroupResourcePath)
}
return nil, fmt.Errorf("unsupported resource %v for cgroup %v", resource, name)
}
func setCgroupv1CpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error {
var cpuQuotaStr, cpuPeriodStr, cpuSharesStr string
if resourceConfig.CPUQuota != nil {
cpuQuotaStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10)
if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_quota_us"), []byte(cpuQuotaStr), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", cpuQuotaStr, cgroupPath, err)
}
}
if resourceConfig.CPUPeriod != nil {
cpuPeriodStr = strconv.FormatUint(*resourceConfig.CPUPeriod, 10)
if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.cfs_period_us"), []byte(cpuPeriodStr), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", cpuPeriodStr, cgroupPath, err)
}
}
if resourceConfig.CPUShares != nil {
cpuSharesStr = strconv.FormatUint(*resourceConfig.CPUShares, 10)
if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.shares"), []byte(cpuSharesStr), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", cpuSharesStr, cgroupPath, err)
}
}
return nil
}
func setCgroupv2CpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error {
if resourceConfig.CPUQuota != nil {
if resourceConfig.CPUPeriod == nil {
return fmt.Errorf("CpuPeriod must be specified in order to set CpuLimit")
}
cpuLimitStr := Cgroup2MaxCpuLimit
if *resourceConfig.CPUQuota > -1 {
cpuLimitStr = strconv.FormatInt(*resourceConfig.CPUQuota, 10)
}
cpuPeriodStr := strconv.FormatUint(*resourceConfig.CPUPeriod, 10)
cpuMaxStr := fmt.Sprintf("%s %s", cpuLimitStr, cpuPeriodStr)
if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.max"), []byte(cpuMaxStr), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", cpuMaxStr, cgroupPath, err)
}
}
if resourceConfig.CPUShares != nil {
cpuWeight := CpuSharesToCpuWeight(*resourceConfig.CPUShares)
cpuWeightStr := strconv.FormatUint(cpuWeight, 10)
if err := os.WriteFile(filepath.Join(cgroupPath, "cpu.weight"), []byte(cpuWeightStr), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v: %v", cpuWeightStr, cgroupPath, err)
}
}
return nil
}
func setCgroupCpuConfig(cgroupPath string, resourceConfig *ResourceConfig) error {
if libcontainercgroups.IsCgroup2UnifiedMode() {
return setCgroupv2CpuConfig(cgroupPath, resourceConfig)
} else {
return setCgroupv1CpuConfig(cgroupPath, resourceConfig)
}
}
func setCgroupMemoryConfig(cgroupPath string, resourceConfig *ResourceConfig) error {
memLimitFile := "memory.limit_in_bytes"
if libcontainercgroups.IsCgroup2UnifiedMode() {
memLimitFile = "memory.max"
}
memLimit := strconv.FormatInt(*resourceConfig.Memory, 10)
if err := os.WriteFile(filepath.Join(cgroupPath, memLimitFile), []byte(memLimit), 0700); err != nil {
return fmt.Errorf("failed to write %v to %v/%v: %v", memLimit, cgroupPath, memLimitFile, err)
}
//TODO(vinaykul,InPlacePodVerticalScaling): Add memory request support
return nil
}
// Set resource config for the specified resource type on the cgroup
func (m *cgroupManagerImpl) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error {
cgroupPaths := m.buildCgroupPaths(name)
cgroupResourcePath, found := cgroupPaths[string(resource)]
if !found {
return fmt.Errorf("failed to build %v cgroup fs path for cgroup %v", resource, name)
}
switch resource {
case v1.ResourceCPU:
return setCgroupCpuConfig(cgroupResourcePath, resourceConfig)
case v1.ResourceMemory:
return setCgroupMemoryConfig(cgroupResourcePath, resourceConfig)
}
return nil
}

View File

@ -169,3 +169,85 @@ func TestParseSystemdToCgroupName(t *testing.T) {
}
}
}
func TestCpuSharesToCpuWeight(t *testing.T) {
testCases := []struct {
cpuShares uint64
expectedCpuWeight uint64
}{
{
cpuShares: 2,
expectedCpuWeight: 1,
},
{
cpuShares: 3,
expectedCpuWeight: 1,
},
{
cpuShares: 4,
expectedCpuWeight: 1,
},
{
cpuShares: 28,
expectedCpuWeight: 1,
},
{
cpuShares: 29,
expectedCpuWeight: 2,
},
{
cpuShares: 245,
expectedCpuWeight: 10,
},
{
cpuShares: 262144,
expectedCpuWeight: 10000,
},
}
for _, testCase := range testCases {
if actual := CpuSharesToCpuWeight(testCase.cpuShares); actual != testCase.expectedCpuWeight {
t.Errorf("cpuShares: %v, expectedCpuWeight: %v, actualCpuWeight: %v",
testCase.cpuShares, testCase.expectedCpuWeight, actual)
}
}
}
func TestCpuWeightToCpuShares(t *testing.T) {
testCases := []struct {
cpuWeight uint64
expectedCpuShares uint64
}{
{
cpuWeight: 1,
expectedCpuShares: 2,
},
{
cpuWeight: 2,
expectedCpuShares: 28,
},
{
cpuWeight: 3,
expectedCpuShares: 54,
},
{
cpuWeight: 4,
expectedCpuShares: 80,
},
{
cpuWeight: 245,
expectedCpuShares: 6398,
},
{
cpuWeight: 10000,
expectedCpuShares: 262144,
},
}
for _, testCase := range testCases {
if actual := CpuWeightToCpuShares(testCase.cpuWeight); actual != testCase.expectedCpuShares {
t.Errorf("cpuWeight: %v, expectedCpuShares: %v, actualCpuShares: %v",
testCase.cpuWeight, testCase.expectedCpuShares, actual)
}
}
}

View File

@ -19,7 +19,11 @@ limitations under the License.
package cm
import "errors"
import (
"errors"
v1 "k8s.io/api/core/v1"
)
type unsupportedCgroupManager struct{}
@ -77,6 +81,14 @@ func (m *unsupportedCgroupManager) ReduceCPULimits(cgroupName CgroupName) error
return nil
}
func (m *unsupportedCgroupManager) GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error) {
return nil, errNotSupported
}
func (m *unsupportedCgroupManager) SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error {
return errNotSupported
}
var RootCgroupName = CgroupName([]string{})
func NewCgroupName(base CgroupName, components ...string) CgroupName {

View File

@ -95,6 +95,14 @@ func (cm *containerManagerStub) GetDevicePluginResourceCapacity() (v1.ResourceLi
return cm.extendedPluginResources, cm.extendedPluginResources, []string{}
}
func (m *podContainerManagerStub) GetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName) (*ResourceConfig, error) {
return nil, nil
}
func (m *podContainerManagerStub) SetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName, _ *ResourceConfig) error {
return nil
}
func (cm *containerManagerStub) NewPodContainerManager() PodContainerManager {
return &podContainerManagerStub{}
}

View File

@ -20,9 +20,11 @@ import (
"fmt"
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager/topology"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
@ -380,6 +382,15 @@ func (p *staticPolicy) guaranteedCPUs(pod *v1.Pod, container *v1.Container) int
return 0
}
cpuQuantity := container.Resources.Requests[v1.ResourceCPU]
// In-place pod resize feature makes Container.Resources field mutable for CPU & memory.
// ResourcesAllocated holds the value of Container.Resources.Requests when the pod was admitted.
// We should return this value because this is what kubelet agreed to allocate for the container
// and the value configured with runtime.
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
cpuQuantity = cs.ResourcesAllocated[v1.ResourceCPU]
}
}
if cpuQuantity.Value()*1000 != cpuQuantity.MilliValue() {
return 0
}

View File

@ -104,3 +104,24 @@ func (m *FakePodContainerManager) IsPodCgroup(cgroupfs string) (bool, types.UID)
m.CalledFunctions = append(m.CalledFunctions, "IsPodCgroup")
return false, types.UID("")
}
func (cm *FakePodContainerManager) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupMemoryUsage")
return 0, nil
}
func (cm *FakePodContainerManager) GetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName) (*ResourceConfig, error) {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "GetPodCgroupConfig")
return nil, nil
}
func (cm *FakePodContainerManager) SetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName, _ *ResourceConfig) error {
cm.Lock()
defer cm.Unlock()
cm.CalledFunctions = append(cm.CalledFunctions, "SetPodCgroupConfig")
return nil
}

View File

@ -28,6 +28,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/api/v1/resource"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
@ -151,6 +152,11 @@ func ResourceConfigForPod(pod *v1.Pod, enforceCPULimits bool, cpuPeriod uint64,
memoryLimitsDeclared = false
}
containerHugePageLimits := HugePageLimits(container.Resources.Requests)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
containerHugePageLimits = HugePageLimits(cs.ResourcesAllocated)
}
}
for k, v := range containerHugePageLimits {
if value, exists := hugePageLimits[k]; exists {
hugePageLimits[k] = value + v

View File

@ -25,10 +25,13 @@ import (
)
const (
MinShares = 0
MinShares = 0
MaxShares = 0
SharesPerCPU = 0
MilliCPUToCPU = 0
QuotaPeriod = 0
MinQuotaPeriod = 0
)

View File

@ -25,9 +25,12 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
corehelper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager/state"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
@ -107,7 +110,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
hint := p.affinity.GetAffinity(podUID, container.Name)
klog.InfoS("Got topology affinity", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name, "hint", hint)
requestedResources, err := getRequestedResources(container)
requestedResources, err := getRequestedResources(pod, container)
if err != nil {
return err
}
@ -319,7 +322,7 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) {
reqRsrcsByAppCtrs := make(map[v1.ResourceName]uint64)
for _, ctr := range pod.Spec.InitContainers {
reqRsrcs, err := getRequestedResources(&ctr)
reqRsrcs, err := getRequestedResources(pod, &ctr)
if err != nil {
return nil, err
@ -336,7 +339,7 @@ func getPodRequestedResources(pod *v1.Pod) (map[v1.ResourceName]uint64, error) {
}
for _, ctr := range pod.Spec.Containers {
reqRsrcs, err := getRequestedResources(&ctr)
reqRsrcs, err := getRequestedResources(pod, &ctr)
if err != nil {
return nil, err
@ -391,7 +394,7 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
return nil
}
requestedResources, err := getRequestedResources(container)
requestedResources, err := getRequestedResources(pod, container)
if err != nil {
klog.ErrorS(err, "Failed to get container requested resources", "pod", klog.KObj(pod), "podUID", pod.UID, "containerName", container.Name)
return nil
@ -408,9 +411,19 @@ func (p *staticPolicy) GetTopologyHints(s state.State, pod *v1.Pod, container *v
return p.calculateHints(s.GetMachineState(), pod, requestedResources)
}
func getRequestedResources(container *v1.Container) (map[v1.ResourceName]uint64, error) {
func getRequestedResources(pod *v1.Pod, container *v1.Container) (map[v1.ResourceName]uint64, error) {
requestedResources := map[v1.ResourceName]uint64{}
for resourceName, quantity := range container.Resources.Requests {
resources := container.Resources.Requests
// In-place pod resize feature makes Container.Resources field mutable for CPU & memory.
// ResourcesAllocated holds the value of Container.Resources.Requests when the pod was admitted.
// We should return this value because this is what kubelet agreed to allocate for the container
// and the value configured with runtime.
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
resources = cs.ResourcesAllocated
}
}
for resourceName, quantity := range resources {
if resourceName != v1.ResourceMemory && !corehelper.IsHugePageResourceName(resourceName) {
continue
}

View File

@ -120,6 +120,25 @@ func (m *podContainerManagerImpl) GetPodContainerName(pod *v1.Pod) (CgroupName,
return cgroupName, cgroupfsName
}
func (m *podContainerManagerImpl) GetPodCgroupMemoryUsage(pod *v1.Pod) (uint64, error) {
podCgroupName, _ := m.GetPodContainerName(pod)
memUsage, err := m.cgroupManager.MemoryUsage(podCgroupName)
if err != nil {
return 0, err
}
return uint64(memUsage), nil
}
func (m *podContainerManagerImpl) GetPodCgroupConfig(pod *v1.Pod, resource v1.ResourceName) (*ResourceConfig, error) {
podCgroupName, _ := m.GetPodContainerName(pod)
return m.cgroupManager.GetCgroupConfig(podCgroupName, resource)
}
func (m *podContainerManagerImpl) SetPodCgroupConfig(pod *v1.Pod, resource v1.ResourceName, resourceConfig *ResourceConfig) error {
podCgroupName, _ := m.GetPodContainerName(pod)
return m.cgroupManager.SetCgroupConfig(podCgroupName, resource, resourceConfig)
}
// Kill one process ID
func (m *podContainerManagerImpl) killOnePid(pid int) error {
// os.FindProcess never returns an error on POSIX
@ -322,3 +341,15 @@ func (m *podContainerManagerNoop) GetAllPodsFromCgroups() (map[types.UID]CgroupN
func (m *podContainerManagerNoop) IsPodCgroup(cgroupfs string) (bool, types.UID) {
return false, types.UID("")
}
func (m *podContainerManagerNoop) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) {
return 0, nil
}
func (m *podContainerManagerNoop) GetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName) (*ResourceConfig, error) {
return nil, nil
}
func (m *podContainerManagerNoop) SetPodCgroupConfig(_ *v1.Pod, _ v1.ResourceName, _ *ResourceConfig) error {
return nil
}

View File

@ -53,3 +53,23 @@ func (m *podContainerManagerStub) GetAllPodsFromCgroups() (map[types.UID]CgroupN
func (m *podContainerManagerStub) IsPodCgroup(cgroupfs string) (bool, types.UID) {
return false, types.UID("")
}
func (m *podContainerManagerStub) GetPodCgroupMemoryUsage(_ *v1.Pod) (uint64, error) {
return 0, nil
}
func (m *podContainerManagerStub) GetPodCgroupMemoryLimit(_ *v1.Pod) (uint64, error) {
return 0, nil
}
func (m *podContainerManagerStub) GetPodCgroupCpuLimit(_ *v1.Pod) (int64, uint64, uint64, error) {
return 0, 0, 0, nil
}
func (m *podContainerManagerStub) SetPodCgroupMemoryLimit(_ *v1.Pod, _ int64) error {
return nil
}
func (m *podContainerManagerStub) SetPodCgroupCpuLimit(_ *v1.Pod, _ *int64, _, _ *uint64) error {
return nil
}

View File

@ -84,6 +84,10 @@ type CgroupManager interface {
ReduceCPULimits(cgroupName CgroupName) error
// MemoryUsage returns current memory usage of the specified cgroup, as read from the cgroupfs.
MemoryUsage(name CgroupName) (int64, error)
// Get the resource config values applied to the cgroup for specified resource type
GetCgroupConfig(name CgroupName, resource v1.ResourceName) (*ResourceConfig, error)
// Set resource config for the specified resource type on the cgroup
SetCgroupConfig(name CgroupName, resource v1.ResourceName, resourceConfig *ResourceConfig) error
}
// QOSContainersInfo stores the names of containers per qos
@ -119,4 +123,13 @@ type PodContainerManager interface {
// IsPodCgroup returns true if the literal cgroupfs name corresponds to a pod
IsPodCgroup(cgroupfs string) (bool, types.UID)
// Get value of memory usage for the pod Cgroup
GetPodCgroupMemoryUsage(pod *v1.Pod) (uint64, error)
// Get the resource config values applied to the pod cgroup for specified resource type
GetPodCgroupConfig(pod *v1.Pod, resource v1.ResourceName) (*ResourceConfig, error)
// Set resource config values for the specified resource type on the pod cgroup
SetPodCgroupConfig(pod *v1.Pod, resource v1.ResourceName, resourceConfig *ResourceConfig) error
}

View File

@ -117,6 +117,23 @@ func HashContainer(container *v1.Container) uint64 {
return uint64(hash.Sum32())
}
// HashContainerWithoutResources returns the hash of the container with Resources field zero'd out.
func HashContainerWithoutResources(container *v1.Container) uint64 {
// InPlacePodVerticalScaling enables mutable Resources field.
// Changes to this field may not require container restart depending on policy.
// Compute hash over fields besides the Resources field
// NOTE: This is needed during alpha and beta so that containers using Resources but
// not subject to In-place resize are not unexpectedly restarted when
// InPlacePodVerticalScaling feature-gate is toggled.
//TODO(vinaykul,InPlacePodVerticalScaling): Remove this in GA+1 and make HashContainerWithoutResources to become Hash.
hashWithoutResources := fnv.New32a()
containerCopy := container.DeepCopy()
containerCopy.Resources = v1.ResourceRequirements{}
containerJSON, _ := json.Marshal(containerCopy)
hashutil.DeepHashObject(hashWithoutResources, containerJSON)
return uint64(hashWithoutResources.Sum32())
}
// envVarsToMap constructs a map of environment name to value from a slice
// of env vars.
func envVarsToMap(envs []EnvVar) map[string]string {
@ -252,12 +269,13 @@ func ConvertPodStatusToRunningPod(runtimeName string, podStatus *PodStatus) Pod
continue
}
container := &Container{
ID: containerStatus.ID,
Name: containerStatus.Name,
Image: containerStatus.Image,
ImageID: containerStatus.ImageID,
Hash: containerStatus.Hash,
State: containerStatus.State,
ID: containerStatus.ID,
Name: containerStatus.Name,
Image: containerStatus.Image,
ImageID: containerStatus.ImageID,
Hash: containerStatus.Hash,
HashWithoutResources: containerStatus.HashWithoutResources,
State: containerStatus.State,
}
runningPod.Containers = append(runningPod.Containers, container)
}

View File

@ -25,6 +25,7 @@ import (
"github.com/stretchr/testify/assert"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@ -908,3 +909,83 @@ func TestHasWindowsHostProcessContainer(t *testing.T) {
})
}
}
func TestHashContainerWithoutResources(t *testing.T) {
cpu100m := resource.MustParse("100m")
cpu200m := resource.MustParse("200m")
mem100M := resource.MustParse("100Mi")
mem200M := resource.MustParse("200Mi")
cpuPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartNotRequired}
memPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartNotRequired}
cpuPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartRequired}
memPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartRequired}
type testCase struct {
name string
container *v1.Container
expectedHash uint64
}
tests := []testCase{
{
"Burstable pod with CPU policy restart required",
&v1.Container{
Name: "foo",
Image: "bar",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired},
},
0x86a4393c,
},
{
"Burstable pod with memory policy restart required",
&v1.Container{
Name: "foo",
Image: "bar",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired},
},
0x73a18cce,
},
{
"Guaranteed pod with CPU policy restart required",
&v1.Container{
Name: "foo",
Image: "bar",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired},
},
0x86a4393c,
},
{
"Guaranteed pod with memory policy restart required",
&v1.Container{
Name: "foo",
Image: "bar",
Resources: v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
Requests: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
},
ResizePolicy: []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired},
},
0x73a18cce,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
containerCopy := tc.container.DeepCopy()
hash := HashContainerWithoutResources(tc.container)
assert.Equal(t, tc.expectedHash, hash, "[%s]", tc.name)
assert.Equal(t, containerCopy, tc.container, "[%s]", tc.name)
})
}
}

View File

@ -27,6 +27,7 @@ import (
"time"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/remotecommand"
"k8s.io/client-go/util/flowcontrol"
@ -295,6 +296,11 @@ type Container struct {
// Hash of the container, used for comparison. Optional for containers
// not managed by kubelet.
Hash uint64
// Hash of the container over fields with Resources field zero'd out.
// NOTE: This is needed during alpha and beta so that containers using Resources are
// not unexpectedly restarted when InPlacePodVerticalScaling feature-gate is toggled.
//TODO(vinaykul,InPlacePodVerticalScaling): Remove this in GA+1 and make HashWithoutResources to become Hash.
HashWithoutResources uint64
// State is the state of the container.
State State
}
@ -319,6 +325,18 @@ type PodStatus struct {
TimeStamp time.Time
}
// ContainerResources represents the Resources allocated to the running container.
type ContainerResources struct {
// CPU capacity reserved for the container
CPURequest *resource.Quantity
// CPU limit enforced on the container
CPULimit *resource.Quantity
// Memory capaacity reserved for the container
MemoryRequest *resource.Quantity
// Memory limit enforced on the container
MemoryLimit *resource.Quantity
}
// Status represents the status of a container.
type Status struct {
// ID of the container.
@ -342,6 +360,8 @@ type Status struct {
ImageID string
// Hash of the container, used for comparison.
Hash uint64
// Hash of the container over fields with Resources field zero'd out.
HashWithoutResources uint64
// Number of times that the container has been restarted.
RestartCount int
// A string explains why container is in such a status.
@ -349,6 +369,8 @@ type Status struct {
// Message written by the container before exiting (stored in
// TerminationMessagePath).
Message string
// CPU and memory resources for this container
Resources *ContainerResources
}
// FindContainerStatusByName returns container status in the pod status with the given name.

View File

@ -25,10 +25,13 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
corev1helpers "k8s.io/component-helpers/scheduling/corev1"
"k8s.io/klog/v2"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
v1resource "k8s.io/kubernetes/pkg/api/v1/resource"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
volumeutils "k8s.io/kubernetes/pkg/volume/util"
@ -1018,6 +1021,12 @@ func evictionMessage(resourceToReclaim v1.ResourceName, pod *v1.Pod, stats stats
for _, container := range pod.Spec.Containers {
if container.Name == containerStats.Name {
requests := container.Resources.Requests[resourceToReclaim]
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) &&
(resourceToReclaim == v1.ResourceMemory || resourceToReclaim == v1.ResourceCPU) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
requests = cs.ResourcesAllocated[resourceToReclaim]
}
}
var usage *resource.Quantity
switch resourceToReclaim {
case v1.ResourceEphemeralStorage:

View File

@ -21,6 +21,7 @@ import (
"fmt"
"reflect"
"sort"
"strings"
"testing"
"time"
@ -30,8 +31,11 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
"k8s.io/kubernetes/pkg/features"
evictionapi "k8s.io/kubernetes/pkg/kubelet/eviction/api"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
@ -2121,3 +2125,53 @@ func (s1 thresholdList) Equal(s2 thresholdList) bool {
}
return true
}
func TestEvictonMessageWithResourceResize(t *testing.T) {
testpod := newPod("testpod", 1, []v1.Container{
newContainer("testcontainer", newResourceList("", "200Mi", ""), newResourceList("", "", "")),
}, nil)
testpod.Status = v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
Name: "testcontainer",
ResourcesAllocated: newResourceList("", "100Mi", ""),
},
},
}
testpodMemory := resource.MustParse("150Mi")
testpodStats := newPodMemoryStats(testpod, testpodMemory)
testpodMemoryBytes := uint64(testpodMemory.Value())
testpodStats.Containers = []statsapi.ContainerStats{
{
Name: "testcontainer",
Memory: &statsapi.MemoryStats{
WorkingSetBytes: &testpodMemoryBytes,
},
},
}
stats := map[*v1.Pod]statsapi.PodStats{
testpod: testpodStats,
}
statsFn := func(pod *v1.Pod) (statsapi.PodStats, bool) {
result, found := stats[pod]
return result, found
}
threshold := []evictionapi.Threshold{}
observations := signalObservations{}
for _, enabled := range []bool{true, false} {
t.Run(fmt.Sprintf("InPlacePodVerticalScaling enabled=%v", enabled), func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, enabled)()
msg, _ := evictionMessage(v1.ResourceMemory, testpod, statsFn, threshold, observations)
if enabled {
if !strings.Contains(msg, "testcontainer was using 150Mi, request is 100Mi") {
t.Errorf("Expected 'exceeds memory' eviction message was not found.")
}
} else {
if strings.Contains(msg, "which exceeds its request") {
t.Errorf("Found 'exceeds memory' eviction message which was not expected.")
}
}
})
}
}

View File

@ -48,6 +48,7 @@ import (
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
@ -66,6 +67,8 @@ import (
"k8s.io/klog/v2"
pluginwatcherapi "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
statsapi "k8s.io/kubelet/pkg/apis/stats/v1alpha1"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/api/v1/resource"
"k8s.io/kubernetes/pkg/features"
kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/apis/podresources"
@ -608,7 +611,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
mirrorPodClient := kubepod.NewBasicMirrorClient(klet.kubeClient, string(nodeName), nodeLister)
klet.podManager = kubepod.NewBasicPodManager(mirrorPodClient)
klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet, kubeDeps.PodStartupLatencyTracker)
klet.statusManager = status.NewManager(klet.kubeClient, klet.podManager, klet, kubeDeps.PodStartupLatencyTracker, klet.getRootDir())
klet.resourceAnalyzer = serverstats.NewResourceAnalyzer(klet, kubeCfg.VolumeStatsAggPeriod.Duration, kubeDeps.Recorder)
@ -665,7 +668,7 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
kubeCfg.CPUCFSQuotaPeriod,
kubeDeps.RemoteRuntimeService,
kubeDeps.RemoteImageService,
kubeDeps.ContainerManager.InternalContainerLifecycle(),
kubeDeps.ContainerManager,
klet.containerLogManager,
klet.runtimeClassManager,
seccompDefault,
@ -1247,6 +1250,9 @@ type Kubelet struct {
// Manage user namespaces
usernsManager *usernsManager
// Mutex to serialize new pod admission and existing pod resizing
podResizeMutex sync.Mutex
}
// ListPodStats is delegated to StatsProvider, which implements stats.Provider interface
@ -1826,6 +1832,16 @@ func (kl *Kubelet) syncPod(_ context.Context, updateType kubetypes.SyncPodType,
// Ensure the pod is being probed
kl.probeManager.AddPod(pod)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// Handle pod resize here instead of doing it in HandlePodUpdates because
// this conveniently retries any Deferred resize requests
// TODO(vinaykul,InPlacePodVerticalScaling): Investigate doing this in HandlePodUpdates + periodic SyncLoop scan
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r663160060
if kl.podWorkers.CouldHaveRunningContainers(pod.UID) && !kubetypes.IsStaticPod(pod) {
kl.handlePodResourcesResize(pod)
}
}
// Call the container runtime's SyncPod callback
result := kl.containerRuntime.SyncPod(ctx, pod, podStatus, pullSecrets, kl.backOff)
kl.reasonCache.Update(pod.UID, result)
@ -1842,6 +1858,15 @@ func (kl *Kubelet) syncPod(_ context.Context, updateType kubetypes.SyncPodType,
return false, nil
}
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && isPodResizeInProgress(pod, &apiPodStatus) {
// While resize is in progress, periodically call PLEG to update pod cache
runningPod := kubecontainer.ConvertPodStatusToRunningPod(kl.getRuntime().Type(), podStatus)
if err, _ := kl.pleg.UpdateCache(&runningPod, pod.UID); err != nil {
klog.ErrorS(err, "Failed to update pod cache", "pod", klog.KObj(pod))
return false, err
}
}
return false, nil
}
@ -2078,6 +2103,23 @@ func (kl *Kubelet) canAdmitPod(pods []*v1.Pod, pod *v1.Pod) (bool, string, strin
// TODO: move out of disk check into a pod admitter
// TODO: out of resource eviction should have a pod admitter call-out
attrs := &lifecycle.PodAdmitAttributes{Pod: pod, OtherPods: pods}
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// Use allocated resources values from checkpoint store (source of truth) to determine fit
otherPods := make([]*v1.Pod, 0, len(pods))
checkpointState := kl.statusManager.State()
for _, p := range pods {
op := p.DeepCopy()
for _, c := range op.Spec.Containers {
resourcesAllocated, found := checkpointState.GetContainerResourceAllocation(string(p.UID), c.Name)
if c.Resources.Requests != nil && found {
c.Resources.Requests[v1.ResourceCPU] = resourcesAllocated[v1.ResourceCPU]
c.Resources.Requests[v1.ResourceMemory] = resourcesAllocated[v1.ResourceMemory]
}
}
otherPods = append(otherPods, op)
}
attrs.OtherPods = otherPods
}
for _, podAdmitHandler := range kl.admitHandlers {
if result := podAdmitHandler.Admit(attrs); !result.Admit {
return false, result.Reason, result.Message
@ -2332,6 +2374,10 @@ func (kl *Kubelet) handleMirrorPod(mirrorPod *v1.Pod, start time.Time) {
func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
start := kl.clock.Now()
sort.Sort(sliceutils.PodsByCreationTime(pods))
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
kl.podResizeMutex.Lock()
defer kl.podResizeMutex.Unlock()
}
for _, pod := range pods {
existingPods := kl.podManager.GetPods()
// Always add the pod to the pod manager. Kubelet relies on the pod
@ -2356,10 +2402,36 @@ func (kl *Kubelet) HandlePodAdditions(pods []*v1.Pod) {
// pods that are alive.
activePods := kl.filterOutInactivePods(existingPods)
// Check if we can admit the pod; if not, reject it.
if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok {
kl.rejectPod(pod, reason, message)
continue
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// To handle kubelet restarts, test pod admissibility using ResourcesAllocated values
// (for cpu & memory) from checkpoint store. If found, that is the source of truth.
checkpointState := kl.statusManager.State()
podCopy := pod.DeepCopy()
for _, c := range podCopy.Spec.Containers {
resourcesAllocated, found := checkpointState.GetContainerResourceAllocation(string(pod.UID), c.Name)
if c.Resources.Requests != nil && found {
c.Resources.Requests[v1.ResourceCPU] = resourcesAllocated[v1.ResourceCPU]
c.Resources.Requests[v1.ResourceMemory] = resourcesAllocated[v1.ResourceMemory]
}
}
// Check if we can admit the pod; if not, reject it.
if ok, reason, message := kl.canAdmitPod(activePods, podCopy); !ok {
kl.rejectPod(pod, reason, message)
continue
}
// For new pod, checkpoint the resource values at which the Pod has been admitted
if err := kl.statusManager.SetPodAllocation(podCopy); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod))
}
} else {
// Check if we can admit the pod; if not, reject it.
if ok, reason, message := kl.canAdmitPod(activePods, pod); !ok {
kl.rejectPod(pod, reason, message)
continue
}
}
}
mirrorPod, _ := kl.podManager.GetMirrorPodByPod(pod)
@ -2434,6 +2506,116 @@ func (kl *Kubelet) HandlePodSyncs(pods []*v1.Pod) {
}
}
func isPodResizeInProgress(pod *v1.Pod, podStatus *v1.PodStatus) bool {
for _, c := range pod.Spec.Containers {
if cs, ok := podutil.GetContainerStatus(podStatus.ContainerStatuses, c.Name); ok {
if cs.Resources == nil {
continue
}
if diff.ObjectDiff(c.Resources.Limits, cs.Resources.Limits) != "" ||
diff.ObjectDiff(cs.ResourcesAllocated, cs.Resources.Requests) != "" {
return true
}
}
}
return false
}
func (kl *Kubelet) canResizePod(pod *v1.Pod) (bool, *v1.Pod, v1.PodResizeStatus) {
var otherActivePods []*v1.Pod
node, err := kl.getNodeAnyWay()
if err != nil {
klog.ErrorS(err, "getNodeAnyway function failed")
return false, nil, ""
}
cpuAvailable := node.Status.Allocatable.Cpu().MilliValue()
memAvailable := node.Status.Allocatable.Memory().Value()
cpuRequests := resource.GetResourceRequest(pod, v1.ResourceCPU)
memRequests := resource.GetResourceRequest(pod, v1.ResourceMemory)
if cpuRequests > cpuAvailable || memRequests > memAvailable {
klog.V(3).InfoS("Resize is not feasible as request exceeds allocatable node resources", "pod", pod.Name)
return false, nil, v1.PodResizeStatusInfeasible
}
// Treat the existing pod needing resize as a new pod with desired resources seeking admit.
// If desired resources don't fit, pod continues to run with currently allocated resources.
activePods := kl.GetActivePods()
for _, p := range activePods {
if p.UID != pod.UID {
otherActivePods = append(otherActivePods, p)
}
}
if ok, failReason, failMessage := kl.canAdmitPod(otherActivePods, pod); !ok {
// Log reason and return. Let the next sync iteration retry the resize
klog.V(3).InfoS("Resize cannot be accommodated", "pod", pod.Name, "reason", failReason, "message", failMessage)
return false, nil, v1.PodResizeStatusDeferred
}
podCopy := pod.DeepCopy()
for _, container := range podCopy.Spec.Containers {
idx, found := podutil.GetIndexOfContainerStatus(podCopy.Status.ContainerStatuses, container.Name)
if found {
for rName, rQuantity := range container.Resources.Requests {
podCopy.Status.ContainerStatuses[idx].ResourcesAllocated[rName] = rQuantity
}
}
}
return true, podCopy, v1.PodResizeStatusInProgress
}
func (kl *Kubelet) handlePodResourcesResize(pod *v1.Pod) {
if pod.Status.Phase != v1.PodRunning {
return
}
podResized := false
for _, container := range pod.Spec.Containers {
if len(container.Resources.Requests) == 0 {
continue
}
containerStatus, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name)
if !found {
klog.V(5).InfoS("ContainerStatus not found", "pod", pod.Name, "container", container.Name)
break
}
if len(containerStatus.ResourcesAllocated) != len(container.Resources.Requests) {
klog.V(5).InfoS("ContainerStatus.ResourcesAllocated length mismatch", "pod", pod.Name, "container", container.Name)
break
}
if len(diff.ObjectDiff(container.Resources.Requests, containerStatus.ResourcesAllocated)) > 0 {
podResized = true
break
}
}
if !podResized {
return
}
kl.podResizeMutex.Lock()
defer kl.podResizeMutex.Unlock()
fit, updatedPod, resizeStatus := kl.canResizePod(pod)
if fit {
// Update pod resource allocation checkpoint
if err := kl.statusManager.SetPodAllocation(updatedPod); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodAllocation failed", "pod", klog.KObj(pod))
}
*pod = *updatedPod
}
if resizeStatus != "" {
// Save resize decision to checkpoint
if err := kl.statusManager.SetPodResizeStatus(pod.UID, resizeStatus); err != nil {
//TODO(vinaykul,InPlacePodVerticalScaling): Can we recover from this in some way? Investigate
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", klog.KObj(pod))
}
pod.Status.Resize = resizeStatus
}
kl.podManager.UpdatePod(pod)
kl.statusManager.SetPodStatus(pod, pod.Status)
return
}
// LatestLoopEntryTime returns the last time in the sync loop monitor.
func (kl *Kubelet) LatestLoopEntryTime() time.Time {
val := kl.syncLoopMonitor.Load()

View File

@ -34,6 +34,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
"k8s.io/apimachinery/pkg/util/sets"
utilvalidation "k8s.io/apimachinery/pkg/util/validation"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -1454,6 +1455,31 @@ func getPhase(spec *v1.PodSpec, info []v1.ContainerStatus) v1.PodPhase {
}
}
func (kl *Kubelet) determinePodResizeStatus(pod *v1.Pod, podStatus *v1.PodStatus) v1.PodResizeStatus {
var podResizeStatus v1.PodResizeStatus
specStatusDiffer := false
for _, c := range pod.Spec.Containers {
if cs, ok := podutil.GetContainerStatus(podStatus.ContainerStatuses, c.Name); ok {
if cs.Resources != nil && diff.ObjectDiff(c.Resources, *cs.Resources) != "" {
specStatusDiffer = true
break
}
}
}
if !specStatusDiffer {
// Clear last resize state from checkpoint
if err := kl.statusManager.SetPodResizeStatus(pod.UID, ""); err != nil {
klog.ErrorS(err, "SetPodResizeStatus failed", "pod", pod.Name)
}
} else {
checkpointState := kl.statusManager.State()
if resizeStatus, found := checkpointState.GetPodResizeStatus(string(pod.UID)); found {
podResizeStatus = resizeStatus
}
}
return podResizeStatus
}
// generateAPIPodStatus creates the final API pod status for a pod, given the
// internal pod status. This method should only be called from within sync*Pod methods.
func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.PodStatus) v1.PodStatus {
@ -1464,6 +1490,9 @@ func (kl *Kubelet) generateAPIPodStatus(pod *v1.Pod, podStatus *kubecontainer.Po
oldPodStatus = pod.Status
}
s := kl.convertStatusToAPIStatus(pod, podStatus, oldPodStatus)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
s.Resize = kl.determinePodResizeStatus(pod, s)
}
// calculate the next phase and preserve reason
allStatus := append(append([]v1.ContainerStatus{}, s.ContainerStatuses...), s.InitContainerStatuses...)
s.Phase = getPhase(&pod.Spec, allStatus)
@ -1715,6 +1744,84 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
return status
}
convertContainerStatusResources := func(cName string, status *v1.ContainerStatus, cStatus *kubecontainer.Status, oldStatuses map[string]v1.ContainerStatus) *v1.ResourceRequirements {
var requests, limits v1.ResourceList
// oldStatus should always exist if container is running
oldStatus, oldStatusFound := oldStatuses[cName]
// Initialize limits/requests from container's spec upon transition to Running state
// For cpu & memory, values queried from runtime via CRI always supercedes spec values
// For ephemeral-storage, a running container's status.limit/request equals spec.limit/request
determineResource := func(rName v1.ResourceName, v1ContainerResource, oldStatusResource, resource v1.ResourceList) {
if oldStatusFound {
if oldStatus.State.Running == nil || status.ContainerID != oldStatus.ContainerID {
if r, exists := v1ContainerResource[rName]; exists {
resource[rName] = r.DeepCopy()
}
} else {
if oldStatusResource != nil {
if r, exists := oldStatusResource[rName]; exists {
resource[rName] = r.DeepCopy()
}
}
}
}
}
container := kubecontainer.GetContainerSpec(pod, cName)
// ResourcesAllocated values come from checkpoint. It is the source-of-truth.
found := false
checkpointState := kl.statusManager.State()
status.ResourcesAllocated, found = checkpointState.GetContainerResourceAllocation(string(pod.UID), cName)
if !(container.Resources.Requests == nil && container.Resources.Limits == nil) && !found {
// Log error and fallback to ResourcesAllocated in oldStatus if it exists
klog.ErrorS(nil, "resource allocation not found in checkpoint store", "pod", pod.Name, "container", cName)
if oldStatusFound {
status.ResourcesAllocated = oldStatus.ResourcesAllocated
}
}
if oldStatus.Resources == nil {
oldStatus.Resources = &v1.ResourceRequirements{}
}
// Convert Limits
if container.Resources.Limits != nil {
limits = make(v1.ResourceList)
if cStatus.Resources != nil && cStatus.Resources.CPULimit != nil {
limits[v1.ResourceCPU] = cStatus.Resources.CPULimit.DeepCopy()
} else {
determineResource(v1.ResourceCPU, container.Resources.Limits, oldStatus.Resources.Limits, limits)
}
if cStatus.Resources != nil && cStatus.Resources.MemoryLimit != nil {
limits[v1.ResourceMemory] = cStatus.Resources.MemoryLimit.DeepCopy()
} else {
determineResource(v1.ResourceMemory, container.Resources.Limits, oldStatus.Resources.Limits, limits)
}
if ephemeralStorage, found := container.Resources.Limits[v1.ResourceEphemeralStorage]; found {
limits[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
}
}
// Convert Requests
if status.ResourcesAllocated != nil {
requests = make(v1.ResourceList)
if cStatus.Resources != nil && cStatus.Resources.CPURequest != nil {
requests[v1.ResourceCPU] = cStatus.Resources.CPURequest.DeepCopy()
} else {
determineResource(v1.ResourceCPU, status.ResourcesAllocated, oldStatus.Resources.Requests, requests)
}
if memory, found := status.ResourcesAllocated[v1.ResourceMemory]; found {
requests[v1.ResourceMemory] = memory.DeepCopy()
}
if ephemeralStorage, found := status.ResourcesAllocated[v1.ResourceEphemeralStorage]; found {
requests[v1.ResourceEphemeralStorage] = ephemeralStorage.DeepCopy()
}
}
//TODO(vinaykul,derekwaynecarr,InPlacePodVerticalScaling): Update this to include extended resources in
// addition to CPU, memory, ephemeral storage. Add test case for extended resources.
resources := &v1.ResourceRequirements{
Limits: limits,
Requests: requests,
}
return resources
}
// Fetch old containers statuses from old pod status.
oldStatuses := make(map[string]v1.ContainerStatus, len(containers))
for _, status := range previousStatus {
@ -1835,6 +1942,11 @@ func (kl *Kubelet) convertToAPIContainerStatuses(pod *v1.Pod, podStatus *kubecon
oldStatusPtr = &oldStatus
}
status := convertContainerStatus(cStatus, oldStatusPtr)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if status.State.Running != nil {
status.Resources = convertContainerStatusResources(cName, status, cStatus, oldStatuses)
}
}
if containerSeen[cName] == 0 {
statuses[cName] = status
} else {

View File

@ -33,6 +33,7 @@ import (
v1 "k8s.io/api/core/v1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
@ -56,6 +57,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/cri/streaming/portforward"
"k8s.io/kubernetes/pkg/kubelet/cri/streaming/remotecommand"
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/kubelet/status"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
)
@ -3861,3 +3863,219 @@ func TestConvertToAPIContainerStatusesDataRace(t *testing.T) {
}()
}
}
func TestConvertToAPIContainerStatusesForResources(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
nowTime := time.Now()
testContainerName := "ctr0"
testContainerID := kubecontainer.ContainerID{Type: "test", ID: testContainerName}
testContainer := v1.Container{
Name: testContainerName,
Image: "img",
}
testContainerStatus := v1.ContainerStatus{
Name: testContainerName,
}
testPod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "123456",
Name: "foo",
Namespace: "bar",
},
Spec: v1.PodSpec{
Containers: []v1.Container{testContainer},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{testContainerStatus},
},
}
testKubeContainerStatus := kubecontainer.Status{
Name: testContainerName,
ID: testContainerID,
Image: "img",
ImageID: "img1234",
State: kubecontainer.ContainerStateRunning,
StartedAt: nowTime,
}
testPodStatus := &kubecontainer.PodStatus{
ID: testPod.UID,
Name: testPod.Name,
Namespace: testPod.Namespace,
ContainerStatuses: []*kubecontainer.Status{&testKubeContainerStatus},
}
CPU1AndMem1G := v1.ResourceList{v1.ResourceCPU: resource.MustParse("1"), v1.ResourceMemory: resource.MustParse("1Gi")}
CPU2AndMem2G := v1.ResourceList{v1.ResourceCPU: resource.MustParse("2"), v1.ResourceMemory: resource.MustParse("2Gi")}
CPU1AndMem1GAndStorage2G := CPU1AndMem1G.DeepCopy()
CPU1AndMem1GAndStorage2G[v1.ResourceEphemeralStorage] = resource.MustParse("2Gi")
CPU2AndMem2GAndStorage2G := CPU2AndMem2G.DeepCopy()
CPU2AndMem2GAndStorage2G[v1.ResourceEphemeralStorage] = resource.MustParse("2Gi")
testKubelet := newTestKubelet(t, false)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.statusManager = status.NewFakeManager()
idx := 0
for tdesc, tc := range map[string]struct {
Resources []v1.ResourceRequirements
OldStatus []v1.ContainerStatus
Expected []v1.ContainerStatus
}{
"GuaranteedQoSPod with CPU and memory CRI status": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
},
"BurstableQoSPod with CPU and memory CRI status": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Limits: CPU2AndMem2G, Requests: CPU1AndMem1G},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
},
"GuaranteedQoSPod with CPU and memory CRI status, with ephemeral storage": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1G, Requests: CPU1AndMem1G},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1GAndStorage2G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G},
},
},
},
"BurstableQoSPod with CPU and memory CRI status, with ephemeral storage": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{Limits: CPU2AndMem2GAndStorage2G, Requests: CPU2AndMem2GAndStorage2G},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1GAndStorage2G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G},
},
},
},
"BurstableQoSPod with CPU and memory CRI status, with ephemeral storage, nil resources in OldStatus": {
Resources: []v1.ResourceRequirements{{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G}},
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
ResourcesAllocated: CPU1AndMem1GAndStorage2G,
Resources: &v1.ResourceRequirements{Limits: CPU1AndMem1GAndStorage2G, Requests: CPU1AndMem1GAndStorage2G},
},
},
},
"BestEffortQoSPod": {
OldStatus: []v1.ContainerStatus{
{
Name: testContainerName,
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
Resources: &v1.ResourceRequirements{},
},
},
Expected: []v1.ContainerStatus{
{
Name: testContainerName,
ContainerID: testContainerID.String(),
Image: "img",
ImageID: "img1234",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{StartedAt: metav1.NewTime(nowTime)}},
Resources: &v1.ResourceRequirements{},
},
},
},
} {
tPod := testPod.DeepCopy()
tPod.Name = fmt.Sprintf("%s-%d", testPod.Name, idx)
for i := range tPod.Spec.Containers {
if tc.Resources != nil {
tPod.Spec.Containers[i].Resources = tc.Resources[i]
}
kubelet.statusManager.SetPodAllocation(tPod)
if tc.Resources != nil {
tPod.Status.ContainerStatuses[i].ResourcesAllocated = tc.Resources[i].Requests
testPodStatus.ContainerStatuses[i].Resources = &kubecontainer.ContainerResources{
MemoryLimit: tc.Resources[i].Limits.Memory(),
CPULimit: tc.Resources[i].Limits.Cpu(),
CPURequest: tc.Resources[i].Requests.Cpu(),
}
}
}
t.Logf("TestCase: %q", tdesc)
cStatuses := kubelet.convertToAPIContainerStatuses(tPod, testPodStatus, tc.OldStatus, tPod.Spec.Containers, false, false)
assert.Equal(t, tc.Expected, cStatuses)
}
}

View File

@ -48,12 +48,15 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol"
featuregatetesting "k8s.io/component-base/featuregate/testing"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/klog/v2/ktesting"
"k8s.io/kubernetes/pkg/features"
kubeletconfiginternal "k8s.io/kubernetes/pkg/kubelet/apis/config"
cadvisortest "k8s.io/kubernetes/pkg/kubelet/cadvisor/testing"
"k8s.io/kubernetes/pkg/kubelet/cm"
@ -260,7 +263,7 @@ func newTestKubeletWithImageList(
kubelet.configMapManager = configMapManager
kubelet.podManager = kubepod.NewBasicPodManager(fakeMirrorClient)
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker)
kubelet.statusManager = status.NewManager(fakeKubeClient, kubelet.podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, kubelet.getRootDir())
kubelet.containerRuntime = fakeRuntime
kubelet.runtimeCache = containertest.NewFakeRuntimeCache(kubelet.containerRuntime)
@ -2436,6 +2439,162 @@ func TestHandlePodAdditionsInvokesPodAdmitHandlers(t *testing.T) {
checkPodStatus(t, kl, podToAdmit, v1.PodPending)
}
func TestHandlePodResourcesResize(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
testKubelet := newTestKubelet(t, false)
defer testKubelet.Cleanup()
kubelet := testKubelet.kubelet
kubelet.statusManager = status.NewFakeManager()
cpu500m := resource.MustParse("500m")
cpu1000m := resource.MustParse("1")
cpu1500m := resource.MustParse("1500m")
cpu2500m := resource.MustParse("2500m")
cpu5000m := resource.MustParse("5000m")
mem500M := resource.MustParse("500Mi")
mem1000M := resource.MustParse("1Gi")
mem1500M := resource.MustParse("1500Mi")
mem2500M := resource.MustParse("2500Mi")
mem4500M := resource.MustParse("4500Mi")
nodes := []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{Name: testKubeletHostname},
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("8"),
v1.ResourceMemory: resource.MustParse("8Gi"),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: resource.MustParse("4"),
v1.ResourceMemory: resource.MustParse("4Gi"),
v1.ResourcePods: *resource.NewQuantity(40, resource.DecimalSI),
},
},
},
}
kubelet.nodeLister = testNodeLister{nodes: nodes}
testPod1 := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "1111",
Name: "pod1",
Namespace: "ns1",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Image: "i1",
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
},
},
},
},
Status: v1.PodStatus{
Phase: v1.PodRunning,
ContainerStatuses: []v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
Resources: &v1.ResourceRequirements{},
},
},
},
}
testPod2 := testPod1.DeepCopy()
testPod2.UID = "2222"
testPod2.Name = "pod2"
testPod2.Namespace = "ns2"
testPod3 := testPod1.DeepCopy()
testPod3.UID = "3333"
testPod3.Name = "pod3"
testPod3.Namespace = "ns2"
testKubelet.fakeKubeClient = fake.NewSimpleClientset(testPod1, testPod2, testPod3)
kubelet.kubeClient = testKubelet.fakeKubeClient
defer testKubelet.fakeKubeClient.ClearActions()
kubelet.podManager.AddPod(testPod1)
kubelet.podManager.AddPod(testPod2)
kubelet.podManager.AddPod(testPod3)
kubelet.podWorkers.(*fakePodWorkers).running = map[types.UID]bool{
testPod1.UID: true,
testPod2.UID: true,
testPod3.UID: true,
}
defer kubelet.podManager.DeletePod(testPod3)
defer kubelet.podManager.DeletePod(testPod2)
defer kubelet.podManager.DeletePod(testPod1)
tests := []struct {
name string
pod *v1.Pod
newRequests v1.ResourceList
expectedAllocations v1.ResourceList
expectedResize v1.PodResizeStatus
}{
{
name: "Request CPU and memory decrease - expect InProgress",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
expectedResize: v1.PodResizeStatusInProgress,
},
{
name: "Request CPU increase, memory decrease - expect InProgress",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1500m, v1.ResourceMemory: mem500M},
expectedResize: v1.PodResizeStatusInProgress,
},
{
name: "Request CPU decrease, memory increase - expect InProgress",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem1500M},
expectedResize: v1.PodResizeStatusInProgress,
},
{
name: "Request CPU and memory increase beyond current capacity - expect Deferred",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu2500m, v1.ResourceMemory: mem2500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusDeferred,
},
{
name: "Request CPU decrease and memory increase beyond current capacity - expect Deferred",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem2500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusDeferred,
},
{
name: "Request memory increase beyond node capacity - expect Infeasible",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem4500M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInfeasible,
},
{
name: "Request CPU increase beyond node capacity - expect Infeasible",
pod: testPod2,
newRequests: v1.ResourceList{v1.ResourceCPU: cpu5000m, v1.ResourceMemory: mem1000M},
expectedAllocations: v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M},
expectedResize: v1.PodResizeStatusInfeasible,
},
}
for _, tt := range tests {
tt.pod.Spec.Containers[0].Resources.Requests = tt.newRequests
tt.pod.Status.ContainerStatuses[0].ResourcesAllocated = v1.ResourceList{v1.ResourceCPU: cpu1000m, v1.ResourceMemory: mem1000M}
kubelet.handlePodResourcesResize(tt.pod)
assert.Equal(t, tt.expectedAllocations, tt.pod.Status.ContainerStatuses[0].ResourcesAllocated, tt.name)
assert.Equal(t, tt.expectedResize, tt.pod.Status.Resize, tt.name)
testKubelet.fakeKubeClient.ClearActions()
}
}
// testPodSyncLoopHandler is a lifecycle.PodSyncLoopHandler that is used for testing.
type testPodSyncLoopHandler struct {
// list of pods to sync

View File

@ -94,12 +94,13 @@ func (m *kubeGenericRuntimeManager) toKubeContainer(c *runtimeapi.Container) (*k
annotatedInfo := getContainerInfoFromAnnotations(c.Annotations)
return &kubecontainer.Container{
ID: kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id},
Name: c.GetMetadata().GetName(),
ImageID: c.ImageRef,
Image: c.Image.Image,
Hash: annotatedInfo.Hash,
State: toKubeContainerState(c.State),
ID: kubecontainer.ContainerID{Type: m.runtimeName, ID: c.Id},
Name: c.GetMetadata().GetName(),
ImageID: c.ImageRef,
Image: c.Image.Image,
Hash: annotatedInfo.Hash,
HashWithoutResources: annotatedInfo.HashWithoutResources,
State: toKubeContainerState(c.State),
}, nil
}

View File

@ -19,6 +19,11 @@ limitations under the License.
package kuberuntime
import (
"k8s.io/kubernetes/pkg/kubelet/cm"
"math"
)
const (
milliCPUToCPU = 1000
@ -53,3 +58,22 @@ func milliCPUToQuota(milliCPU int64, period int64) (quota int64) {
return
}
// sharesToMilliCPU converts CpuShares (cpu.shares) to milli-CPU value
// TODO(vinaykul,InPlacePodVerticalScaling): Address issue that sets min req/limit to 2m/10m before beta
// See: https://github.com/kubernetes/kubernetes/pull/102884#discussion_r662552642
func sharesToMilliCPU(shares int64) int64 {
milliCPU := int64(0)
if shares >= int64(cm.MinShares) {
milliCPU = int64(math.Ceil(float64(shares*milliCPUToCPU) / float64(cm.SharesPerCPU)))
}
return milliCPU
}
// quotaToMilliCPU converts cpu.cfs_quota_us and cpu.cfs_period_us to milli-CPU value
func quotaToMilliCPU(quota int64, period int64) int64 {
if quota == -1 {
return int64(0)
}
return (quota * milliCPUToCPU) / period
}

View File

@ -28,6 +28,7 @@ import (
featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/cm"
utilpointer "k8s.io/utils/pointer"
)
@ -670,3 +671,81 @@ func TestGetSeccompProfileDefaultSeccomp(t *testing.T) {
func getLocal(v string) *string {
return &v
}
func TestSharesToMilliCPU(t *testing.T) {
knownMilliCPUToShares := map[int64]int64{
0: 2,
1: 2,
2: 2,
3: 3,
4: 4,
32: 32,
64: 65,
100: 102,
250: 256,
500: 512,
1000: 1024,
1500: 1536,
2000: 2048,
}
t.Run("sharesToMilliCPUTest", func(t *testing.T) {
var testMilliCPU int64
for testMilliCPU = 0; testMilliCPU <= 2000; testMilliCPU++ {
shares := int64(cm.MilliCPUToShares(testMilliCPU))
if expectedShares, found := knownMilliCPUToShares[testMilliCPU]; found {
if shares != expectedShares {
t.Errorf("Test milliCPIToShares: Input milliCPU %v, expected shares %v, but got %v", testMilliCPU, expectedShares, shares)
}
}
expectedMilliCPU := testMilliCPU
if testMilliCPU < 2 {
expectedMilliCPU = 2
}
milliCPU := sharesToMilliCPU(shares)
if milliCPU != expectedMilliCPU {
t.Errorf("Test sharesToMilliCPU: Input shares %v, expected milliCPU %v, but got %v", shares, expectedMilliCPU, milliCPU)
}
}
})
}
func TestQuotaToMilliCPU(t *testing.T) {
for _, tc := range []struct {
name string
quota int64
period int64
expected int64
}{
{
name: "50m",
quota: int64(5000),
period: int64(100000),
expected: int64(50),
},
{
name: "750m",
quota: int64(75000),
period: int64(100000),
expected: int64(750),
},
{
name: "1000m",
quota: int64(100000),
period: int64(100000),
expected: int64(1000),
},
{
name: "1500m",
quota: int64(150000),
period: int64(100000),
expected: int64(1500),
}} {
t.Run(tc.name, func(t *testing.T) {
milliCPU := quotaToMilliCPU(tc.quota, tc.period)
if milliCPU != tc.expected {
t.Errorf("Test %s: Input quota %v and period %v, expected milliCPU %v, but got %v", tc.name, tc.quota, tc.period, tc.expected, milliCPU)
}
})
}
}

View File

@ -46,7 +46,9 @@ import (
kubetypes "k8s.io/apimachinery/pkg/types"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/cri/remote"
"k8s.io/kubernetes/pkg/kubelet/events"
@ -359,6 +361,19 @@ func (m *kubeGenericRuntimeManager) generateContainerConfig(ctx context.Context,
return config, cleanupAction, nil
}
func (m *kubeGenericRuntimeManager) updateContainerResources(pod *v1.Pod, container *v1.Container, containerID kubecontainer.ContainerID) error {
containerResources := m.generateContainerResources(pod, container)
if containerResources == nil {
return fmt.Errorf("container %q updateContainerResources failed: cannot generate resources config", containerID.String())
}
ctx := context.Background()
err := m.runtimeService.UpdateContainerResources(ctx, containerID.ID, containerResources)
if err != nil {
klog.ErrorS(err, "UpdateContainerResources failed", "container", containerID.String())
}
return err
}
// makeDevices generates container devices for kubelet runtime v1.
func makeDevices(opts *kubecontainer.RunContainerOptions) []*runtimeapi.Device {
devices := make([]*runtimeapi.Device, len(opts.Devices))
@ -557,18 +572,25 @@ func (m *kubeGenericRuntimeManager) getPodContainerStatuses(ctx context.Context,
func toKubeContainerStatus(status *runtimeapi.ContainerStatus, runtimeName string) *kubecontainer.Status {
annotatedInfo := getContainerInfoFromAnnotations(status.Annotations)
labeledInfo := getContainerInfoFromLabels(status.Labels)
var cStatusResources *kubecontainer.ContainerResources
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// If runtime reports cpu & memory resources info, add it to container status
cStatusResources = toKubeContainerResources(status.Resources)
}
cStatus := &kubecontainer.Status{
ID: kubecontainer.ContainerID{
Type: runtimeName,
ID: status.Id,
},
Name: labeledInfo.ContainerName,
Image: status.Image.Image,
ImageID: status.ImageRef,
Hash: annotatedInfo.Hash,
RestartCount: annotatedInfo.RestartCount,
State: toKubeContainerState(status.State),
CreatedAt: time.Unix(0, status.CreatedAt),
Name: labeledInfo.ContainerName,
Image: status.Image.Image,
ImageID: status.ImageRef,
Hash: annotatedInfo.Hash,
HashWithoutResources: annotatedInfo.HashWithoutResources,
RestartCount: annotatedInfo.RestartCount,
State: toKubeContainerState(status.State),
CreatedAt: time.Unix(0, status.CreatedAt),
Resources: cStatusResources,
}
if status.State != runtimeapi.ContainerState_CONTAINER_CREATED {

View File

@ -60,7 +60,7 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
return nil, err
}
lc := &runtimeapi.LinuxContainerConfig{
Resources: &runtimeapi.LinuxContainerResources{},
Resources: m.generateLinuxContainerResources(pod, container, enforceMemoryQoS),
SecurityContext: sc,
}
@ -69,17 +69,22 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
lc.SecurityContext.NamespaceOptions.TargetId = nsTarget.ID
}
return lc, nil
}
// generateLinuxContainerResources generates linux container resources config for runtime
func (m *kubeGenericRuntimeManager) generateLinuxContainerResources(pod *v1.Pod, container *v1.Container, enforceMemoryQoS bool) *runtimeapi.LinuxContainerResources {
// set linux container resources
var cpuRequest *resource.Quantity
if _, cpuRequestExists := container.Resources.Requests[v1.ResourceCPU]; cpuRequestExists {
cpuRequest = container.Resources.Requests.Cpu()
}
lc.Resources = m.calculateLinuxResources(cpuRequest, container.Resources.Limits.Cpu(), container.Resources.Limits.Memory())
lcr := m.calculateLinuxResources(cpuRequest, container.Resources.Limits.Cpu(), container.Resources.Limits.Memory())
lc.Resources.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container,
lcr.OomScoreAdj = int64(qos.GetContainerOOMScoreAdjust(pod, container,
int64(m.machineInfo.MemoryCapacity)))
lc.Resources.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)
lcr.HugepageLimits = GetHugepageLimitsFromResources(container.Resources)
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.NodeSwap) {
// NOTE(ehashman): Behaviour is defined in the opencontainers runtime spec:
@ -87,14 +92,14 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
switch m.memorySwapBehavior {
case kubelettypes.UnlimitedSwap:
// -1 = unlimited swap
lc.Resources.MemorySwapLimitInBytes = -1
lcr.MemorySwapLimitInBytes = -1
case kubelettypes.LimitedSwap:
fallthrough
default:
// memorySwapLimit = total permitted memory+swap; if equal to memory limit, => 0 swap above memory limit
// Some swapping is still possible.
// Note that if memory limit is 0, memory swap limit is ignored.
lc.Resources.MemorySwapLimitInBytes = lc.Resources.MemoryLimitInBytes
lcr.MemorySwapLimitInBytes = lcr.MemoryLimitInBytes
}
}
@ -125,18 +130,31 @@ func (m *kubeGenericRuntimeManager) generateLinuxContainerConfig(container *v1.C
unified[cm.MemoryHigh] = strconv.FormatInt(memoryHigh, 10)
}
if len(unified) > 0 {
if lc.Resources.Unified == nil {
lc.Resources.Unified = unified
if lcr.Unified == nil {
lcr.Unified = unified
} else {
for k, v := range unified {
lc.Resources.Unified[k] = v
lcr.Unified[k] = v
}
}
klog.V(4).InfoS("MemoryQoS config for container", "pod", klog.KObj(pod), "containerName", container.Name, "unified", unified)
}
}
return lc, nil
return lcr
}
// generateContainerResources generates platform specific (linux) container resources config for runtime
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
enforceMemoryQoS := false
// Set memory.min and memory.high if MemoryQoS enabled with cgroups v2
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
libcontainercgroups.IsCgroup2UnifiedMode() {
enforceMemoryQoS = true
}
return &runtimeapi.ContainerResources{
Linux: m.generateLinuxContainerResources(pod, container, enforceMemoryQoS),
}
}
// calculateLinuxResources will create the linuxContainerResources type based on the provided CPU and memory resource requests, limits
@ -218,3 +236,34 @@ func GetHugepageLimitsFromResources(resources v1.ResourceRequirements) []*runtim
return hugepageLimits
}
func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources {
var cStatusResources *kubecontainer.ContainerResources
runtimeStatusResources := statusResources.GetLinux()
if runtimeStatusResources != nil {
var cpuLimit, memLimit, cpuRequest *resource.Quantity
if runtimeStatusResources.CpuPeriod > 0 {
milliCPU := quotaToMilliCPU(runtimeStatusResources.CpuQuota, runtimeStatusResources.CpuPeriod)
if milliCPU > 0 {
cpuLimit = resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
}
}
if runtimeStatusResources.CpuShares > 0 {
milliCPU := sharesToMilliCPU(runtimeStatusResources.CpuShares)
if milliCPU > 0 {
cpuRequest = resource.NewMilliQuantity(milliCPU, resource.DecimalSI)
}
}
if runtimeStatusResources.MemoryLimitInBytes > 0 {
memLimit = resource.NewQuantity(runtimeStatusResources.MemoryLimitInBytes, resource.BinarySI)
}
if cpuLimit != nil || memLimit != nil || cpuRequest != nil {
cStatusResources = &kubecontainer.ContainerResources{
CPULimit: cpuLimit,
CPURequest: cpuRequest,
MemoryLimit: memLimit,
}
}
}
return cStatusResources
}

View File

@ -31,6 +31,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/diff"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
@ -712,3 +713,168 @@ func TestGenerateLinuxContainerConfigSwap(t *testing.T) {
})
}
}
func TestGenerateLinuxContainerResources(t *testing.T) {
_, _, m, err := createTestRuntimeManager()
assert.NoError(t, err)
m.machineInfo.MemoryCapacity = 17179860387 // 16GB
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "foo",
Namespace: "bar",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Image: "busybox",
},
},
},
Status: v1.PodStatus{},
}
for _, tc := range []struct {
name string
scalingFg bool
limits v1.ResourceList
requests v1.ResourceList
cStatus []v1.ContainerStatus
expected *runtimeapi.LinuxContainerResources
}{
{
"requests & limits, cpu & memory, guaranteed qos - no container status",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"requests & limits, cpu & memory, burstable qos - no container status",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970},
},
{
"best-effort qos - no container status",
true,
nil,
nil,
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000},
},
{
"requests & limits, cpu & memory, guaranteed qos - empty resources container status",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{{Name: "c1"}},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"requests & limits, cpu & memory, burstable qos - empty resources container status",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{{Name: "c1"}},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 999},
},
{
"best-effort qos - empty resources container status",
true,
nil,
nil,
[]v1.ContainerStatus{{Name: "c1"}},
&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000},
},
{
"requests & limits, cpu & memory, guaranteed qos - container status with resourcesAllocated",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
},
},
&runtimeapi.LinuxContainerResources{CpuShares: 204, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"requests & limits, cpu & memory, burstable qos - container status with resourcesAllocated",
true,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
},
},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970},
},
{
"requests & limits, cpu & memory, guaranteed qos - no container status",
false,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"requests & limits, cpu & memory, burstable qos - container status with resourcesAllocated",
false,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("500m"), v1.ResourceMemory: resource.MustParse("750Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("250m"), v1.ResourceMemory: resource.MustParse("500Mi")},
},
},
&runtimeapi.LinuxContainerResources{CpuShares: 256, MemoryLimitInBytes: 786432000, OomScoreAdj: 970},
},
{
"requests & limits, cpu & memory, guaranteed qos - container status with resourcesAllocated",
false,
v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
[]v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: resource.MustParse("200m"), v1.ResourceMemory: resource.MustParse("500Mi")},
},
},
&runtimeapi.LinuxContainerResources{CpuShares: 204, MemoryLimitInBytes: 524288000, OomScoreAdj: -997},
},
{
"best-effort qos - no container status",
false,
nil,
nil,
[]v1.ContainerStatus{},
&runtimeapi.LinuxContainerResources{CpuShares: 2, OomScoreAdj: 1000},
},
} {
t.Run(tc.name, func(t *testing.T) {
if tc.scalingFg {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
}
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{Limits: tc.limits, Requests: tc.requests}
if len(tc.cStatus) > 0 {
pod.Status.ContainerStatuses = tc.cStatus
}
resources := m.generateLinuxContainerResources(pod, &pod.Spec.Containers[0], false)
tc.expected.HugepageLimits = resources.HugepageLimits
if diff.ObjectDiff(resources, tc.expected) != "" {
t.Errorf("Test %s: expected resources %+v, but got %+v", tc.name, tc.expected, resources)
}
})
}
//TODO(vinaykul,InPlacePodVerticalScaling): Add unit tests for cgroup v1 & v2
}

View File

@ -28,6 +28,7 @@ import (
"github.com/google/go-cmp/cmp"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
@ -230,6 +231,111 @@ func TestToKubeContainerStatus(t *testing.T) {
}
}
// TestToKubeContainerStatusWithResources tests the converting the CRI container status to
// the internal type (i.e., toKubeContainerStatus()) for containers that returns Resources.
func TestToKubeContainerStatusWithResources(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
cid := &kubecontainer.ContainerID{Type: "testRuntime", ID: "dummyid"}
meta := &runtimeapi.ContainerMetadata{Name: "cname", Attempt: 3}
imageSpec := &runtimeapi.ImageSpec{Image: "fimage"}
var (
createdAt int64 = 327
startedAt int64 = 999
)
for desc, test := range map[string]struct {
input *runtimeapi.ContainerStatus
expected *kubecontainer.Status
}{
"container reporting cpu and memory": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
StartedAt: startedAt,
Resources: &runtimeapi.ContainerResources{
Linux: &runtimeapi.LinuxContainerResources{
CpuQuota: 25000,
CpuPeriod: 100000,
MemoryLimitInBytes: 524288000,
OomScoreAdj: -998,
},
},
},
expected: &kubecontainer.Status{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateRunning,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
Resources: &kubecontainer.ContainerResources{
CPULimit: resource.NewMilliQuantity(250, resource.DecimalSI),
MemoryLimit: resource.NewQuantity(524288000, resource.BinarySI),
},
},
},
"container reporting cpu only": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
StartedAt: startedAt,
Resources: &runtimeapi.ContainerResources{
Linux: &runtimeapi.LinuxContainerResources{
CpuQuota: 50000,
CpuPeriod: 100000,
},
},
},
expected: &kubecontainer.Status{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateRunning,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
Resources: &kubecontainer.ContainerResources{
CPULimit: resource.NewMilliQuantity(500, resource.DecimalSI),
},
},
},
"container reporting memory only": {
input: &runtimeapi.ContainerStatus{
Id: cid.ID,
Metadata: meta,
Image: imageSpec,
State: runtimeapi.ContainerState_CONTAINER_RUNNING,
CreatedAt: createdAt,
StartedAt: startedAt,
Resources: &runtimeapi.ContainerResources{
Linux: &runtimeapi.LinuxContainerResources{
MemoryLimitInBytes: 524288000,
OomScoreAdj: -998,
},
},
},
expected: &kubecontainer.Status{
ID: *cid,
Image: imageSpec.Image,
State: kubecontainer.ContainerStateRunning,
CreatedAt: time.Unix(0, createdAt),
StartedAt: time.Unix(0, startedAt),
Resources: &kubecontainer.ContainerResources{
MemoryLimit: resource.NewQuantity(524288000, resource.BinarySI),
},
},
},
} {
t.Run(desc, func(t *testing.T) {
actual := toKubeContainerStatus(test.input, cid.Type)
assert.Equal(t, test.expected, actual, desc)
})
}
}
func TestLifeCycleHook(t *testing.T) {
// Setup
@ -696,3 +802,40 @@ func TestKillContainerGracePeriod(t *testing.T) {
})
}
}
// TestUpdateContainerResources tests updating a container in a Pod.
func TestUpdateContainerResources(t *testing.T) {
fakeRuntime, _, m, errCreate := createTestRuntimeManager()
require.NoError(t, errCreate)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
UID: "12345678",
Name: "bar",
Namespace: "new",
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "foo",
Image: "busybox",
ImagePullPolicy: v1.PullIfNotPresent,
},
},
},
}
// Create fake sandbox and container
_, fakeContainers := makeAndSetFakePod(t, m, fakeRuntime, pod)
assert.Equal(t, len(fakeContainers), 1)
ctx := context.Background()
cStatus, err := m.getPodContainerStatuses(ctx, pod.UID, pod.Name, pod.Namespace)
assert.NoError(t, err)
containerID := cStatus[0].ID
err = m.updateContainerResources(pod, &pod.Spec.Containers[0], containerID)
assert.NoError(t, err)
// Verify container is updated
assert.Contains(t, fakeRuntime.Called, "UpdateContainerResources")
}

View File

@ -29,3 +29,12 @@ import (
func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config *runtimeapi.ContainerConfig, container *v1.Container, pod *v1.Pod, uid *int64, username string, nsTarget *kubecontainer.ContainerID) error {
return nil
}
// generateContainerResources generates platform specific container resources config for runtime
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
return nil
}
func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources {
return nil
}

View File

@ -40,6 +40,12 @@ func (m *kubeGenericRuntimeManager) applyPlatformSpecificContainerConfig(config
return nil
}
// generateContainerResources generates platform specific (windows) container resources config for runtime
func (m *kubeGenericRuntimeManager) generateContainerResources(pod *v1.Pod, container *v1.Container) *runtimeapi.ContainerResources {
//TODO: Add windows support
return nil
}
// generateWindowsContainerConfig generates windows container config for kubelet runtime v1.
// Refer https://git.k8s.io/design-proposals-archive/node/cri-windows.md.
func (m *kubeGenericRuntimeManager) generateWindowsContainerConfig(container *v1.Container, pod *v1.Pod, uid *int64, username string) (*runtimeapi.WindowsContainerConfig, error) {
@ -126,3 +132,8 @@ func calculateCPUMaximum(cpuLimit *resource.Quantity, cpuCount int64) int64 {
}
return cpuMaximum
}
func toKubeContainerResources(statusResources *runtimeapi.ContainerResources) *kubecontainer.ContainerResources {
//TODO: Add windows support
return nil
}

View File

@ -30,8 +30,10 @@ import (
"k8s.io/klog/v2"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/diff"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
utilversion "k8s.io/apimachinery/pkg/util/version"
utilfeature "k8s.io/apiserver/pkg/util/feature"
@ -42,6 +44,7 @@ import (
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
"k8s.io/kubernetes/pkg/api/legacyscheme"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/credentialprovider"
"k8s.io/kubernetes/pkg/credentialprovider/plugin"
"k8s.io/kubernetes/pkg/features"
@ -133,6 +136,9 @@ type kubeGenericRuntimeManager struct {
// The directory path for seccomp profiles.
seccompProfileRoot string
// Container management interface for pod container.
containerManager cm.ContainerManager
// Internal lifecycle event handlers for container resource management.
internalLifecycle cm.InternalContainerLifecycle
@ -190,7 +196,7 @@ func NewKubeGenericRuntimeManager(
cpuCFSQuotaPeriod metav1.Duration,
runtimeService internalapi.RuntimeService,
imageService internalapi.ImageManagerService,
internalLifecycle cm.InternalContainerLifecycle,
containerManager cm.ContainerManager,
logManager logs.ContainerLogManager,
runtimeClassManager *runtimeclass.Manager,
seccompDefault bool,
@ -215,7 +221,8 @@ func NewKubeGenericRuntimeManager(
runtimeHelper: runtimeHelper,
runtimeService: runtimeService,
imageService: imageService,
internalLifecycle: internalLifecycle,
containerManager: containerManager,
internalLifecycle: containerManager.InternalContainerLifecycle(),
logManager: logManager,
runtimeClassManager: runtimeClassManager,
logReduction: logreduction.NewLogReduction(identicalErrorDelay),
@ -446,6 +453,26 @@ type containerToKillInfo struct {
reason containerKillReason
}
// containerResources holds the set of resources applicable to the running container
type containerResources struct {
memoryLimit int64
memoryRequest int64
cpuLimit int64
cpuRequest int64
}
// containerToUpdateInfo contains necessary information to update a container's resources.
type containerToUpdateInfo struct {
// Index of the container in pod.Spec.Containers that needs resource update
apiContainerIdx int
// ID of the runtime container that needs resource update
kubeContainerID kubecontainer.ContainerID
// Desired resources for the running container
desiredContainerResources containerResources
// Most recently configured resources on the running container
currentContainerResources *containerResources
}
// podActions keeps information what to do for a pod.
type podActions struct {
// Stop all running (regular, init and ephemeral) containers and the sandbox for the pod.
@ -471,6 +498,11 @@ type podActions struct {
// EphemeralContainersToStart is a list of indexes for the ephemeral containers to start,
// where the index is the index of the specific container in pod.Spec.EphemeralContainers.
EphemeralContainersToStart []int
// ContainersToUpdate keeps a list of containers needing resource update.
// Container resource update is applicable only for CPU and memory.
ContainersToUpdate map[v1.ResourceName][]containerToUpdateInfo
// UpdatePodResources is true if container(s) need resource update with restart
UpdatePodResources bool
}
func containerChanged(container *v1.Container, containerStatus *kubecontainer.Status) (uint64, uint64, bool) {
@ -490,8 +522,267 @@ func containerSucceeded(c *v1.Container, podStatus *kubecontainer.PodStatus) boo
return cStatus.ExitCode == 0
}
func (m *kubeGenericRuntimeManager) computePodResizeAction(pod *v1.Pod, containerIdx int, kubeContainerStatus *kubecontainer.Status, changes *podActions) bool {
container := pod.Spec.Containers[containerIdx]
if container.Resources.Limits == nil || len(pod.Status.ContainerStatuses) == 0 {
return true
}
// Determine if the *running* container needs resource update by comparing v1.Spec.Resources (desired)
// with v1.Status.Resources / runtime.Status.Resources (last known actual).
// Proceed only when kubelet has accepted the resize a.k.a v1.Spec.Resources.Requests == v1.Status.ResourcesAllocated.
// Skip if runtime containerID doesn't match pod.Status containerID (container is restarting)
apiContainerStatus, exists := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name)
if !exists || apiContainerStatus.State.Running == nil || apiContainerStatus.Resources == nil ||
kubeContainerStatus.State != kubecontainer.ContainerStateRunning ||
kubeContainerStatus.ID.String() != apiContainerStatus.ContainerID ||
len(diff.ObjectDiff(container.Resources.Requests, apiContainerStatus.ResourcesAllocated)) != 0 {
return true
}
desiredMemoryLimit := container.Resources.Limits.Memory().Value()
desiredCPULimit := container.Resources.Limits.Cpu().MilliValue()
desiredCPURequest := container.Resources.Requests.Cpu().MilliValue()
currentMemoryLimit := apiContainerStatus.Resources.Limits.Memory().Value()
currentCPULimit := apiContainerStatus.Resources.Limits.Cpu().MilliValue()
currentCPURequest := apiContainerStatus.Resources.Requests.Cpu().MilliValue()
// Runtime container status resources (from CRI), if set, supercedes v1(api) container status resrouces.
if kubeContainerStatus.Resources != nil {
if kubeContainerStatus.Resources.MemoryLimit != nil {
currentMemoryLimit = kubeContainerStatus.Resources.MemoryLimit.Value()
}
if kubeContainerStatus.Resources.CPULimit != nil {
currentCPULimit = kubeContainerStatus.Resources.CPULimit.MilliValue()
}
if kubeContainerStatus.Resources.CPURequest != nil {
currentCPURequest = kubeContainerStatus.Resources.CPURequest.MilliValue()
}
}
// Note: cgroup doesn't support memory request today, so we don't compare that. If canAdmitPod called during
// handlePodResourcesResize finds 'fit', then desiredMemoryRequest == currentMemoryRequest.
if desiredMemoryLimit == currentMemoryLimit && desiredCPULimit == currentCPULimit && desiredCPURequest == currentCPURequest {
return true
}
desiredResources := containerResources{
memoryLimit: desiredMemoryLimit,
memoryRequest: apiContainerStatus.ResourcesAllocated.Memory().Value(),
cpuLimit: desiredCPULimit,
cpuRequest: desiredCPURequest,
}
currentResources := containerResources{
memoryLimit: currentMemoryLimit,
memoryRequest: apiContainerStatus.Resources.Requests.Memory().Value(),
cpuLimit: currentCPULimit,
cpuRequest: currentCPURequest,
}
resizePolicy := make(map[v1.ResourceName]v1.ResourceResizePolicy)
for _, pol := range container.ResizePolicy {
resizePolicy[pol.ResourceName] = pol.Policy
}
determineContainerResize := func(rName v1.ResourceName, specValue, statusValue int64) (resize, restart bool) {
if specValue == statusValue {
return false, false
}
if resizePolicy[rName] == v1.RestartRequired {
return true, true
}
return true, false
}
markContainerForUpdate := func(rName v1.ResourceName, specValue, statusValue int64) {
cUpdateInfo := containerToUpdateInfo{
apiContainerIdx: containerIdx,
kubeContainerID: kubeContainerStatus.ID,
desiredContainerResources: desiredResources,
currentContainerResources: &currentResources,
}
// Order the container updates such that resource decreases are applied before increases
switch {
case specValue > statusValue: // append
changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], cUpdateInfo)
case specValue < statusValue: // prepend
changes.ContainersToUpdate[rName] = append(changes.ContainersToUpdate[rName], containerToUpdateInfo{})
copy(changes.ContainersToUpdate[rName][1:], changes.ContainersToUpdate[rName])
changes.ContainersToUpdate[rName][0] = cUpdateInfo
}
}
resizeMemLim, restartMemLim := determineContainerResize(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
resizeCPULim, restartCPULim := determineContainerResize(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
resizeCPUReq, restartCPUReq := determineContainerResize(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
if restartCPULim || restartCPUReq || restartMemLim {
// resize policy requires this container to restart
changes.ContainersToKill[kubeContainerStatus.ID] = containerToKillInfo{
name: kubeContainerStatus.Name,
container: &pod.Spec.Containers[containerIdx],
message: fmt.Sprintf("Container %s resize requires restart", container.Name),
}
changes.ContainersToStart = append(changes.ContainersToStart, containerIdx)
changes.UpdatePodResources = true
return false
} else {
if resizeMemLim {
markContainerForUpdate(v1.ResourceMemory, desiredMemoryLimit, currentMemoryLimit)
}
if resizeCPULim {
markContainerForUpdate(v1.ResourceCPU, desiredCPULimit, currentCPULimit)
} else if resizeCPUReq {
markContainerForUpdate(v1.ResourceCPU, desiredCPURequest, currentCPURequest)
}
}
return true
}
func (m *kubeGenericRuntimeManager) doPodResizeAction(pod *v1.Pod, podStatus *kubecontainer.PodStatus, podContainerChanges podActions, result kubecontainer.PodSyncResult) {
pcm := m.containerManager.NewPodContainerManager()
//TODO(vinaykul,InPlacePodVerticalScaling): Figure out best way to get enforceMemoryQoS value (parameter #4 below) in platform-agnostic way
podResources := cm.ResourceConfigForPod(pod, m.cpuCFSQuota, uint64((m.cpuCFSQuotaPeriod.Duration)/time.Microsecond), false)
if podResources == nil {
klog.ErrorS(nil, "Unable to get resource configuration", "pod", pod.Name)
result.Fail(fmt.Errorf("Unable to get resource configuration processing resize for pod %s", pod.Name))
return
}
setPodCgroupConfig := func(rName v1.ResourceName, setLimitValue bool) error {
var err error
switch rName {
case v1.ResourceCPU:
podCpuResources := &cm.ResourceConfig{CPUPeriod: podResources.CPUPeriod}
if setLimitValue == true {
podCpuResources.CPUQuota = podResources.CPUQuota
} else {
podCpuResources.CPUShares = podResources.CPUShares
}
err = pcm.SetPodCgroupConfig(pod, rName, podCpuResources)
case v1.ResourceMemory:
err = pcm.SetPodCgroupConfig(pod, rName, podResources)
}
if err != nil {
klog.ErrorS(err, "Failed to set cgroup config", "resource", rName, "pod", pod.Name)
}
return err
}
// Memory and CPU are updated separately because memory resizes may be ordered differently than CPU resizes.
// If resize results in net pod resource increase, set pod cgroup config before resizing containers.
// If resize results in net pod resource decrease, set pod cgroup config after resizing containers.
// If an error occurs at any point, abort. Let future syncpod iterations retry the unfinished stuff.
resizeContainers := func(rName v1.ResourceName, currPodCgLimValue, newPodCgLimValue, currPodCgReqValue, newPodCgReqValue int64) error {
var err error
if newPodCgLimValue > currPodCgLimValue {
if err = setPodCgroupConfig(rName, true); err != nil {
return err
}
}
if newPodCgReqValue > currPodCgReqValue {
if err = setPodCgroupConfig(rName, false); err != nil {
return err
}
}
if len(podContainerChanges.ContainersToUpdate[rName]) > 0 {
if err = m.updatePodContainerResources(pod, rName, podContainerChanges.ContainersToUpdate[rName]); err != nil {
klog.ErrorS(err, "updatePodContainerResources failed", "pod", format.Pod(pod), "resource", rName)
return err
}
}
if newPodCgLimValue < currPodCgLimValue {
err = setPodCgroupConfig(rName, true)
}
if newPodCgReqValue < currPodCgReqValue {
if err = setPodCgroupConfig(rName, false); err != nil {
return err
}
}
return err
}
if len(podContainerChanges.ContainersToUpdate[v1.ResourceMemory]) > 0 || podContainerChanges.UpdatePodResources {
currentPodMemoryConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceMemory)
if err != nil {
klog.ErrorS(err, "GetPodCgroupConfig for memory failed", "pod", pod.Name)
result.Fail(err)
return
}
currentPodMemoryUsage, err := pcm.GetPodCgroupMemoryUsage(pod)
if err != nil {
klog.ErrorS(err, "GetPodCgroupMemoryUsage failed", "pod", pod.Name)
result.Fail(err)
return
}
if currentPodMemoryUsage >= uint64(*podResources.Memory) {
klog.ErrorS(nil, "Aborting attempt to set pod memory limit less than current memory usage", "pod", pod.Name)
result.Fail(fmt.Errorf("Aborting attempt to set pod memory limit less than current memory usage for pod %s", pod.Name))
return
}
if errResize := resizeContainers(v1.ResourceMemory, int64(*currentPodMemoryConfig.Memory), *podResources.Memory, 0, 0); errResize != nil {
result.Fail(errResize)
return
}
}
if len(podContainerChanges.ContainersToUpdate[v1.ResourceCPU]) > 0 || podContainerChanges.UpdatePodResources {
currentPodCpuConfig, err := pcm.GetPodCgroupConfig(pod, v1.ResourceCPU)
if err != nil {
klog.ErrorS(err, "GetPodCgroupConfig for CPU failed", "pod", pod.Name)
result.Fail(err)
return
}
if errResize := resizeContainers(v1.ResourceCPU, *currentPodCpuConfig.CPUQuota, *podResources.CPUQuota,
int64(*currentPodCpuConfig.CPUShares), int64(*podResources.CPUShares)); errResize != nil {
result.Fail(errResize)
return
}
}
}
func (m *kubeGenericRuntimeManager) updatePodContainerResources(pod *v1.Pod, resourceName v1.ResourceName, containersToUpdate []containerToUpdateInfo) error {
klog.V(5).InfoS("Updating container resources", "pod", klog.KObj(pod))
for _, cInfo := range containersToUpdate {
container := pod.Spec.Containers[cInfo.apiContainerIdx].DeepCopy()
// If updating memory limit, use most recently configured CPU request and limit values.
// If updating CPU request and limit, use most recently configured memory request and limit values.
switch resourceName {
case v1.ResourceMemory:
container.Resources.Limits = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuLimit, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryLimit, resource.BinarySI),
}
container.Resources.Requests = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.currentContainerResources.cpuRequest, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.desiredContainerResources.memoryRequest, resource.BinarySI),
}
case v1.ResourceCPU:
container.Resources.Limits = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuLimit, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryLimit, resource.BinarySI),
}
container.Resources.Requests = v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(cInfo.desiredContainerResources.cpuRequest, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(cInfo.currentContainerResources.memoryRequest, resource.BinarySI),
}
}
if err := m.updateContainerResources(pod, container, cInfo.kubeContainerID); err != nil {
// Log error and abort as container updates need to succeed in the order determined by computePodResizeAction.
// The recovery path is for SyncPod to keep retrying at later times until it succeeds.
klog.ErrorS(err, "updateContainerResources failed", "container", container.Name, "cID", cInfo.kubeContainerID,
"pod", format.Pod(pod), "resourceName", resourceName)
return err
}
// If UpdateContainerResources is error-free, it means desired values for 'resourceName' was accepted by runtime.
// So we update currentContainerResources for 'resourceName', which is our view of most recently configured resources.
// Note: We can't rely on GetPodStatus as runtime may lag in actuating the resource values it just accepted.
switch resourceName {
case v1.ResourceMemory:
cInfo.currentContainerResources.memoryLimit = cInfo.desiredContainerResources.memoryLimit
cInfo.currentContainerResources.memoryRequest = cInfo.desiredContainerResources.memoryRequest
case v1.ResourceCPU:
cInfo.currentContainerResources.cpuLimit = cInfo.desiredContainerResources.cpuLimit
cInfo.currentContainerResources.cpuRequest = cInfo.desiredContainerResources.cpuRequest
}
}
return nil
}
// computePodActions checks whether the pod spec has changed and returns the changes if true.
func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
func (m *kubeGenericRuntimeManager) computePodActions(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus) podActions {
klog.V(5).InfoS("Syncing Pod", "pod", klog.KObj(pod))
createPodSandbox, attempt, sandboxID := runtimeutil.PodSandboxChanged(pod, podStatus)
@ -582,6 +873,14 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
return changes
}
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
changes.ContainersToUpdate = make(map[v1.ResourceName][]containerToUpdateInfo)
latestPodStatus, err := m.GetPodStatus(ctx, podStatus.ID, pod.Name, pod.Namespace)
if err == nil {
podStatus = latestPodStatus
}
}
// Number of running containers to keep.
keepCount := 0
// check the status of containers.
@ -623,7 +922,10 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
var message string
var reason containerKillReason
restart := shouldRestartOnFailure(pod)
if _, _, changed := containerChanged(&container, containerStatus); changed {
// Do not restart if only the Resources field has changed with InPlacePodVerticalScaling enabled
if _, _, changed := containerChanged(&container, containerStatus); changed &&
(!utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) ||
kubecontainer.HashContainerWithoutResources(&container) != containerStatus.HashWithoutResources) {
message = fmt.Sprintf("Container %s definition changed", container.Name)
// Restart regardless of the restart policy because the container
// spec changed.
@ -636,6 +938,10 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
// If the container failed the startup probe, we should kill it.
message = fmt.Sprintf("Container %s failed startup probe", container.Name)
reason = reasonStartupProbe
} else if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) &&
!m.computePodResizeAction(pod, idx, containerStatus, &changes) {
// computePodResizeAction updates 'changes' if resize policy requires restarting this container
continue
} else {
// Keep the container.
keepCount++
@ -674,10 +980,11 @@ func (m *kubeGenericRuntimeManager) computePodActions(pod *v1.Pod, podStatus *ku
// 4. Create sandbox if necessary.
// 5. Create ephemeral containers.
// 6. Create init containers.
// 7. Create normal containers.
// 7. Resize running containers (if InPlacePodVerticalScaling==true)
// 8. Create normal containers.
func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
// Step 1: Compute sandbox and container changes.
podContainerChanges := m.computePodActions(pod, podStatus)
podContainerChanges := m.computePodActions(ctx, pod, podStatus)
klog.V(3).InfoS("computePodActions got for pod", "podActions", podContainerChanges, "pod", klog.KObj(pod))
if podContainerChanges.CreateSandbox {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
@ -903,7 +1210,14 @@ func (m *kubeGenericRuntimeManager) SyncPod(ctx context.Context, pod *v1.Pod, po
klog.V(4).InfoS("Completed init container for pod", "containerName", container.Name, "pod", klog.KObj(pod))
}
// Step 7: start containers in podContainerChanges.ContainersToStart.
// Step 7: For containers in podContainerChanges.ContainersToUpdate[CPU,Memory] list, invoke UpdateContainerResources
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if len(podContainerChanges.ContainersToUpdate) > 0 || podContainerChanges.UpdatePodResources {
m.doPodResizeAction(pod, podStatus, podContainerChanges, result)
}
}
// Step 8: start containers in podContainerChanges.ContainersToStart.
for _, idx := range podContainerChanges.ContainersToStart {
start(ctx, "container", metrics.Container, containerStartSpec(&pod.Spec.Containers[idx]))
}
@ -1096,7 +1410,6 @@ func (m *kubeGenericRuntimeManager) GetPodStatus(ctx context.Context, uid kubety
}
m.logReduction.ClearID(podFullName)
return &kubecontainer.PodStatus{
ID: uid,
Name: name,

View File

@ -37,11 +37,14 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/util/flowcontrol"
featuregatetesting "k8s.io/component-base/featuregate/testing"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
apitest "k8s.io/cri-api/pkg/apis/testing"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/credentialprovider"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
containertest "k8s.io/kubernetes/pkg/kubelet/container/testing"
proberesults "k8s.io/kubernetes/pkg/kubelet/prober/results"
@ -861,6 +864,28 @@ func makeBasePodAndStatus() (*v1.Pod, *kubecontainer.PodStatus) {
},
},
},
Status: v1.PodStatus{
ContainerStatuses: []v1.ContainerStatus{
{
ContainerID: "://id1",
Name: "foo1",
Image: "busybox",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
},
{
ContainerID: "://id2",
Name: "foo2",
Image: "busybox",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
},
{
ContainerID: "://id3",
Name: "foo3",
Image: "busybox",
State: v1.ContainerState{Running: &v1.ContainerStateRunning{}},
},
},
},
}
status := &kubecontainer.PodStatus{
ID: pod.UID,
@ -1149,7 +1174,8 @@ func TestComputePodActions(t *testing.T) {
if test.mutateStatusFn != nil {
test.mutateStatusFn(status)
}
actions := m.computePodActions(pod, status)
ctx := context.Background()
actions := m.computePodActions(ctx, pod, status)
verifyActions(t, &test.actions, &actions, desc)
if test.resetStatusFn != nil {
test.resetStatusFn(status)
@ -1364,7 +1390,8 @@ func TestComputePodActionsWithInitContainers(t *testing.T) {
if test.mutateStatusFn != nil {
test.mutateStatusFn(status)
}
actions := m.computePodActions(pod, status)
ctx := context.Background()
actions := m.computePodActions(ctx, pod, status)
verifyActions(t, &test.actions, &actions, desc)
}
}
@ -1546,7 +1573,8 @@ func TestComputePodActionsWithInitAndEphemeralContainers(t *testing.T) {
if test.mutateStatusFn != nil {
test.mutateStatusFn(status)
}
actions := m.computePodActions(pod, status)
ctx := context.Background()
actions := m.computePodActions(ctx, pod, status)
verifyActions(t, &test.actions, &actions, desc)
}
}
@ -1615,3 +1643,467 @@ func makeBasePodAndStatusWithInitAndEphemeralContainers() (*v1.Pod, *kubecontain
})
return pod, status
}
func TestComputePodActionsForPodResize(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
fakeRuntime, _, m, err := createTestRuntimeManager()
m.machineInfo.MemoryCapacity = 17179860387 // 16GB
assert.NoError(t, err)
cpu100m := resource.MustParse("100m")
cpu200m := resource.MustParse("200m")
mem100M := resource.MustParse("100Mi")
mem200M := resource.MustParse("200Mi")
cpuPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartNotRequired}
memPolicyRestartNotRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartNotRequired}
cpuPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceCPU, Policy: v1.RestartRequired}
memPolicyRestartRequired := v1.ContainerResizePolicy{ResourceName: v1.ResourceMemory, Policy: v1.RestartRequired}
for desc, test := range map[string]struct {
podResizePolicyFn func(*v1.Pod)
mutatePodFn func(*v1.Pod)
getExpectedPodActionsFn func(*v1.Pod, *kubecontainer.PodStatus) *podActions
}{
"Update container CPU and memory resources": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceMemory: {
{
apiContainerIdx: 1,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem200M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
},
},
v1.ResourceCPU: {
{
apiContainerIdx: 1,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem200M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
},
},
},
}
return &pa
},
},
"Update container CPU resources": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceCPU: {
{
apiContainerIdx: 1,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
},
},
},
}
return &pa
},
},
"Update container memory resources": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceMemory: {
{
apiContainerIdx: 2,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem200M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
},
},
},
}
return &pa
},
},
"Nothing when spec.Resources and status.Resources are equal": {
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m},
}
pod.Status.ContainerStatuses[1].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m},
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToStart: []int{},
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{},
}
return &pa
},
},
"Update container CPU and memory resources with Restart policy for CPU": {
podResizePolicyFn: func(pod *v1.Pod) {
pod.Spec.Containers[0].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired}
},
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[0].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[0].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[0].Name)
killMap := make(map[kubecontainer.ContainerID]containerToKillInfo)
killMap[kcs.ID] = containerToKillInfo{
container: &pod.Spec.Containers[0],
name: pod.Spec.Containers[0].Name,
}
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{0},
ContainersToKill: killMap,
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{},
UpdatePodResources: true,
}
return &pa
},
},
"Update container CPU and memory resources with Restart policy for memory": {
podResizePolicyFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired}
},
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name)
killMap := make(map[kubecontainer.ContainerID]containerToKillInfo)
killMap[kcs.ID] = containerToKillInfo{
container: &pod.Spec.Containers[2],
name: pod.Spec.Containers[2].Name,
}
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{2},
ContainersToKill: killMap,
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{},
UpdatePodResources: true,
}
return &pa
},
},
"Update container memory resources with Restart policy for CPU": {
podResizePolicyFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartRequired, memPolicyRestartNotRequired}
},
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[1].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem200M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[1].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[1].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceMemory: {
{
apiContainerIdx: 1,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem200M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
},
},
},
}
return &pa
},
},
"Update container CPU resources with Restart policy for memory": {
podResizePolicyFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartRequired}
},
mutatePodFn: func(pod *v1.Pod) {
pod.Spec.Containers[2].Resources = v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem100M},
}
if idx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[2].Name); found {
pod.Status.ContainerStatuses[idx].Resources = &v1.ResourceRequirements{
Limits: v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M},
}
}
},
getExpectedPodActionsFn: func(pod *v1.Pod, podStatus *kubecontainer.PodStatus) *podActions {
kcs := podStatus.FindContainerStatusByName(pod.Spec.Containers[2].Name)
pa := podActions{
SandboxID: podStatus.SandboxStatuses[0].Id,
ContainersToStart: []int{},
ContainersToKill: getKillMap(pod, podStatus, []int{}),
ContainersToUpdate: map[v1.ResourceName][]containerToUpdateInfo{
v1.ResourceCPU: {
{
apiContainerIdx: 2,
kubeContainerID: kcs.ID,
desiredContainerResources: containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu200m.MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: mem100M.Value(),
cpuLimit: cpu100m.MilliValue(),
},
},
},
},
}
return &pa
},
},
} {
pod, kps := makeBasePodAndStatus()
for idx := range pod.Spec.Containers {
// default resize policy when pod resize feature is enabled
pod.Spec.Containers[idx].ResizePolicy = []v1.ContainerResizePolicy{cpuPolicyRestartNotRequired, memPolicyRestartNotRequired}
}
if test.podResizePolicyFn != nil {
test.podResizePolicyFn(pod)
}
for idx := range pod.Spec.Containers {
// compute hash
if kcs := kps.FindContainerStatusByName(pod.Spec.Containers[idx].Name); kcs != nil {
kcs.Hash = kubecontainer.HashContainer(&pod.Spec.Containers[idx])
kcs.HashWithoutResources = kubecontainer.HashContainerWithoutResources(&pod.Spec.Containers[idx])
}
}
makeAndSetFakePod(t, m, fakeRuntime, pod)
ctx := context.Background()
status, _ := m.GetPodStatus(ctx, kps.ID, pod.Name, pod.Namespace)
for idx := range pod.Spec.Containers {
if rcs := status.FindContainerStatusByName(pod.Spec.Containers[idx].Name); rcs != nil {
if csIdx, found := podutil.GetIndexOfContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[idx].Name); found {
pod.Status.ContainerStatuses[csIdx].ContainerID = rcs.ID.String()
}
}
}
for idx := range pod.Spec.Containers {
if kcs := kps.FindContainerStatusByName(pod.Spec.Containers[idx].Name); kcs != nil {
kcs.Hash = kubecontainer.HashContainer(&pod.Spec.Containers[idx])
kcs.HashWithoutResources = kubecontainer.HashContainerWithoutResources(&pod.Spec.Containers[idx])
}
}
if test.mutatePodFn != nil {
test.mutatePodFn(pod)
}
expectedActions := test.getExpectedPodActionsFn(pod, status)
actions := m.computePodActions(ctx, pod, status)
verifyActions(t, expectedActions, &actions, desc)
}
}
func TestUpdatePodContainerResources(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
fakeRuntime, _, m, err := createTestRuntimeManager()
m.machineInfo.MemoryCapacity = 17179860387 // 16GB
assert.NoError(t, err)
cpu100m := resource.MustParse("100m")
cpu150m := resource.MustParse("150m")
cpu200m := resource.MustParse("200m")
cpu250m := resource.MustParse("250m")
cpu300m := resource.MustParse("300m")
cpu350m := resource.MustParse("350m")
mem100M := resource.MustParse("100Mi")
mem150M := resource.MustParse("150Mi")
mem200M := resource.MustParse("200Mi")
mem250M := resource.MustParse("250Mi")
mem300M := resource.MustParse("300Mi")
mem350M := resource.MustParse("350Mi")
res100m100Mi := v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem100M}
res150m100Mi := v1.ResourceList{v1.ResourceCPU: cpu150m, v1.ResourceMemory: mem100M}
res100m150Mi := v1.ResourceList{v1.ResourceCPU: cpu100m, v1.ResourceMemory: mem150M}
res150m150Mi := v1.ResourceList{v1.ResourceCPU: cpu150m, v1.ResourceMemory: mem150M}
res200m200Mi := v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem200M}
res250m200Mi := v1.ResourceList{v1.ResourceCPU: cpu250m, v1.ResourceMemory: mem200M}
res200m250Mi := v1.ResourceList{v1.ResourceCPU: cpu200m, v1.ResourceMemory: mem250M}
res250m250Mi := v1.ResourceList{v1.ResourceCPU: cpu250m, v1.ResourceMemory: mem250M}
res300m300Mi := v1.ResourceList{v1.ResourceCPU: cpu300m, v1.ResourceMemory: mem300M}
res350m300Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem300M}
res300m350Mi := v1.ResourceList{v1.ResourceCPU: cpu300m, v1.ResourceMemory: mem350M}
res350m350Mi := v1.ResourceList{v1.ResourceCPU: cpu350m, v1.ResourceMemory: mem350M}
pod, _ := makeBasePodAndStatus()
makeAndSetFakePod(t, m, fakeRuntime, pod)
for dsc, tc := range map[string]struct {
resourceName v1.ResourceName
apiSpecResources []v1.ResourceRequirements
apiStatusResources []v1.ResourceRequirements
requiresRestart []bool
invokeUpdateResources bool
expectedCurrentLimits []v1.ResourceList
expectedCurrentRequests []v1.ResourceList
}{
"Guaranteed QoS Pod - CPU & memory resize requested, update CPU": {
resourceName: v1.ResourceCPU,
apiSpecResources: []v1.ResourceRequirements{
{Limits: res150m150Mi, Requests: res150m150Mi},
{Limits: res250m250Mi, Requests: res250m250Mi},
{Limits: res350m350Mi, Requests: res350m350Mi},
},
apiStatusResources: []v1.ResourceRequirements{
{Limits: res100m100Mi, Requests: res100m100Mi},
{Limits: res200m200Mi, Requests: res200m200Mi},
{Limits: res300m300Mi, Requests: res300m300Mi},
},
requiresRestart: []bool{false, false, false},
invokeUpdateResources: true,
expectedCurrentLimits: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi},
expectedCurrentRequests: []v1.ResourceList{res150m100Mi, res250m200Mi, res350m300Mi},
},
"Guaranteed QoS Pod - CPU & memory resize requested, update memory": {
resourceName: v1.ResourceMemory,
apiSpecResources: []v1.ResourceRequirements{
{Limits: res150m150Mi, Requests: res150m150Mi},
{Limits: res250m250Mi, Requests: res250m250Mi},
{Limits: res350m350Mi, Requests: res350m350Mi},
},
apiStatusResources: []v1.ResourceRequirements{
{Limits: res100m100Mi, Requests: res100m100Mi},
{Limits: res200m200Mi, Requests: res200m200Mi},
{Limits: res300m300Mi, Requests: res300m300Mi},
},
requiresRestart: []bool{false, false, false},
invokeUpdateResources: true,
expectedCurrentLimits: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi},
expectedCurrentRequests: []v1.ResourceList{res100m150Mi, res200m250Mi, res300m350Mi},
},
} {
var containersToUpdate []containerToUpdateInfo
for idx := range pod.Spec.Containers {
// default resize policy when pod resize feature is enabled
pod.Spec.Containers[idx].Resources = tc.apiSpecResources[idx]
pod.Status.ContainerStatuses[idx].Resources = &tc.apiStatusResources[idx]
cInfo := containerToUpdateInfo{
apiContainerIdx: idx,
kubeContainerID: kubecontainer.ContainerID{},
desiredContainerResources: containerResources{
memoryLimit: tc.apiSpecResources[idx].Limits.Memory().Value(),
memoryRequest: tc.apiSpecResources[idx].Requests.Memory().Value(),
cpuLimit: tc.apiSpecResources[idx].Limits.Cpu().MilliValue(),
cpuRequest: tc.apiSpecResources[idx].Requests.Cpu().MilliValue(),
},
currentContainerResources: &containerResources{
memoryLimit: tc.apiStatusResources[idx].Limits.Memory().Value(),
memoryRequest: tc.apiStatusResources[idx].Requests.Memory().Value(),
cpuLimit: tc.apiStatusResources[idx].Limits.Cpu().MilliValue(),
cpuRequest: tc.apiStatusResources[idx].Requests.Cpu().MilliValue(),
},
}
containersToUpdate = append(containersToUpdate, cInfo)
}
fakeRuntime.Called = []string{}
err := m.updatePodContainerResources(pod, tc.resourceName, containersToUpdate)
assert.NoError(t, err, dsc)
if tc.invokeUpdateResources {
assert.Contains(t, fakeRuntime.Called, "UpdateContainerResources", dsc)
}
for idx := range pod.Spec.Containers {
assert.Equal(t, tc.expectedCurrentLimits[idx].Memory().Value(), containersToUpdate[idx].currentContainerResources.memoryLimit, dsc)
assert.Equal(t, tc.expectedCurrentRequests[idx].Memory().Value(), containersToUpdate[idx].currentContainerResources.memoryRequest, dsc)
assert.Equal(t, tc.expectedCurrentLimits[idx].Cpu().MilliValue(), containersToUpdate[idx].currentContainerResources.cpuLimit, dsc)
assert.Equal(t, tc.expectedCurrentRequests[idx].Cpu().MilliValue(), containersToUpdate[idx].currentContainerResources.cpuRequest, dsc)
}
}
}

View File

@ -22,7 +22,9 @@ import (
v1 "k8s.io/api/core/v1"
kubetypes "k8s.io/apimachinery/pkg/types"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/types"
)
@ -33,6 +35,7 @@ const (
podTerminationGracePeriodLabel = "io.kubernetes.pod.terminationGracePeriod"
containerHashLabel = "io.kubernetes.container.hash"
containerHashWithoutResourcesLabel = "io.kubernetes.container.hashWithoutResources"
containerRestartCountLabel = "io.kubernetes.container.restartCount"
containerTerminationMessagePathLabel = "io.kubernetes.container.terminationMessagePath"
containerTerminationMessagePolicyLabel = "io.kubernetes.container.terminationMessagePolicy"
@ -62,6 +65,7 @@ type labeledContainerInfo struct {
type annotatedContainerInfo struct {
Hash uint64
HashWithoutResources uint64
RestartCount int
PodDeletionGracePeriod *int64
PodTerminationGracePeriod *int64
@ -113,6 +117,9 @@ func newContainerAnnotations(container *v1.Container, pod *v1.Pod, restartCount
}
annotations[containerHashLabel] = strconv.FormatUint(kubecontainer.HashContainer(container), 16)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
annotations[containerHashWithoutResourcesLabel] = strconv.FormatUint(kubecontainer.HashContainerWithoutResources(container), 16)
}
annotations[containerRestartCountLabel] = strconv.Itoa(restartCount)
annotations[containerTerminationMessagePathLabel] = container.TerminationMessagePath
annotations[containerTerminationMessagePolicyLabel] = string(container.TerminationMessagePolicy)
@ -193,6 +200,11 @@ func getContainerInfoFromAnnotations(annotations map[string]string) *annotatedCo
if containerInfo.Hash, err = getUint64ValueFromLabel(annotations, containerHashLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashLabel, "annotations", annotations)
}
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if containerInfo.HashWithoutResources, err = getUint64ValueFromLabel(annotations, containerHashWithoutResourcesLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerHashWithoutResourcesLabel, "annotations", annotations)
}
}
if containerInfo.RestartCount, err = getIntValueFromLabel(annotations, containerRestartCountLabel); err != nil {
klog.ErrorS(err, "Unable to get label value from annotations", "label", containerRestartCountLabel, "annotations", annotations)
}

View File

@ -23,6 +23,9 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/features"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
@ -152,11 +155,13 @@ func TestContainerAnnotations(t *testing.T) {
PodDeletionGracePeriod: pod.DeletionGracePeriodSeconds,
PodTerminationGracePeriod: pod.Spec.TerminationGracePeriodSeconds,
Hash: kubecontainer.HashContainer(container),
HashWithoutResources: kubecontainer.HashContainerWithoutResources(container),
RestartCount: restartCount,
TerminationMessagePath: container.TerminationMessagePath,
PreStopHandler: container.Lifecycle.PreStop,
}
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
// Test whether we can get right information from label
annotations := newContainerAnnotations(container, pod, restartCount, opts)
containerInfo := getContainerInfoFromAnnotations(annotations)
@ -177,6 +182,7 @@ func TestContainerAnnotations(t *testing.T) {
expected.PreStopHandler = nil
// Because container is changed, the Hash should be updated
expected.Hash = kubecontainer.HashContainer(container)
expected.HashWithoutResources = kubecontainer.HashContainerWithoutResources(container)
annotations = newContainerAnnotations(container, pod, restartCount, opts)
containerInfo = getContainerInfoFromAnnotations(annotations)
if !reflect.DeepEqual(containerInfo, expected) {

View File

@ -409,3 +409,7 @@ func (e *EventedPLEG) updateRunningContainerMetric(podStatus *kubecontainer.PodS
}
}
}
func (e *EventedPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) (error, bool) {
return fmt.Errorf("not implemented"), false
}

View File

@ -76,6 +76,8 @@ type GenericPLEG struct {
runningMu sync.Mutex
// Indicates relisting related parameters
relistDuration *RelistDuration
// Mutex to serialize updateCache called by relist vs UpdateCache interface
podCacheMutex sync.Mutex
}
// plegContainerState has a one-to-one mapping to the
@ -436,6 +438,8 @@ func (g *GenericPLEG) updateCache(ctx context.Context, pod *kubecontainer.Pod, p
return nil, true
}
g.podCacheMutex.Lock()
defer g.podCacheMutex.Unlock()
timestamp := g.clock.Now()
status, err := g.runtime.GetPodStatus(ctx, pod.ID, pod.Name, pod.Namespace)
@ -478,6 +482,17 @@ func (g *GenericPLEG) updateCache(ctx context.Context, pod *kubecontainer.Pod, p
return err, g.cache.Set(pod.ID, status, err, timestamp)
}
func (g *GenericPLEG) UpdateCache(pod *kubecontainer.Pod, pid types.UID) (error, bool) {
ctx := context.Background()
if !g.cacheEnabled() {
return fmt.Errorf("pod cache disabled"), false
}
if pod == nil {
return fmt.Errorf("pod cannot be nil"), false
}
return g.updateCache(ctx, pod, pid)
}
func updateEvents(eventsByPodID map[types.UID][]*PodLifecycleEvent, e *PodLifecycleEvent) {
if e == nil {
return

View File

@ -20,6 +20,7 @@ import (
"time"
"k8s.io/apimachinery/pkg/types"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
)
// PodLifeCycleEventType define the event type of pod life cycle events.
@ -68,4 +69,5 @@ type PodLifecycleEventGenerator interface {
Watch() chan *PodLifecycleEvent
Healthy() (bool, error)
Relist()
UpdateCache(*kubecontainer.Pod, types.UID) (error, bool)
}

View File

@ -17,6 +17,7 @@ limitations under the License.
package prober
import (
"io/ioutil"
"reflect"
"sync"
@ -109,8 +110,14 @@ func newTestManager() *manager {
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
// Add test pod to pod manager, so that status manager can get the pod from pod manager if needed.
podManager.AddPod(getTestPod())
testRootDir := ""
if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil {
return nil
} else {
testRootDir = tempDir
}
m := NewManager(
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker),
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, testRootDir),
results.NewManager(),
results.NewManager(),
results.NewManager(),

View File

@ -19,6 +19,7 @@ package prober
import (
"context"
"fmt"
"io/ioutil"
"net"
"net/http"
"sync"
@ -80,10 +81,16 @@ func TestTCPPortExhaustion(t *testing.T) {
}
for _, tt := range tests {
t.Run(fmt.Sprintf(tt.name), func(t *testing.T) {
testRootDir := ""
if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil {
t.Fatalf("can't make a temp rootdir: %v", err)
} else {
testRootDir = tempDir
}
podManager := kubepod.NewBasicPodManager(nil)
podStartupLatencyTracker := kubeletutil.NewPodStartupLatencyTracker()
m := NewManager(
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker),
status.NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, testRootDir),
results.NewManager(),
results.NewManager(),
results.NewManager(),

View File

@ -19,6 +19,7 @@ package prober
import (
"context"
"fmt"
"io/ioutil"
"testing"
"time"
@ -153,7 +154,13 @@ func TestDoProbe(t *testing.T) {
}
// Clean up.
m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker())
testRootDir := ""
if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil {
t.Fatalf("can't make a temp rootdir: %v", err)
} else {
testRootDir = tempDir
}
m.statusManager = status.NewManager(&fake.Clientset{}, kubepod.NewBasicPodManager(nil), &statustest.FakePodDeletionSafetyProvider{}, kubeletutil.NewPodStartupLatencyTracker(), testRootDir)
resultsManager(m, probeType).Remove(testContainerID)
}
}

View File

@ -18,7 +18,10 @@ package qos
import (
v1 "k8s.io/api/core/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/types"
)
@ -60,6 +63,11 @@ func GetContainerOOMScoreAdjust(pod *v1.Pod, container *v1.Container, memoryCapa
// targets for OOM kills.
// Note that this is a heuristic, it won't work if a container has many small processes.
memoryRequest := container.Resources.Requests.Memory().Value()
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
if cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, container.Name); ok {
memoryRequest = cs.ResourcesAllocated.Memory().Value()
}
}
oomScoreAdjust := 1000 - (1000*memoryRequest)/memoryCapacity
// A guaranteed pod using 100% of memory can have an OOM score of 10. Ensure
// that burstable pods have a higher OOM score adjustment.

View File

@ -85,7 +85,7 @@ func TestRunOnce(t *testing.T) {
recorder: &record.FakeRecorder{},
cadvisor: cadvisor,
nodeLister: testNodeLister{},
statusManager: status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker),
statusManager: status.NewManager(nil, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, basePath),
podManager: podManager,
podWorkers: &fakePodWorkers{},
os: &containertest.FakeOS{},

View File

@ -0,0 +1,93 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package status
import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/status/state"
)
type fakeManager struct {
state state.State
}
func (m *fakeManager) Start() {
klog.InfoS("Start()")
return
}
func (m *fakeManager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) {
klog.InfoS("GetPodStatus()")
return v1.PodStatus{}, false
}
func (m *fakeManager) SetPodStatus(pod *v1.Pod, status v1.PodStatus) {
klog.InfoS("SetPodStatus()")
return
}
func (m *fakeManager) SetContainerReadiness(podUID types.UID, containerID kubecontainer.ContainerID, ready bool) {
klog.InfoS("SetContainerReadiness()")
return
}
func (m *fakeManager) SetContainerStartup(podUID types.UID, containerID kubecontainer.ContainerID, started bool) {
klog.InfoS("SetContainerStartup()")
return
}
func (m *fakeManager) TerminatePod(pod *v1.Pod) {
klog.InfoS("TerminatePod()")
return
}
func (m *fakeManager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) {
klog.InfoS("RemoveOrphanedStatuses()")
return
}
func (m *fakeManager) State() state.Reader {
klog.InfoS("State()")
return m.state
}
func (m *fakeManager) SetPodAllocation(pod *v1.Pod) error {
klog.InfoS("SetPodAllocation()")
for _, container := range pod.Spec.Containers {
var alloc v1.ResourceList
if container.Resources.Requests != nil {
alloc = container.Resources.Requests.DeepCopy()
}
m.state.SetContainerResourceAllocation(string(pod.UID), container.Name, alloc)
}
return nil
}
func (m *fakeManager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error {
klog.InfoS("SetPodResizeStatus()")
return nil
}
// NewFakeManager creates empty/fake memory manager
func NewFakeManager() Manager {
return &fakeManager{
state: state.NewStateMemory(),
}
}

View File

@ -0,0 +1,65 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"encoding/json"
"k8s.io/api/core/v1"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/checksum"
)
var _ checkpointmanager.Checkpoint = &PodResourceAllocationCheckpoint{}
// PodResourceAllocationCheckpoint is used to store resources allocated to a pod in checkpoint
type PodResourceAllocationCheckpoint struct {
AllocationEntries map[string]map[string]v1.ResourceList `json:"allocationEntries,omitempty"`
ResizeStatusEntries map[string]v1.PodResizeStatus `json:"resizeStatusEntries,omitempty"`
Checksum checksum.Checksum `json:"checksum"`
}
// NewPodResourceAllocationCheckpoint returns an instance of Checkpoint
func NewPodResourceAllocationCheckpoint() *PodResourceAllocationCheckpoint {
//lint:ignore unexported-type-in-api user-facing error message
return &PodResourceAllocationCheckpoint{
AllocationEntries: make(map[string]map[string]v1.ResourceList),
ResizeStatusEntries: make(map[string]v1.PodResizeStatus),
}
}
// MarshalCheckpoint returns marshalled checkpoint
func (prc *PodResourceAllocationCheckpoint) MarshalCheckpoint() ([]byte, error) {
// make sure checksum wasn't set before so it doesn't affect output checksum
prc.Checksum = 0
prc.Checksum = checksum.New(prc)
return json.Marshal(*prc)
}
// UnmarshalCheckpoint tries to unmarshal passed bytes to checkpoint
func (prc *PodResourceAllocationCheckpoint) UnmarshalCheckpoint(blob []byte) error {
return json.Unmarshal(blob, prc)
}
// VerifyChecksum verifies that current checksum of checkpoint is valid
func (prc *PodResourceAllocationCheckpoint) VerifyChecksum() error {
ck := prc.Checksum
prc.Checksum = 0
err := ck.Verify(prc)
prc.Checksum = ck
return err
}

View File

@ -0,0 +1,62 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"k8s.io/api/core/v1"
)
// PodResourceAllocation type is used in tracking resources allocated to pod's containers
type PodResourceAllocation map[string]map[string]v1.ResourceList
// PodResizeStatus type is used in tracking the last resize decision for pod
type PodResizeStatus map[string]v1.PodResizeStatus
// Clone returns a copy of PodResourceAllocation
func (pr PodResourceAllocation) Clone() PodResourceAllocation {
prCopy := make(PodResourceAllocation)
for pod := range pr {
prCopy[pod] = make(map[string]v1.ResourceList)
for container, alloc := range pr[pod] {
prCopy[pod][container] = alloc.DeepCopy()
}
}
return prCopy
}
// Reader interface used to read current pod resource allocation state
type Reader interface {
GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool)
GetPodResourceAllocation() PodResourceAllocation
GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool)
GetResizeStatus() PodResizeStatus
}
type writer interface {
SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error
SetPodResourceAllocation(PodResourceAllocation) error
SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error
SetResizeStatus(PodResizeStatus) error
Delete(podUID string, containerName string) error
ClearState() error
}
// State interface provides methods for tracking and setting pod resource allocation
type State interface {
Reader
writer
}

View File

@ -0,0 +1,179 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"fmt"
"path"
"sync"
"k8s.io/api/core/v1"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager"
"k8s.io/kubernetes/pkg/kubelet/checkpointmanager/errors"
)
var _ State = &stateCheckpoint{}
type stateCheckpoint struct {
mux sync.RWMutex
cache State
checkpointManager checkpointmanager.CheckpointManager
checkpointName string
}
// NewStateCheckpoint creates new State for keeping track of pod resource allocations with checkpoint backend
func NewStateCheckpoint(stateDir, checkpointName string) (State, error) {
checkpointManager, err := checkpointmanager.NewCheckpointManager(stateDir)
if err != nil {
return nil, fmt.Errorf("failed to initialize checkpoint manager for pod allocation tracking: %v", err)
}
stateCheckpoint := &stateCheckpoint{
cache: NewStateMemory(),
checkpointManager: checkpointManager,
checkpointName: checkpointName,
}
if err := stateCheckpoint.restoreState(); err != nil {
//lint:ignore ST1005 user-facing error message
return nil, fmt.Errorf("could not restore state from checkpoint: %v, please drain this node and delete pod allocation checkpoint file %q before restarting Kubelet", err, path.Join(stateDir, checkpointName))
}
return stateCheckpoint, nil
}
// restores state from a checkpoint and creates it if it doesn't exist
func (sc *stateCheckpoint) restoreState() error {
sc.mux.Lock()
defer sc.mux.Unlock()
var err error
checkpoint := NewPodResourceAllocationCheckpoint()
if err = sc.checkpointManager.GetCheckpoint(sc.checkpointName, checkpoint); err != nil {
if err == errors.ErrCheckpointNotFound {
return sc.storeState()
}
return err
}
sc.cache.SetPodResourceAllocation(checkpoint.AllocationEntries)
sc.cache.SetResizeStatus(checkpoint.ResizeStatusEntries)
klog.V(2).InfoS("State checkpoint: restored pod resource allocation state from checkpoint")
return nil
}
// saves state to a checkpoint, caller is responsible for locking
func (sc *stateCheckpoint) storeState() error {
checkpoint := NewPodResourceAllocationCheckpoint()
podAllocation := sc.cache.GetPodResourceAllocation()
for pod := range podAllocation {
checkpoint.AllocationEntries[pod] = make(map[string]v1.ResourceList)
for container, alloc := range podAllocation[pod] {
checkpoint.AllocationEntries[pod][container] = alloc
}
}
podResizeStatus := sc.cache.GetResizeStatus()
checkpoint.ResizeStatusEntries = make(map[string]v1.PodResizeStatus)
for pUID, rStatus := range podResizeStatus {
checkpoint.ResizeStatusEntries[pUID] = rStatus
}
err := sc.checkpointManager.CreateCheckpoint(sc.checkpointName, checkpoint)
if err != nil {
klog.ErrorS(err, "Failed to save pod allocation checkpoint")
return err
}
return nil
}
// GetContainerResourceAllocation returns current resources allocated to a pod's container
func (sc *stateCheckpoint) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) {
sc.mux.RLock()
defer sc.mux.RUnlock()
return sc.cache.GetContainerResourceAllocation(podUID, containerName)
}
// GetPodResourceAllocation returns current pod resource allocation
func (sc *stateCheckpoint) GetPodResourceAllocation() PodResourceAllocation {
sc.mux.RLock()
defer sc.mux.RUnlock()
return sc.cache.GetPodResourceAllocation()
}
// GetPodResizeStatus returns the last resize decision for a pod
func (sc *stateCheckpoint) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
sc.mux.RLock()
defer sc.mux.RUnlock()
return sc.cache.GetPodResizeStatus(podUID)
}
// GetResizeStatus returns the set of resize decisions made
func (sc *stateCheckpoint) GetResizeStatus() PodResizeStatus {
sc.mux.RLock()
defer sc.mux.RUnlock()
return sc.cache.GetResizeStatus()
}
// SetContainerResourceAllocation sets resources allocated to a pod's container
func (sc *stateCheckpoint) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.SetContainerResourceAllocation(podUID, containerName, alloc)
return sc.storeState()
}
// SetPodResourceAllocation sets pod resource allocation
func (sc *stateCheckpoint) SetPodResourceAllocation(a PodResourceAllocation) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.SetPodResourceAllocation(a)
return sc.storeState()
}
// SetPodResizeStatus sets the last resize decision for a pod
func (sc *stateCheckpoint) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.SetPodResizeStatus(podUID, resizeStatus)
return sc.storeState()
}
// SetResizeStatus sets the resize decisions
func (sc *stateCheckpoint) SetResizeStatus(rs PodResizeStatus) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.SetResizeStatus(rs)
return sc.storeState()
}
// Delete deletes allocations for specified pod
func (sc *stateCheckpoint) Delete(podUID string, containerName string) error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.Delete(podUID, containerName)
return sc.storeState()
}
// ClearState clears the state and saves it in a checkpoint
func (sc *stateCheckpoint) ClearState() error {
sc.mux.Lock()
defer sc.mux.Unlock()
sc.cache.ClearState()
return sc.storeState()
}

View File

@ -0,0 +1,152 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"sync"
"k8s.io/api/core/v1"
"k8s.io/klog/v2"
)
type stateMemory struct {
sync.RWMutex
podAllocation PodResourceAllocation
podResizeStatus PodResizeStatus
}
var _ State = &stateMemory{}
// NewStateMemory creates new State to track resources allocated to pods
func NewStateMemory() State {
klog.V(2).InfoS("Initialized new in-memory state store for pod resource allocation tracking")
return &stateMemory{
podAllocation: PodResourceAllocation{},
podResizeStatus: PodResizeStatus{},
}
}
func (s *stateMemory) GetContainerResourceAllocation(podUID string, containerName string) (v1.ResourceList, bool) {
s.RLock()
defer s.RUnlock()
alloc, ok := s.podAllocation[podUID][containerName]
return alloc.DeepCopy(), ok
}
func (s *stateMemory) GetPodResourceAllocation() PodResourceAllocation {
s.RLock()
defer s.RUnlock()
return s.podAllocation.Clone()
}
func (s *stateMemory) GetPodResizeStatus(podUID string) (v1.PodResizeStatus, bool) {
s.RLock()
defer s.RUnlock()
resizeStatus, ok := s.podResizeStatus[podUID]
return resizeStatus, ok
}
func (s *stateMemory) GetResizeStatus() PodResizeStatus {
s.RLock()
defer s.RUnlock()
prs := make(map[string]v1.PodResizeStatus)
for k, v := range s.podResizeStatus {
prs[k] = v
}
return prs
}
func (s *stateMemory) SetContainerResourceAllocation(podUID string, containerName string, alloc v1.ResourceList) error {
s.Lock()
defer s.Unlock()
if _, ok := s.podAllocation[podUID]; !ok {
s.podAllocation[podUID] = make(map[string]v1.ResourceList)
}
s.podAllocation[podUID][containerName] = alloc
klog.V(3).InfoS("Updated container resource allocation", "podUID", podUID, "containerName", containerName, "alloc", alloc)
return nil
}
func (s *stateMemory) SetPodResourceAllocation(a PodResourceAllocation) error {
s.Lock()
defer s.Unlock()
s.podAllocation = a.Clone()
klog.V(3).InfoS("Updated pod resource allocation", "allocation", a)
return nil
}
func (s *stateMemory) SetPodResizeStatus(podUID string, resizeStatus v1.PodResizeStatus) error {
s.Lock()
defer s.Unlock()
if resizeStatus != "" {
s.podResizeStatus[podUID] = resizeStatus
} else {
delete(s.podResizeStatus, podUID)
}
klog.V(3).InfoS("Updated pod resize state", "podUID", podUID, "resizeStatus", resizeStatus)
return nil
}
func (s *stateMemory) SetResizeStatus(rs PodResizeStatus) error {
s.Lock()
defer s.Unlock()
prs := make(map[string]v1.PodResizeStatus)
for k, v := range rs {
prs[k] = v
}
s.podResizeStatus = prs
klog.V(3).InfoS("Updated pod resize state", "resizes", rs)
return nil
}
func (s *stateMemory) deleteContainer(podUID string, containerName string) {
delete(s.podAllocation[podUID], containerName)
if len(s.podAllocation[podUID]) == 0 {
delete(s.podAllocation, podUID)
delete(s.podResizeStatus, podUID)
}
klog.V(3).InfoS("Deleted pod resource allocation", "podUID", podUID, "containerName", containerName)
}
func (s *stateMemory) Delete(podUID string, containerName string) error {
s.Lock()
defer s.Unlock()
if len(containerName) == 0 {
delete(s.podAllocation, podUID)
delete(s.podResizeStatus, podUID)
klog.V(3).InfoS("Deleted pod resource allocation and resize state", "podUID", podUID)
return nil
}
s.deleteContainer(podUID, containerName)
return nil
}
func (s *stateMemory) ClearState() error {
s.Lock()
defer s.Unlock()
s.podAllocation = make(PodResourceAllocation)
s.podResizeStatus = make(PodResizeStatus)
klog.V(3).InfoS("Cleared state")
return nil
}

View File

@ -41,10 +41,14 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/metrics"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/status/state"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
statusutil "k8s.io/kubernetes/pkg/util/pod"
)
// podStatusManagerStateFile is the file name where status manager stores its state
const podStatusManagerStateFile = "pod_status_manager_state"
// A wrapper around v1.PodStatus that includes a version to enforce that stale pod statuses are
// not sent to the API server.
type versionedPodStatus struct {
@ -79,6 +83,10 @@ type manager struct {
podDeletionSafety PodDeletionSafetyProvider
podStartupLatencyHelper PodStartupLatencyStateHelper
// state allows to save/restore pod resource allocation and tolerate kubelet restarts.
state state.State
// stateFileDirectory holds the directory where the state file for checkpoints is held.
stateFileDirectory string
}
// PodStatusProvider knows how to provide status for a pod. It's intended to be used by other components
@ -128,12 +136,21 @@ type Manager interface {
// RemoveOrphanedStatuses scans the status cache and removes any entries for pods not included in
// the provided podUIDs.
RemoveOrphanedStatuses(podUIDs map[types.UID]bool)
// State returns a read-only interface to the internal status manager state.
State() state.Reader
// SetPodAllocation checkpoints the resources allocated to a pod's containers.
SetPodAllocation(pod *v1.Pod) error
// SetPodResizeStatus checkpoints the last resizing decision for the pod.
SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) error
}
const syncPeriod = 10 * time.Second
// NewManager returns a functional Manager.
func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podDeletionSafety PodDeletionSafetyProvider, podStartupLatencyHelper PodStartupLatencyStateHelper) Manager {
func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podDeletionSafety PodDeletionSafetyProvider, podStartupLatencyHelper PodStartupLatencyStateHelper, stateFileDirectory string) Manager {
return &manager{
kubeClient: kubeClient,
podManager: podManager,
@ -142,6 +159,7 @@ func NewManager(kubeClient clientset.Interface, podManager kubepod.Manager, podD
apiStatusVersions: make(map[kubetypes.MirrorPodUID]uint64),
podDeletionSafety: podDeletionSafety,
podStartupLatencyHelper: podStartupLatencyHelper,
stateFileDirectory: stateFileDirectory,
}
}
@ -173,6 +191,15 @@ func (m *manager) Start() {
return
}
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
stateImpl, err := state.NewStateCheckpoint(m.stateFileDirectory, podStatusManagerStateFile)
if err != nil {
klog.ErrorS(err, "Could not initialize pod allocation checkpoint manager, please drain node and remove policy state file")
return
}
m.state = stateImpl
}
klog.InfoS("Starting to sync pod status with apiserver")
//nolint:staticcheck // SA1015 Ticker can leak since this is only called once and doesn't handle termination.
@ -200,6 +227,34 @@ func (m *manager) Start() {
}, 0)
}
// State returns the pod resources checkpoint state of the pod status manager
func (m *manager) State() state.Reader {
return m.state
}
// SetPodAllocation checkpoints the resources allocated to a pod's containers
func (m *manager) SetPodAllocation(pod *v1.Pod) error {
m.podStatusesLock.RLock()
defer m.podStatusesLock.RUnlock()
for _, container := range pod.Spec.Containers {
var alloc v1.ResourceList
if container.Resources.Requests != nil {
alloc = container.Resources.Requests.DeepCopy()
}
if err := m.state.SetContainerResourceAllocation(string(pod.UID), container.Name, alloc); err != nil {
return err
}
}
return nil
}
// SetPodResizeStatus checkpoints the last resizing decision for the pod.
func (m *manager) SetPodResizeStatus(podUID types.UID, resizeStatus v1.PodResizeStatus) error {
m.podStatusesLock.RLock()
defer m.podStatusesLock.RUnlock()
return m.state.SetPodResizeStatus(string(podUID), resizeStatus)
}
func (m *manager) GetPodStatus(uid types.UID) (v1.PodStatus, bool) {
m.podStatusesLock.RLock()
defer m.podStatusesLock.RUnlock()
@ -616,6 +671,9 @@ func (m *manager) deletePodStatus(uid types.UID) {
defer m.podStatusesLock.Unlock()
delete(m.podStatuses, uid)
m.podStartupLatencyHelper.DeletePodStartupState(uid)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
m.state.Delete(string(uid), "")
}
}
// TODO(filipg): It'd be cleaner if we can do this without signal from user.
@ -626,6 +684,9 @@ func (m *manager) RemoveOrphanedStatuses(podUIDs map[types.UID]bool) {
if _, ok := podUIDs[key]; !ok {
klog.V(5).InfoS("Removing pod from status map.", "podUID", key)
delete(m.podStatuses, key)
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
m.state.Delete(string(key), "")
}
}
}
}

View File

@ -18,6 +18,7 @@ package status
import (
"fmt"
"io/ioutil"
"math/rand"
"reflect"
"strconv"
@ -87,7 +88,13 @@ func newTestManager(kubeClient clientset.Interface) *manager {
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient())
podManager.AddPod(getTestPod())
podStartupLatencyTracker := util.NewPodStartupLatencyTracker()
return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker).(*manager)
testRootDir := ""
if tempDir, err := ioutil.TempDir("", "kubelet_test."); err != nil {
return nil
} else {
testRootDir = tempDir
}
return NewManager(kubeClient, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, testRootDir).(*manager)
}
func generateRandomMessage() string {
@ -962,7 +969,7 @@ func TestTerminatePod_DefaultUnknownStatus(t *testing.T) {
t.Run(tc.name, func(t *testing.T) {
podManager := kubepod.NewBasicPodManager(podtest.NewFakeMirrorClient())
podStartupLatencyTracker := util.NewPodStartupLatencyTracker()
syncer := NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker).(*manager)
syncer := NewManager(&fake.Clientset{}, podManager, &statustest.FakePodDeletionSafetyProvider{}, podStartupLatencyTracker, "").(*manager)
original := tc.pod.DeepCopy()
syncer.SetPodStatus(original, original.Status)

View File

@ -27,6 +27,7 @@ import (
v1 "k8s.io/api/core/v1"
types "k8s.io/apimachinery/pkg/types"
container "k8s.io/kubernetes/pkg/kubelet/container"
state "k8s.io/kubernetes/pkg/kubelet/status/state"
)
// MockPodStatusProvider is a mock of PodStatusProvider interface.
@ -239,6 +240,34 @@ func (mr *MockManagerMockRecorder) SetContainerStartup(podUID, containerID, star
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetContainerStartup", reflect.TypeOf((*MockManager)(nil).SetContainerStartup), podUID, containerID, started)
}
// SetPodAllocation mocks base method.
func (m *MockManager) SetPodAllocation(pod *v1.Pod) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "SetPodAllocation", pod)
ret0, _ := ret[0].(error)
return ret0
}
// SetPodAllocation indicates an expected call of SetPodAllocation.
func (mr *MockManagerMockRecorder) SetPodAllocation(pod interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPodAllocation", reflect.TypeOf((*MockManager)(nil).SetPodAllocation), pod)
}
// SetPodResizeStatus mocks base method.
func (m *MockManager) SetPodResizeStatus(podUID types.UID, resize v1.PodResizeStatus) error {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "SetPodResizeStatus", podUID, resize)
ret0, _ := ret[0].(error)
return ret0
}
// SetPodResizeStatus indicates an expected call of SetPodResizeStatus.
func (mr *MockManagerMockRecorder) SetPodResizeStatus(podUID, resize interface{}) *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "SetPodResizeStatus", reflect.TypeOf((*MockManager)(nil).SetPodResizeStatus), podUID, resize)
}
// SetPodStatus mocks base method.
func (m *MockManager) SetPodStatus(pod *v1.Pod, status v1.PodStatus) {
m.ctrl.T.Helper()
@ -263,6 +292,20 @@ func (mr *MockManagerMockRecorder) Start() *gomock.Call {
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Start", reflect.TypeOf((*MockManager)(nil).Start))
}
// State mocks base method.
func (m *MockManager) State() state.Reader {
m.ctrl.T.Helper()
ret := m.ctrl.Call(m, "State")
ret0, _ := ret[0].(state.Reader)
return ret0
}
// State indicates an expected call of State.
func (mr *MockManagerMockRecorder) State() *gomock.Call {
mr.mock.ctrl.T.Helper()
return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "State", reflect.TypeOf((*MockManager)(nil).State))
}
// TerminatePod mocks base method.
func (m *MockManager) TerminatePod(pod *v1.Pod) {
m.ctrl.T.Helper()

View File

@ -30,10 +30,13 @@ import (
"k8s.io/apiserver/pkg/admission"
quota "k8s.io/apiserver/pkg/quota/v1"
"k8s.io/apiserver/pkg/quota/v1/generic"
"k8s.io/apiserver/pkg/util/feature"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
api "k8s.io/kubernetes/pkg/apis/core"
k8s_api_v1 "k8s.io/kubernetes/pkg/apis/core/v1"
"k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
"k8s.io/kubernetes/pkg/features"
"k8s.io/utils/clock"
)
@ -155,6 +158,9 @@ func (p *podEvaluator) Handles(a admission.Attributes) bool {
if op == admission.Create {
return true
}
if feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) && op == admission.Update {
return true
}
return false
}
@ -356,7 +362,14 @@ func PodUsageFunc(obj runtime.Object, clock clock.Clock) (corev1.ResourceList, e
limits := corev1.ResourceList{}
// TODO: ideally, we have pod level requests and limits in the future.
for i := range pod.Spec.Containers {
requests = quota.Add(requests, pod.Spec.Containers[i].Resources.Requests)
containerRequests := pod.Spec.Containers[i].Resources.Requests
if feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
cs, ok := podutil.GetContainerStatus(pod.Status.ContainerStatuses, pod.Spec.Containers[i].Name)
if ok && cs.ResourcesAllocated != nil {
containerRequests = quota.Max(containerRequests, cs.ResourcesAllocated)
}
}
requests = quota.Add(requests, containerRequests)
limits = quota.Add(limits, pod.Spec.Containers[i].Resources.Limits)
}
// InitContainers are run sequentially before other containers start, so the highest

View File

@ -27,7 +27,10 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
quota "k8s.io/apiserver/pkg/quota/v1"
"k8s.io/apiserver/pkg/quota/v1/generic"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/util/node"
"k8s.io/utils/clock"
testingclock "k8s.io/utils/clock/testing"
@ -750,3 +753,211 @@ func TestPodEvaluatorMatchingScopes(t *testing.T) {
})
}
}
func TestPodEvaluatorUsageResourceResize(t *testing.T) {
fakeClock := testingclock.NewFakeClock(time.Now())
evaluator := NewPodEvaluator(nil, fakeClock)
testCases := map[string]struct {
pod *api.Pod
usageFgEnabled corev1.ResourceList
usageFgDisabled corev1.ResourceList
}{
"verify Max(Container.Spec.Requests, ContainerStatus.ResourcesAllocated) for memory resource": {
pod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceMemory: resource.MustParse("200Mi"),
},
Limits: api.ResourceList{
api.ResourceMemory: resource.MustParse("400Mi"),
},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
ResourcesAllocated: api.ResourceList{
api.ResourceMemory: resource.MustParse("150Mi"),
},
},
},
},
},
usageFgEnabled: corev1.ResourceList{
corev1.ResourceRequestsMemory: resource.MustParse("200Mi"),
corev1.ResourceLimitsMemory: resource.MustParse("400Mi"),
corev1.ResourcePods: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("200Mi"),
generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"),
},
usageFgDisabled: corev1.ResourceList{
corev1.ResourceRequestsMemory: resource.MustParse("200Mi"),
corev1.ResourceLimitsMemory: resource.MustParse("400Mi"),
corev1.ResourcePods: resource.MustParse("1"),
corev1.ResourceMemory: resource.MustParse("200Mi"),
generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"),
},
},
"verify Max(Container.Spec.Requests, ContainerStatus.ResourcesAllocated) for CPU resource": {
pod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceCPU: resource.MustParse("100m"),
},
Limits: api.ResourceList{
api.ResourceCPU: resource.MustParse("200m"),
},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
ResourcesAllocated: api.ResourceList{
api.ResourceCPU: resource.MustParse("150m"),
},
},
},
},
},
usageFgEnabled: corev1.ResourceList{
corev1.ResourceRequestsCPU: resource.MustParse("150m"),
corev1.ResourceLimitsCPU: resource.MustParse("200m"),
corev1.ResourcePods: resource.MustParse("1"),
corev1.ResourceCPU: resource.MustParse("150m"),
generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"),
},
usageFgDisabled: corev1.ResourceList{
corev1.ResourceRequestsCPU: resource.MustParse("100m"),
corev1.ResourceLimitsCPU: resource.MustParse("200m"),
corev1.ResourcePods: resource.MustParse("1"),
corev1.ResourceCPU: resource.MustParse("100m"),
generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"),
},
},
"verify Max(Container.Spec.Requests, ContainerStatus.ResourcesAllocated) for CPU and memory resource": {
pod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceCPU: resource.MustParse("100m"),
api.ResourceMemory: resource.MustParse("200Mi"),
},
Limits: api.ResourceList{
api.ResourceCPU: resource.MustParse("200m"),
api.ResourceMemory: resource.MustParse("400Mi"),
},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{
ResourcesAllocated: api.ResourceList{
api.ResourceCPU: resource.MustParse("150m"),
api.ResourceMemory: resource.MustParse("250Mi"),
},
},
},
},
},
usageFgEnabled: corev1.ResourceList{
corev1.ResourceRequestsCPU: resource.MustParse("150m"),
corev1.ResourceLimitsCPU: resource.MustParse("200m"),
corev1.ResourceRequestsMemory: resource.MustParse("250Mi"),
corev1.ResourceLimitsMemory: resource.MustParse("400Mi"),
corev1.ResourcePods: resource.MustParse("1"),
corev1.ResourceCPU: resource.MustParse("150m"),
corev1.ResourceMemory: resource.MustParse("250Mi"),
generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"),
},
usageFgDisabled: corev1.ResourceList{
corev1.ResourceRequestsCPU: resource.MustParse("100m"),
corev1.ResourceLimitsCPU: resource.MustParse("200m"),
corev1.ResourceRequestsMemory: resource.MustParse("200Mi"),
corev1.ResourceLimitsMemory: resource.MustParse("400Mi"),
corev1.ResourcePods: resource.MustParse("1"),
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("200Mi"),
generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"),
},
},
"verify Max(Container.Spec.Requests, ContainerStatus.ResourcesAllocated==nil) for CPU and memory resource": {
pod: &api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{
{
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceCPU: resource.MustParse("100m"),
api.ResourceMemory: resource.MustParse("200Mi"),
},
Limits: api.ResourceList{
api.ResourceCPU: resource.MustParse("200m"),
api.ResourceMemory: resource.MustParse("400Mi"),
},
},
},
},
},
Status: api.PodStatus{
ContainerStatuses: []api.ContainerStatus{
{},
},
},
},
usageFgEnabled: corev1.ResourceList{
corev1.ResourceRequestsCPU: resource.MustParse("100m"),
corev1.ResourceLimitsCPU: resource.MustParse("200m"),
corev1.ResourceRequestsMemory: resource.MustParse("200Mi"),
corev1.ResourceLimitsMemory: resource.MustParse("400Mi"),
corev1.ResourcePods: resource.MustParse("1"),
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("200Mi"),
generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"),
},
usageFgDisabled: corev1.ResourceList{
corev1.ResourceRequestsCPU: resource.MustParse("100m"),
corev1.ResourceLimitsCPU: resource.MustParse("200m"),
corev1.ResourceRequestsMemory: resource.MustParse("200Mi"),
corev1.ResourceLimitsMemory: resource.MustParse("400Mi"),
corev1.ResourcePods: resource.MustParse("1"),
corev1.ResourceCPU: resource.MustParse("100m"),
corev1.ResourceMemory: resource.MustParse("200Mi"),
generic.ObjectCountQuotaResourceNameFor(schema.GroupResource{Resource: "pods"}): resource.MustParse("1"),
},
},
}
t.Parallel()
for _, enabled := range []bool{true, false} {
for testName, testCase := range testCases {
t.Run(testName, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, enabled)()
actual, err := evaluator.Usage(testCase.pod)
if err != nil {
t.Error(err)
}
usage := testCase.usageFgEnabled
if !enabled {
usage = testCase.usageFgDisabled
}
if !quota.Equals(usage, actual) {
t.Errorf("FG enabled: %v, expected: %v, actual: %v", enabled, usage, actual)
}
})
}
}
}

View File

@ -97,6 +97,14 @@ func (podStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object
oldPod := old.(*api.Pod)
newPod.Status = oldPod.Status
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// With support for in-place pod resizing, container resources are now mutable.
// If container resources are updated with new resource requests values, a pod resize is
// desired. The status of this request is reflected by setting Resize field to "Proposed"
// as a signal to the caller that the request is being considered.
podutil.MarkPodProposedForResize(oldPod, newPod)
}
podutil.DropDisabledPodFields(newPod, oldPod)
}

View File

@ -28,4 +28,5 @@ type Features struct {
EnableMatchLabelKeysInPodTopologySpread bool
EnablePodSchedulingReadiness bool
EnablePodDisruptionConditions bool
EnableInPlacePodVerticalScaling bool
}

View File

@ -76,9 +76,10 @@ var nodeResourceStrategyTypeMap = map[config.ScoringStrategyType]scorer{
// Fit is a plugin that checks if a node has sufficient resources.
type Fit struct {
ignoredResources sets.String
ignoredResourceGroups sets.String
handle framework.Handle
ignoredResources sets.String
ignoredResourceGroups sets.String
enableInPlacePodVerticalScaling bool
handle framework.Handle
resourceAllocationScorer
}
@ -123,10 +124,11 @@ func NewFit(plArgs runtime.Object, h framework.Handle, fts feature.Features) (fr
}
return &Fit{
ignoredResources: sets.NewString(args.IgnoredResources...),
ignoredResourceGroups: sets.NewString(args.IgnoredResourceGroups...),
handle: h,
resourceAllocationScorer: *scorePlugin(args),
ignoredResources: sets.NewString(args.IgnoredResources...),
ignoredResourceGroups: sets.NewString(args.IgnoredResourceGroups...),
enableInPlacePodVerticalScaling: fts.EnableInPlacePodVerticalScaling,
handle: h,
resourceAllocationScorer: *scorePlugin(args),
}, nil
}
@ -202,12 +204,15 @@ func getPreFilterState(cycleState *framework.CycleState) (*preFilterState, error
// EventsToRegister returns the possible events that may make a Pod
// failed by this plugin schedulable.
// NOTE: if in-place-update (KEP 1287) gets implemented, then PodUpdate event
// should be registered for this plugin since a Pod update may free up resources
// that make other Pods schedulable.
func (f *Fit) EventsToRegister() []framework.ClusterEvent {
podActionType := framework.Delete
if f.enableInPlacePodVerticalScaling {
// If InPlacePodVerticalScaling (KEP 1287) is enabled, then PodUpdate event should be registered
// for this plugin since a Pod update may free up resources that make other Pods schedulable.
podActionType |= framework.Update
}
return []framework.ClusterEvent{
{Resource: framework.Pod, ActionType: framework.Delete},
{Resource: framework.Pod, ActionType: podActionType},
{Resource: framework.Node, ActionType: framework.Add | framework.Update},
}
}

View File

@ -22,6 +22,7 @@ import (
"reflect"
"testing"
"github.com/google/go-cmp/cmp"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/kubernetes/pkg/scheduler/apis/config"
@ -893,3 +894,38 @@ func BenchmarkTestFitScore(b *testing.B) {
})
}
}
func TestEventsToRegister(t *testing.T) {
tests := []struct {
name string
inPlacePodVerticalScalingEnabled bool
expectedClusterEvents []framework.ClusterEvent
}{
{
"Register events with InPlacePodVerticalScaling feature enabled",
true,
[]framework.ClusterEvent{
{Resource: "Pod", ActionType: framework.Update | framework.Delete},
{Resource: "Node", ActionType: framework.Add | framework.Update},
},
},
{
"Register events with InPlacePodVerticalScaling feature disabled",
false,
[]framework.ClusterEvent{
{Resource: "Pod", ActionType: framework.Delete},
{Resource: "Node", ActionType: framework.Add | framework.Update},
},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
fp := &Fit{enableInPlacePodVerticalScaling: test.inPlacePodVerticalScalingEnabled}
actualClusterEvents := fp.EventsToRegister()
if diff := cmp.Diff(test.expectedClusterEvents, actualClusterEvents); diff != "" {
t.Error("Cluster Events doesn't match extected events (-expected +actual):\n", diff)
}
})
}
}

View File

@ -55,6 +55,7 @@ func NewInTreeRegistry() runtime.Registry {
EnableMatchLabelKeysInPodTopologySpread: feature.DefaultFeatureGate.Enabled(features.MatchLabelKeysInPodTopologySpread),
EnablePodSchedulingReadiness: feature.DefaultFeatureGate.Enabled(features.PodSchedulingReadiness),
EnablePodDisruptionConditions: feature.DefaultFeatureGate.Enabled(features.PodDisruptionConditions),
EnableInPlacePodVerticalScaling: feature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling),
}
registry := runtime.Registry{

View File

@ -29,7 +29,11 @@ import (
"k8s.io/apimachinery/pkg/labels"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
quota "k8s.io/apiserver/pkg/quota/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/features"
schedutil "k8s.io/kubernetes/pkg/scheduler/util"
)
@ -724,15 +728,28 @@ func max(a, b int64) int64 {
// resourceRequest = max(sum(podSpec.Containers), podSpec.InitContainers) + overHead
func calculateResource(pod *v1.Pod) (res Resource, non0CPU int64, non0Mem int64) {
inPlacePodVerticalScalingEnabled := utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling)
resPtr := &res
for _, c := range pod.Spec.Containers {
resPtr.Add(c.Resources.Requests)
non0CPUReq, non0MemReq := schedutil.GetNonzeroRequests(&c.Resources.Requests)
req := c.Resources.Requests
if inPlacePodVerticalScalingEnabled {
cs, found := podutil.GetContainerStatus(pod.Status.ContainerStatuses, c.Name)
if found {
if pod.Status.Resize == v1.PodResizeStatusInfeasible {
req = cs.ResourcesAllocated
} else {
req = quota.Max(c.Resources.Requests, cs.ResourcesAllocated)
}
}
}
resPtr.Add(req)
non0CPUReq, non0MemReq := schedutil.GetNonzeroRequests(&req)
non0CPU += non0CPUReq
non0Mem += non0MemReq
// No non-zero resources for GPUs or opaque resources.
}
// Note: In-place resize is not allowed for InitContainers, so no need to check for ResizeStatus value
for _, ic := range pod.Spec.InitContainers {
resPtr.SetMaxResource(ic.Resources.Requests)
non0CPUReq, non0MemReq := schedutil.GetNonzeroRequests(&ic.Resources.Requests)

View File

@ -28,6 +28,9 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/features"
)
func TestNewResource(t *testing.T) {
@ -1458,3 +1461,101 @@ func TestFitError_Error(t *testing.T) {
})
}
}
func TestCalculatePodResourcesWithResize(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, true)()
cpu500m := resource.MustParse("500m")
mem500M := resource.MustParse("500Mi")
cpu700m := resource.MustParse("700m")
mem800M := resource.MustParse("800Mi")
testpod := v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "pod_resize_test",
Name: "testpod",
UID: types.UID("testpod"),
},
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "c1",
Resources: v1.ResourceRequirements{Requests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M}},
},
},
},
Status: v1.PodStatus{
Phase: v1.PodRunning,
Resize: "",
ContainerStatuses: []v1.ContainerStatus{
{
Name: "c1",
ResourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
},
},
},
}
tests := []struct {
name string
requests v1.ResourceList
resourcesAllocated v1.ResourceList
resizeStatus v1.PodResizeStatus
expectedResource Resource
expectedNon0CPU int64
expectedNon0Mem int64
}{
{
name: "Pod with no pending resize",
requests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
resourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
resizeStatus: "",
expectedResource: Resource{MilliCPU: cpu500m.MilliValue(), Memory: mem500M.Value()},
expectedNon0CPU: cpu500m.MilliValue(),
expectedNon0Mem: mem500M.Value(),
},
{
name: "Pod with resize in progress",
requests: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
resourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
resizeStatus: v1.PodResizeStatusInProgress,
expectedResource: Resource{MilliCPU: cpu500m.MilliValue(), Memory: mem500M.Value()},
expectedNon0CPU: cpu500m.MilliValue(),
expectedNon0Mem: mem500M.Value(),
},
{
name: "Pod with deferred resize",
requests: v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M},
resourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
resizeStatus: v1.PodResizeStatusDeferred,
expectedResource: Resource{MilliCPU: cpu700m.MilliValue(), Memory: mem800M.Value()},
expectedNon0CPU: cpu700m.MilliValue(),
expectedNon0Mem: mem800M.Value(),
},
{
name: "Pod with infeasible resize",
requests: v1.ResourceList{v1.ResourceCPU: cpu700m, v1.ResourceMemory: mem800M},
resourcesAllocated: v1.ResourceList{v1.ResourceCPU: cpu500m, v1.ResourceMemory: mem500M},
resizeStatus: v1.PodResizeStatusInfeasible,
expectedResource: Resource{MilliCPU: cpu500m.MilliValue(), Memory: mem500M.Value()},
expectedNon0CPU: cpu500m.MilliValue(),
expectedNon0Mem: mem500M.Value(),
},
}
for _, tt := range tests {
pod := testpod.DeepCopy()
pod.Spec.Containers[0].Resources.Requests = tt.requests
pod.Status.ContainerStatuses[0].ResourcesAllocated = tt.resourcesAllocated
pod.Status.Resize = tt.resizeStatus
res, non0CPU, non0Mem := calculateResource(pod)
if !reflect.DeepEqual(tt.expectedResource, res) {
t.Errorf("Test: %s expected resource: %+v, got: %+v", tt.name, tt.expectedResource, res)
}
if non0CPU != tt.expectedNon0CPU {
t.Errorf("Test: %s expected non0CPU: %d, got: %d", tt.name, tt.expectedNon0CPU, non0CPU)
}
if non0Mem != tt.expectedNon0Mem {
t.Errorf("Test: %s expected non0Mem: %d, got: %d", tt.name, tt.expectedNon0Mem, non0Mem)
}
}
}

View File

@ -38,10 +38,12 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
listersv1 "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/framework"
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/interpodaffinity"
"k8s.io/kubernetes/pkg/scheduler/internal/heap"
@ -683,14 +685,47 @@ func (p *PriorityQueue) AssignedPodAdded(pod *v1.Pod) {
p.lock.Unlock()
}
// isPodResourcesResizedDown returns true if a pod CPU and/or memory resize request has been
// admitted by kubelet, is 'InProgress', and results in a net sizing down of updated resources.
// It returns false if either CPU or memory resource is net resized up, or if no resize is in progress.
func isPodResourcesResizedDown(pod *v1.Pod) bool {
if utilfeature.DefaultFeatureGate.Enabled(features.InPlacePodVerticalScaling) {
// TODO(vinaykul,wangchen615,InPlacePodVerticalScaling): Fix this to determine when a
// pod is truly resized down (might need oldPod if we cannot determine from Status alone)
if pod.Status.Resize == v1.PodResizeStatusInProgress {
return true
}
}
return false
}
// AssignedPodUpdated is called when a bound pod is updated. Change of labels
// may make pending pods with matching affinity terms schedulable.
func (p *PriorityQueue) AssignedPodUpdated(pod *v1.Pod) {
p.lock.Lock()
p.movePodsToActiveOrBackoffQueue(p.getUnschedulablePodsWithMatchingAffinityTerm(pod), AssignedPodUpdate)
if isPodResourcesResizedDown(pod) {
p.moveAllToActiveOrBackoffQueue(AssignedPodUpdate, nil)
} else {
p.movePodsToActiveOrBackoffQueue(p.getUnschedulablePodsWithMatchingAffinityTerm(pod), AssignedPodUpdate)
}
p.lock.Unlock()
}
// NOTE: this function assumes a lock has been acquired in the caller.
// moveAllToActiveOrBackoffQueue moves all pods from unschedulablePods to activeQ or backoffQ.
// This function adds all pods and then signals the condition variable to ensure that
// if Pop() is waiting for an item, it receives the signal after all the pods are in the
// queue and the head is the highest priority pod.
func (p *PriorityQueue) moveAllToActiveOrBackoffQueue(event framework.ClusterEvent, preCheck PreEnqueueCheck) {
unschedulablePods := make([]*framework.QueuedPodInfo, 0, len(p.unschedulablePods.podInfoMap))
for _, pInfo := range p.unschedulablePods.podInfoMap {
if preCheck == nil || preCheck(pInfo.Pod) {
unschedulablePods = append(unschedulablePods, pInfo)
}
}
p.movePodsToActiveOrBackoffQueue(unschedulablePods, event)
}
// MoveAllToActiveOrBackoffQueue moves all pods from unschedulablePods to activeQ or backoffQ.
// This function adds all pods and then signals the condition variable to ensure that
// if Pop() is waiting for an item, it receives the signal after all the pods are in the
@ -698,13 +733,7 @@ func (p *PriorityQueue) AssignedPodUpdated(pod *v1.Pod) {
func (p *PriorityQueue) MoveAllToActiveOrBackoffQueue(event framework.ClusterEvent, preCheck PreEnqueueCheck) {
p.lock.Lock()
defer p.lock.Unlock()
unschedulablePods := make([]*framework.QueuedPodInfo, 0, len(p.unschedulablePods.podInfoMap))
for _, pInfo := range p.unschedulablePods.podInfoMap {
if preCheck == nil || preCheck(pInfo.Pod) {
unschedulablePods = append(unschedulablePods, pInfo)
}
}
p.movePodsToActiveOrBackoffQueue(unschedulablePods, event)
p.moveAllToActiveOrBackoffQueue(event, preCheck)
}
// NOTE: this function assumes lock has been acquired in caller

View File

@ -31,12 +31,15 @@ import (
genericadmissioninitializer "k8s.io/apiserver/pkg/admission/initializer"
"k8s.io/apiserver/pkg/admission/plugin/resourcequota"
resourcequotaapi "k8s.io/apiserver/pkg/admission/plugin/resourcequota/apis/resourcequota"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/fake"
testcore "k8s.io/client-go/testing"
"k8s.io/client-go/tools/cache"
featuregatetesting "k8s.io/component-base/featuregate/testing"
api "k8s.io/kubernetes/pkg/apis/core"
"k8s.io/kubernetes/pkg/features"
kubeapiserveradmission "k8s.io/kubernetes/pkg/kubeapiserver/admission"
"k8s.io/kubernetes/pkg/quota/v1/install"
)
@ -2105,3 +2108,112 @@ func TestAdmitAllowDecreaseUsageWithoutCoveringQuota(t *testing.T) {
t.Errorf("Expected no error for decreasing a limited resource without quota, got %v", err)
}
}
func TestPodResourcesResizeWithResourceQuota(t *testing.T) {
stopCh := make(chan struct{})
defer close(stopCh)
resourceQuota := &corev1.ResourceQuota{
ObjectMeta: metav1.ObjectMeta{Name: "quota", Namespace: "test", ResourceVersion: "124"},
Status: corev1.ResourceQuotaStatus{
Hard: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("1000m"),
corev1.ResourceMemory: resource.MustParse("1000Mi"),
corev1.ResourcePods: resource.MustParse("5"),
},
Used: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("500m"),
corev1.ResourceMemory: resource.MustParse("500Mi"),
corev1.ResourcePods: resource.MustParse("1"),
},
},
}
currentPod := validPod("testpod", 1, getResourceRequirements(getResourceList("500m", "500Mi"), getResourceList("500m", "500Mi")))
currentPod.ResourceVersion = "1"
type testCase struct {
newPod *api.Pod
fgEnabled bool
expectError string
expectActions sets.String
}
testCases := map[string]testCase{
"pod resize featuregate enabled, increase CPU within quota": {
newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("990m", "500Mi"), getResourceList("990m", "500Mi"))),
fgEnabled: true,
expectError: "",
expectActions: sets.NewString(strings.Join([]string{"update", "resourcequotas", "status"}, "-")),
},
"pod resize featuregate enabled, increase memory beyond quota": {
newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("500m", "1100Mi"), getResourceList("500m", "1100Mi"))),
fgEnabled: true,
expectError: "forbidden: exceeded quota: quota, requested: memory=600Mi, used: memory=500Mi, limited: memory=1000Mi",
expectActions: sets.NewString(strings.Join([]string{"update", "resourcequotas", "status"}, "-")),
},
"pod resize featuregate enabled, decrease CPU within quota": {
newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("300m", "500Mi"), getResourceList("300m", "500Mi"))),
fgEnabled: true,
expectError: "",
expectActions: sets.NewString(strings.Join([]string{"update", "resourcequotas", "status"}, "-")),
},
"pod resize featuregate disabled, decrease memory within quota": {
newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("500m", "400Mi"), getResourceList("500m", "400Mi"))),
fgEnabled: false,
expectError: "",
expectActions: nil,
},
"pod resize featuregate disabled, increase CPU beyond quota": {
newPod: validPod("testpod", 1, getResourceRequirements(getResourceList("1010m", "500Mi"), getResourceList("1010m", "500Mi"))),
fgEnabled: false,
expectError: "",
expectActions: nil,
},
}
for desc, tc := range testCases {
t.Run(desc, func(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.InPlacePodVerticalScaling, tc.fgEnabled)()
kubeClient := fake.NewSimpleClientset(resourceQuota)
informerFactory := informers.NewSharedInformerFactory(kubeClient, 0)
handler, err := createHandler(kubeClient, informerFactory, stopCh)
if err != nil {
t.Errorf("Error occurred while creating admission plugin: %v", err)
}
informerFactory.Core().V1().ResourceQuotas().Informer().GetIndexer().Add(resourceQuota)
tc.newPod.ResourceVersion = "2"
err = handler.Validate(context.TODO(), admission.NewAttributesRecord(tc.newPod, currentPod,
api.Kind("Pod").WithVersion("version"), tc.newPod.Namespace, tc.newPod.Name,
corev1.Resource("pods").WithVersion("version"), "", admission.Update, &metav1.UpdateOptions{},
false, nil), nil)
if tc.expectError == "" {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if tc.expectActions != nil {
if len(kubeClient.Actions()) == 0 {
t.Errorf("Expected a client action")
}
} else {
if len(kubeClient.Actions()) > 0 {
t.Errorf("Got client action(s) when not expected")
}
}
actionSet := sets.NewString()
for _, action := range kubeClient.Actions() {
actionSet.Insert(strings.Join([]string{action.GetVerb(), action.GetResource().Resource,
action.GetSubresource()}, "-"))
}
if !actionSet.HasAll(tc.expectActions.List()...) {
t.Errorf("Expected actions:\n%v\n but got:\n%v\nDifference:\n%v", tc.expectActions,
actionSet, tc.expectActions.Difference(actionSet))
}
} else {
if err == nil || !strings.Contains(err.Error(), tc.expectError) {
t.Errorf("Expected error containing '%s' got err: '%v'", tc.expectError, err)
}
}
})
}
}

File diff suppressed because it is too large Load Diff

View File

@ -722,6 +722,12 @@ message Container {
// +optional
optional ResourceRequirements resources = 8;
// Resources resize policy for the container.
// +featureGate=InPlacePodVerticalScaling
// +optional
// +listType=atomic
repeated ContainerResizePolicy resizePolicy = 23;
// Pod volumes to mount into the container's filesystem.
// Cannot be updated.
// +optional
@ -862,6 +868,17 @@ message ContainerPort {
optional string hostIP = 5;
}
// ContainerResizePolicy represents resource resize policy for a single container.
message ContainerResizePolicy {
// Name of the resource type to which this resource resize policy applies.
// Supported values: cpu, memory.
optional string resourceName = 1;
// Resource resize policy applicable to the specified resource name.
// If not specified, it defaults to RestartNotRequired.
optional string policy = 2;
}
// ContainerState holds a possible state of container.
// Only one of its members may be specified.
// If none of them is specified, the default one is ContainerStateWaiting.
@ -964,6 +981,19 @@ message ContainerStatus {
// Is always true when no startupProbe is defined.
// +optional
optional bool started = 9;
// ResourcesAllocated represents the compute resources allocated for this container by the
// node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission
// and after successfully admitting desired pod resize.
// +featureGate=InPlacePodVerticalScaling
// +optional
map<string, k8s.io.apimachinery.pkg.api.resource.Quantity> resourcesAllocated = 10;
// Resources represents the compute resource requests and limits that have been successfully
// enacted on the running container after it has been started or has been successfully resized.
// +featureGate=InPlacePodVerticalScaling
// +optional
optional ResourceRequirements resources = 11;
}
// DaemonEndpoint contains information about a single Daemon endpoint.
@ -1320,6 +1350,12 @@ message EphemeralContainerCommon {
// +optional
optional ResourceRequirements resources = 8;
// Resources resize policy for the container.
// +featureGate=InPlacePodVerticalScaling
// +optional
// +listType=atomic
repeated ContainerResizePolicy resizePolicy = 23;
// Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers.
// Cannot be updated.
// +optional
@ -3935,6 +3971,13 @@ message PodStatus {
// Status for any ephemeral containers that have run in this pod.
// +optional
repeated ContainerStatus ephemeralContainerStatuses = 13;
// Status of resources resize desired for pod's containers.
// It is empty if no resources resize is pending.
// Any changes to container resources will automatically set this to "Proposed"
// +featureGate=InPlacePodVerticalScaling
// +optional
optional string resize = 14;
}
// PodStatusResult is a wrapper for PodStatus returned by kubelet that can be encode/decoded

View File

@ -2263,6 +2263,33 @@ const (
PullIfNotPresent PullPolicy = "IfNotPresent"
)
// ResourceResizePolicy specifies how Kubernetes should handle resource resize.
type ResourceResizePolicy string
// These are the valid resource resize policy values:
const (
// RestartNotRequired tells Kubernetes to resize the container in-place
// without restarting it, if possible. Kubernetes may however choose to
// restart the container if it is unable to actuate resize without a
// restart. For e.g. the runtime doesn't support restart-free resizing.
RestartNotRequired ResourceResizePolicy = "RestartNotRequired"
// 'RestartRequired' tells Kubernetes to resize the container in-place
// by stopping and starting the container when new resources are applied.
// This is needed for legacy applications. For e.g. java apps using the
// -xmxN flag which are unable to use resized memory without restarting.
RestartRequired ResourceResizePolicy = "RestartRequired"
)
// ContainerResizePolicy represents resource resize policy for a single container.
type ContainerResizePolicy struct {
// Name of the resource type to which this resource resize policy applies.
// Supported values: cpu, memory.
ResourceName ResourceName `json:"resourceName" protobuf:"bytes,1,opt,name=resourceName,casttype=ResourceName"`
// Resource resize policy applicable to the specified resource name.
// If not specified, it defaults to RestartNotRequired.
Policy ResourceResizePolicy `json:"policy" protobuf:"bytes,2,opt,name=policy,casttype=ResourceResizePolicy"`
}
// PreemptionPolicy describes a policy for if/when to preempt a pod.
// +enum
type PreemptionPolicy string
@ -2412,6 +2439,11 @@ type Container struct {
// More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
// +optional
Resources ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,8,opt,name=resources"`
// Resources resize policy for the container.
// +featureGate=InPlacePodVerticalScaling
// +optional
// +listType=atomic
ResizePolicy []ContainerResizePolicy `json:"resizePolicy,omitempty" protobuf:"bytes,23,rep,name=resizePolicy"`
// Pod volumes to mount into the container's filesystem.
// Cannot be updated.
// +optional
@ -2658,6 +2690,17 @@ type ContainerStatus struct {
// Is always true when no startupProbe is defined.
// +optional
Started *bool `json:"started,omitempty" protobuf:"varint,9,opt,name=started"`
// ResourcesAllocated represents the compute resources allocated for this container by the
// node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission
// and after successfully admitting desired pod resize.
// +featureGate=InPlacePodVerticalScaling
// +optional
ResourcesAllocated ResourceList `json:"resourcesAllocated,omitempty" protobuf:"bytes,10,rep,name=resourcesAllocated,casttype=ResourceList,castkey=ResourceName"`
// Resources represents the compute resource requests and limits that have been successfully
// enacted on the running container after it has been started or has been successfully resized.
// +featureGate=InPlacePodVerticalScaling
// +optional
Resources *ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,11,opt,name=resources"`
}
// PodPhase is a label for the condition of a pod at the current time.
@ -2750,6 +2793,20 @@ type PodCondition struct {
Message string `json:"message,omitempty" protobuf:"bytes,6,opt,name=message"`
}
// PodResizeStatus shows status of desired resize of a pod's containers.
type PodResizeStatus string
const (
// Pod resources resize has been requested and will be evaluated by node.
PodResizeStatusProposed PodResizeStatus = "Proposed"
// Pod resources resize has been accepted by node and is being actuated.
PodResizeStatusInProgress PodResizeStatus = "InProgress"
// Node cannot resize the pod at this time and will keep retrying.
PodResizeStatusDeferred PodResizeStatus = "Deferred"
// Requested pod resize is not feasible and will not be re-evaluated.
PodResizeStatusInfeasible PodResizeStatus = "Infeasible"
)
// RestartPolicy describes how the container should be restarted.
// Only one of the following restart policies may be specified.
// If none of the following policies is specified, the default one
@ -3888,6 +3945,11 @@ type EphemeralContainerCommon struct {
// already allocated to the pod.
// +optional
Resources ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,8,opt,name=resources"`
// Resources resize policy for the container.
// +featureGate=InPlacePodVerticalScaling
// +optional
// +listType=atomic
ResizePolicy []ContainerResizePolicy `json:"resizePolicy,omitempty" protobuf:"bytes,23,rep,name=resizePolicy"`
// Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers.
// Cannot be updated.
// +optional
@ -4079,6 +4141,13 @@ type PodStatus struct {
// Status for any ephemeral containers that have run in this pod.
// +optional
EphemeralContainerStatuses []ContainerStatus `json:"ephemeralContainerStatuses,omitempty" protobuf:"bytes,13,rep,name=ephemeralContainerStatuses"`
// Status of resources resize desired for pod's containers.
// It is empty if no resources resize is pending.
// Any changes to container resources will automatically set this to "Proposed"
// +featureGate=InPlacePodVerticalScaling
// +optional
Resize PodResizeStatus `json:"resize,omitempty" protobuf:"bytes,14,opt,name=resize,casttype=PodResizeStatus"`
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object

View File

@ -346,6 +346,7 @@ var map_Container = map[string]string{
"envFrom": "List of sources to populate environment variables in the container. The keys defined within a source must be a C_IDENTIFIER. All invalid keys will be reported as an event when the container is starting. When a key exists in multiple sources, the value associated with the last source will take precedence. Values defined by an Env with a duplicate key will take precedence. Cannot be updated.",
"env": "List of environment variables to set in the container. Cannot be updated.",
"resources": "Compute Resources required by this container. Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/",
"resizePolicy": "Resources resize policy for the container.",
"volumeMounts": "Pod volumes to mount into the container's filesystem. Cannot be updated.",
"volumeDevices": "volumeDevices is the list of block devices to be used by the container.",
"livenessProbe": "Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes",
@ -388,6 +389,16 @@ func (ContainerPort) SwaggerDoc() map[string]string {
return map_ContainerPort
}
var map_ContainerResizePolicy = map[string]string{
"": "ContainerResizePolicy represents resource resize policy for a single container.",
"resourceName": "Name of the resource type to which this resource resize policy applies. Supported values: cpu, memory.",
"policy": "Resource resize policy applicable to the specified resource name. If not specified, it defaults to RestartNotRequired.",
}
func (ContainerResizePolicy) SwaggerDoc() map[string]string {
return map_ContainerResizePolicy
}
var map_ContainerState = map[string]string{
"": "ContainerState holds a possible state of container. Only one of its members may be specified. If none of them is specified, the default one is ContainerStateWaiting.",
"waiting": "Details about a waiting container",
@ -434,16 +445,18 @@ func (ContainerStateWaiting) SwaggerDoc() map[string]string {
}
var map_ContainerStatus = map[string]string{
"": "ContainerStatus contains details for the current status of this container.",
"name": "This must be a DNS_LABEL. Each container in a pod must have a unique name. Cannot be updated.",
"state": "Details about the container's current condition.",
"lastState": "Details about the container's last termination condition.",
"ready": "Specifies whether the container has passed its readiness probe.",
"restartCount": "The number of times the container has been restarted.",
"image": "The image the container is running. More info: https://kubernetes.io/docs/concepts/containers/images.",
"imageID": "ImageID of the container's image.",
"containerID": "Container's ID in the format '<type>://<container_id>'.",
"started": "Specifies whether the container has passed its startup probe. Initialized as false, becomes true after startupProbe is considered successful. Resets to false when the container is restarted, or if kubelet loses state temporarily. Is always true when no startupProbe is defined.",
"": "ContainerStatus contains details for the current status of this container.",
"name": "This must be a DNS_LABEL. Each container in a pod must have a unique name. Cannot be updated.",
"state": "Details about the container's current condition.",
"lastState": "Details about the container's last termination condition.",
"ready": "Specifies whether the container has passed its readiness probe.",
"restartCount": "The number of times the container has been restarted.",
"image": "The image the container is running. More info: https://kubernetes.io/docs/concepts/containers/images.",
"imageID": "ImageID of the container's image.",
"containerID": "Container's ID in the format '<type>://<container_id>'.",
"started": "Specifies whether the container has passed its startup probe. Initialized as false, becomes true after startupProbe is considered successful. Resets to false when the container is restarted, or if kubelet loses state temporarily. Is always true when no startupProbe is defined.",
"resourcesAllocated": "ResourcesAllocated represents the compute resources allocated for this container by the node. Kubelet sets this value to Container.Resources.Requests upon successful pod admission and after successfully admitting desired pod resize.",
"resources": "Resources represents the compute resource requests and limits that have been successfully enacted on the running container after it has been started or has been successfully resized.",
}
func (ContainerStatus) SwaggerDoc() map[string]string {
@ -609,6 +622,7 @@ var map_EphemeralContainerCommon = map[string]string{
"envFrom": "List of sources to populate environment variables in the container. The keys defined within a source must be a C_IDENTIFIER. All invalid keys will be reported as an event when the container is starting. When a key exists in multiple sources, the value associated with the last source will take precedence. Values defined by an Env with a duplicate key will take precedence. Cannot be updated.",
"env": "List of environment variables to set in the container. Cannot be updated.",
"resources": "Resources are not allowed for ephemeral containers. Ephemeral containers use spare resources already allocated to the pod.",
"resizePolicy": "Resources resize policy for the container.",
"volumeMounts": "Pod volumes to mount into the container's filesystem. Subpath mounts are not allowed for ephemeral containers. Cannot be updated.",
"volumeDevices": "volumeDevices is the list of block devices to be used by the container.",
"livenessProbe": "Probes are not allowed for ephemeral containers.",
@ -1724,6 +1738,7 @@ var map_PodStatus = map[string]string{
"containerStatuses": "The list has one entry per container in the manifest. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#pod-and-container-status",
"qosClass": "The Quality of Service (QOS) classification assigned to the pod based on resource requirements See PodQOSClass type for available QOS classes More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-qos/#quality-of-service-classes",
"ephemeralContainerStatuses": "Status for any ephemeral containers that have run in this pod.",
"resize": "Status of resources resize desired for pod's containers. It is empty if no resources resize is pending. Any changes to container resources will automatically set this to \"Proposed\"",
}
func (PodStatus) SwaggerDoc() map[string]string {

View File

@ -788,6 +788,11 @@ func (in *Container) DeepCopyInto(out *Container) {
}
}
in.Resources.DeepCopyInto(&out.Resources)
if in.ResizePolicy != nil {
in, out := &in.ResizePolicy, &out.ResizePolicy
*out = make([]ContainerResizePolicy, len(*in))
copy(*out, *in)
}
if in.VolumeMounts != nil {
in, out := &in.VolumeMounts, &out.VolumeMounts
*out = make([]VolumeMount, len(*in))
@ -875,6 +880,22 @@ func (in *ContainerPort) DeepCopy() *ContainerPort {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ContainerResizePolicy) DeepCopyInto(out *ContainerResizePolicy) {
*out = *in
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ContainerResizePolicy.
func (in *ContainerResizePolicy) DeepCopy() *ContainerResizePolicy {
if in == nil {
return nil
}
out := new(ContainerResizePolicy)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *ContainerState) DeepCopyInto(out *ContainerState) {
*out = *in
@ -967,6 +988,18 @@ func (in *ContainerStatus) DeepCopyInto(out *ContainerStatus) {
*out = new(bool)
**out = **in
}
if in.ResourcesAllocated != nil {
in, out := &in.ResourcesAllocated, &out.ResourcesAllocated
*out = make(ResourceList, len(*in))
for key, val := range *in {
(*out)[key] = val.DeepCopy()
}
}
if in.Resources != nil {
in, out := &in.Resources, &out.Resources
*out = new(ResourceRequirements)
(*in).DeepCopyInto(*out)
}
return
}
@ -1382,6 +1415,11 @@ func (in *EphemeralContainerCommon) DeepCopyInto(out *EphemeralContainerCommon)
}
}
in.Resources.DeepCopyInto(&out.Resources)
if in.ResizePolicy != nil {
in, out := &in.ResizePolicy, &out.ResizePolicy
*out = make([]ContainerResizePolicy, len(*in))
copy(*out, *in)
}
if in.VolumeMounts != nil {
in, out := &in.VolumeMounts, &out.VolumeMounts
*out = make([]VolumeMount, len(*in))

View File

@ -551,6 +551,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -828,6 +834,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -1105,6 +1117,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",

View File

@ -312,6 +312,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -515,6 +518,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -720,6 +726,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue

View File

@ -552,6 +552,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -829,6 +835,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -1106,6 +1118,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",

View File

@ -320,6 +320,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -523,6 +526,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -728,6 +734,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue

View File

@ -553,6 +553,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -830,6 +836,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -1107,6 +1119,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",

View File

@ -312,6 +312,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -515,6 +518,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -720,6 +726,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue

View File

@ -552,6 +552,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -829,6 +835,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -1106,6 +1118,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",

View File

@ -320,6 +320,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -523,6 +526,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -728,6 +734,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue

View File

@ -552,6 +552,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -829,6 +835,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -1106,6 +1118,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",

View File

@ -322,6 +322,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -525,6 +528,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -730,6 +736,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue

View File

@ -552,6 +552,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -829,6 +835,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",
@ -1106,6 +1118,12 @@
}
]
},
"resizePolicy": [
{
"resourceName": "resourceNameValue",
"policy": "policyValue"
}
],
"volumeMounts": [
{
"name": "nameValue",

View File

@ -320,6 +320,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -523,6 +526,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue
@ -728,6 +734,9 @@ spec:
port: portValue
terminationGracePeriodSeconds: 7
timeoutSeconds: 3
resizePolicy:
- policy: policyValue
resourceName: resourceNameValue
resources:
claims:
- name: nameValue

Some files were not shown because too many files have changed in this diff Show More