Merge pull request #116897 from Richabanker/kubelete-resource-metrics-ga

Graduate kubelet resource metrics to GA
This commit is contained in:
Kubernetes Prow Robot
2023-08-18 16:03:37 -07:00
committed by GitHub
5 changed files with 192 additions and 25 deletions

View File

@@ -181,21 +181,34 @@ func (g *Grabber) GrabFromKubelet(ctx context.Context, nodeName string) (Kubelet
return KubeletMetrics{}, fmt.Errorf("Error listing nodes with name %v, got %v", nodeName, nodes.Items)
}
kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port
return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort))
return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort), "metrics")
}
func (g *Grabber) grabFromKubeletInternal(ctx context.Context, nodeName string, kubeletPort int) (KubeletMetrics, error) {
// GrabresourceMetricsFromKubelet returns resource metrics from kubelet
func (g *Grabber) GrabResourceMetricsFromKubelet(ctx context.Context, nodeName string) (KubeletMetrics, error) {
nodes, err := g.client.CoreV1().Nodes().List(ctx, metav1.ListOptions{FieldSelector: fields.Set{"metadata.name": nodeName}.AsSelector().String()})
if err != nil {
return KubeletMetrics{}, err
}
if len(nodes.Items) != 1 {
return KubeletMetrics{}, fmt.Errorf("Error listing nodes with name %v, got %v", nodeName, nodes.Items)
}
kubeletPort := nodes.Items[0].Status.DaemonEndpoints.KubeletEndpoint.Port
return g.grabFromKubeletInternal(ctx, nodeName, int(kubeletPort), "metrics/resource")
}
func (g *Grabber) grabFromKubeletInternal(ctx context.Context, nodeName string, kubeletPort int, pathSuffix string) (KubeletMetrics, error) {
if kubeletPort <= 0 || kubeletPort > 65535 {
return KubeletMetrics{}, fmt.Errorf("Invalid Kubelet port %v. Skipping Kubelet's metrics gathering", kubeletPort)
}
output, err := g.getMetricsFromNode(ctx, nodeName, int(kubeletPort))
output, err := g.getMetricsFromNode(ctx, nodeName, int(kubeletPort), pathSuffix)
if err != nil {
return KubeletMetrics{}, err
}
return parseKubeletMetrics(output)
}
func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubeletPort int) (string, error) {
func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubeletPort int, pathSuffix string) (string, error) {
// There's a problem with timing out during proxy. Wrapping this in a goroutine to prevent deadlock.
finished := make(chan struct{}, 1)
var err error
@@ -205,7 +218,7 @@ func (g *Grabber) getMetricsFromNode(ctx context.Context, nodeName string, kubel
Resource("nodes").
SubResource("proxy").
Name(fmt.Sprintf("%v:%v", nodeName, kubeletPort)).
Suffix("metrics").
Suffix(pathSuffix).
Do(ctx).Raw()
finished <- struct{}{}
}()
@@ -432,7 +445,7 @@ func (g *Grabber) Grab(ctx context.Context) (Collection, error) {
} else {
for _, node := range nodes.Items {
kubeletPort := node.Status.DaemonEndpoints.KubeletEndpoint.Port
metrics, err := g.grabFromKubeletInternal(ctx, node.Name, int(kubeletPort))
metrics, err := g.grabFromKubeletInternal(ctx, node.Name, int(kubeletPort), "metrics")
if err != nil {
errs = append(errs, err)
}

View File

@@ -0,0 +1,70 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package instrumentation
import (
"context"
"errors"
"time"
"github.com/onsi/gomega"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
e2emetrics "k8s.io/kubernetes/test/e2e/framework/metrics"
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
"k8s.io/kubernetes/test/e2e/instrumentation/common"
admissionapi "k8s.io/pod-security-admission/api"
"github.com/onsi/ginkgo/v2"
)
var _ = common.SIGDescribe("Metrics", func() {
f := framework.NewDefaultFramework("metrics")
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged
var c, ec clientset.Interface
var grabber *e2emetrics.Grabber
ginkgo.BeforeEach(func(ctx context.Context) {
var err error
c = f.ClientSet
ec = f.KubemarkExternalClusterClientSet
gomega.Eventually(ctx, func() error {
grabber, err = e2emetrics.NewMetricsGrabber(ctx, c, ec, f.ClientConfig(), true, true, true, true, true, true)
if err != nil {
framework.ExpectNoError(err, "failed to create metrics grabber")
}
return nil
}, 5*time.Minute, 10*time.Second).Should(gomega.BeNil())
})
/*
Release: v1.29
Testname: Kubelet resource metrics
Description: Should attempt to grab all resource metrics from kubelet metrics/resource endpoint.
*/
ginkgo.It("should grab all metrics from kubelet /metrics/resource endpoint", func(ctx context.Context) {
ginkgo.By("Connecting to kubelet's /metrics/resource endpoint")
node, err := e2enode.GetRandomReadySchedulableNode(ctx, f.ClientSet)
if errors.Is(err, e2emetrics.MetricsGrabbingDisabledError) {
e2eskipper.Skipf("%v", err)
}
framework.ExpectNoError(err)
response, err := grabber.GrabResourceMetricsFromKubelet(ctx, node.Name)
framework.ExpectNoError(err)
gomega.Expect(response).NotTo(gomega.BeEmpty())
})
})

View File

@@ -74,6 +74,56 @@
stabilityLevel: STABLE
labels:
- zone
- name: container_cpu_usage_seconds_total
help: Cumulative cpu time consumed by the container in core-seconds
type: Custom
stabilityLevel: STABLE
labels:
- container
- pod
- namespace
- name: container_memory_working_set_bytes
help: Current working set of the container in bytes
type: Custom
stabilityLevel: STABLE
labels:
- container
- pod
- namespace
- name: container_start_time_seconds
help: Start time of the container since unix epoch in seconds
type: Custom
stabilityLevel: STABLE
labels:
- container
- pod
- namespace
- name: node_cpu_usage_seconds_total
help: Cumulative cpu time consumed by the node in core-seconds
type: Custom
stabilityLevel: STABLE
- name: node_memory_working_set_bytes
help: Current working set of the node in bytes
type: Custom
stabilityLevel: STABLE
- name: pod_cpu_usage_seconds_total
help: Cumulative cpu time consumed by the pod in core-seconds
type: Custom
stabilityLevel: STABLE
labels:
- pod
- namespace
- name: pod_memory_working_set_bytes
help: Current working set of the pod in bytes
type: Custom
stabilityLevel: STABLE
labels:
- pod
- namespace
- name: resource_scrape_error
help: 1 if there was an error while getting container metrics, 0 otherwise
type: Custom
stabilityLevel: STABLE
- name: pod_scheduling_sli_duration_seconds
subsystem: scheduler
help: E2e latency for a pod being scheduled, from the time the pod enters the scheduling