kubelet tracing

Signed-off-by: Sally O'Malley <somalley@redhat.com>
Co-authored-by: David Ashpole <dashpole@google.com>
This commit is contained in:
Sally O'Malley
2021-10-10 09:17:27 -04:00
parent bddb4ec08e
commit 47e7d8034f
39 changed files with 683 additions and 473 deletions

View File

@@ -498,6 +498,13 @@ const (
// Enable POD resources API to return allocatable resources
KubeletPodResourcesGetAllocatable featuregate.Feature = "KubeletPodResourcesGetAllocatable"
// owner: @sallyom
// kep: http://kep.k8s.io/2832
// alpha: v1.25
//
// Add support for distributed tracing in the kubelet
KubeletTracing featuregate.Feature = "KubeletTracing"
// owner: @zshihang
// kep: http://kep.k8s.io/2800
// beta: v1.24
@@ -961,6 +968,8 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
KubeletPodResourcesGetAllocatable: {Default: true, PreRelease: featuregate.Beta},
KubeletTracing: {Default: false, PreRelease: featuregate.Alpha},
LegacyServiceAccountTokenNoAutoGeneration: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta},

View File

@@ -47,7 +47,7 @@ type Config struct {
}
// New sets up the plugins and admission start hooks needed for admission
func (c *Config) New(proxyTransport *http.Transport, egressSelector *egressselector.EgressSelector, serviceResolver webhook.ServiceResolver, tp *trace.TracerProvider) ([]admission.PluginInitializer, genericapiserver.PostStartHookFunc, error) {
func (c *Config) New(proxyTransport *http.Transport, egressSelector *egressselector.EgressSelector, serviceResolver webhook.ServiceResolver, tp trace.TracerProvider) ([]admission.PluginInitializer, genericapiserver.PostStartHookFunc, error) {
webhookAuthResolverWrapper := webhook.NewDefaultAuthenticationInfoResolverWrapper(proxyTransport, egressSelector, c.LoopbackClientConfig, tp)
webhookPluginInitializer := webhookinit.NewPluginInitializer(webhookAuthResolverWrapper, serviceResolver)

View File

@@ -280,5 +280,7 @@ var (
"ShutdownGracePeriod.Duration",
"ShutdownGracePeriodCriticalPods.Duration",
"MemoryThrottlingFactor",
"Tracing.Endpoint",
"Tracing.SamplingRatePerMillion",
)
)

View File

@@ -24,6 +24,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
componentconfigtesting "k8s.io/component-base/config/testing"
logsapi "k8s.io/component-base/logs/api/v1"
tracingapi "k8s.io/component-base/tracing/api/v1"
)
func TestComponentConfigSetup(t *testing.T) {
@@ -33,11 +34,12 @@ func TestComponentConfigSetup(t *testing.T) {
SchemeGroupVersion: SchemeGroupVersion,
AddToScheme: AddToScheme,
AllowedTags: map[reflect.Type]bool{
reflect.TypeOf(logsapi.LoggingConfiguration{}): true,
reflect.TypeOf(metav1.Duration{}): true,
reflect.TypeOf(metav1.TypeMeta{}): true,
reflect.TypeOf(v1.NodeConfigSource{}): true,
reflect.TypeOf(v1.Taint{}): true,
reflect.TypeOf(logsapi.LoggingConfiguration{}): true,
reflect.TypeOf(tracingapi.TracingConfiguration{}): true,
reflect.TypeOf(metav1.Duration{}): true,
reflect.TypeOf(metav1.TypeMeta{}): true,
reflect.TypeOf(v1.NodeConfigSource{}): true,
reflect.TypeOf(v1.Taint{}): true,
},
}

View File

@@ -20,6 +20,7 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
logsapi "k8s.io/component-base/logs/api/v1"
tracingapi "k8s.io/component-base/tracing/api/v1"
)
// HairpinMode denotes how the kubelet should configure networking to handle
@@ -441,10 +442,14 @@ type KubeletConfiguration struct {
// is true and upon the initial registration of the node.
// +optional
RegisterWithTaints []v1.Taint
// registerNode enables automatic registration with the apiserver.
// +optional
RegisterNode bool
// Tracing specifies the versioned configuration for OpenTelemetry tracing clients.
// See http://kep.k8s.io/2832 for more details.
// +featureGate=KubeletTracing
// +optional
Tracing *tracingapi.TracingConfiguration
}
// KubeletAuthorizationMode denotes the authorization mode for the kubelet

View File

@@ -27,6 +27,7 @@ import (
"k8s.io/component-base/featuregate"
logsapi "k8s.io/component-base/logs/api/v1"
"k8s.io/component-base/metrics"
tracingapi "k8s.io/component-base/tracing/api/v1"
"k8s.io/kubernetes/pkg/features"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/cm/cpuset"
@@ -241,6 +242,14 @@ func ValidateKubeletConfiguration(kc *kubeletconfig.KubeletConfiguration, featur
allErrors = append(allErrors, errs.ToAggregate().Errors()...)
}
if localFeatureGate.Enabled(features.KubeletTracing) {
if errs := tracingapi.ValidateTracingConfiguration(kc.Tracing, localFeatureGate, field.NewPath("tracing")); len(errs) > 0 {
allErrors = append(allErrors, errs.ToAggregate().Errors()...)
}
} else if kc.Tracing != nil {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: tracing should not be configured if KubeletTracing feature flag is disabled."))
}
if localFeatureGate.Enabled(features.MemoryQoS) && kc.MemoryThrottlingFactor == nil {
allErrors = append(allErrors, fmt.Errorf("invalid configuration: memoryThrottlingFactor is required when MemoryQoS feature flag is enabled"))
}

View File

@@ -25,6 +25,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
logsapi "k8s.io/component-base/logs/api/v1"
tracingapi "k8s.io/component-base/tracing/api/v1"
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
"k8s.io/kubernetes/pkg/kubelet/apis/config/validation"
kubetypes "k8s.io/kubernetes/pkg/kubelet/types"
@@ -497,6 +498,36 @@ func TestValidateKubeletConfiguration(t *testing.T) {
},
errMsg: "invalid configuration: taint.TimeAdded is not nil",
},
{
name: "specify tracing with KubeletTracing disabled",
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
samplingRate := int32(99999)
conf.FeatureGates = map[string]bool{"KubeletTracing": false}
conf.Tracing = &tracingapi.TracingConfiguration{SamplingRatePerMillion: &samplingRate}
return conf
},
errMsg: "invalid configuration: tracing should not be configured if KubeletTracing feature flag is disabled.",
},
{
name: "specify tracing invalid sampling rate",
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
samplingRate := int32(-1)
conf.FeatureGates = map[string]bool{"KubeletTracing": true}
conf.Tracing = &tracingapi.TracingConfiguration{SamplingRatePerMillion: &samplingRate}
return conf
},
errMsg: "tracing.samplingRatePerMillion: Invalid value: -1: sampling rate must be positive",
},
{
name: "specify tracing invalid endpoint",
configure: func(conf *kubeletconfig.KubeletConfiguration) *kubeletconfig.KubeletConfiguration {
ep := "dn%2s://localhost:4317"
conf.FeatureGates = map[string]bool{"KubeletTracing": true}
conf.Tracing = &tracingapi.TracingConfiguration{Endpoint: &ep}
return conf
},
errMsg: "tracing.endpoint: Invalid value: \"dn%2s://localhost:4317\": parse \"dn%2s://localhost:4317\": first path segment in URL cannot contain colon",
},
}
for _, tc := range cases {

View File

@@ -23,16 +23,20 @@ import (
"strings"
"time"
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"go.opentelemetry.io/otel/trace"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/credentials/insecure"
"google.golang.org/grpc/status"
"k8s.io/klog/v2"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/logs/logreduction"
tracing "k8s.io/component-base/tracing"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
runtimeapiV1alpha2 "k8s.io/cri-api/pkg/apis/runtime/v1alpha2"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/kubelet/util"
"k8s.io/kubernetes/pkg/probe/exec"
utilexec "k8s.io/utils/exec"
@@ -68,7 +72,7 @@ const (
)
// NewRemoteRuntimeService creates a new internalapi.RuntimeService.
func NewRemoteRuntimeService(endpoint string, connectionTimeout time.Duration) (internalapi.RuntimeService, error) {
func NewRemoteRuntimeService(endpoint string, connectionTimeout time.Duration, tp trace.TracerProvider) (internalapi.RuntimeService, error) {
klog.V(3).InfoS("Connecting to runtime service", "endpoint", endpoint)
addr, dialer, err := util.GetAddressAndDialer(endpoint)
if err != nil {
@@ -77,10 +81,23 @@ func NewRemoteRuntimeService(endpoint string, connectionTimeout time.Duration) (
ctx, cancel := context.WithTimeout(context.Background(), connectionTimeout)
defer cancel()
conn, err := grpc.DialContext(ctx, addr,
dialOpts := []grpc.DialOption{}
dialOpts = append(dialOpts,
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithContextDialer(dialer),
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxMsgSize)))
if utilfeature.DefaultFeatureGate.Enabled(features.KubeletTracing) {
tracingOpts := []otelgrpc.Option{
otelgrpc.WithPropagators(tracing.Propagators()),
otelgrpc.WithTracerProvider(tp),
}
// Even if there is no TracerProvider, the otelgrpc still handles context propagation.
// See https://github.com/open-telemetry/opentelemetry-go/tree/main/example/passthrough
dialOpts = append(dialOpts,
grpc.WithUnaryInterceptor(otelgrpc.UnaryClientInterceptor(tracingOpts...)),
grpc.WithStreamInterceptor(otelgrpc.StreamClientInterceptor(tracingOpts...)))
}
conn, err := grpc.DialContext(ctx, addr, dialOpts...)
if err != nil {
klog.ErrorS(err, "Connect remote runtime failed", "address", addr)
return nil, err

View File

@@ -21,6 +21,8 @@ import (
"testing"
"time"
oteltrace "go.opentelemetry.io/otel/trace"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
internalapi "k8s.io/cri-api/pkg/apis"
@@ -47,7 +49,7 @@ func createAndStartFakeRemoteRuntime(t *testing.T) (*fakeremote.RemoteRuntime, s
}
func createRemoteRuntimeService(endpoint string, t *testing.T) internalapi.RuntimeService {
runtimeService, err := NewRemoteRuntimeService(endpoint, defaultConnectionTimeout)
runtimeService, err := NewRemoteRuntimeService(endpoint, defaultConnectionTimeout, oteltrace.NewNoopTracerProvider())
require.NoError(t, err)
return runtimeService

View File

@@ -38,6 +38,7 @@ import (
cadvisorapi "github.com/google/cadvisor/info/v1"
libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns"
"go.opentelemetry.io/otel/trace"
"k8s.io/mount-utils"
"k8s.io/utils/integer"
netutils "k8s.io/utils/net"
@@ -206,7 +207,7 @@ type Bootstrap interface {
GetConfiguration() kubeletconfiginternal.KubeletConfiguration
BirthCry()
StartGarbageCollection()
ListenAndServe(kubeCfg *kubeletconfiginternal.KubeletConfiguration, tlsOptions *server.TLSOptions, auth server.AuthInterface)
ListenAndServe(kubeCfg *kubeletconfiginternal.KubeletConfiguration, tlsOptions *server.TLSOptions, auth server.AuthInterface, tp trace.TracerProvider)
ListenAndServeReadOnly(address net.IP, port uint)
ListenAndServePodResources()
Run(<-chan kubetypes.PodUpdate)
@@ -236,6 +237,7 @@ type Dependencies struct {
ProbeManager prober.Manager
Recorder record.EventRecorder
Subpather subpath.Interface
TracerProvider trace.TracerProvider
VolumePlugins []volume.VolumePlugin
DynamicPluginProber volume.DynamicPluginProber
TLSOptions *server.TLSOptions
@@ -293,7 +295,7 @@ func PreInitRuntimeService(kubeCfg *kubeletconfiginternal.KubeletConfiguration,
}
var err error
if kubeDeps.RemoteRuntimeService, err = remote.NewRemoteRuntimeService(remoteRuntimeEndpoint, kubeCfg.RuntimeRequestTimeout.Duration); err != nil {
if kubeDeps.RemoteRuntimeService, err = remote.NewRemoteRuntimeService(remoteRuntimeEndpoint, kubeCfg.RuntimeRequestTimeout.Duration, kubeDeps.TracerProvider); err != nil {
return err
}
if kubeDeps.RemoteImageService, err = remote.NewRemoteImageService(remoteImageEndpoint, kubeCfg.RuntimeRequestTimeout.Duration); err != nil {
@@ -2395,8 +2397,8 @@ func (kl *Kubelet) ResyncInterval() time.Duration {
// ListenAndServe runs the kubelet HTTP server.
func (kl *Kubelet) ListenAndServe(kubeCfg *kubeletconfiginternal.KubeletConfiguration, tlsOptions *server.TLSOptions,
auth server.AuthInterface) {
server.ListenAndServeKubeletServer(kl, kl.resourceAnalyzer, kubeCfg, tlsOptions, auth)
auth server.AuthInterface, tp trace.TracerProvider) {
server.ListenAndServeKubeletServer(kl, kl.resourceAnalyzer, kubeCfg, tlsOptions, auth, tp)
}
// ListenAndServeReadOnly runs the kubelet HTTP server in read-only mode.

View File

@@ -37,6 +37,7 @@ import (
cadvisorapi "github.com/google/cadvisor/info/v1"
cadvisorv2 "github.com/google/cadvisor/info/v2"
"github.com/google/cadvisor/metrics"
oteltrace "go.opentelemetry.io/otel/trace"
"google.golang.org/grpc"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/kubelet/metrics/collectors"
@@ -63,6 +64,7 @@ import (
compbasemetrics "k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
tracing "k8s.io/component-base/tracing"
podresourcesapi "k8s.io/kubelet/pkg/apis/podresources/v1"
podresourcesapiv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
"k8s.io/kubernetes/pkg/api/legacyscheme"
@@ -127,10 +129,13 @@ type containerInterface interface {
// so we can ensure restful.FilterFunctions are used for all handlers
type filteringContainer struct {
*restful.Container
oteltrace.TracerProvider
registeredHandlePaths []string
}
func (a *filteringContainer) Handle(path string, handler http.Handler) {
handler = tracing.WithTracing(handler, a.TracerProvider, "kubelet")
a.HandleWithFilter(path, handler)
a.registeredHandlePaths = append(a.registeredHandlePaths, path)
}
@@ -144,12 +149,13 @@ func ListenAndServeKubeletServer(
resourceAnalyzer stats.ResourceAnalyzer,
kubeCfg *kubeletconfiginternal.KubeletConfiguration,
tlsOptions *TLSOptions,
auth AuthInterface) {
auth AuthInterface,
tp oteltrace.TracerProvider) {
address := netutils.ParseIPSloppy(kubeCfg.Address)
port := uint(kubeCfg.Port)
klog.InfoS("Starting to listen", "address", address, "port", port)
handler := NewServer(host, resourceAnalyzer, auth, kubeCfg)
handler := NewServer(host, resourceAnalyzer, auth, tp, kubeCfg)
s := &http.Server{
Addr: net.JoinHostPort(address.String(), strconv.FormatUint(uint64(port), 10)),
Handler: &handler,
@@ -158,6 +164,7 @@ func ListenAndServeKubeletServer(
WriteTimeout: 4 * 60 * time.Minute,
MaxHeaderBytes: 1 << 20,
}
if tlsOptions != nil {
s.TLSConfig = tlsOptions.Config
// Passing empty strings as the cert and key files means no
@@ -174,9 +181,14 @@ func ListenAndServeKubeletServer(
}
// ListenAndServeKubeletReadOnlyServer initializes a server to respond to HTTP network requests on the Kubelet.
func ListenAndServeKubeletReadOnlyServer(host HostInterface, resourceAnalyzer stats.ResourceAnalyzer, address net.IP, port uint) {
func ListenAndServeKubeletReadOnlyServer(
host HostInterface,
resourceAnalyzer stats.ResourceAnalyzer,
address net.IP,
port uint) {
klog.InfoS("Starting to listen read-only", "address", address, "port", port)
s := NewServer(host, resourceAnalyzer, nil, nil)
// TODO: https://github.com/kubernetes/kubernetes/issues/109829 tracer should use WithPublicEndpoint
s := NewServer(host, resourceAnalyzer, nil, oteltrace.NewNoopTracerProvider(), nil)
server := &http.Server{
Addr: net.JoinHostPort(address.String(), strconv.FormatUint(uint64(port), 10)),
@@ -241,12 +253,15 @@ func NewServer(
host HostInterface,
resourceAnalyzer stats.ResourceAnalyzer,
auth AuthInterface,
tp oteltrace.TracerProvider,
kubeCfg *kubeletconfiginternal.KubeletConfiguration) Server {
rc := &filteringContainer{Container: restful.NewContainer(), TracerProvider: tp}
server := Server{
host: host,
resourceAnalyzer: resourceAnalyzer,
auth: auth,
restfulCont: &filteringContainer{Container: restful.NewContainer()},
restfulCont: rc,
metricsBuckets: sets.NewString(),
metricsMethodBuckets: sets.NewString("OPTIONS", "GET", "HEAD", "POST", "PUT", "DELETE", "TRACE", "CONNECT"),
}

View File

@@ -37,6 +37,7 @@ import (
cadvisorapiv2 "github.com/google/cadvisor/info/v2"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
oteltrace "go.opentelemetry.io/otel/trace"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
@@ -358,6 +359,7 @@ func newServerTestWithDebuggingHandlers(kubeCfg *kubeletconfiginternal.KubeletCo
fw.fakeKubelet,
stats.NewResourceAnalyzer(fw.fakeKubelet, time.Minute, &record.FakeRecorder{}),
fw.fakeAuth,
oteltrace.NewNoopTracerProvider(),
kubeCfg,
)
fw.serverUnderTest = &server