Merge pull request #118148 from linxiulei/sched_readyz
Expose /readyz & /livez in kube-scheduler
This commit is contained in:
		| @@ -167,10 +167,12 @@ func Run(ctx context.Context, cc *schedulerserverconfig.CompletedConfig, sched * | ||||
| 	defer cc.EventBroadcaster.Shutdown() | ||||
|  | ||||
| 	// Setup healthz checks. | ||||
| 	var checks []healthz.HealthChecker | ||||
| 	var checks, readyzChecks []healthz.HealthChecker | ||||
| 	if cc.ComponentConfig.LeaderElection.LeaderElect { | ||||
| 		checks = append(checks, cc.LeaderElection.WatchDog) | ||||
| 		readyzChecks = append(readyzChecks, cc.LeaderElection.WatchDog) | ||||
| 	} | ||||
| 	readyzChecks = append(readyzChecks, healthz.NewShutdownHealthz(ctx.Done())) | ||||
|  | ||||
| 	waitingForLeader := make(chan struct{}) | ||||
| 	isLeader := func() bool { | ||||
| @@ -184,9 +186,20 @@ func Run(ctx context.Context, cc *schedulerserverconfig.CompletedConfig, sched * | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	handlerSyncReadyCh := make(chan struct{}) | ||||
| 	handlerSyncCheck := healthz.NamedCheck("sched-handler-sync", func(_ *http.Request) error { | ||||
| 		select { | ||||
| 		case <-handlerSyncReadyCh: | ||||
| 			return nil | ||||
| 		default: | ||||
| 		} | ||||
| 		return fmt.Errorf("waiting for handlers to sync") | ||||
| 	}) | ||||
| 	readyzChecks = append(readyzChecks, handlerSyncCheck) | ||||
|  | ||||
| 	// Start up the healthz server. | ||||
| 	if cc.SecureServing != nil { | ||||
| 		handler := buildHandlerChain(newHealthzAndMetricsHandler(&cc.ComponentConfig, cc.InformerFactory, isLeader, checks...), cc.Authentication.Authenticator, cc.Authorization.Authorizer) | ||||
| 		handler := buildHandlerChain(newHealthEndpointsAndMetricsHandler(&cc.ComponentConfig, cc.InformerFactory, isLeader, checks, readyzChecks), cc.Authentication.Authenticator, cc.Authorization.Authorizer) | ||||
| 		// TODO: handle stoppedCh and listenerStoppedCh returned by c.SecureServing.Serve | ||||
| 		if _, _, err := cc.SecureServing.Serve(handler, 0, ctx.Done()); err != nil { | ||||
| 			// fail early for secure handlers, removing the old error loop from above | ||||
| @@ -214,6 +227,7 @@ func Run(ctx context.Context, cc *schedulerserverconfig.CompletedConfig, sched * | ||||
| 			logger.Error(err, "waiting for handlers to sync") | ||||
| 		} | ||||
|  | ||||
| 		close(handlerSyncReadyCh) | ||||
| 		logger.V(3).Info("Handlers synced") | ||||
| 	} | ||||
| 	if !cc.ComponentConfig.DelayCacheUntilActive || cc.LeaderElection == nil { | ||||
| @@ -288,11 +302,14 @@ func installMetricHandler(pathRecorderMux *mux.PathRecorderMux, informers inform | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| // newHealthzAndMetricsHandler creates a healthz server from the config, and will also | ||||
| // newHealthEndpointsAndMetricsHandler creates an API health server from the config, and will also | ||||
| // embed the metrics handler. | ||||
| func newHealthzAndMetricsHandler(config *kubeschedulerconfig.KubeSchedulerConfiguration, informers informers.SharedInformerFactory, isLeader func() bool, checks ...healthz.HealthChecker) http.Handler { | ||||
| // TODO: healthz check is deprecated, please use livez and readyz instead. Will be removed in the future. | ||||
| func newHealthEndpointsAndMetricsHandler(config *kubeschedulerconfig.KubeSchedulerConfiguration, informers informers.SharedInformerFactory, isLeader func() bool, healthzChecks, readyzChecks []healthz.HealthChecker) http.Handler { | ||||
| 	pathRecorderMux := mux.NewPathRecorderMux("kube-scheduler") | ||||
| 	healthz.InstallHandler(pathRecorderMux, checks...) | ||||
| 	healthz.InstallHandler(pathRecorderMux, healthzChecks...) | ||||
| 	healthz.InstallLivezHandler(pathRecorderMux) | ||||
| 	healthz.InstallReadyzHandler(pathRecorderMux, readyzChecks...) | ||||
| 	installMetricHandler(pathRecorderMux, informers, isLeader) | ||||
| 	slis.SLIMetricsWithReset{}.Install(pathRecorderMux) | ||||
|  | ||||
|   | ||||
| @@ -118,7 +118,7 @@ func (s *GenericAPIServer) AddLivezChecks(delay time.Duration, checks ...healthz | ||||
| // that we can register that the api-server is no longer ready while we attempt to gracefully | ||||
| // shutdown. | ||||
| func (s *GenericAPIServer) addReadyzShutdownCheck(stopCh <-chan struct{}) error { | ||||
| 	return s.AddReadyzChecks(shutdownCheck{stopCh}) | ||||
| 	return s.AddReadyzChecks(healthz.NewShutdownHealthz(stopCh)) | ||||
| } | ||||
|  | ||||
| // installHealthz creates the healthz endpoint for this server | ||||
| @@ -139,25 +139,6 @@ func (s *GenericAPIServer) installLivez() { | ||||
| 	s.livezRegistry.installHandler(s.Handler.NonGoRestfulMux) | ||||
| } | ||||
|  | ||||
| // shutdownCheck fails if the embedded channel is closed. This is intended to allow for graceful shutdown sequences | ||||
| // for the apiserver. | ||||
| type shutdownCheck struct { | ||||
| 	StopCh <-chan struct{} | ||||
| } | ||||
|  | ||||
| func (shutdownCheck) Name() string { | ||||
| 	return "shutdown" | ||||
| } | ||||
|  | ||||
| func (c shutdownCheck) Check(req *http.Request) error { | ||||
| 	select { | ||||
| 	case <-c.StopCh: | ||||
| 		return fmt.Errorf("process is shutting down") | ||||
| 	default: | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| // delayedHealthCheck wraps a health check which will not fail until the explicitly defined delay has elapsed. This | ||||
| // is intended for use primarily for livez health checks. | ||||
| func delayedHealthCheck(check healthz.HealthChecker, clock clock.Clock, delay time.Duration) healthz.HealthChecker { | ||||
|   | ||||
| @@ -105,6 +105,29 @@ func (i *informerSync) Name() string { | ||||
| 	return "informer-sync" | ||||
| } | ||||
|  | ||||
| type shutdown struct { | ||||
| 	stopCh <-chan struct{} | ||||
| } | ||||
|  | ||||
| // NewShutdownHealthz returns a new HealthChecker that will fail if the embedded channel is closed. | ||||
| // This is intended to allow for graceful shutdown sequences. | ||||
| func NewShutdownHealthz(stopCh <-chan struct{}) HealthChecker { | ||||
| 	return &shutdown{stopCh} | ||||
| } | ||||
|  | ||||
| func (s *shutdown) Name() string { | ||||
| 	return "shutdown" | ||||
| } | ||||
|  | ||||
| func (s *shutdown) Check(req *http.Request) error { | ||||
| 	select { | ||||
| 	case <-s.stopCh: | ||||
| 		return fmt.Errorf("process is shutting down") | ||||
| 	default: | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (i *informerSync) Check(_ *http.Request) error { | ||||
| 	stopCh := make(chan struct{}) | ||||
| 	// Close stopCh to force checking if informers are synced now. | ||||
|   | ||||
							
								
								
									
										240
									
								
								test/integration/scheduler/serving/healthcheck_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										240
									
								
								test/integration/scheduler/serving/healthcheck_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,240 @@ | ||||
| /* | ||||
| Copyright 2024 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package serving | ||||
|  | ||||
| import ( | ||||
| 	"crypto/tls" | ||||
| 	"crypto/x509" | ||||
| 	"fmt" | ||||
| 	"io" | ||||
| 	"net/http" | ||||
| 	"os" | ||||
| 	"path" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
|  | ||||
| 	"k8s.io/klog/v2/ktesting" | ||||
| 	kubeapiservertesting "k8s.io/kubernetes/cmd/kube-apiserver/app/testing" | ||||
| 	kubeschedulertesting "k8s.io/kubernetes/cmd/kube-scheduler/app/testing" | ||||
| 	"k8s.io/kubernetes/test/integration/framework" | ||||
| ) | ||||
|  | ||||
| func TestHealthEndpoints(t *testing.T) { | ||||
| 	server, configStr, _, err := startTestAPIServer(t) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Failed to start kube-apiserver server: %v", err) | ||||
| 	} | ||||
| 	defer server.TearDownFn() | ||||
|  | ||||
| 	apiserverConfig, err := os.CreateTemp("", "kubeconfig") | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Failed to create config file: %v", err) | ||||
| 	} | ||||
| 	defer func() { | ||||
| 		_ = os.Remove(apiserverConfig.Name()) | ||||
| 	}() | ||||
| 	if _, err = apiserverConfig.WriteString(configStr); err != nil { | ||||
| 		t.Fatalf("Failed to write config file: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	brokenConfigStr := strings.ReplaceAll(configStr, "127.0.0.1", "127.0.0.2") | ||||
| 	brokenConfig, err := os.CreateTemp("", "kubeconfig") | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Failed to create config file: %v", err) | ||||
| 	} | ||||
| 	if _, err := brokenConfig.WriteString(brokenConfigStr); err != nil { | ||||
| 		t.Fatalf("Failed to write config file: %v", err) | ||||
| 	} | ||||
| 	defer func() { | ||||
| 		_ = os.Remove(brokenConfig.Name()) | ||||
| 	}() | ||||
|  | ||||
| 	tests := []struct { | ||||
| 		name             string | ||||
| 		path             string | ||||
| 		useBrokenConfig  bool | ||||
| 		wantResponseCode int | ||||
| 	}{ | ||||
| 		{ | ||||
| 			"/healthz", | ||||
| 			"/healthz", | ||||
| 			false, | ||||
| 			http.StatusOK, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"/livez", | ||||
| 			"/livez", | ||||
| 			false, | ||||
| 			http.StatusOK, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"/livez with ping check", | ||||
| 			"/livez/ping", | ||||
| 			false, | ||||
| 			http.StatusOK, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"/readyz", | ||||
| 			"/readyz", | ||||
| 			false, | ||||
| 			http.StatusOK, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"/readyz with sched-handler-sync", | ||||
| 			"/readyz/sched-handler-sync", | ||||
| 			false, | ||||
| 			http.StatusOK, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"/readyz with shutdown", | ||||
| 			"/readyz/shutdown", | ||||
| 			false, | ||||
| 			http.StatusOK, | ||||
| 		}, | ||||
| 		{ | ||||
| 			"/readyz with broken apiserver", | ||||
| 			"/readyz", | ||||
| 			true, | ||||
| 			http.StatusInternalServerError, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tt := range tests { | ||||
| 		t.Run(tt.name, func(t *testing.T) { | ||||
| 			tt := tt | ||||
| 			_, ctx := ktesting.NewTestContext(t) | ||||
|  | ||||
| 			configFile := apiserverConfig.Name() | ||||
| 			if tt.useBrokenConfig { | ||||
| 				configFile = brokenConfig.Name() | ||||
| 			} | ||||
| 			result, err := kubeschedulertesting.StartTestServer( | ||||
| 				ctx, | ||||
| 				[]string{"--kubeconfig", configFile, "--leader-elect=false", "--authorization-always-allow-paths", tt.path}) | ||||
|  | ||||
| 			if err != nil { | ||||
| 				t.Fatalf("Failed to start kube-scheduler server: %v", err) | ||||
| 			} | ||||
| 			if result.TearDownFn != nil { | ||||
| 				defer result.TearDownFn() | ||||
| 			} | ||||
|  | ||||
| 			client, base, err := clientAndURLFromTestServer(result) | ||||
| 			if err != nil { | ||||
| 				t.Fatalf("Failed to get client from test server: %v", err) | ||||
| 			} | ||||
| 			req, err := http.NewRequest("GET", base+tt.path, nil) | ||||
| 			if err != nil { | ||||
| 				t.Fatalf("failed to request: %v", err) | ||||
| 			} | ||||
| 			r, err := client.Do(req) | ||||
| 			if err != nil { | ||||
| 				t.Fatalf("failed to GET %s from component: %v", tt.path, err) | ||||
| 			} | ||||
|  | ||||
| 			body, err := io.ReadAll(r.Body) | ||||
| 			if err != nil { | ||||
| 				t.Fatalf("failed to read response body: %v", err) | ||||
| 			} | ||||
| 			if err = r.Body.Close(); err != nil { | ||||
| 				t.Fatalf("failed to close response body: %v", err) | ||||
| 			} | ||||
| 			if got, expected := r.StatusCode, tt.wantResponseCode; got != expected { | ||||
| 				t.Fatalf("expected http %d at %s of component, got: %d %q", expected, tt.path, got, string(body)) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| // TODO: Make this a util function once there is a unified way to start a testing apiserver so that we can reuse it. | ||||
| func startTestAPIServer(t *testing.T) (server *kubeapiservertesting.TestServer, apiserverConfig, token string, err error) { | ||||
| 	// Insulate this test from picking up in-cluster config when run inside a pod | ||||
| 	// We can't assume we have permissions to write to /var/run/secrets/... from a unit test to mock in-cluster config for testing | ||||
| 	originalHost := os.Getenv("KUBERNETES_SERVICE_HOST") | ||||
| 	if len(originalHost) > 0 { | ||||
| 		if err = os.Setenv("KUBERNETES_SERVICE_HOST", ""); err != nil { | ||||
| 			return | ||||
| 		} | ||||
| 		defer func() { | ||||
| 			err = os.Setenv("KUBERNETES_SERVICE_HOST", originalHost) | ||||
| 		}() | ||||
| 	} | ||||
|  | ||||
| 	// authenticate to apiserver via bearer token | ||||
| 	token = "flwqkenfjasasdfmwerasd" // Fake token for testing. | ||||
| 	var tokenFile *os.File | ||||
| 	tokenFile, err = os.CreateTemp("", "kubeconfig") | ||||
| 	if err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	if _, err = tokenFile.WriteString(fmt.Sprintf(`%s,system:kube-scheduler,system:kube-scheduler,""`, token)); err != nil { | ||||
| 		return | ||||
| 	} | ||||
| 	if err = tokenFile.Close(); err != nil { | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	// start apiserver | ||||
| 	server = kubeapiservertesting.StartTestServerOrDie(t, nil, []string{ | ||||
| 		"--token-auth-file", tokenFile.Name(), | ||||
| 		"--authorization-mode", "AlwaysAllow", | ||||
| 	}, framework.SharedEtcd()) | ||||
|  | ||||
| 	apiserverConfig = fmt.Sprintf(` | ||||
| apiVersion: v1 | ||||
| kind: Config | ||||
| clusters: | ||||
| - cluster: | ||||
|     server: %s | ||||
|     certificate-authority: %s | ||||
|   name: integration | ||||
| contexts: | ||||
| - context: | ||||
|     cluster: integration | ||||
|     user: kube-scheduler | ||||
|   name: default-context | ||||
| current-context: default-context | ||||
| users: | ||||
| - name: kube-scheduler | ||||
|   user: | ||||
|     token: %s | ||||
| `, server.ClientConfig.Host, server.ServerOpts.SecureServing.ServerCert.CertKey.CertFile, token) | ||||
| 	return server, apiserverConfig, token, nil | ||||
| } | ||||
|  | ||||
| func clientAndURLFromTestServer(s kubeschedulertesting.TestServer) (*http.Client, string, error) { | ||||
| 	secureInfo := s.Config.SecureServing | ||||
| 	secureOptions := s.Options.SecureServing | ||||
| 	url := fmt.Sprintf("https://%s", secureInfo.Listener.Addr().String()) | ||||
| 	url = strings.ReplaceAll(url, "[::]", "127.0.0.1") // switch to IPv4 because the self-signed cert does not support [::] | ||||
|  | ||||
| 	// read self-signed server cert disk | ||||
| 	pool := x509.NewCertPool() | ||||
| 	serverCertPath := path.Join(secureOptions.ServerCert.CertDirectory, secureOptions.ServerCert.PairName+".crt") | ||||
| 	serverCert, err := os.ReadFile(serverCertPath) | ||||
| 	if err != nil { | ||||
| 		return nil, "", fmt.Errorf("Failed to read component server cert %q: %w", serverCertPath, err) | ||||
| 	} | ||||
| 	pool.AppendCertsFromPEM(serverCert) | ||||
| 	tr := &http.Transport{ | ||||
| 		TLSClientConfig: &tls.Config{ | ||||
| 			RootCAs: pool, | ||||
| 		}, | ||||
| 	} | ||||
| 	client := &http.Client{Transport: tr} | ||||
| 	return client, url, nil | ||||
| } | ||||
							
								
								
									
										27
									
								
								test/integration/scheduler/serving/main_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								test/integration/scheduler/serving/main_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| /* | ||||
| Copyright 2024 The Kubernetes Authors. | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
| You may obtain a copy of the License at | ||||
|  | ||||
|     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  | ||||
| Unless required by applicable law or agreed to in writing, software | ||||
| distributed under the License is distributed on an "AS IS" BASIS, | ||||
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| See the License for the specific language governing permissions and | ||||
| limitations under the License. | ||||
| */ | ||||
|  | ||||
| package serving | ||||
|  | ||||
| import ( | ||||
| 	"testing" | ||||
|  | ||||
| 	"k8s.io/kubernetes/test/integration/framework" | ||||
| ) | ||||
|  | ||||
| func TestMain(m *testing.M) { | ||||
| 	framework.EtcdMain(m.Run) | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot