
The new metrics is: cloudprovider_gce_api_request_duration_seconds{request, region, zone} cloudprovider_gce_api_request_errors{request, region, zone} `request` is the specific function that is used. `region` is the target region (Will be "<n/a>" if not applicable) `zone` is the target zone (Will be "<n/a>" if not applicable) Note: this fixes some issues with the previous implementation of metrics for disks: - Time duration tracked was of the initial API call, not the entire operation. - Metrics label tuple would have resulted in many independent histograms stored, one for each disk. (Did not aggregate well).
109 lines
3.2 KiB
Go
109 lines
3.2 KiB
Go
/*
|
|
Copyright 2017 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package gce
|
|
|
|
import (
|
|
"fmt"
|
|
"time"
|
|
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
|
|
"github.com/golang/glog"
|
|
compute "google.golang.org/api/compute/v1"
|
|
"google.golang.org/api/googleapi"
|
|
)
|
|
|
|
func (gce *GCECloud) waitForOp(op *compute.Operation, getOperation func(operationName string) (*compute.Operation, error), mc *metricContext) error {
|
|
if op == nil {
|
|
return mc.Observe(fmt.Errorf("operation must not be nil"))
|
|
}
|
|
|
|
if opIsDone(op) {
|
|
return getErrorFromOp(op)
|
|
}
|
|
|
|
opStart := time.Now()
|
|
opName := op.Name
|
|
|
|
return wait.Poll(operationPollInterval, operationPollTimeoutDuration, func() (bool, error) {
|
|
start := time.Now()
|
|
gce.operationPollRateLimiter.Accept()
|
|
duration := time.Now().Sub(start)
|
|
if duration > 5*time.Second {
|
|
glog.V(2).Infof("pollOperation: throttled %v for %v", duration, opName)
|
|
}
|
|
pollOp, err := getOperation(opName)
|
|
if err != nil {
|
|
glog.Warningf("GCE poll operation %s failed: pollOp: [%v] err: [%v] getErrorFromOp: [%v]",
|
|
opName, pollOp, err, getErrorFromOp(pollOp))
|
|
}
|
|
|
|
done := opIsDone(pollOp)
|
|
if done {
|
|
duration := time.Now().Sub(opStart)
|
|
if duration > 1*time.Minute {
|
|
// Log the JSON. It's cleaner than the %v structure.
|
|
enc, err := pollOp.MarshalJSON()
|
|
if err != nil {
|
|
glog.Warningf("waitForOperation: long operation (%v): %v (failed to encode to JSON: %v)",
|
|
duration, pollOp, err)
|
|
} else {
|
|
glog.V(2).Infof("waitForOperation: long operation (%v): %v",
|
|
duration, string(enc))
|
|
}
|
|
}
|
|
}
|
|
|
|
return done, mc.Observe(getErrorFromOp(pollOp))
|
|
})
|
|
}
|
|
|
|
func opIsDone(op *compute.Operation) bool {
|
|
return op != nil && op.Status == "DONE"
|
|
}
|
|
|
|
func getErrorFromOp(op *compute.Operation) error {
|
|
if op != nil && op.Error != nil && len(op.Error.Errors) > 0 {
|
|
err := &googleapi.Error{
|
|
Code: int(op.HttpErrorStatusCode),
|
|
Message: op.Error.Errors[0].Message,
|
|
}
|
|
glog.Errorf("GCE operation failed: %v", err)
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (gce *GCECloud) waitForGlobalOp(op *compute.Operation, mc *metricContext) error {
|
|
return gce.waitForOp(op, func(operationName string) (*compute.Operation, error) {
|
|
return gce.service.GlobalOperations.Get(gce.projectID, operationName).Do()
|
|
}, mc)
|
|
}
|
|
|
|
func (gce *GCECloud) waitForRegionOp(op *compute.Operation, region string, mc *metricContext) error {
|
|
return gce.waitForOp(op, func(operationName string) (*compute.Operation, error) {
|
|
return gce.service.RegionOperations.Get(gce.projectID, region, operationName).Do()
|
|
}, mc)
|
|
}
|
|
|
|
func (gce *GCECloud) waitForZoneOp(op *compute.Operation, zone string, mc *metricContext) error {
|
|
return gce.waitForOp(op, func(operationName string) (*compute.Operation, error) {
|
|
return gce.service.ZoneOperations.Get(gce.projectID, zone, operationName).Do()
|
|
}, mc)
|
|
}
|