Merge pull request #16053 from saad-ali/attachDetachMutextFix
Fix GCE Cloud/Attach/Detach stability issues
This commit is contained in:
@@ -55,6 +55,9 @@ const (
|
||||
gceAffinityTypeClientIP = "CLIENT_IP"
|
||||
// AffinityTypeClientIPProto - affinity based on Client IP and port.
|
||||
gceAffinityTypeClientIPProto = "CLIENT_IP_PROTO"
|
||||
|
||||
operationPollInterval = 3 * time.Second
|
||||
operationPollTimeoutDuration = 30 * time.Minute
|
||||
)
|
||||
|
||||
// GCECloud is an implementation of Interface, TCPLoadBalancer and Instances for Google Compute Engine.
|
||||
@@ -259,48 +262,57 @@ func (gce *GCECloud) targetPoolURL(name, region string) string {
|
||||
return fmt.Sprintf("https://www.googleapis.com/compute/v1/projects/%s/regions/%s/targetPools/%s", gce.projectID, region, name)
|
||||
}
|
||||
|
||||
func waitForOp(op *compute.Operation, getOperation func() (*compute.Operation, error)) error {
|
||||
pollOp := op
|
||||
consecPollFails := 0
|
||||
for pollOp.Status != "DONE" {
|
||||
var err error
|
||||
time.Sleep(3 * time.Second)
|
||||
pollOp, err = getOperation()
|
||||
if err != nil {
|
||||
if consecPollFails == 2 {
|
||||
// Only bail if we've seen 3 consecutive polling errors.
|
||||
return err
|
||||
}
|
||||
consecPollFails++
|
||||
} else {
|
||||
consecPollFails = 0
|
||||
}
|
||||
func waitForOp(op *compute.Operation, getOperation func(operationName string) (*compute.Operation, error)) error {
|
||||
if op == nil {
|
||||
return fmt.Errorf("operation must not be nil")
|
||||
}
|
||||
if pollOp.Error != nil && len(pollOp.Error.Errors) > 0 {
|
||||
return &googleapi.Error{
|
||||
Code: int(pollOp.HttpErrorStatusCode),
|
||||
Message: pollOp.Error.Errors[0].Message,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
|
||||
if opIsDone(op) {
|
||||
return getErrorFromOp(op)
|
||||
}
|
||||
|
||||
opName := op.Name
|
||||
return wait.Poll(operationPollInterval, operationPollTimeoutDuration, func() (bool, error) {
|
||||
pollOp, err := getOperation(opName)
|
||||
if err != nil {
|
||||
glog.Warningf("GCE poll operation failed: %v", err)
|
||||
}
|
||||
return opIsDone(pollOp), getErrorFromOp(pollOp)
|
||||
})
|
||||
}
|
||||
|
||||
func opIsDone(op *compute.Operation) bool {
|
||||
return op != nil && op.Status == "DONE"
|
||||
}
|
||||
|
||||
func getErrorFromOp(op *compute.Operation) error {
|
||||
if op != nil && op.Error != nil && len(op.Error.Errors) > 0 {
|
||||
err := &googleapi.Error{
|
||||
Code: int(op.HttpErrorStatusCode),
|
||||
Message: op.Error.Errors[0].Message,
|
||||
}
|
||||
glog.Errorf("GCE operation failed: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (gce *GCECloud) waitForGlobalOp(op *compute.Operation) error {
|
||||
return waitForOp(op, func() (*compute.Operation, error) {
|
||||
return gce.service.GlobalOperations.Get(gce.projectID, op.Name).Do()
|
||||
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
|
||||
return gce.service.GlobalOperations.Get(gce.projectID, operationName).Do()
|
||||
})
|
||||
}
|
||||
|
||||
func (gce *GCECloud) waitForRegionOp(op *compute.Operation, region string) error {
|
||||
return waitForOp(op, func() (*compute.Operation, error) {
|
||||
return gce.service.RegionOperations.Get(gce.projectID, region, op.Name).Do()
|
||||
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
|
||||
return gce.service.RegionOperations.Get(gce.projectID, region, operationName).Do()
|
||||
})
|
||||
}
|
||||
|
||||
func (gce *GCECloud) waitForZoneOp(op *compute.Operation) error {
|
||||
return waitForOp(op, func() (*compute.Operation, error) {
|
||||
return gce.service.ZoneOperations.Get(gce.projectID, gce.zone, op.Name).Do()
|
||||
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
|
||||
return gce.service.ZoneOperations.Get(gce.projectID, gce.zone, operationName).Do()
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1457,18 +1469,7 @@ func (gce *GCECloud) AttachDisk(diskName string, readOnly bool) error {
|
||||
|
||||
attachOp, err := gce.service.Instances.AttachDisk(gce.projectID, gce.zone, gce.instanceID, attachedDisk).Do()
|
||||
if err != nil {
|
||||
// Check if the disk is already attached to this instance. We do this only
|
||||
// in the error case, since it is expected to be exceptional.
|
||||
instance, err := gce.service.Instances.Get(gce.projectID, gce.zone, gce.instanceID).Do()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, disk := range instance.Disks {
|
||||
if disk.Source == attachedDisk.Source {
|
||||
// Disk is already attached, we're good to go.
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
return gce.waitForZoneOp(attachOp)
|
||||
|
Reference in New Issue
Block a user