Merge pull request #16053 from saad-ali/attachDetachMutextFix

Fix GCE Cloud/Attach/Detach stability issues
This commit is contained in:
Daniel Smith
2015-10-26 13:05:27 -07:00
6 changed files with 267 additions and 355 deletions

View File

@@ -55,6 +55,9 @@ const (
gceAffinityTypeClientIP = "CLIENT_IP"
// AffinityTypeClientIPProto - affinity based on Client IP and port.
gceAffinityTypeClientIPProto = "CLIENT_IP_PROTO"
operationPollInterval = 3 * time.Second
operationPollTimeoutDuration = 30 * time.Minute
)
// GCECloud is an implementation of Interface, TCPLoadBalancer and Instances for Google Compute Engine.
@@ -259,48 +262,57 @@ func (gce *GCECloud) targetPoolURL(name, region string) string {
return fmt.Sprintf("https://www.googleapis.com/compute/v1/projects/%s/regions/%s/targetPools/%s", gce.projectID, region, name)
}
func waitForOp(op *compute.Operation, getOperation func() (*compute.Operation, error)) error {
pollOp := op
consecPollFails := 0
for pollOp.Status != "DONE" {
var err error
time.Sleep(3 * time.Second)
pollOp, err = getOperation()
if err != nil {
if consecPollFails == 2 {
// Only bail if we've seen 3 consecutive polling errors.
return err
}
consecPollFails++
} else {
consecPollFails = 0
}
func waitForOp(op *compute.Operation, getOperation func(operationName string) (*compute.Operation, error)) error {
if op == nil {
return fmt.Errorf("operation must not be nil")
}
if pollOp.Error != nil && len(pollOp.Error.Errors) > 0 {
return &googleapi.Error{
Code: int(pollOp.HttpErrorStatusCode),
Message: pollOp.Error.Errors[0].Message,
}
}
return nil
if opIsDone(op) {
return getErrorFromOp(op)
}
opName := op.Name
return wait.Poll(operationPollInterval, operationPollTimeoutDuration, func() (bool, error) {
pollOp, err := getOperation(opName)
if err != nil {
glog.Warningf("GCE poll operation failed: %v", err)
}
return opIsDone(pollOp), getErrorFromOp(pollOp)
})
}
func opIsDone(op *compute.Operation) bool {
return op != nil && op.Status == "DONE"
}
func getErrorFromOp(op *compute.Operation) error {
if op != nil && op.Error != nil && len(op.Error.Errors) > 0 {
err := &googleapi.Error{
Code: int(op.HttpErrorStatusCode),
Message: op.Error.Errors[0].Message,
}
glog.Errorf("GCE operation failed: %v", err)
return err
}
return nil
}
func (gce *GCECloud) waitForGlobalOp(op *compute.Operation) error {
return waitForOp(op, func() (*compute.Operation, error) {
return gce.service.GlobalOperations.Get(gce.projectID, op.Name).Do()
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
return gce.service.GlobalOperations.Get(gce.projectID, operationName).Do()
})
}
func (gce *GCECloud) waitForRegionOp(op *compute.Operation, region string) error {
return waitForOp(op, func() (*compute.Operation, error) {
return gce.service.RegionOperations.Get(gce.projectID, region, op.Name).Do()
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
return gce.service.RegionOperations.Get(gce.projectID, region, operationName).Do()
})
}
func (gce *GCECloud) waitForZoneOp(op *compute.Operation) error {
return waitForOp(op, func() (*compute.Operation, error) {
return gce.service.ZoneOperations.Get(gce.projectID, gce.zone, op.Name).Do()
return waitForOp(op, func(operationName string) (*compute.Operation, error) {
return gce.service.ZoneOperations.Get(gce.projectID, gce.zone, operationName).Do()
})
}
@@ -1457,18 +1469,7 @@ func (gce *GCECloud) AttachDisk(diskName string, readOnly bool) error {
attachOp, err := gce.service.Instances.AttachDisk(gce.projectID, gce.zone, gce.instanceID, attachedDisk).Do()
if err != nil {
// Check if the disk is already attached to this instance. We do this only
// in the error case, since it is expected to be exceptional.
instance, err := gce.service.Instances.Get(gce.projectID, gce.zone, gce.instanceID).Do()
if err != nil {
return err
}
for _, disk := range instance.Disks {
if disk.Source == attachedDisk.Source {
// Disk is already attached, we're good to go.
return nil
}
}
return err
}
return gce.waitForZoneOp(attachOp)