Merge pull request #95748 from nilo19/bug/fix-lb-update-failed-pip
Update the PIP when it is not in the Succeeded provisioning state during the LB update.
This commit is contained in:
		| @@ -20,6 +20,7 @@ package azure | ||||
|  | ||||
| import ( | ||||
| 	"net/http" | ||||
| 	"regexp" | ||||
| 	"strings" | ||||
|  | ||||
| 	"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute" | ||||
| @@ -44,6 +45,12 @@ const ( | ||||
| 	operationCanceledErrorMessage = "canceledandsupersededduetoanotheroperation" | ||||
|  | ||||
| 	cannotDeletePublicIPErrorMessageCode = "PublicIPAddressCannotBeDeleted" | ||||
|  | ||||
| 	referencedResourceNotProvisionedMessageCode = "ReferencedResourceNotProvisioned" | ||||
| ) | ||||
|  | ||||
| var ( | ||||
| 	pipErrorMessageRE = regexp.MustCompile(`(?:.*)/subscriptions/(?:.*)/resourceGroups/(.*)/providers/Microsoft.Network/publicIPAddresses/([^\s]+)(?:.*)`) | ||||
| ) | ||||
|  | ||||
| // RequestBackoff if backoff is disabled in cloud provider it | ||||
| @@ -182,7 +189,7 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer) | ||||
| 	defer cancel() | ||||
|  | ||||
| 	rgName := az.getLoadBalancerResourceGroup() | ||||
| 	rerr := az.LoadBalancerClient.CreateOrUpdate(ctx, rgName, *lb.Name, lb, to.String(lb.Etag)) | ||||
| 	rerr := az.LoadBalancerClient.CreateOrUpdate(ctx, rgName, to.String(lb.Name), lb, to.String(lb.Etag)) | ||||
| 	klog.V(10).Infof("LoadBalancerClient.CreateOrUpdate(%s): end", *lb.Name) | ||||
| 	if rerr == nil { | ||||
| 		// Invalidate the cache right after updating | ||||
| @@ -192,12 +199,39 @@ func (az *Cloud) CreateOrUpdateLB(service *v1.Service, lb network.LoadBalancer) | ||||
|  | ||||
| 	// Invalidate the cache because ETAG precondition mismatch. | ||||
| 	if rerr.HTTPStatusCode == http.StatusPreconditionFailed { | ||||
| 		klog.V(3).Infof("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed", *lb.Name) | ||||
| 		klog.V(3).Infof("LoadBalancer cache for %s is cleanup because of http.StatusPreconditionFailed", to.String(lb.Name)) | ||||
| 		az.lbCache.Delete(*lb.Name) | ||||
| 	} | ||||
|  | ||||
| 	retryErrorMessage := rerr.Error().Error() | ||||
| 	// Invalidate the cache because another new operation has canceled the current request. | ||||
| 	if strings.Contains(strings.ToLower(rerr.Error().Error()), operationCanceledErrorMessage) { | ||||
| 		klog.V(3).Infof("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation", *lb.Name) | ||||
| 	if strings.Contains(strings.ToLower(retryErrorMessage), operationCanceledErrorMessage) { | ||||
| 		klog.V(3).Infof("LoadBalancer cache for %s is cleanup because CreateOrUpdate is canceled by another operation", to.String(lb.Name)) | ||||
| 		az.lbCache.Delete(*lb.Name) | ||||
| 	} | ||||
|  | ||||
| 	// The LB update may fail because the referenced PIP is not in the Succeeded provisioning state | ||||
| 	if strings.Contains(strings.ToLower(retryErrorMessage), strings.ToLower(referencedResourceNotProvisionedMessageCode)) { | ||||
| 		matches := pipErrorMessageRE.FindStringSubmatch(retryErrorMessage) | ||||
| 		if len(matches) != 3 { | ||||
| 			klog.Warningf("Failed to parse the retry error message %s", retryErrorMessage) | ||||
| 			return rerr.Error() | ||||
| 		} | ||||
| 		pipRG, pipName := matches[1], matches[2] | ||||
| 		klog.V(3).Infof("The public IP %s referenced by load balancer %s is not in Succeeded provisioning state, will try to update it", pipName, to.String(lb.Name)) | ||||
| 		pip, _, err := az.getPublicIPAddress(pipRG, pipName) | ||||
| 		if err != nil { | ||||
| 			klog.Warningf("Failed to get the public IP %s in resource group %s: %v", pipName, pipRG, err) | ||||
| 			return rerr.Error() | ||||
| 		} | ||||
| 		// Perform a dummy update to fix the provisioning state | ||||
| 		err = az.CreateOrUpdatePIP(service, pipRG, pip) | ||||
| 		if err != nil { | ||||
| 			klog.Warningf("Failed to update the public IP %s in resource group %s: %v", pipName, pipRG, err) | ||||
| 			return rerr.Error() | ||||
| 		} | ||||
| 		// Invalidate the LB cache, return the error, and the controller manager | ||||
| 		// would retry the LB update in the next reconcile loop | ||||
| 		az.lbCache.Delete(*lb.Name) | ||||
| 	} | ||||
|  | ||||
| @@ -241,10 +275,10 @@ func (az *Cloud) CreateOrUpdatePIP(service *v1.Service, pipResourceGroup string, | ||||
| 	ctx, cancel := getContextWithCancel() | ||||
| 	defer cancel() | ||||
|  | ||||
| 	rerr := az.PublicIPAddressesClient.CreateOrUpdate(ctx, pipResourceGroup, *pip.Name, pip) | ||||
| 	klog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end", pipResourceGroup, *pip.Name) | ||||
| 	rerr := az.PublicIPAddressesClient.CreateOrUpdate(ctx, pipResourceGroup, to.String(pip.Name), pip) | ||||
| 	klog.V(10).Infof("PublicIPAddressesClient.CreateOrUpdate(%s, %s): end", pipResourceGroup, to.String(pip.Name)) | ||||
| 	if rerr != nil { | ||||
| 		klog.Errorf("PublicIPAddressesClient.CreateOrUpdate(%s, %s) failed: %s", pipResourceGroup, *pip.Name, rerr.Error().Error()) | ||||
| 		klog.Errorf("PublicIPAddressesClient.CreateOrUpdate(%s, %s) failed: %s", pipResourceGroup, to.String(pip.Name), rerr.Error().Error()) | ||||
| 		az.Event(service, v1.EventTypeWarning, "CreateOrUpdatePublicIPAddress", rerr.Error().Error()) | ||||
| 		return rerr.Error() | ||||
| 	} | ||||
|   | ||||
| @@ -240,6 +240,8 @@ func TestCreateOrUpdateLB(t *testing.T) { | ||||
| 	ctrl := gomock.NewController(t) | ||||
| 	defer ctrl.Finish() | ||||
|  | ||||
| 	referencedResourceNotProvisionedRawErrorString := `Code="ReferencedResourceNotProvisioned" Message="Cannot proceed with operation because resource /subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/publicIPAddresses/pip used by resource /subscriptions/sub/resourceGroups/rg/providers/Microsoft.Network/loadBalancers/lb is not in Succeeded state. Resource is in Failed state and the last operation that updated/is updating the resource is PutPublicIpAddressOperation."` | ||||
|  | ||||
| 	tests := []struct { | ||||
| 		clientErr   *retry.Error | ||||
| 		expectedErr error | ||||
| @@ -252,6 +254,10 @@ func TestCreateOrUpdateLB(t *testing.T) { | ||||
| 			clientErr:   &retry.Error{RawError: fmt.Errorf(operationCanceledErrorMessage)}, | ||||
| 			expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: canceledandsupersededduetoanotheroperation"), | ||||
| 		}, | ||||
| 		{ | ||||
| 			clientErr:   &retry.Error{RawError: fmt.Errorf(referencedResourceNotProvisionedRawErrorString)}, | ||||
| 			expectedErr: fmt.Errorf("Retriable: false, RetryAfter: 0s, HTTPStatusCode: 0, RawError: %s", referencedResourceNotProvisionedRawErrorString), | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, test := range tests { | ||||
| @@ -262,6 +268,15 @@ func TestCreateOrUpdateLB(t *testing.T) { | ||||
| 		mockLBClient.EXPECT().CreateOrUpdate(gomock.Any(), az.ResourceGroup, gomock.Any(), gomock.Any(), gomock.Any()).Return(test.clientErr) | ||||
| 		mockLBClient.EXPECT().Get(gomock.Any(), az.ResourceGroup, "lb", gomock.Any()).Return(network.LoadBalancer{}, nil) | ||||
|  | ||||
| 		mockPIPClient := az.PublicIPAddressesClient.(*mockpublicipclient.MockInterface) | ||||
| 		mockPIPClient.EXPECT().CreateOrUpdate(gomock.Any(), az.ResourceGroup, "pip", gomock.Any()).Return(nil).AnyTimes() | ||||
| 		mockPIPClient.EXPECT().Get(gomock.Any(), az.ResourceGroup, "pip", gomock.Any()).Return(network.PublicIPAddress{ | ||||
| 			Name: to.StringPtr("pip"), | ||||
| 			PublicIPAddressPropertiesFormat: &network.PublicIPAddressPropertiesFormat{ | ||||
| 				ProvisioningState: to.StringPtr("Succeeded"), | ||||
| 			}, | ||||
| 		}, nil).AnyTimes() | ||||
|  | ||||
| 		err := az.CreateOrUpdateLB(&v1.Service{}, network.LoadBalancer{ | ||||
| 			Name: to.StringPtr("lb"), | ||||
| 			Etag: to.StringPtr("etag"), | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Kubernetes Prow Robot
					Kubernetes Prow Robot