diff --git a/pkg/cloudprovider/providers/aws/BUILD b/pkg/cloudprovider/providers/aws/BUILD index eadeaa1f3ce..eb41175c317 100644 --- a/pkg/cloudprovider/providers/aws/BUILD +++ b/pkg/cloudprovider/providers/aws/BUILD @@ -31,6 +31,7 @@ go_library( "//pkg/credentialprovider/aws:go_default_library", "//pkg/kubelet/apis:go_default_library", "//pkg/volume:go_default_library", + "//pkg/volume/util:go_default_library", "//vendor/github.com/aws/aws-sdk-go/aws:go_default_library", "//vendor/github.com/aws/aws-sdk-go/aws/awserr:go_default_library", "//vendor/github.com/aws/aws-sdk-go/aws/credentials:go_default_library", diff --git a/pkg/cloudprovider/providers/aws/aws.go b/pkg/cloudprovider/providers/aws/aws.go index 77e2689d4d6..c41c15f0d01 100644 --- a/pkg/cloudprovider/providers/aws/aws.go +++ b/pkg/cloudprovider/providers/aws/aws.go @@ -42,6 +42,8 @@ import ( "github.com/golang/glog" "github.com/prometheus/client_golang/prometheus" + "path" + "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" @@ -51,7 +53,7 @@ import ( "k8s.io/kubernetes/pkg/controller" kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" "k8s.io/kubernetes/pkg/volume" - "path" + volumeutil "k8s.io/kubernetes/pkg/volume/util" ) // ProviderName is the name of this cloud provider. @@ -1806,7 +1808,7 @@ func (c *Cloud) CreateDisk(volumeOptions *VolumeOptions) (KubernetesVolumeID, er createAZ = volume.ChooseZoneForVolume(allZones, volumeOptions.PVCName) } if !volumeOptions.ZonePresent && volumeOptions.ZonesPresent { - if adminSetOfZones, err := volume.ZonesToSet(volumeOptions.AvailabilityZones); err != nil { + if adminSetOfZones, err := volumeutil.ZonesToSet(volumeOptions.AvailabilityZones); err != nil { return "", err } else { createAZ = volume.ChooseZoneForVolume(adminSetOfZones, volumeOptions.PVCName) diff --git a/pkg/cloudprovider/providers/gce/BUILD b/pkg/cloudprovider/providers/gce/BUILD index 65f7a5e7d62..81e169f6423 100644 --- a/pkg/cloudprovider/providers/gce/BUILD +++ b/pkg/cloudprovider/providers/gce/BUILD @@ -52,6 +52,7 @@ go_library( "//pkg/util/net/sets:go_default_library", "//pkg/util/version:go_default_library", "//pkg/volume:go_default_library", + "//pkg/volume/util:go_default_library", "//vendor/cloud.google.com/go/compute/metadata:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library", @@ -105,6 +106,7 @@ go_test( "//vendor/google.golang.org/api/googleapi:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", ], ) diff --git a/pkg/cloudprovider/providers/gce/gce.go b/pkg/cloudprovider/providers/gce/gce.go index b28f61c6bcc..2445d02170c 100644 --- a/pkg/cloudprovider/providers/gce/gce.go +++ b/pkg/cloudprovider/providers/gce/gce.go @@ -80,7 +80,8 @@ const ( // Defaults to 5 * 2 = 10 seconds before the LB will steer traffic away gceHcUnhealthyThreshold = int64(5) - gceComputeAPIEndpoint = "https://www.googleapis.com/compute/v1/" + gceComputeAPIEndpoint = "https://www.googleapis.com/compute/v1/" + gceComputeAPIEndpointAlpha = "https://www.googleapis.com/compute/alpha/" ) // gceObject is an abstraction of all GCE API object in go client @@ -135,16 +136,28 @@ type ServiceManager interface { name string, sizeGb int64, tagsStr string, - diskTypeURI string, + diskType string, zone string) (gceObject, error) - // Attach a persistent disk on GCE with the given disk spec to the specified instance. - AttachDisk(diskName string, - diskKind string, - diskZone string, - readWrite string, - source string, + // Creates a new regional persistent disk on GCE with the given disk spec. + CreateRegionalDisk( + name string, + sizeGb int64, + tagsStr string, diskType string, + zones sets.String) (gceObject, error) + + // Deletes the persistent disk from GCE with the given diskName. + DeleteDisk(zone string, disk string) (gceObject, error) + + // Deletes the regional persistent disk from GCE with the given diskName. + DeleteRegionalDisk(diskName string) (gceObject, error) + + // Attach a persistent disk on GCE with the given disk spec to the specified instance. + AttachDisk( + disk *GCEDisk, + readWrite string, + instanceZone string, instanceName string) (gceObject, error) // Detach a persistent disk on GCE with the given disk spec from the specified instance. @@ -154,13 +167,16 @@ type ServiceManager interface { devicePath string) (gceObject, error) // Gets the persistent disk from GCE with the given diskName. - GetDisk(project string, zone string, diskName string) (*GCEDisk, error) + GetDisk(zone string, diskName string) (*GCEDisk, error) - // Deletes the persistent disk from GCE with the given diskName. - DeleteDisk(project string, zone string, disk string) (gceObject, error) + // Gets the regional persistent disk from GCE with the given diskName. + GetRegionalDisk(diskName string) (*GCEDisk, error) // Waits until GCE reports the given operation in the given zone as done. WaitForZoneOp(op gceObject, zone string, mc *metricContext) error + + // Waits until GCE reports the given operation in the given region is done. + WaitForRegionalOp(op gceObject, mc *metricContext) error } type GCEServiceManager struct { @@ -740,8 +756,14 @@ func (manager *GCEServiceManager) CreateDisk( name string, sizeGb int64, tagsStr string, - diskTypeURI string, + diskType string, zone string) (gceObject, error) { + diskTypeURI, err := manager.getDiskTypeURI( + manager.gce.region /* diskRegion */, singleZone{zone}, diskType) + if err != nil { + return nil, err + } + if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { diskToCreateAlpha := &computealpha.Disk{ Name: name, @@ -749,7 +771,9 @@ func (manager *GCEServiceManager) CreateDisk( Description: tagsStr, Type: diskTypeURI, } - return manager.gce.serviceAlpha.Disks.Insert(manager.gce.projectID, zone, diskToCreateAlpha).Do() + + return manager.gce.serviceAlpha.Disks.Insert( + manager.gce.projectID, zone, diskToCreateAlpha).Do() } diskToCreateV1 := &compute.Disk{ @@ -758,38 +782,72 @@ func (manager *GCEServiceManager) CreateDisk( Description: tagsStr, Type: diskTypeURI, } - return manager.gce.service.Disks.Insert(manager.gce.projectID, zone, diskToCreateV1).Do() + return manager.gce.service.Disks.Insert( + manager.gce.projectID, zone, diskToCreateV1).Do() +} + +func (manager *GCEServiceManager) CreateRegionalDisk( + name string, + sizeGb int64, + tagsStr string, + diskType string, + replicaZones sets.String) (gceObject, error) { + diskTypeURI, err := manager.getDiskTypeURI( + manager.gce.region /* diskRegion */, multiZone{replicaZones}, diskType) + if err != nil { + return nil, err + } + fullyQualifiedReplicaZones := []string{} + for _, replicaZone := range replicaZones.UnsortedList() { + fullyQualifiedReplicaZones = append( + fullyQualifiedReplicaZones, manager.getReplicaZoneURI(replicaZone)) + } + if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { + diskToCreateAlpha := &computealpha.Disk{ + Name: name, + SizeGb: sizeGb, + Description: tagsStr, + Type: diskTypeURI, + ReplicaZones: fullyQualifiedReplicaZones, + } + return manager.gce.serviceAlpha.RegionDisks.Insert( + manager.gce.projectID, manager.gce.region, diskToCreateAlpha).Do() + } + + return nil, fmt.Errorf("The regional PD feature is only available via the GCE Alpha API. Enable \"GCEDiskAlphaAPI\" in the list of \"alpha-features\" in \"gce.conf\" to use the feature.") } func (manager *GCEServiceManager) AttachDisk( - diskName string, - diskKind string, - diskZone string, + disk *GCEDisk, readWrite string, - source string, - diskType string, + instanceZone string, instanceName string) (gceObject, error) { + source, err := manager.getDiskSourceURI(disk) + if err != nil { + return nil, err + } + if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { attachedDiskAlpha := &computealpha.AttachedDisk{ - DeviceName: diskName, - Kind: diskKind, + DeviceName: disk.Name, + Kind: disk.Kind, Mode: readWrite, Source: source, - Type: diskType, + Type: diskTypePersistent, } return manager.gce.serviceAlpha.Instances.AttachDisk( - manager.gce.projectID, diskZone, instanceName, attachedDiskAlpha).Do() + manager.gce.projectID, instanceZone, instanceName, attachedDiskAlpha).Do() } attachedDiskV1 := &compute.AttachedDisk{ - DeviceName: diskName, - Kind: diskKind, + DeviceName: disk.Name, + Kind: disk.Kind, Mode: readWrite, Source: source, - Type: diskType, + Type: disk.Type, } return manager.gce.service.Instances.AttachDisk( - manager.gce.projectID, diskZone, instanceName, attachedDiskV1).Do() + manager.gce.projectID, instanceZone, instanceName, attachedDiskV1).Do() } func (manager *GCEServiceManager) DetachDisk( @@ -806,49 +864,270 @@ func (manager *GCEServiceManager) DetachDisk( } func (manager *GCEServiceManager) GetDisk( - project string, zone string, diskName string) (*GCEDisk, error) { + if zone == "" { + return nil, fmt.Errorf("Can not fetch disk %q. Zone is empty.", diskName) + } + + if diskName == "" { + return nil, fmt.Errorf("Can not fetch disk. Zone is specified (%q). But disk name is empty.", zone) + } if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { - diskAlpha, err := manager.gce.serviceAlpha.Disks.Get(project, zone, diskName).Do() + diskAlpha, err := manager.gce.serviceAlpha.Disks.Get( + manager.gce.projectID, zone, diskName).Do() if err != nil { return nil, err } + var zoneInfo zoneType + if len(diskAlpha.ReplicaZones) > 1 { + zones := sets.NewString() + for _, zoneURI := range diskAlpha.ReplicaZones { + zones.Insert(lastComponent(zoneURI)) + } + zoneInfo = multiZone{zones} + } else { + zoneInfo = singleZone{lastComponent(diskAlpha.Zone)} + if diskAlpha.Zone == "" { + zoneInfo = singleZone{lastComponent(zone)} + } + } + + region := strings.TrimSpace(lastComponent(diskAlpha.Region)) + if region == "" { + region, err = manager.getRegionFromZone(zoneInfo) + if err != nil { + return nil, fmt.Errorf("failed to extract region from zone for %q/%q err=%v", zone, diskName, err) + } + } + return &GCEDisk{ - Zone: lastComponent(diskAlpha.Zone), - Name: diskAlpha.Name, - Kind: diskAlpha.Kind, - Type: diskAlpha.Type, + ZoneInfo: zoneInfo, + Region: region, + Name: diskAlpha.Name, + Kind: diskAlpha.Kind, + Type: diskAlpha.Type, }, nil } - diskStable, err := manager.gce.service.Disks.Get(project, zone, diskName).Do() + diskStable, err := manager.gce.service.Disks.Get( + manager.gce.projectID, zone, diskName).Do() if err != nil { return nil, err } + zoneInfo := singleZone{strings.TrimSpace(lastComponent(diskStable.Zone))} + if zoneInfo.zone == "" { + zoneInfo = singleZone{zone} + } + + region, err := manager.getRegionFromZone(zoneInfo) + if err != nil { + return nil, fmt.Errorf("failed to extract region from zone for %q/%q err=%v", zone, diskName, err) + } + return &GCEDisk{ - Zone: lastComponent(diskStable.Zone), - Name: diskStable.Name, - Kind: diskStable.Kind, - Type: diskStable.Type, + ZoneInfo: zoneInfo, + Region: region, + Name: diskStable.Name, + Kind: diskStable.Kind, + Type: diskStable.Type, }, nil } +func (manager *GCEServiceManager) GetRegionalDisk( + diskName string) (*GCEDisk, error) { + + if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { + diskAlpha, err := manager.gce.serviceAlpha.RegionDisks.Get( + manager.gce.projectID, manager.gce.region, diskName).Do() + if err != nil { + return nil, err + } + + zones := sets.NewString() + for _, zoneURI := range diskAlpha.ReplicaZones { + zones.Insert(lastComponent(zoneURI)) + } + + return &GCEDisk{ + ZoneInfo: multiZone{zones}, + Region: lastComponent(diskAlpha.Region), + Name: diskAlpha.Name, + Kind: diskAlpha.Kind, + Type: diskAlpha.Type, + }, nil + } + + return nil, fmt.Errorf("The regional PD feature is only available via the GCE Alpha API. Enable \"GCEDiskAlphaAPI\" in the list of \"alpha-features\" in \"gce.conf\" to use the feature.") +} + func (manager *GCEServiceManager) DeleteDisk( - project string, zone string, diskName string) (gceObject, error) { if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { - return manager.gce.serviceAlpha.Disks.Delete(project, zone, diskName).Do() + return manager.gce.serviceAlpha.Disks.Delete( + manager.gce.projectID, zone, diskName).Do() } - return manager.gce.service.Disks.Delete(project, zone, diskName).Do() + return manager.gce.service.Disks.Delete( + manager.gce.projectID, zone, diskName).Do() } -func (manager *GCEServiceManager) WaitForZoneOp(op gceObject, zone string, mc *metricContext) error { +func (manager *GCEServiceManager) DeleteRegionalDisk( + diskName string) (gceObject, error) { + if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { + return manager.gce.serviceAlpha.RegionDisks.Delete( + manager.gce.projectID, manager.gce.region, diskName).Do() + } + + return nil, fmt.Errorf("DeleteRegionalDisk is a regional PD feature and is only available via the GCE Alpha API. Enable \"GCEDiskAlphaAPI\" in the list of \"alpha-features\" in \"gce.conf\" to use the feature.") +} + +func (manager *GCEServiceManager) WaitForZoneOp( + op gceObject, zone string, mc *metricContext) error { return manager.gce.waitForZoneOp(op, zone, mc) } + +func (manager *GCEServiceManager) WaitForRegionalOp( + op gceObject, mc *metricContext) error { + return manager.gce.waitForRegionOp(op, manager.gce.region, mc) +} + +func (manager *GCEServiceManager) getDiskSourceURI(disk *GCEDisk) (string, error) { + getProjectsAPIEndpoint := manager.getProjectsAPIEndpoint() + if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { + getProjectsAPIEndpoint = manager.getProjectsAPIEndpointAlpha() + } + + switch zoneInfo := disk.ZoneInfo.(type) { + case singleZone: + if zoneInfo.zone == "" || disk.Region == "" { + // Unexpected, but sanity-check + return "", fmt.Errorf("PD does not have zone/region information: %#v", disk) + } + + return getProjectsAPIEndpoint + fmt.Sprintf( + diskSourceURITemplateSingleZone, + manager.gce.projectID, + zoneInfo.zone, + disk.Name), nil + case multiZone: + if zoneInfo.replicaZones == nil || zoneInfo.replicaZones.Len() <= 0 { + // Unexpected, but sanity-check + return "", fmt.Errorf("PD is regional but does not have any replicaZones specified: %v", disk) + } + return getProjectsAPIEndpoint + fmt.Sprintf( + diskSourceURITemplateRegional, + manager.gce.projectID, + disk.Region, + disk.Name), nil + case nil: + // Unexpected, but sanity-check + return "", fmt.Errorf("PD did not have ZoneInfo: %v", disk) + default: + // Unexpected, but sanity-check + return "", fmt.Errorf("disk.ZoneInfo has unexpected type %T", zoneInfo) + } +} + +func (manager *GCEServiceManager) getDiskTypeURI( + diskRegion string, diskZoneInfo zoneType, diskType string) (string, error) { + getProjectsAPIEndpoint := manager.getProjectsAPIEndpoint() + if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { + getProjectsAPIEndpoint = manager.getProjectsAPIEndpointAlpha() + } + + switch zoneInfo := diskZoneInfo.(type) { + case singleZone: + if zoneInfo.zone == "" { + return "", fmt.Errorf("zone is empty: %v", zoneInfo) + } + + return getProjectsAPIEndpoint + fmt.Sprintf( + diskTypeURITemplateSingleZone, + manager.gce.projectID, + zoneInfo.zone, + diskType), nil + case multiZone: + if zoneInfo.replicaZones == nil || zoneInfo.replicaZones.Len() <= 0 { + return "", fmt.Errorf("zoneInfo is regional but does not have any replicaZones specified: %v", zoneInfo) + } + return getProjectsAPIEndpoint + fmt.Sprintf( + diskTypeURITemplateRegional, + manager.gce.projectID, + diskRegion, + diskType), nil + case nil: + return "", fmt.Errorf("zoneInfo nil") + default: + return "", fmt.Errorf("zoneInfo has unexpected type %T", zoneInfo) + } +} + +func (manager *GCEServiceManager) getReplicaZoneURI(zone string) string { + getProjectsAPIEndpoint := manager.getProjectsAPIEndpoint() + if manager.gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { + getProjectsAPIEndpoint = manager.getProjectsAPIEndpointAlpha() + } + + return getProjectsAPIEndpoint + fmt.Sprintf( + replicaZoneURITemplateSingleZone, + manager.gce.projectID, + zone) +} + +func (manager *GCEServiceManager) getProjectsAPIEndpoint() string { + projectsApiEndpoint := gceComputeAPIEndpoint + "projects/" + if manager.gce.service != nil { + projectsApiEndpoint = manager.gce.service.BasePath + } + + return projectsApiEndpoint +} + +func (manager *GCEServiceManager) getProjectsAPIEndpointAlpha() string { + projectsApiEndpoint := gceComputeAPIEndpointAlpha + "projects/" + if manager.gce.service != nil { + projectsApiEndpoint = manager.gce.serviceAlpha.BasePath + } + + return projectsApiEndpoint +} + +func (manager *GCEServiceManager) getRegionFromZone(zoneInfo zoneType) (string, error) { + var zone string + switch zoneInfo := zoneInfo.(type) { + case singleZone: + if zoneInfo.zone == "" { + // Unexpected, but sanity-check + return "", fmt.Errorf("PD is single zone, but zone is not specified: %#v", zoneInfo) + } + + zone = zoneInfo.zone + case multiZone: + if zoneInfo.replicaZones == nil || zoneInfo.replicaZones.Len() <= 0 { + // Unexpected, but sanity-check + return "", fmt.Errorf("PD is regional but does not have any replicaZones specified: %v", zoneInfo) + } + + zone = zoneInfo.replicaZones.UnsortedList()[0] + case nil: + // Unexpected, but sanity-check + return "", fmt.Errorf("zoneInfo is nil") + default: + // Unexpected, but sanity-check + return "", fmt.Errorf("zoneInfo has unexpected type %T", zoneInfo) + } + + region, err := GetGCERegion(zone) + if err != nil { + glog.Warningf("failed to parse GCE region from zone %q: %v", zone, err) + region = manager.gce.region + } + + return region, nil +} diff --git a/pkg/cloudprovider/providers/gce/gce_alpha.go b/pkg/cloudprovider/providers/gce/gce_alpha.go index 7bc4b45c72d..292504f7d6e 100644 --- a/pkg/cloudprovider/providers/gce/gce_alpha.go +++ b/pkg/cloudprovider/providers/gce/gce_alpha.go @@ -29,7 +29,7 @@ const ( // tier to use. Currently supports "Standard" and "Premium" (default). AlphaFeatureNetworkTiers = "NetworkTiers" - GCEDiskAlphaFeatureGate = "GCEDiskAlphaAPI" + GCEDiskAlphaFeatureGate = "DiskAlphaAPI" ) // All known alpha features diff --git a/pkg/cloudprovider/providers/gce/gce_disks.go b/pkg/cloudprovider/providers/gce/gce_disks.go index bdc126c3048..997b65d624c 100644 --- a/pkg/cloudprovider/providers/gce/gce_disks.go +++ b/pkg/cloudprovider/providers/gce/gce_disks.go @@ -23,9 +23,11 @@ import ( "strings" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/kubernetes/pkg/cloudprovider" kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" "k8s.io/kubernetes/pkg/volume" + volumeutil "k8s.io/kubernetes/pkg/volume/util" "github.com/golang/glog" "google.golang.org/api/googleapi" @@ -37,9 +39,15 @@ const ( DiskTypeSSD = "pd-ssd" DiskTypeStandard = "pd-standard" - diskTypeDefault = DiskTypeStandard - diskTypeUriTemplate = "%s/zones/%s/diskTypes/%s" - diskTypePersistent = "PERSISTENT" + diskTypeDefault = DiskTypeStandard + diskTypeURITemplateSingleZone = "%s/zones/%s/diskTypes/%s" // {gce.projectID}/zones/{disk.Zone}/diskTypes/{disk.Type}" + diskTypeURITemplateRegional = "%s/regions/%s/diskTypes/%s" // {gce.projectID}/regions/{disk.Region}/diskTypes/{disk.Type}" + diskTypePersistent = "PERSISTENT" + + diskSourceURITemplateSingleZone = "%s/zones/%s/disks/%s" // {gce.projectID}/zones/{disk.Zone}/disks/{disk.Name}" + diskSourceURITemplateRegional = "%s/regions/%s/disks/%s" //{gce.projectID}/regions/{disk.Region}/disks/repd" + + replicaZoneURITemplateSingleZone = "%s/zones/%s" // {gce.projectID}/zones/{disk.Zone} ) // Disks is interface for manipulation with GCE PDs. @@ -63,6 +71,11 @@ type Disks interface { // as JSON into Description field. CreateDisk(name string, diskType string, zone string, sizeGb int64, tags map[string]string) error + // CreateRegionalDisk creates a new Regional Persistent Disk, with the + // specified properties, replicated to the specified zones. Tags are + // serialized as JSON into Description field. + CreateRegionalDisk(name string, diskType string, replicaZones sets.String, sizeGb int64, tags map[string]string) error + // DeleteDisk deletes PD. DeleteDisk(diskToDelete string) error @@ -77,14 +90,34 @@ type Disks interface { var _ Disks = (*GCECloud)(nil) type GCEDisk struct { - Zone string - Name string - Kind string - Type string + ZoneInfo zoneType + Region string + Name string + Kind string + Type string } -func newDiskMetricContext(request, zone string) *metricContext { - return newGenericMetricContext("disk", request, unusedMetricLabel, zone, computeV1Version) +type zoneType interface { + isZoneType() +} + +type multiZone struct { + replicaZones sets.String +} + +type singleZone struct { + zone string +} + +func (m multiZone) isZoneType() {} +func (s singleZone) isZoneType() {} + +func newDiskMetricContextZonal(request, region, zone string) *metricContext { + return newGenericMetricContext("disk", request, region, zone, computeV1Version) +} + +func newDiskMetricContextRegional(request, region string) *metricContext { + return newGenericMetricContext("disk", request, region, unusedMetricLabel, computeV1Version) } func (gce *GCECloud) AttachDisk(diskName string, nodeName types.NodeName, readOnly bool) error { @@ -93,26 +126,41 @@ func (gce *GCECloud) AttachDisk(diskName string, nodeName types.NodeName, readOn if err != nil { return fmt.Errorf("error getting instance %q", instanceName) } - disk, err := gce.getDiskByName(diskName, instance.Zone) - if err != nil { - return err + + // Try fetching as regional PD + var disk *GCEDisk + var mc *metricContext + if gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { + disk, err = gce.getRegionalDiskByName(diskName) + if err != nil { + glog.V(5).Infof("Could not find regional PD named %q to Attach. Will look for a zonal PD", diskName) + err = nil + } else { + mc = newDiskMetricContextRegional("attach", gce.region) + } } + + if disk == nil { + disk, err = gce.getDiskByName(diskName, instance.Zone) + if err != nil { + return err + } + mc = newDiskMetricContextZonal("attach", gce.region, instance.Zone) + } + readWrite := "READ_WRITE" if readOnly { readWrite = "READ_ONLY" } - mc := newDiskMetricContext("attach", instance.Zone) - source := gce.service.BasePath + strings.Join([]string{ - gce.projectID, "zones", disk.Zone, "disks", disk.Name}, "/") attachOp, err := gce.manager.AttachDisk( - disk.Name, disk.Kind, disk.Zone, readWrite, source, diskTypePersistent, instance.Name) + disk, readWrite, instance.Zone, instance.Name) if err != nil { return mc.Observe(err) } - return gce.manager.WaitForZoneOp(attachOp, disk.Zone, mc) + return gce.manager.WaitForZoneOp(attachOp, instance.Zone, mc) } func (gce *GCECloud) DetachDisk(devicePath string, nodeName types.NodeName) error { @@ -131,7 +179,7 @@ func (gce *GCECloud) DetachDisk(devicePath string, nodeName types.NodeName) erro return fmt.Errorf("error getting instance %q", instanceName) } - mc := newDiskMetricContext("detach", inst.Zone) + mc := newDiskMetricContextZonal("detach", gce.region, inst.Zone) detachOp, err := gce.manager.DetachDisk(inst.Zone, inst.Name, devicePath) if err != nil { return mc.Observe(err) @@ -206,14 +254,7 @@ func (gce *GCECloud) CreateDisk( // Do not allow creation of PDs in zones that are not managed. Such PDs // then cannot be deleted by DeleteDisk. - isManaged := false - for _, managedZone := range gce.managedZones { - if zone == managedZone { - isManaged = true - break - } - } - if !isManaged { + if isManaged := gce.verifyZoneIsManaged(zone); !isManaged { return fmt.Errorf("kubernetes does not manage zone %q", zone) } @@ -222,25 +263,15 @@ func (gce *GCECloud) CreateDisk( return err } - switch diskType { - case DiskTypeSSD, DiskTypeStandard: - // noop - case "": - diskType = diskTypeDefault - default: - return fmt.Errorf("invalid GCE disk type %q", diskType) + diskType, err = getDiskType(diskType) + if err != nil { + return err } - projectsApiEndpoint := gceComputeAPIEndpoint + "projects/" - if gce.service != nil { - projectsApiEndpoint = gce.service.BasePath - } - diskTypeUri := projectsApiEndpoint + fmt.Sprintf(diskTypeUriTemplate, gce.projectID, zone, diskType) - - mc := newDiskMetricContext("create", zone) + mc := newDiskMetricContextZonal("create", gce.region, zone) createOp, err := gce.manager.CreateDisk( - name, sizeGb, tagsStr, diskTypeUri, zone) + name, sizeGb, tagsStr, diskType, zone) if isGCEError(err, "alreadyExists") { glog.Warningf("GCE PD %q already exists, reusing", name) @@ -257,6 +288,76 @@ func (gce *GCECloud) CreateDisk( return err } +// CreateRegionalDisk creates a new Regional Persistent Disk, with the specified +// name & size, replicated to the specified zones. It stores specified tags +// encoded in JSON in Description field. +func (gce *GCECloud) CreateRegionalDisk( + name string, diskType string, replicaZones sets.String, sizeGb int64, tags map[string]string) error { + + // Do not allow creation of PDs in zones that are not managed. Such PDs + // then cannot be deleted by DeleteDisk. + unmanagedZones := []string{} + for _, zone := range replicaZones.UnsortedList() { + if isManaged := gce.verifyZoneIsManaged(zone); !isManaged { + unmanagedZones = append(unmanagedZones, zone) + } + } + + if len(unmanagedZones) > 0 { + return fmt.Errorf("kubernetes does not manage specified zones: %q. Managed Zones: %q", unmanagedZones, gce.managedZones) + } + + tagsStr, err := gce.encodeDiskTags(tags) + if err != nil { + return err + } + + diskType, err = getDiskType(diskType) + if err != nil { + return err + } + + mc := newDiskMetricContextRegional("create", gce.region) + + createOp, err := gce.manager.CreateRegionalDisk( + name, sizeGb, tagsStr, diskType, replicaZones) + + if isGCEError(err, "alreadyExists") { + glog.Warningf("GCE PD %q already exists, reusing", name) + return nil + } else if err != nil { + return mc.Observe(err) + } + + err = gce.manager.WaitForRegionalOp(createOp, mc) + if isGCEError(err, "alreadyExists") { + glog.Warningf("GCE PD %q already exists, reusing", name) + return nil + } + return err +} + +func (gce *GCECloud) verifyZoneIsManaged(zone string) bool { + for _, managedZone := range gce.managedZones { + if zone == managedZone { + return true + } + } + + return false +} + +func getDiskType(diskType string) (string, error) { + switch diskType { + case DiskTypeSSD, DiskTypeStandard: + return diskType, nil + case "": + return diskTypeDefault, nil + default: + return "", fmt.Errorf("invalid GCE disk type %q", diskType) + } +} + func (gce *GCECloud) DeleteDisk(diskToDelete string) error { err := gce.doDeleteDisk(diskToDelete) if isGCEError(err, "resourceInUseByAnotherResource") { @@ -278,40 +379,66 @@ func (gce *GCECloud) GetAutoLabelsForPD(name string, zone string) (map[string]st var disk *GCEDisk var err error if zone == "" { - // We would like as far as possible to avoid this case, - // because GCE doesn't guarantee that volumes are uniquely named per region, - // just per zone. However, creation of GCE PDs was originally done only - // by name, so we have to continue to support that. - // However, wherever possible the zone should be passed (and it is passed - // for most cases that we can control, e.g. dynamic volume provisioning) + // For regional PDs this is fine, but for zonal PDs we would like as far + // as possible to avoid this case, because GCE doesn't guarantee that + // volumes are uniquely named per region, just per zone. However, + // creation of GCE PDs was originally done only by name, so we have to + // continue to support that. + // However, wherever possible the zone should be passed (and it is + // passed for most cases that we can control, e.g. dynamic volume + // provisioning). disk, err = gce.GetDiskByNameUnknownZone(name) if err != nil { return nil, err } - zone = disk.Zone } else { // We could assume the disks exists; we have all the information we need // However it is more consistent to ensure the disk exists, // and in future we may gather addition information (e.g. disk type, IOPS etc) - disk, err = gce.getDiskByName(name, zone) + zoneSet, err := volumeutil.LabelZonesToSet(zone) if err != nil { - return nil, err + glog.Warningf("Failed to parse zone field: %q. Will use raw field.", zone) + } + + if len(zoneSet) > 1 { + // Regional PD + disk, err = gce.getRegionalDiskByName(name) + if err != nil { + return nil, err + } + } else { + // Zonal PD + disk, err = gce.getDiskByName(name, zone) + if err != nil { + return nil, err + } } } - region, err := GetGCERegion(zone) - if err != nil { - return nil, err - } - - if zone == "" || region == "" { - // Unexpected, but sanity-check - return nil, fmt.Errorf("PD did not have zone/region information: %q", disk.Name) - } - labels := make(map[string]string) - labels[kubeletapis.LabelZoneFailureDomain] = zone - labels[kubeletapis.LabelZoneRegion] = region + switch zoneInfo := disk.ZoneInfo.(type) { + case singleZone: + if zoneInfo.zone == "" || disk.Region == "" { + // Unexpected, but sanity-check + return nil, fmt.Errorf("PD did not have zone/region information: %v", disk) + } + labels[kubeletapis.LabelZoneFailureDomain] = zoneInfo.zone + labels[kubeletapis.LabelZoneRegion] = disk.Region + case multiZone: + if zoneInfo.replicaZones == nil || zoneInfo.replicaZones.Len() <= 0 { + // Unexpected, but sanity-check + return nil, fmt.Errorf("PD is regional but does not have any replicaZones specified: %v", disk) + } + labels[kubeletapis.LabelZoneFailureDomain] = + volumeutil.ZonesSetToLabelValue(zoneInfo.replicaZones) + labels[kubeletapis.LabelZoneRegion] = disk.Region + case nil: + // Unexpected, but sanity-check + return nil, fmt.Errorf("PD did not have ZoneInfo: %v", disk) + default: + // Unexpected, but sanity-check + return nil, fmt.Errorf("disk.ZoneInfo has unexpected type %T", zoneInfo) + } return labels, nil } @@ -319,8 +446,8 @@ func (gce *GCECloud) GetAutoLabelsForPD(name string, zone string) (map[string]st // Returns a GCEDisk for the disk, if it is found in the specified zone. // If not found, returns (nil, nil) func (gce *GCECloud) findDiskByName(diskName string, zone string) (*GCEDisk, error) { - mc := newDiskMetricContext("get", zone) - disk, err := gce.manager.GetDisk(gce.projectID, zone, diskName) + mc := newDiskMetricContextZonal("get", gce.region, zone) + disk, err := gce.manager.GetDisk(zone, diskName) if err == nil { return disk, mc.Observe(nil) } @@ -339,10 +466,40 @@ func (gce *GCECloud) getDiskByName(diskName string, zone string) (*GCEDisk, erro return disk, err } +// Returns a GCEDisk for the regional disk, if it is found. +// If not found, returns (nil, nil) +func (gce *GCECloud) findRegionalDiskByName(diskName string) (*GCEDisk, error) { + mc := newDiskMetricContextRegional("get", gce.region) + disk, err := gce.manager.GetRegionalDisk(diskName) + if err == nil { + return disk, mc.Observe(nil) + } + if !isHTTPErrorCode(err, http.StatusNotFound) { + return nil, mc.Observe(err) + } + return nil, mc.Observe(nil) +} + +// Like findRegionalDiskByName, but returns an error if the disk is not found +func (gce *GCECloud) getRegionalDiskByName(diskName string) (*GCEDisk, error) { + disk, err := gce.findRegionalDiskByName(diskName) + if disk == nil && err == nil { + return nil, fmt.Errorf("GCE regional persistent disk not found: diskName=%q", diskName) + } + return disk, err +} + // Scans all managed zones to return the GCE PD // Prefer getDiskByName, if the zone can be established // Return cloudprovider.DiskNotFound if the given disk cannot be found in any zone func (gce *GCECloud) GetDiskByNameUnknownZone(diskName string) (*GCEDisk, error) { + if gce.AlphaFeatureGate.Enabled(GCEDiskAlphaFeatureGate) { + regionalDisk, err := gce.getRegionalDiskByName(diskName) + if err == nil { + return regionalDisk, err + } + } + // Note: this is the gotcha right now with GCE PD support: // disk names are not unique per-region. // (I can create two volumes with name "myvol" in e.g. us-central1-b & us-central1-f) @@ -365,7 +522,17 @@ func (gce *GCECloud) GetDiskByNameUnknownZone(diskName string) (*GCEDisk, error) continue } if found != nil { - return nil, fmt.Errorf("GCE persistent disk name was found in multiple zones: %q", diskName) + switch zoneInfo := disk.ZoneInfo.(type) { + case multiZone: + if zoneInfo.replicaZones.Has(zone) { + glog.Warningf("GCE PD name (%q) was found in multiple zones (%q), but ok because it is a RegionalDisk.", + diskName, zoneInfo.replicaZones) + continue + } + return nil, fmt.Errorf("GCE PD name was found in multiple zones: %q", diskName) + default: + return nil, fmt.Errorf("GCE PD name was found in multiple zones: %q", diskName) + } } found = disk } @@ -399,14 +566,28 @@ func (gce *GCECloud) doDeleteDisk(diskToDelete string) error { return err } - mc := newDiskMetricContext("delete", disk.Zone) + var mc *metricContext - deleteOp, err := gce.manager.DeleteDisk(gce.projectID, disk.Zone, disk.Name) - if err != nil { - return mc.Observe(err) + switch zoneInfo := disk.ZoneInfo.(type) { + case singleZone: + mc = newDiskMetricContextZonal("delete", disk.Region, zoneInfo.zone) + deleteOp, err := gce.manager.DeleteDisk(zoneInfo.zone, disk.Name) + if err != nil { + return mc.Observe(err) + } + return gce.manager.WaitForZoneOp(deleteOp, zoneInfo.zone, mc) + case multiZone: + mc = newDiskMetricContextRegional("delete", disk.Region) + deleteOp, err := gce.manager.DeleteRegionalDisk(disk.Name) + if err != nil { + return mc.Observe(err) + } + return gce.manager.WaitForRegionalOp(deleteOp, mc) + case nil: + return fmt.Errorf("PD has nil ZoneInfo: %v", disk) + default: + return fmt.Errorf("disk.ZoneInfo has unexpected type %T", zoneInfo) } - - return gce.manager.WaitForZoneOp(deleteOp, disk.Zone, mc) } // isGCEError returns true if given error is a googleapi.Error with given diff --git a/pkg/cloudprovider/providers/gce/gce_disks_test.go b/pkg/cloudprovider/providers/gce/gce_disks_test.go index 8eb960a1f72..358df9f3898 100644 --- a/pkg/cloudprovider/providers/gce/gce_disks_test.go +++ b/pkg/cloudprovider/providers/gce/gce_disks_test.go @@ -25,14 +25,19 @@ import ( computebeta "google.golang.org/api/compute/v0.beta" compute "google.golang.org/api/compute/v1" "google.golang.org/api/googleapi" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/kubernetes/pkg/cloudprovider" kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" ) +// TODO TODO write a test for GetDiskByNameUnknownZone and make sure casting logic works +// TODO TODO verify that RegionDisks.Get does not return non-replica disks + func TestCreateDisk_Basic(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() - projectId := "test-project" + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) if featureGateErr != nil { t.Error(featureGateErr) @@ -40,7 +45,7 @@ func TestCreateDisk_Basic(t *testing.T) { gce := GCECloud{ manager: fakeManager, managedZones: []string{"zone1"}, - projectID: projectId, + projectID: gceProjectId, AlphaFeatureGate: alphaFeatureGate, } @@ -51,7 +56,8 @@ func TestCreateDisk_Basic(t *testing.T) { tags := make(map[string]string) tags["test-tag"] = "test-value" - diskTypeUri := gceComputeAPIEndpoint + "projects/" + fmt.Sprintf(diskTypeUriTemplate, projectId, zone, diskType) + expectedDiskTypeURI := gceComputeAPIEndpoint + "projects/" + fmt.Sprintf( + diskTypeURITemplateSingleZone, gceProjectId, zone, diskType) expectedDescription := "{\"test-tag\":\"test-value\"}" /* Act */ @@ -74,8 +80,66 @@ func TestCreateDisk_Basic(t *testing.T) { t.Errorf("Expected disk name: %s; Actual: %s", diskName, diskToCreate.Name) } - if diskToCreate.Type != diskTypeUri { - t.Errorf("Expected disk type: %s; Actual: %s", diskTypeUri, diskToCreate.Type) + if diskToCreate.Type != expectedDiskTypeURI { + t.Errorf("Expected disk type: %s; Actual: %s", expectedDiskTypeURI, diskToCreate.Type) + } + if diskToCreate.SizeGb != sizeGb { + t.Errorf("Expected disk size: %d; Actual: %d", sizeGb, diskToCreate.SizeGb) + } + if diskToCreate.Description != expectedDescription { + t.Errorf("Expected tag string: %s; Actual: %s", expectedDescription, diskToCreate.Description) + } +} + +func TestCreateRegionalDisk_Basic(t *testing.T) { + /* Arrange */ + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) + alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{GCEDiskAlphaFeatureGate}) + if featureGateErr != nil { + t.Error(featureGateErr) + } + gce := GCECloud{ + manager: fakeManager, + managedZones: []string{"zone1", "zone3", "zone2"}, + projectID: gceProjectId, + AlphaFeatureGate: alphaFeatureGate, + } + + diskName := "disk" + diskType := DiskTypeSSD + replicaZones := sets.NewString("zone1", "zone2") + const sizeGb int64 = 128 + tags := make(map[string]string) + tags["test-tag"] = "test-value" + + expectedDiskTypeURI := gceComputeAPIEndpointAlpha + "projects/" + fmt.Sprintf( + diskTypeURITemplateRegional, gceProjectId, gceRegion, diskType) + expectedDescription := "{\"test-tag\":\"test-value\"}" + + /* Act */ + err := gce.CreateRegionalDisk(diskName, diskType, replicaZones, sizeGb, tags) + + /* Assert */ + if err != nil { + t.Error(err) + } + if !fakeManager.createDiskCalled { + t.Error("Never called GCE disk create.") + } + if !fakeManager.doesOpMatch { + t.Error("Ops used in WaitForZoneOp does not match what's returned by CreateDisk.") + } + + // Partial check of equality between disk description sent to GCE and parameters of method. + diskToCreate := fakeManager.diskToCreateStable + if diskToCreate.Name != diskName { + t.Errorf("Expected disk name: %s; Actual: %s", diskName, diskToCreate.Name) + } + + if diskToCreate.Type != expectedDiskTypeURI { + t.Errorf("Expected disk type: %s; Actual: %s", expectedDiskTypeURI, diskToCreate.Type) } if diskToCreate.SizeGb != sizeGb { t.Errorf("Expected disk size: %d; Actual: %d", sizeGb, diskToCreate.SizeGb) @@ -87,7 +151,9 @@ func TestCreateDisk_Basic(t *testing.T) { func TestCreateDisk_DiskAlreadyExists(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) if featureGateErr != nil { t.Error(featureGateErr) @@ -100,7 +166,7 @@ func TestCreateDisk_DiskAlreadyExists(t *testing.T) { // Inject disk AlreadyExists error. alreadyExistsError := googleapi.ErrorItem{Reason: "alreadyExists"} - fakeManager.waitForZoneOpError = &googleapi.Error{ + fakeManager.waitForOpError = &googleapi.Error{ Errors: []googleapi.ErrorItem{alreadyExistsError}, } @@ -116,7 +182,9 @@ func TestCreateDisk_DiskAlreadyExists(t *testing.T) { func TestCreateDisk_WrongZone(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) gce := GCECloud{manager: fakeManager, managedZones: []string{"zone1"}} diskName := "disk" @@ -134,7 +202,9 @@ func TestCreateDisk_WrongZone(t *testing.T) { func TestCreateDisk_NoManagedZone(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) gce := GCECloud{manager: fakeManager, managedZones: []string{}} diskName := "disk" @@ -152,7 +222,9 @@ func TestCreateDisk_NoManagedZone(t *testing.T) { func TestCreateDisk_BadDiskType(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) gce := GCECloud{manager: fakeManager, managedZones: []string{"zone1"}} diskName := "disk" @@ -171,7 +243,9 @@ func TestCreateDisk_BadDiskType(t *testing.T) { func TestCreateDisk_MultiZone(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) if featureGateErr != nil { t.Error(featureGateErr) @@ -198,7 +272,9 @@ func TestCreateDisk_MultiZone(t *testing.T) { func TestDeleteDisk_Basic(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) if featureGateErr != nil { t.Error(featureGateErr) @@ -233,8 +309,18 @@ func TestDeleteDisk_Basic(t *testing.T) { func TestDeleteDisk_NotFound(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() - gce := GCECloud{manager: fakeManager, managedZones: []string{"zone1"}} + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) + alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) + if featureGateErr != nil { + t.Error(featureGateErr) + } + gce := GCECloud{ + manager: fakeManager, + managedZones: []string{"zone1"}, + AlphaFeatureGate: alphaFeatureGate, + } diskName := "disk" /* Act */ @@ -248,7 +334,9 @@ func TestDeleteDisk_NotFound(t *testing.T) { func TestDeleteDisk_ResourceBeingUsed(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) if featureGateErr != nil { t.Error(featureGateErr) @@ -277,7 +365,9 @@ func TestDeleteDisk_ResourceBeingUsed(t *testing.T) { func TestDeleteDisk_SameDiskMultiZone(t *testing.T) { /* Assert */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) if featureGateErr != nil { t.Error(featureGateErr) @@ -309,7 +399,9 @@ func TestDeleteDisk_SameDiskMultiZone(t *testing.T) { func TestDeleteDisk_DiffDiskMultiZone(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) if featureGateErr != nil { t.Error(featureGateErr) @@ -341,7 +433,9 @@ func TestDeleteDisk_DiffDiskMultiZone(t *testing.T) { func TestGetAutoLabelsForPD_Basic(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "us-central1" + fakeManager := newFakeManager(gceProjectId, gceRegion) diskName := "disk" diskType := DiskTypeSSD zone := "us-central1-c" @@ -369,14 +463,16 @@ func TestGetAutoLabelsForPD_Basic(t *testing.T) { t.Errorf("Failure domain is '%v', but zone is '%v'", labels[kubeletapis.LabelZoneFailureDomain], zone) } - if labels[kubeletapis.LabelZoneRegion] != "us-central1" { - t.Errorf("Region is '%v', but zone is 'us-central1'", labels[kubeletapis.LabelZoneRegion]) + if labels[kubeletapis.LabelZoneRegion] != gceRegion { + t.Errorf("Region is '%v', but region is 'us-central1'", labels[kubeletapis.LabelZoneRegion]) } } func TestGetAutoLabelsForPD_NoZone(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "europe-west1" + fakeManager := newFakeManager(gceProjectId, gceRegion) diskName := "disk" diskType := DiskTypeStandard zone := "europe-west1-d" @@ -403,14 +499,16 @@ func TestGetAutoLabelsForPD_NoZone(t *testing.T) { t.Errorf("Failure domain is '%v', but zone is '%v'", labels[kubeletapis.LabelZoneFailureDomain], zone) } - if labels[kubeletapis.LabelZoneRegion] != "europe-west1" { - t.Errorf("Region is '%v', but zone is 'europe-west1'", labels[kubeletapis.LabelZoneRegion]) + if labels[kubeletapis.LabelZoneRegion] != gceRegion { + t.Errorf("Region is '%v', but region is 'europe-west1'", labels[kubeletapis.LabelZoneRegion]) } } func TestGetAutoLabelsForPD_DiskNotFound(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) diskName := "disk" zone := "asia-northeast1-a" gce := GCECloud{manager: fakeManager, managedZones: []string{zone}} @@ -426,9 +524,19 @@ func TestGetAutoLabelsForPD_DiskNotFound(t *testing.T) { func TestGetAutoLabelsForPD_DiskNotFoundAndNoZone(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) diskName := "disk" - gce := GCECloud{manager: fakeManager, managedZones: []string{}} + alphaFeatureGate, featureGateErr := NewAlphaFeatureGate([]string{}) + if featureGateErr != nil { + t.Error(featureGateErr) + } + gce := GCECloud{ + manager: fakeManager, + managedZones: []string{}, + AlphaFeatureGate: alphaFeatureGate, + } /* Act */ _, err := gce.GetAutoLabelsForPD(diskName, "") @@ -441,7 +549,9 @@ func TestGetAutoLabelsForPD_DiskNotFoundAndNoZone(t *testing.T) { func TestGetAutoLabelsForPD_DupDisk(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "us-west1" + fakeManager := newFakeManager(gceProjectId, gceRegion) diskName := "disk" diskType := DiskTypeStandard zone := "us-west1-b" @@ -471,14 +581,16 @@ func TestGetAutoLabelsForPD_DupDisk(t *testing.T) { t.Errorf("Failure domain is '%v', but zone is '%v'", labels[kubeletapis.LabelZoneFailureDomain], zone) } - if labels[kubeletapis.LabelZoneRegion] != "us-west1" { - t.Errorf("Region is '%v', but zone is 'us-west1'", labels[kubeletapis.LabelZoneRegion]) + if labels[kubeletapis.LabelZoneRegion] != gceRegion { + t.Errorf("Region is '%v', but region is 'us-west1'", labels[kubeletapis.LabelZoneRegion]) } } func TestGetAutoLabelsForPD_DupDiskNoZone(t *testing.T) { /* Arrange */ - fakeManager := newFakeManager() + gceProjectId := "test-project" + gceRegion := "fake-region" + fakeManager := newFakeManager(gceProjectId, gceRegion) diskName := "disk" diskType := DiskTypeStandard const sizeGb int64 = 128 @@ -515,13 +627,16 @@ const ( type FakeServiceManager struct { // Common fields shared among tests - targetAPI targetClientAPI - opAlpha *computealpha.Operation // Mocks an operation returned by GCE API calls - opBeta *computebeta.Operation // Mocks an operation returned by GCE API calls - opStable *compute.Operation // Mocks an operation returned by GCE API calls - doesOpMatch bool - disks map[string]string // zone: diskName - waitForZoneOpError error // Error to be returned by WaitForZoneOp + targetAPI targetClientAPI + gceProjectID string + gceRegion string + opAlpha *computealpha.Operation // Mocks an operation returned by GCE API calls + opBeta *computebeta.Operation // Mocks an operation returned by GCE API calls + opStable *compute.Operation // Mocks an operation returned by GCE API calls + doesOpMatch bool + zonalDisks map[string]string // zone: diskName + regionalDisks map[string]sets.String // diskName: zones + waitForOpError error // Error to be returned by WaitForZoneOp or WaitForRegionalOp // Fields for TestCreateDisk createDiskCalled bool @@ -534,8 +649,13 @@ type FakeServiceManager struct { resourceInUse bool // Marks the disk as in-use } -func newFakeManager() *FakeServiceManager { - return &FakeServiceManager{disks: make(map[string]string)} +func newFakeManager(gceProjectID string, gceRegion string) *FakeServiceManager { + return &FakeServiceManager{ + zonalDisks: make(map[string]string), + regionalDisks: make(map[string]sets.String), + gceProjectID: gceProjectID, + gceRegion: gceRegion, + } } /** @@ -546,10 +666,65 @@ func (manager *FakeServiceManager) CreateDisk( name string, sizeGb int64, tagsStr string, - diskTypeURI string, + diskType string, zone string) (gceObject, error) { manager.createDiskCalled = true + switch t := manager.targetAPI; t { + case targetStable: + manager.opStable = &compute.Operation{} + diskTypeURI := gceComputeAPIEndpoint + "projects/" + fmt.Sprintf(diskTypeURITemplateSingleZone, manager.gceProjectID, zone, diskType) + diskToCreateV1 := &compute.Disk{ + Name: name, + SizeGb: sizeGb, + Description: tagsStr, + Type: diskTypeURI, + } + manager.diskToCreateStable = diskToCreateV1 + manager.zonalDisks[zone] = diskToCreateV1.Name + return manager.opStable, nil + case targetBeta: + manager.opBeta = &computebeta.Operation{} + diskTypeURI := gceComputeAPIEndpoint + "projects/" + fmt.Sprintf(diskTypeURITemplateSingleZone, manager.gceProjectID, zone, diskType) + diskToCreateBeta := &computebeta.Disk{ + Name: name, + SizeGb: sizeGb, + Description: tagsStr, + Type: diskTypeURI, + } + manager.diskToCreateBeta = diskToCreateBeta + manager.zonalDisks[zone] = diskToCreateBeta.Name + return manager.opBeta, nil + case targetAlpha: + manager.opAlpha = &computealpha.Operation{} + diskTypeURI := gceComputeAPIEndpointAlpha + "projects/" + fmt.Sprintf(diskTypeURITemplateSingleZone, manager.gceProjectID, zone, diskType) + diskToCreateAlpha := &computealpha.Disk{ + Name: name, + SizeGb: sizeGb, + Description: tagsStr, + Type: diskTypeURI, + } + manager.diskToCreateAlpha = diskToCreateAlpha + manager.zonalDisks[zone] = diskToCreateAlpha.Name + return manager.opAlpha, nil + default: + return nil, fmt.Errorf("unexpected type: %T", t) + } +} + +/** + * Upon disk creation, disk info is stored in FakeServiceManager + * to be used by other tested methods. + */ +func (manager *FakeServiceManager) CreateRegionalDisk( + name string, + sizeGb int64, + tagsStr string, + diskType string, + zones sets.String) (gceObject, error) { + manager.createDiskCalled = true + diskTypeURI := gceComputeAPIEndpointAlpha + "projects/" + fmt.Sprintf(diskTypeURITemplateRegional, manager.gceProjectID, manager.gceRegion, diskType) + switch t := manager.targetAPI; t { case targetStable: manager.opStable = &compute.Operation{} @@ -560,42 +735,21 @@ func (manager *FakeServiceManager) CreateDisk( Type: diskTypeURI, } manager.diskToCreateStable = diskToCreateV1 - manager.disks[zone] = diskToCreateV1.Name + manager.regionalDisks[diskToCreateV1.Name] = zones return manager.opStable, nil case targetBeta: - manager.opBeta = &computebeta.Operation{} - diskToCreateBeta := &computebeta.Disk{ - Name: name, - SizeGb: sizeGb, - Description: tagsStr, - Type: diskTypeURI, - } - manager.diskToCreateBeta = diskToCreateBeta - manager.disks[zone] = diskToCreateBeta.Name - return manager.opBeta, nil + return nil, fmt.Errorf("RegionalDisk CreateDisk op not supported in beta.") case targetAlpha: - manager.opAlpha = &computealpha.Operation{} - diskToCreateAlpha := &computealpha.Disk{ - Name: name, - SizeGb: sizeGb, - Description: tagsStr, - Type: diskTypeURI, - } - manager.diskToCreateAlpha = diskToCreateAlpha - manager.disks[zone] = diskToCreateAlpha.Name - return manager.opAlpha, nil + return nil, fmt.Errorf("RegionalDisk CreateDisk op not supported in alpha.") default: return nil, fmt.Errorf("unexpected type: %T", t) } } func (manager *FakeServiceManager) AttachDisk( - diskName string, - diskKind string, - diskZone string, + disk *GCEDisk, readWrite string, - source string, - diskType string, + instanceZone string, instanceName string) (gceObject, error) { switch t := manager.targetAPI; t { @@ -636,11 +790,9 @@ func (manager *FakeServiceManager) DetachDisk( * Gets disk info stored in the FakeServiceManager. */ func (manager *FakeServiceManager) GetDisk( - project string, - zone string, - diskName string) (*GCEDisk, error) { + zone string, diskName string) (*GCEDisk, error) { - if manager.disks[zone] == "" { + if manager.zonalDisks[zone] == "" { return nil, cloudprovider.DiskNotFound } @@ -651,10 +803,36 @@ func (manager *FakeServiceManager) GetDisk( } return &GCEDisk{ - Zone: lastComponent(zone), - Name: diskName, - Kind: "compute#disk", - Type: "type", + Region: manager.gceRegion, + ZoneInfo: singleZone{lastComponent(zone)}, + Name: diskName, + Kind: "compute#disk", + Type: "type", + }, nil +} + +/** + * Gets disk info stored in the FakeServiceManager. + */ +func (manager *FakeServiceManager) GetRegionalDisk( + diskName string) (*GCEDisk, error) { + + if _, ok := manager.regionalDisks[diskName]; !ok { + return nil, cloudprovider.DiskNotFound + } + + if manager.resourceInUse { + errorItem := googleapi.ErrorItem{Reason: "resourceInUseByAnotherResource"} + err := &googleapi.Error{Errors: []googleapi.ErrorItem{errorItem}} + return nil, err + } + + return &GCEDisk{ + Region: manager.gceRegion, + ZoneInfo: multiZone{manager.regionalDisks[diskName]}, + Name: diskName, + Kind: "compute#disk", + Type: "type", }, nil } @@ -662,12 +840,32 @@ func (manager *FakeServiceManager) GetDisk( * Disk info is removed from the FakeServiceManager. */ func (manager *FakeServiceManager) DeleteDisk( - project string, zone string, disk string) (gceObject, error) { manager.deleteDiskCalled = true - manager.disks[zone] = "" + delete(manager.zonalDisks, zone) + + switch t := manager.targetAPI; t { + case targetStable: + manager.opStable = &compute.Operation{} + return manager.opStable, nil + case targetBeta: + manager.opBeta = &computebeta.Operation{} + return manager.opBeta, nil + case targetAlpha: + manager.opAlpha = &computealpha.Operation{} + return manager.opAlpha, nil + default: + return nil, fmt.Errorf("unexpected type: %T", t) + } +} + +func (manager *FakeServiceManager) DeleteRegionalDisk( + disk string) (gceObject, error) { + + manager.deleteDiskCalled = true + delete(manager.regionalDisks, disk) switch t := manager.targetAPI; t { case targetStable: @@ -704,5 +902,26 @@ func (manager *FakeServiceManager) WaitForZoneOp( default: return fmt.Errorf("unexpected type: %T", v) } - return manager.waitForZoneOpError + return manager.waitForOpError +} + +func (manager *FakeServiceManager) WaitForRegionalOp( + op gceObject, mc *metricContext) error { + switch v := op.(type) { + case *computealpha.Operation: + if op.(*computealpha.Operation) == manager.opAlpha { + manager.doesOpMatch = true + } + case *computebeta.Operation: + if op.(*computebeta.Operation) == manager.opBeta { + manager.doesOpMatch = true + } + case *compute.Operation: + if op.(*compute.Operation) == manager.opStable { + manager.doesOpMatch = true + } + default: + return fmt.Errorf("unexpected type: %T", v) + } + return manager.waitForOpError } diff --git a/pkg/kubelet/apis/well_known_labels.go b/pkg/kubelet/apis/well_known_labels.go index b3d0be7275c..5a0db552c82 100644 --- a/pkg/kubelet/apis/well_known_labels.go +++ b/pkg/kubelet/apis/well_known_labels.go @@ -17,9 +17,10 @@ limitations under the License. package apis const ( - LabelHostname = "kubernetes.io/hostname" - LabelZoneFailureDomain = "failure-domain.beta.kubernetes.io/zone" - LabelZoneRegion = "failure-domain.beta.kubernetes.io/region" + LabelHostname = "kubernetes.io/hostname" + LabelZoneFailureDomain = "failure-domain.beta.kubernetes.io/zone" + LabelMultiZoneDelimiter = "__" + LabelZoneRegion = "failure-domain.beta.kubernetes.io/region" LabelInstanceType = "beta.kubernetes.io/instance-type" diff --git a/pkg/volume/gce_pd/BUILD b/pkg/volume/gce_pd/BUILD index a3d28e7a1f4..eb65ce35401 100644 --- a/pkg/volume/gce_pd/BUILD +++ b/pkg/volume/gce_pd/BUILD @@ -47,6 +47,7 @@ go_test( "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/types:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/client-go/kubernetes/fake:go_default_library", "//vendor/k8s.io/client-go/util/testing:go_default_library", ], diff --git a/pkg/volume/gce_pd/attacher_test.go b/pkg/volume/gce_pd/attacher_test.go index cccb876376b..3d8fe615a21 100644 --- a/pkg/volume/gce_pd/attacher_test.go +++ b/pkg/volume/gce_pd/attacher_test.go @@ -22,6 +22,7 @@ import ( "testing" "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/kubernetes/pkg/volume" volumetest "k8s.io/kubernetes/pkg/volume/testing" @@ -361,6 +362,10 @@ func (testcase *testcase) CreateDisk(name string, diskType string, zone string, return errors.New("Not implemented") } +func (testcase *testcase) CreateRegionalDisk(name string, diskType string, replicaZones sets.String, sizeGb int64, tags map[string]string) error { + return errors.New("Not implemented") +} + func (testcase *testcase) DeleteDisk(diskToDelete string) error { return errors.New("Not implemented") } diff --git a/pkg/volume/gce_pd/gce_util.go b/pkg/volume/gce_pd/gce_util.go index d78126c66cc..026324e6305 100644 --- a/pkg/volume/gce_pd/gce_util.go +++ b/pkg/volume/gce_pd/gce_util.go @@ -40,9 +40,11 @@ const ( diskPartitionSuffix = "-part" diskSDPath = "/dev/sd" diskSDPattern = "/dev/sd*" + regionalPDZonesAuto = "auto" // "replica-zones: auto" means Kubernetes will select zones for RePD maxChecks = 60 maxRetries = 10 checkSleepDuration = time.Second + maxRegionalPDZones = 2 ) // These variables are modified only in unit tests and should be constant @@ -70,7 +72,7 @@ func (util *GCEDiskUtil) DeleteVolume(d *gcePersistentDiskDeleter) error { } // CreateVolume creates a GCE PD. -// Returns: volumeID, volumeSizeGB, labels, error +// Returns: gcePDName, volumeSizeGB, labels, fsType, error func (gceutil *GCEDiskUtil) CreateVolume(c *gcePersistentDiskProvisioner) (string, int, map[string]string, string, error) { cloud, err := getCloudProvider(c.gcePersistentDisk.plugin.host.GetCloudProvider()) if err != nil { @@ -88,8 +90,10 @@ func (gceutil *GCEDiskUtil) CreateVolume(c *gcePersistentDiskProvisioner) (strin diskType := "" configuredZone := "" configuredZones := "" + configuredReplicaZones := "" zonePresent := false zonesPresent := false + replicaZonesPresent := false fstype := "" for k, v := range c.options.Parameters { switch strings.ToLower(k) { @@ -101,6 +105,9 @@ func (gceutil *GCEDiskUtil) CreateVolume(c *gcePersistentDiskProvisioner) (strin case "zones": zonesPresent = true configuredZones = v + case "replica-zones": + replicaZonesPresent = true + configuredReplicaZones = v case volume.VolumeParameterFSType: fstype = v default: @@ -108,8 +115,10 @@ func (gceutil *GCEDiskUtil) CreateVolume(c *gcePersistentDiskProvisioner) (strin } } - if zonePresent && zonesPresent { - return "", 0, nil, "", fmt.Errorf("both zone and zones StorageClass parameters must not be used at the same time") + if ((zonePresent || zonesPresent) && replicaZonesPresent) || + (zonePresent && zonesPresent) { + // 011, 101, 111, 110 + return "", 0, nil, "", fmt.Errorf("a combination of zone, zones, and replica-zones StorageClass parameters must not be used at the same time") } // TODO: implement PVC.Selector parsing @@ -117,36 +126,69 @@ func (gceutil *GCEDiskUtil) CreateVolume(c *gcePersistentDiskProvisioner) (strin return "", 0, nil, "", fmt.Errorf("claim.Spec.Selector is not supported for dynamic provisioning on GCE") } - var zones sets.String - if !zonePresent && !zonesPresent { - zones, err = cloud.GetAllZones() + if !zonePresent && !zonesPresent && replicaZonesPresent { + // 001 - "replica-zones" specified + replicaZones, err := volumeutil.ZonesToSet(configuredReplicaZones) if err != nil { - glog.V(2).Infof("error getting zone information from GCE: %v", err) return "", 0, nil, "", err } - } - if !zonePresent && zonesPresent { - if zones, err = volume.ZonesToSet(configuredZones); err != nil { - return "", 0, nil, "", err - } - } - if zonePresent && !zonesPresent { - if err := volume.ValidateZone(configuredZone); err != nil { - return "", 0, nil, "", err - } - zones = make(sets.String) - zones.Insert(configuredZone) - } - zone := volume.ChooseZoneForVolume(zones, c.options.PVC.Name) - err = cloud.CreateDisk(name, diskType, zone, int64(requestGB), *c.options.CloudTags) - if err != nil { - glog.V(2).Infof("Error creating GCE PD volume: %v", err) - return "", 0, nil, "", err - } - glog.V(2).Infof("Successfully created GCE PD volume %s", name) + err = createRegionalPD( + name, + c.options.PVC.Name, + diskType, + replicaZones, + requestGB, + c.options.CloudTags, + cloud) + if err != nil { + glog.V(2).Infof("Error creating regional GCE PD volume: %v", err) + return "", 0, nil, "", err + } - labels, err := cloud.GetAutoLabelsForPD(name, zone) + glog.V(2).Infof("Successfully created Regional GCE PD volume %s", name) + } else { + var zones sets.String + if !zonePresent && !zonesPresent { + // 000 - neither "zone", "zones", or "replica-zones" specified + // Pick a zone randomly selected from all active zones where + // Kubernetes cluster has a node. + zones, err = cloud.GetAllZones() + if err != nil { + glog.V(2).Infof("error getting zone information from GCE: %v", err) + return "", 0, nil, "", err + } + } else if !zonePresent && zonesPresent { + // 010 - "zones" specified + // Pick a zone randomly selected from specified set. + if zones, err = volumeutil.ZonesToSet(configuredZones); err != nil { + return "", 0, nil, "", err + } + } else if zonePresent && !zonesPresent { + // 100 - "zone" specified + // Use specified zone + if err := volume.ValidateZone(configuredZone); err != nil { + return "", 0, nil, "", err + } + zones = make(sets.String) + zones.Insert(configuredZone) + } + zone := volume.ChooseZoneForVolume(zones, c.options.PVC.Name) + + if err := cloud.CreateDisk( + name, + diskType, + zone, + int64(requestGB), + *c.options.CloudTags); err != nil { + glog.V(2).Infof("Error creating single-zone GCE PD volume: %v", err) + return "", 0, nil, "", err + } + + glog.V(2).Infof("Successfully created single-zone GCE PD volume %s", name) + } + + labels, err := cloud.GetAutoLabelsForPD(name, "" /* zone */) if err != nil { // We don't really want to leak the volume here... glog.Errorf("error getting labels for volume %q: %v", name, err) @@ -155,6 +197,48 @@ func (gceutil *GCEDiskUtil) CreateVolume(c *gcePersistentDiskProvisioner) (strin return name, int(requestGB), labels, fstype, nil } +// Creates a Regional PD +func createRegionalPD( + diskName string, + pvcName string, + diskType string, + replicaZones sets.String, + requestGB int64, + cloudTags *map[string]string, + cloud *gcecloud.GCECloud) error { + + autoZoneSelection := false + if replicaZones.Len() != maxRegionalPDZones { + replicaZonesList := replicaZones.UnsortedList() + if replicaZones.Len() == 1 && replicaZonesList[0] == regionalPDZonesAuto { + // User requested automatic zone selection. + autoZoneSelection = true + } else { + return fmt.Errorf( + "replica-zones specifies %d zones. It must specify %d zones or the keyword \"auto\" to let Kubernetes select zones.", + replicaZones.Len(), + maxRegionalPDZones) + } + } + + selectedReplicaZones := replicaZones + if autoZoneSelection { + selectedReplicaZones = volume.ChooseZonesForVolume( + replicaZones, pvcName, maxRegionalPDZones) + } + + if err := cloud.CreateRegionalDisk( + diskName, + diskType, + selectedReplicaZones, + int64(requestGB), + *cloudTags); err != nil { + return err + } + + return nil +} + // Returns the first path that exists, or empty string if none exist. func verifyDevicePath(devicePaths []string, sdBeforeSet sets.String) (string, error) { if err := udevadmChangeToNewDrives(sdBeforeSet); err != nil { diff --git a/pkg/volume/util.go b/pkg/volume/util.go index 6999b91beca..b976ce947f5 100644 --- a/pkg/volume/util.go +++ b/pkg/volume/util.go @@ -298,9 +298,52 @@ func GetPath(mounter Mounter) (string, error) { func ChooseZoneForVolume(zones sets.String, pvcName string) string { // We create the volume in a zone determined by the name // Eventually the scheduler will coordinate placement into an available zone - var hash uint32 - var index uint32 + hash, index := getPVCNameHashAndIndexOffset(pvcName) + // Zones.List returns zones in a consistent order (sorted) + // We do have a potential failure case where volumes will not be properly spread, + // if the set of zones changes during StatefulSet volume creation. However, this is + // probably relatively unlikely because we expect the set of zones to be essentially + // static for clusters. + // Hopefully we can address this problem if/when we do full scheduler integration of + // PVC placement (which could also e.g. avoid putting volumes in overloaded or + // unhealthy zones) + zoneSlice := zones.List() + zone := zoneSlice[(hash+index)%uint32(len(zoneSlice))] + + glog.V(2).Infof("Creating volume for PVC %q; chose zone=%q from zones=%q", pvcName, zone, zoneSlice) + return zone +} + +// ChooseZonesForVolume is identical to ChooseZoneForVolume, but selects a multiple zones, for multi-zone disks. +func ChooseZonesForVolume(zones sets.String, pvcName string, numZones uint32) sets.String { + // We create the volume in a zone determined by the name + // Eventually the scheduler will coordinate placement into an available zone + hash, index := getPVCNameHashAndIndexOffset(pvcName) + + // Zones.List returns zones in a consistent order (sorted) + // We do have a potential failure case where volumes will not be properly spread, + // if the set of zones changes during StatefulSet volume creation. However, this is + // probably relatively unlikely because we expect the set of zones to be essentially + // static for clusters. + // Hopefully we can address this problem if/when we do full scheduler integration of + // PVC placement (which could also e.g. avoid putting volumes in overloaded or + // unhealthy zones) + zoneSlice := zones.List() + replicaZones := sets.NewString() + + startingIndex := index * numZones + for index = startingIndex; index < startingIndex+numZones; index++ { + zone := zoneSlice[(hash+index)%uint32(len(zoneSlice))] + replicaZones.Insert(zone) + } + + glog.V(2).Infof("Creating volume for replicated PVC %q; chosen zones=%q from zones=%q", + pvcName, replicaZones.UnsortedList(), zoneSlice) + return replicaZones +} + +func getPVCNameHashAndIndexOffset(pvcName string) (hash uint32, index uint32) { if pvcName == "" { // We should always be called with a name; this shouldn't happen glog.Warningf("No name defined during volume create; choosing random zone") @@ -349,19 +392,7 @@ func ChooseZoneForVolume(zones sets.String, pvcName string) string { hash = h.Sum32() } - // Zones.List returns zones in a consistent order (sorted) - // We do have a potential failure case where volumes will not be properly spread, - // if the set of zones changes during StatefulSet volume creation. However, this is - // probably relatively unlikely because we expect the set of zones to be essentially - // static for clusters. - // Hopefully we can address this problem if/when we do full scheduler integration of - // PVC placement (which could also e.g. avoid putting volumes in overloaded or - // unhealthy zones) - zoneSlice := zones.List() - zone := zoneSlice[(hash+index)%uint32(len(zoneSlice))] - - glog.V(2).Infof("Creating volume for PVC %q; chose zone=%q from zones=%q", pvcName, zone, zoneSlice) - return zone + return hash, index } // UnmountViaEmptyDir delegates the tear down operation for secret, configmap, git_repo and downwardapi @@ -419,20 +450,6 @@ func JoinMountOptions(userOptions []string, systemOptions []string) []string { return allMountOptions.UnsortedList() } -// ZonesToSet converts a string containing a comma separated list of zones to set -func ZonesToSet(zonesString string) (sets.String, error) { - zonesSlice := strings.Split(zonesString, ",") - zonesSet := make(sets.String) - for _, zone := range zonesSlice { - trimmedZone := strings.TrimSpace(zone) - if trimmedZone == "" { - return make(sets.String), fmt.Errorf("comma separated list of zones (%q) must not contain an empty zone", zonesString) - } - zonesSet.Insert(trimmedZone) - } - return zonesSet, nil -} - // ValidateZone returns: // - an error in case zone is an empty string or contains only any combination of spaces and tab characters // - nil otherwise diff --git a/pkg/volume/util/BUILD b/pkg/volume/util/BUILD index 6c2c7ac2f17..445cb425fb2 100644 --- a/pkg/volume/util/BUILD +++ b/pkg/volume/util/BUILD @@ -30,6 +30,7 @@ go_library( deps = [ "//pkg/api:go_default_library", "//pkg/api/v1/helper:go_default_library", + "//pkg/kubelet/apis:go_default_library", "//pkg/util/mount:go_default_library", "//vendor/github.com/golang/glog:go_default_library", "//vendor/github.com/prometheus/client_golang/prometheus:go_default_library", @@ -69,9 +70,9 @@ go_test( "//pkg/api/v1/helper:go_default_library", "//vendor/k8s.io/api/core/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", ] + select({ "@io_bazel_rules_go//go/platform:linux_amd64": [ - "//vendor/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//vendor/k8s.io/client-go/util/testing:go_default_library", ], "//conditions:default": [], diff --git a/pkg/volume/util/util.go b/pkg/volume/util/util.go index 0d402bf194e..976ad96890a 100644 --- a/pkg/volume/util/util.go +++ b/pkg/volume/util/util.go @@ -22,15 +22,19 @@ import ( "os" "path" + "strings" + "github.com/golang/glog" "k8s.io/api/core/v1" storage "k8s.io/api/storage/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/sets" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/pkg/api" v1helper "k8s.io/kubernetes/pkg/api/v1/helper" + kubeletapis "k8s.io/kubernetes/pkg/kubelet/apis" "k8s.io/kubernetes/pkg/util/mount" ) @@ -235,3 +239,34 @@ func LoadPodFromFile(filePath string) (*v1.Pod, error) { } return pod, nil } + +func ZonesSetToLabelValue(strSet sets.String) string { + return strings.Join(strSet.UnsortedList(), kubeletapis.LabelMultiZoneDelimiter) +} + +// ZonesToSet converts a string containing a comma separated list of zones to set +func ZonesToSet(zonesString string) (sets.String, error) { + return stringToSet(zonesString, ",") +} + +// LabelZonesToSet converts a PV label value from string containing a delimited list of zones to set +func LabelZonesToSet(labelZonesValue string) (sets.String, error) { + return stringToSet(labelZonesValue, kubeletapis.LabelMultiZoneDelimiter) +} + +// StringToSet converts a string containing list separated by specified delimiter to to a set +func stringToSet(str, delimiter string) (sets.String, error) { + zonesSlice := strings.Split(str, delimiter) + zonesSet := make(sets.String) + for _, zone := range zonesSlice { + trimmedZone := strings.TrimSpace(zone) + if trimmedZone == "" { + return make(sets.String), fmt.Errorf( + "%q separated list (%q) must not contain an empty string", + delimiter, + str) + } + zonesSet.Insert(trimmedZone) + } + return zonesSet, nil +} diff --git a/pkg/volume/util/util_test.go b/pkg/volume/util/util_test.go index 280895cee79..480e5762739 100644 --- a/pkg/volume/util/util_test.go +++ b/pkg/volume/util/util_test.go @@ -23,6 +23,7 @@ import ( "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" // util.go uses api.Codecs.LegacyCodec so import this package to do some // resource initialization. _ "k8s.io/kubernetes/pkg/api/install" @@ -229,3 +230,36 @@ spec: } } } +func TestZonesToSet(t *testing.T) { + functionUnderTest := "ZonesToSet" + // First part: want an error + sliceOfZones := []string{"", ",", "us-east-1a, , us-east-1d", ", us-west-1b", "us-west-2b,"} + for _, zones := range sliceOfZones { + if got, err := ZonesToSet(zones); err == nil { + t.Errorf("%v(%v) returned (%v), want (%v)", functionUnderTest, zones, got, "an error") + } + } + + // Second part: want no error + tests := []struct { + zones string + want sets.String + }{ + { + zones: "us-east-1a", + want: sets.String{"us-east-1a": sets.Empty{}}, + }, + { + zones: "us-east-1a, us-west-2a", + want: sets.String{ + "us-east-1a": sets.Empty{}, + "us-west-2a": sets.Empty{}, + }, + }, + } + for _, tt := range tests { + if got, err := ZonesToSet(tt.zones); err != nil || !got.Equal(tt.want) { + t.Errorf("%v(%v) returned (%v), want (%v)", functionUnderTest, tt.zones, got, tt.want) + } + } +} diff --git a/pkg/volume/util_test.go b/pkg/volume/util_test.go index 826f9843cb8..6eeb00a7543 100644 --- a/pkg/volume/util_test.go +++ b/pkg/volume/util_test.go @@ -396,164 +396,473 @@ func TestChooseZoneForVolume(t *testing.T) { checkFnv32(t, "", 2166136261) tests := []struct { - Zones []string + Zones sets.String VolumeName string Expected string }{ // Test for PVC names that don't have a dash { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "henley", Expected: "a", // hash("henley") == 0 }, // Tests for PVC names that end in - number, but don't look like statefulset PVCs { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "henley-0", Expected: "a", // hash("henley") == 0 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "henley-1", Expected: "b", // hash("henley") + 1 == 1 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "henley-2", Expected: "c", // hash("henley") + 2 == 2 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "henley-3", Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "henley-4", Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 }, // Tests for PVC names that are edge cases { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "henley-", Expected: "c", // hash("henley-") = 2652299129 === 2 mod 3 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "henley-a", Expected: "c", // hash("henley-a") = 1459735322 === 2 mod 3 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium--1", Expected: "c", // hash("") + 1 == 2166136261 + 1 === 2 mod 3 }, // Tests for PVC names for simple StatefulSet cases { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-henley-1", Expected: "b", // hash("henley") + 1 == 1 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "loud-henley-1", Expected: "b", // hash("henley") + 1 == 1 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "quiet-henley-2", Expected: "c", // hash("henley") + 2 == 2 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-henley-2", Expected: "c", // hash("henley") + 2 == 2 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-henley-3", Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-henley-4", Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 }, // Tests for statefulsets (or claims) with dashes in the names { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-alpha-henley-2", Expected: "c", // hash("henley") + 2 == 2 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-beta-henley-3", Expected: "a", // hash("henley") + 3 == 3 === 0 mod 3 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-gamma-henley-4", Expected: "b", // hash("henley") + 4 == 4 === 1 mod 3 }, // Tests for statefulsets name ending in - { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-henley--2", Expected: "a", // hash("") + 2 == 0 mod 3 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-henley--3", Expected: "b", // hash("") + 3 == 1 mod 3 }, { - Zones: []string{"a", "b", "c"}, + Zones: sets.NewString("a", "b", "c"), VolumeName: "medium-henley--4", Expected: "c", // hash("") + 4 == 2 mod 3 }, } for _, test := range tests { - zonesSet := sets.NewString(test.Zones...) + actual := ChooseZoneForVolume(test.Zones, test.VolumeName) - actual := ChooseZoneForVolume(zonesSet, test.VolumeName) - - for actual != test.Expected { + if actual != test.Expected { t.Errorf("Test %v failed, expected zone %q, actual %q", test, test.Expected, actual) } } } -func TestZonesToSet(t *testing.T) { - functionUnderTest := "ZonesToSet" - // First part: want an error - sliceOfZones := []string{"", ",", "us-east-1a, , us-east-1d", ", us-west-1b", "us-west-2b,"} - for _, zones := range sliceOfZones { - if got, err := ZonesToSet(zones); err == nil { - t.Errorf("%v(%v) returned (%v), want (%v)", functionUnderTest, zones, got, "an error") - } +func TestChooseZonesForVolume(t *testing.T) { + checkFnv32(t, "henley", 1180403676) + // 1180403676 mod 3 == 0, so the offset from "henley" is 0, which makes it easier to verify this by inspection + + // A few others + checkFnv32(t, "henley-", 2652299129) + checkFnv32(t, "henley-a", 1459735322) + checkFnv32(t, "", 2166136261) + + tests := []struct { + Zones sets.String + VolumeName string + NumZones uint32 + Expected sets.String + }{ + // Test for PVC names that don't have a dash + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley", + NumZones: 1, + Expected: sets.NewString("a" /* hash("henley") == 0 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley", + NumZones: 2, + Expected: sets.NewString("a" /* hash("henley") == 0 */, "b"), + }, + // Tests for PVC names that end in - number, but don't look like statefulset PVCs + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-0", + NumZones: 1, + Expected: sets.NewString("a" /* hash("henley") == 0 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-0", + NumZones: 2, + Expected: sets.NewString("a" /* hash("henley") == 0 */, "b"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-1", + NumZones: 1, + Expected: sets.NewString("b" /* hash("henley") + 1 == 1 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-1", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley") + 1 + 1(startingIndex) == 2 */, "a"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-2", + NumZones: 1, + Expected: sets.NewString("c" /* hash("henley") + 2 == 2 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-2", + NumZones: 2, + Expected: sets.NewString("b" /* hash("henley") + 2 + 2(startingIndex) == 4 */, "c"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-3", + NumZones: 1, + Expected: sets.NewString("a" /* hash("henley") + 3 == 3 === 0 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-3", + NumZones: 2, + Expected: sets.NewString("a" /* hash("henley") + 3 + 3(startingIndex) == 6 */, "b"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-4", + NumZones: 1, + Expected: sets.NewString("b" /* hash("henley") + 4 == 4 === 1 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-4", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley") + 4 + 4(startingIndex) == 8 */, "a"), + }, + // Tests for PVC names that are edge cases + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-", + NumZones: 1, + Expected: sets.NewString("c" /* hash("henley-") = 2652299129 === 2 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley-") = 2652299129 === 2 mod 3 = 2 */, "a"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-a", + NumZones: 1, + Expected: sets.NewString("c" /* hash("henley-a") = 1459735322 === 2 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "henley-a", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley-a") = 1459735322 === 2 mod 3 = 2 */, "a"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium--1", + NumZones: 1, + Expected: sets.NewString("c" /* hash("") + 1 == 2166136261 + 1 === 2 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium--1", + NumZones: 2, + Expected: sets.NewString("a" /* hash("") + 1 + 1(startingIndex) == 2166136261 + 1 + 1 === 3 mod 3 = 0 */, "b"), + }, + // Tests for PVC names for simple StatefulSet cases + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley-1", + NumZones: 1, + Expected: sets.NewString("b" /* hash("henley") + 1 == 1 */), + }, + // Tests for PVC names for simple StatefulSet cases + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley-1", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley") + 1 + 1(startingIndex) == 2 */, "a"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "loud-henley-1", + NumZones: 1, + Expected: sets.NewString("b" /* hash("henley") + 1 == 1 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "loud-henley-1", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley") + 1 + 1(startingIndex) == 2 */, "a"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "quiet-henley-2", + NumZones: 1, + Expected: sets.NewString("c" /* hash("henley") + 2 == 2 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "quiet-henley-2", + NumZones: 2, + Expected: sets.NewString("b" /* hash("henley") + 2 + 2(startingIndex) == 4 */, "c"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley-2", + NumZones: 1, + Expected: sets.NewString("c" /* hash("henley") + 2 == 2 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley-2", + NumZones: 2, + Expected: sets.NewString("b" /* hash("henley") + 2 + 2(startingIndex) == 4 */, "c"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley-3", + NumZones: 1, + Expected: sets.NewString("a" /* hash("henley") + 3 == 3 === 0 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley-3", + NumZones: 2, + Expected: sets.NewString("a" /* hash("henley") + 3 + 3(startingIndex) == 6 === 6 mod 3 = 0 */, "b"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley-4", + NumZones: 1, + Expected: sets.NewString("b" /* hash("henley") + 4 == 4 === 1 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley-4", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley") + 4 + 4(startingIndex) == 8 === 2 mod 3 */, "a"), + }, + // Tests for statefulsets (or claims) with dashes in the names + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-alpha-henley-2", + NumZones: 1, + Expected: sets.NewString("c" /* hash("henley") + 2 == 2 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-alpha-henley-2", + NumZones: 2, + Expected: sets.NewString("b" /* hash("henley") + 2 + 2(startingIndex) == 4 */, "c"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-beta-henley-3", + NumZones: 1, + Expected: sets.NewString("a" /* hash("henley") + 3 == 3 === 0 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-beta-henley-3", + NumZones: 2, + Expected: sets.NewString("a" /* hash("henley") + 3 + 3(startingIndex) == 6 === 0 mod 3 */, "b"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-gamma-henley-4", + NumZones: 1, + Expected: sets.NewString("b" /* hash("henley") + 4 == 4 === 1 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-gamma-henley-4", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley") + 4 + 4(startingIndex) == 8 === 2 mod 3 */, "a"), + }, + // Tests for statefulsets name ending in - + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley--2", + NumZones: 1, + Expected: sets.NewString("a" /* hash("") + 2 == 0 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley--2", + NumZones: 2, + Expected: sets.NewString("c" /* hash("") + 2 + 2(startingIndex) == 2 mod 3 */, "a"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley--3", + NumZones: 1, + Expected: sets.NewString("b" /* hash("") + 3 == 1 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley--3", + NumZones: 2, + Expected: sets.NewString("b" /* hash("") + 3 + 3(startingIndex) == 1 mod 3 */, "c"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley--4", + NumZones: 1, + Expected: sets.NewString("c" /* hash("") + 4 == 2 mod 3 */), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley--4", + NumZones: 2, + Expected: sets.NewString("a" /* hash("") + 4 + 4(startingIndex) == 0 mod 3 */, "b"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley--4", + NumZones: 3, + Expected: sets.NewString("c" /* hash("") + 4 == 2 mod 3 */, "a", "b"), + }, + { + Zones: sets.NewString("a", "b", "c"), + VolumeName: "medium-henley--4", + NumZones: 4, + Expected: sets.NewString("c" /* hash("") + 4 + 9(startingIndex) == 2 mod 3 */, "a", "b", "c"), + }, + { + Zones: sets.NewString("a", "b", "c", "d", "e", "f", "g", "h", "i"), + VolumeName: "henley-0", + NumZones: 2, + Expected: sets.NewString("a" /* hash("henley") == 0 */, "b"), + }, + { + Zones: sets.NewString("a", "b", "c", "d", "e", "f", "g", "h", "i"), + VolumeName: "henley-1", + NumZones: 2, + Expected: sets.NewString("c" /* hash("henley") == 0 + 2 */, "d"), + }, + { + Zones: sets.NewString("a", "b", "c", "d", "e", "f", "g", "h", "i"), + VolumeName: "henley-2", + NumZones: 2, + Expected: sets.NewString("e" /* hash("henley") == 0 + 2 + 2(startingIndex) */, "f"), + }, + { + Zones: sets.NewString("a", "b", "c", "d", "e", "f", "g", "h", "i"), + VolumeName: "henley-3", + NumZones: 2, + Expected: sets.NewString("g" /* hash("henley") == 0 + 2 + 4(startingIndex) */, "h"), + }, + { + Zones: sets.NewString("a", "b", "c", "d", "e", "f", "g", "h", "i"), + VolumeName: "henley-0", + NumZones: 3, + Expected: sets.NewString("a" /* hash("henley") == 0 */, "b", "c"), + }, + { + Zones: sets.NewString("a", "b", "c", "d", "e", "f", "g", "h", "i"), + VolumeName: "henley-1", + NumZones: 3, + Expected: sets.NewString("d" /* hash("henley") == 0 + 1 + 2(startingIndex) */, "e", "f"), + }, + { + Zones: sets.NewString("a", "b", "c", "d", "e", "f", "g", "h", "i"), + VolumeName: "henley-2", + NumZones: 3, + Expected: sets.NewString("g" /* hash("henley") == 0 + 2 + 4(startingIndex) */, "h", "i"), + }, + { + Zones: sets.NewString("a", "b", "c", "d", "e", "f", "g", "h", "i"), + VolumeName: "henley-3", + NumZones: 3, + Expected: sets.NewString("a" /* hash("henley") == 0 + 3 + 6(startingIndex) */, "b", "c"), + }, } - // Second part: want no error - tests := []struct { - zones string - want sets.String - }{ - { - zones: "us-east-1a", - want: sets.String{"us-east-1a": sets.Empty{}}, - }, - { - zones: "us-east-1a, us-west-2a", - want: sets.String{ - "us-east-1a": sets.Empty{}, - "us-west-2a": sets.Empty{}, - }, - }, - } - for _, tt := range tests { - if got, err := ZonesToSet(tt.zones); err != nil || !got.Equal(tt.want) { - t.Errorf("%v(%v) returned (%v), want (%v)", functionUnderTest, tt.zones, got, tt.want) + for _, test := range tests { + actual := ChooseZonesForVolume(test.Zones, test.VolumeName, test.NumZones) + + if !actual.Equal(test.Expected) { + t.Errorf("Test %v failed, expected zone %#v, actual %#v", test, test.Expected, actual) } } } diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates.go b/plugin/pkg/scheduler/algorithm/predicates/predicates.go index dc811b8e80f..427700b5300 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates.go @@ -435,7 +435,13 @@ func (c *VolumeZoneChecker) predicate(pod *v1.Pod, meta interface{}, nodeInfo *s continue } nodeV, _ := nodeConstraints[k] - if v != nodeV { + volumeVSet, err := volumeutil.LabelZonesToSet(v) + if err != nil { + glog.Warningf("Failed to parse label for %q: %q. Ignoring the label. err=%v. ", k, v, err) + continue + } + + if !volumeVSet.Has(nodeV) { glog.V(10).Infof("Won't schedule pod %q onto node %q due to volume %q (mismatch on %q)", pod.Name, node.Name, pvName, k) return false, []algorithm.PredicateFailureReason{ErrVolumeZoneConflict}, nil } diff --git a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go index 383107861a8..4f5c877a866 100644 --- a/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go +++ b/plugin/pkg/scheduler/algorithm/predicates/predicates_test.go @@ -3495,13 +3495,13 @@ func createPodWithVolume(pod, pv, pvc string) *v1.Pod { func TestVolumeZonePredicate(t *testing.T) { pvInfo := FakePersistentVolumeInfo{ { - ObjectMeta: metav1.ObjectMeta{Name: "Vol_1", Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "zone_1"}}, + ObjectMeta: metav1.ObjectMeta{Name: "Vol_1", Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "us-west1-a"}}, }, { - ObjectMeta: metav1.ObjectMeta{Name: "Vol_2", Labels: map[string]string{kubeletapis.LabelZoneRegion: "zone_2", "uselessLabel": "none"}}, + ObjectMeta: metav1.ObjectMeta{Name: "Vol_2", Labels: map[string]string{kubeletapis.LabelZoneRegion: "us-west1-b", "uselessLabel": "none"}}, }, { - ObjectMeta: metav1.ObjectMeta{Name: "Vol_3", Labels: map[string]string{kubeletapis.LabelZoneRegion: "zone_3"}}, + ObjectMeta: metav1.ObjectMeta{Name: "Vol_3", Labels: map[string]string{kubeletapis.LabelZoneRegion: "us-west1-c"}}, }, } @@ -3538,7 +3538,7 @@ func TestVolumeZonePredicate(t *testing.T) { Node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "host1", - Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "zone_1"}, + Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "us-west1-a"}, }, }, Fits: true, @@ -3559,7 +3559,7 @@ func TestVolumeZonePredicate(t *testing.T) { Node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "host1", - Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "zone_1", "uselessLabel": "none"}, + Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "us-west1-a", "uselessLabel": "none"}, }, }, Fits: true, @@ -3570,7 +3570,7 @@ func TestVolumeZonePredicate(t *testing.T) { Node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "host1", - Labels: map[string]string{kubeletapis.LabelZoneRegion: "zone_2", "uselessLabel": "none"}, + Labels: map[string]string{kubeletapis.LabelZoneRegion: "us-west1-b", "uselessLabel": "none"}, }, }, Fits: true, @@ -3581,7 +3581,7 @@ func TestVolumeZonePredicate(t *testing.T) { Node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "host1", - Labels: map[string]string{kubeletapis.LabelZoneRegion: "no_zone_2", "uselessLabel": "none"}, + Labels: map[string]string{kubeletapis.LabelZoneRegion: "no_us-west1-b", "uselessLabel": "none"}, }, }, Fits: false, @@ -3592,7 +3592,100 @@ func TestVolumeZonePredicate(t *testing.T) { Node: &v1.Node{ ObjectMeta: metav1.ObjectMeta{ Name: "host1", - Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "no_zone_1", "uselessLabel": "none"}, + Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "no_us-west1-a", "uselessLabel": "none"}, + }, + }, + Fits: false, + }, + } + + expectedFailureReasons := []algorithm.PredicateFailureReason{ErrVolumeZoneConflict} + + for _, test := range tests { + fit := NewVolumeZonePredicate(pvInfo, pvcInfo) + node := &schedulercache.NodeInfo{} + node.SetNode(test.Node) + + fits, reasons, err := fit(test.Pod, nil, node) + if err != nil { + t.Errorf("%s: unexpected error: %v", test.Name, err) + } + if !fits && !reflect.DeepEqual(reasons, expectedFailureReasons) { + t.Errorf("%s: unexpected failure reasons: %v, want: %v", test.Name, reasons, expectedFailureReasons) + } + if fits != test.Fits { + t.Errorf("%s: expected %v got %v", test.Name, test.Fits, fits) + } + + } +} + +func TestVolumeZonePredicateMultiZone(t *testing.T) { + pvInfo := FakePersistentVolumeInfo{ + { + ObjectMeta: metav1.ObjectMeta{Name: "Vol_1", Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "us-west1-a"}}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "Vol_2", Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "us-west1-b", "uselessLabel": "none"}}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "Vol_3", Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "us-west1-c__us-west1-a"}}, + }, + } + + pvcInfo := FakePersistentVolumeClaimInfo{ + { + ObjectMeta: metav1.ObjectMeta{Name: "PVC_1", Namespace: "default"}, + Spec: v1.PersistentVolumeClaimSpec{VolumeName: "Vol_1"}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "PVC_2", Namespace: "default"}, + Spec: v1.PersistentVolumeClaimSpec{VolumeName: "Vol_2"}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "PVC_3", Namespace: "default"}, + Spec: v1.PersistentVolumeClaimSpec{VolumeName: "Vol_3"}, + }, + { + ObjectMeta: metav1.ObjectMeta{Name: "PVC_4", Namespace: "default"}, + Spec: v1.PersistentVolumeClaimSpec{VolumeName: "Vol_not_exist"}, + }, + } + + tests := []struct { + Name string + Pod *v1.Pod + Fits bool + Node *v1.Node + }{ + { + Name: "node without labels", + Pod: createPodWithVolume("pod_1", "Vol_3", "PVC_3"), + Node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "host1", + }, + }, + Fits: true, + }, + { + Name: "label zone failure domain matched", + Pod: createPodWithVolume("pod_1", "Vol_3", "PVC_3"), + Node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "host1", + Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "us-west1-a", "uselessLabel": "none"}, + }, + }, + Fits: true, + }, + { + Name: "label zone failure domain failed match", + Pod: createPodWithVolume("pod_1", "vol_1", "PVC_1"), + Node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Name: "host1", + Labels: map[string]string{kubeletapis.LabelZoneFailureDomain: "us-west1-b", "uselessLabel": "none"}, }, }, Fits: false,