Scheduler predicate for capping node volume count

For certain volume types (e.g. AWS EBS or GCE PD), a limitted
number of such volumes can be attached to a given node.  This commit
introduces a predicate with allows cluster admins to cap
the maximum number of volumes matching a particular type attached to a
given node.

The volume type is configurable by passing a pair of filter functions,
and the maximum number of such volumes is configurable to allow node
admins to reserve a certain number of volumes for system use.

By default, the predicate is exposed as MaxEBSVolumeCount and
MaxGCEPDVolumeCount (for AWS ElasticBlocKStore and GCE PersistentDisk
volumes, respectively), each of which can be configured using the
`KUBE_MAX_PD_VOLS` environment variable.

Fixes #7835
This commit is contained in:
Solly Ross
2016-01-14 15:45:08 -05:00
parent 7489cb6e79
commit 2d436ff080
4 changed files with 427 additions and 0 deletions

View File

@@ -145,6 +145,141 @@ func NoDiskConflict(pod *api.Pod, existingPods []*api.Pod, node string) (bool, e
return true, nil
}
type MaxPDVolumeCountChecker struct {
filter VolumeFilter
maxVolumes int
pvInfo PersistentVolumeInfo
pvcInfo PersistentVolumeClaimInfo
}
// VolumeFilter contains information on how to filter PD Volumes when checking PD Volume caps
type VolumeFilter struct {
// Filter normal volumes
FilterVolume func(vol *api.Volume) (id string, relevant bool)
FilterPersistentVolume func(pv *api.PersistentVolume) (id string, relevant bool)
}
// NewMaxPDVolumeCountPredicate creates a predicate which evaluates whether a pod can fit based on the
// number of volumes which match a filter that it requests, and those that are already present. The
// maximum number is configurable to accommodate different systems.
//
// The predicate looks for both volumes used directly, as well as PVC volumes that are backed by relevant volume
// types, counts the number of unique volumes, and rejects the new pod if it would place the total count over
// the maximum.
func NewMaxPDVolumeCountPredicate(filter VolumeFilter, maxVolumes int, pvInfo PersistentVolumeInfo, pvcInfo PersistentVolumeClaimInfo) algorithm.FitPredicate {
c := &MaxPDVolumeCountChecker{
filter: filter,
maxVolumes: maxVolumes,
pvInfo: pvInfo,
pvcInfo: pvcInfo,
}
return c.predicate
}
func (c *MaxPDVolumeCountChecker) filterVolumes(volumes []api.Volume, namespace string, filteredVolumes map[string]bool) error {
for _, vol := range volumes {
if id, ok := c.filter.FilterVolume(&vol); ok {
filteredVolumes[id] = true
} else if vol.PersistentVolumeClaim != nil {
pvcName := vol.PersistentVolumeClaim.ClaimName
if pvcName == "" {
return fmt.Errorf("PersistentVolumeClaim had no name: %q", pvcName)
}
pvc, err := c.pvcInfo.GetPersistentVolumeClaimInfo(namespace, pvcName)
if err != nil {
return err
}
pvName := pvc.Spec.VolumeName
if pvName == "" {
return fmt.Errorf("PersistentVolumeClaim is not bound: %q", pvcName)
}
pv, err := c.pvInfo.GetPersistentVolumeInfo(pvName)
if err != nil {
return err
}
if id, ok := c.filter.FilterPersistentVolume(pv); ok {
filteredVolumes[id] = true
}
}
}
return nil
}
func (c *MaxPDVolumeCountChecker) predicate(pod *api.Pod, existingPods []*api.Pod, node string) (bool, error) {
newVolumes := make(map[string]bool)
if err := c.filterVolumes(pod.Spec.Volumes, pod.Namespace, newVolumes); err != nil {
return false, err
}
// quick return
if len(newVolumes) == 0 {
return true, nil
}
// count unique volumes
existingVolumes := make(map[string]bool)
for _, existingPod := range existingPods {
if err := c.filterVolumes(existingPod.Spec.Volumes, existingPod.Namespace, existingVolumes); err != nil {
return false, err
}
}
numExistingVolumes := len(existingVolumes)
// filter out already-mounted volumes
for k := range existingVolumes {
if _, ok := newVolumes[k]; ok {
delete(newVolumes, k)
}
}
numNewVolumes := len(newVolumes)
if numExistingVolumes+numNewVolumes > c.maxVolumes {
return false, nil
}
return true, nil
}
// EBSVolumeFilter is a VolumeFilter for filtering AWS ElasticBlockStore Volumes
var EBSVolumeFilter VolumeFilter = VolumeFilter{
FilterVolume: func(vol *api.Volume) (string, bool) {
if vol.AWSElasticBlockStore != nil {
return vol.AWSElasticBlockStore.VolumeID, true
}
return "", false
},
FilterPersistentVolume: func(pv *api.PersistentVolume) (string, bool) {
if pv.Spec.AWSElasticBlockStore != nil {
return pv.Spec.AWSElasticBlockStore.VolumeID, true
}
return "", false
},
}
// GCEPDVolumeFilter is a VolumeFilter for filtering GCE PersistentDisk Volumes
var GCEPDVolumeFilter VolumeFilter = VolumeFilter{
FilterVolume: func(vol *api.Volume) (string, bool) {
if vol.GCEPersistentDisk != nil {
return vol.GCEPersistentDisk.PDName, true
}
return "", false
},
FilterPersistentVolume: func(pv *api.PersistentVolume) (string, bool) {
if pv.Spec.GCEPersistentDisk != nil {
return pv.Spec.GCEPersistentDisk.PDName, true
}
return "", false
},
}
type VolumeZoneChecker struct {
nodeInfo NodeInfo
pvInfo PersistentVolumeInfo

View File

@@ -44,6 +44,28 @@ func (nodes FakeNodeListInfo) GetNodeInfo(nodeName string) (*api.Node, error) {
return nil, fmt.Errorf("Unable to find node: %s", nodeName)
}
type FakePersistentVolumeClaimInfo []api.PersistentVolumeClaim
func (pvcs FakePersistentVolumeClaimInfo) GetPersistentVolumeClaimInfo(namespace string, pvcID string) (*api.PersistentVolumeClaim, error) {
for _, pvc := range pvcs {
if pvc.Name == pvcID && pvc.Namespace == namespace {
return &pvc, nil
}
}
return nil, fmt.Errorf("Unable to find persistent volume claim: %s/%s", namespace, pvcID)
}
type FakePersistentVolumeInfo []api.PersistentVolume
func (pvs FakePersistentVolumeInfo) GetPersistentVolumeInfo(pvID string) (*api.PersistentVolume, error) {
for _, pv := range pvs {
if pv.Name == pvID {
return &pv, nil
}
}
return nil, fmt.Errorf("Unable to find persistent volume: %s", pvID)
}
func makeResources(milliCPU int64, memory int64, pods int64) api.NodeResources {
return api.NodeResources{
Capacity: api.ResourceList{
@@ -771,3 +793,224 @@ func TestServiceAffinity(t *testing.T) {
}
}
}
func TestEBSVolumeCountConflicts(t *testing.T) {
oneVolPod := &api.Pod{
Spec: api.PodSpec{
Volumes: []api.Volume{
{
VolumeSource: api.VolumeSource{
AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{VolumeID: "ovp"},
},
},
},
},
}
ebsPVCPod := &api.Pod{
Spec: api.PodSpec{
Volumes: []api.Volume{
{
VolumeSource: api.VolumeSource{
PersistentVolumeClaim: &api.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol",
},
},
},
},
},
}
splitPVCPod := &api.Pod{
Spec: api.PodSpec{
Volumes: []api.Volume{
{
VolumeSource: api.VolumeSource{
PersistentVolumeClaim: &api.PersistentVolumeClaimVolumeSource{
ClaimName: "someNonEBSVol",
},
},
},
{
VolumeSource: api.VolumeSource{
PersistentVolumeClaim: &api.PersistentVolumeClaimVolumeSource{
ClaimName: "someEBSVol",
},
},
},
},
},
}
twoVolPod := &api.Pod{
Spec: api.PodSpec{
Volumes: []api.Volume{
{
VolumeSource: api.VolumeSource{
AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp1"},
},
},
{
VolumeSource: api.VolumeSource{
AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{VolumeID: "tvp2"},
},
},
},
},
}
splitVolsPod := &api.Pod{
Spec: api.PodSpec{
Volumes: []api.Volume{
{
VolumeSource: api.VolumeSource{
HostPath: &api.HostPathVolumeSource{},
},
},
{
VolumeSource: api.VolumeSource{
AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{VolumeID: "svp"},
},
},
},
},
}
nonApplicablePod := &api.Pod{
Spec: api.PodSpec{
Volumes: []api.Volume{
{
VolumeSource: api.VolumeSource{
HostPath: &api.HostPathVolumeSource{},
},
},
},
},
}
emptyPod := &api.Pod{
Spec: api.PodSpec{},
}
tests := []struct {
newPod *api.Pod
existingPods []*api.Pod
maxVols int
fits bool
test string
}{
{
newPod: oneVolPod,
existingPods: []*api.Pod{twoVolPod, oneVolPod},
maxVols: 4,
fits: true,
test: "fits when node capacity >= new pod's EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*api.Pod{oneVolPod},
maxVols: 2,
fits: false,
test: "doesn't fit when node capacity < new pod's EBS volumes",
},
{
newPod: splitVolsPod,
existingPods: []*api.Pod{twoVolPod},
maxVols: 3,
fits: true,
test: "new pod's count ignores non-EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*api.Pod{splitVolsPod, nonApplicablePod, emptyPod},
maxVols: 3,
fits: true,
test: "existing pods' counts ignore non-EBS volumes",
},
{
newPod: ebsPVCPod,
existingPods: []*api.Pod{splitVolsPod, nonApplicablePod, emptyPod},
maxVols: 3,
fits: true,
test: "new pod's count considers PVCs backed by EBS volumes",
},
{
newPod: splitPVCPod,
existingPods: []*api.Pod{splitVolsPod, oneVolPod},
maxVols: 3,
fits: true,
test: "new pod's count ignores PVCs not backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*api.Pod{oneVolPod, ebsPVCPod},
maxVols: 3,
fits: false,
test: "existing pods' counts considers PVCs backed by EBS volumes",
},
{
newPod: twoVolPod,
existingPods: []*api.Pod{oneVolPod, twoVolPod, ebsPVCPod},
maxVols: 4,
fits: true,
test: "already-mounted EBS volumes are always ok to allow",
},
{
newPod: splitVolsPod,
existingPods: []*api.Pod{oneVolPod, oneVolPod, ebsPVCPod},
maxVols: 3,
fits: true,
test: "the same EBS volumes are not counted multiple times",
},
}
pvInfo := FakePersistentVolumeInfo{
{
ObjectMeta: api.ObjectMeta{Name: "someEBSVol"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{
AWSElasticBlockStore: &api.AWSElasticBlockStoreVolumeSource{},
},
},
},
{
ObjectMeta: api.ObjectMeta{Name: "someNonEBSVol"},
Spec: api.PersistentVolumeSpec{
PersistentVolumeSource: api.PersistentVolumeSource{},
},
},
}
pvcInfo := FakePersistentVolumeClaimInfo{
{
ObjectMeta: api.ObjectMeta{Name: "someEBSVol"},
Spec: api.PersistentVolumeClaimSpec{VolumeName: "someEBSVol"},
},
{
ObjectMeta: api.ObjectMeta{Name: "someNonEBSVol"},
Spec: api.PersistentVolumeClaimSpec{VolumeName: "someNonEBSVol"},
},
}
filter := VolumeFilter{
FilterVolume: func(vol *api.Volume) (string, bool) {
if vol.AWSElasticBlockStore != nil {
return vol.AWSElasticBlockStore.VolumeID, true
}
return "", false
},
FilterPersistentVolume: func(pv *api.PersistentVolume) (string, bool) {
if pv.Spec.AWSElasticBlockStore != nil {
return pv.Spec.AWSElasticBlockStore.VolumeID, true
}
return "", false
},
}
for _, test := range tests {
pred := NewMaxPDVolumeCountPredicate(filter, test.maxVols, pvInfo, pvcInfo)
fits, err := pred(test.newPod, test.existingPods, "some-node")
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if fits != test.fits {
t.Errorf("%s: expected %v, got %v", test.test, test.fits, fits)
}
}
}