move NoDiskConflict predicate to its filter plugin
This commit is contained in:
@@ -22,7 +22,6 @@ import (
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
|
||||
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/migration"
|
||||
framework "k8s.io/kubernetes/pkg/scheduler/framework/v1alpha1"
|
||||
"k8s.io/kubernetes/pkg/scheduler/nodeinfo"
|
||||
)
|
||||
@@ -40,11 +39,90 @@ func (pl *VolumeRestrictions) Name() string {
|
||||
return Name
|
||||
}
|
||||
|
||||
func isVolumeConflict(volume v1.Volume, pod *v1.Pod) bool {
|
||||
// fast path if there is no conflict checking targets.
|
||||
if volume.GCEPersistentDisk == nil && volume.AWSElasticBlockStore == nil && volume.RBD == nil && volume.ISCSI == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, existingVolume := range pod.Spec.Volumes {
|
||||
// Same GCE disk mounted by multiple pods conflicts unless all pods mount it read-only.
|
||||
if volume.GCEPersistentDisk != nil && existingVolume.GCEPersistentDisk != nil {
|
||||
disk, existingDisk := volume.GCEPersistentDisk, existingVolume.GCEPersistentDisk
|
||||
if disk.PDName == existingDisk.PDName && !(disk.ReadOnly && existingDisk.ReadOnly) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if volume.AWSElasticBlockStore != nil && existingVolume.AWSElasticBlockStore != nil {
|
||||
if volume.AWSElasticBlockStore.VolumeID == existingVolume.AWSElasticBlockStore.VolumeID {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if volume.ISCSI != nil && existingVolume.ISCSI != nil {
|
||||
iqn := volume.ISCSI.IQN
|
||||
eiqn := existingVolume.ISCSI.IQN
|
||||
// two ISCSI volumes are same, if they share the same iqn. As iscsi volumes are of type
|
||||
// RWO or ROX, we could permit only one RW mount. Same iscsi volume mounted by multiple Pods
|
||||
// conflict unless all other pods mount as read only.
|
||||
if iqn == eiqn && !(volume.ISCSI.ReadOnly && existingVolume.ISCSI.ReadOnly) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if volume.RBD != nil && existingVolume.RBD != nil {
|
||||
mon, pool, image := volume.RBD.CephMonitors, volume.RBD.RBDPool, volume.RBD.RBDImage
|
||||
emon, epool, eimage := existingVolume.RBD.CephMonitors, existingVolume.RBD.RBDPool, existingVolume.RBD.RBDImage
|
||||
// two RBDs images are the same if they share the same Ceph monitor, are in the same RADOS Pool, and have the same image name
|
||||
// only one read-write mount is permitted for the same RBD image.
|
||||
// same RBD image mounted by multiple Pods conflicts unless all Pods mount the image read-only
|
||||
if haveOverlap(mon, emon) && pool == epool && image == eimage && !(volume.RBD.ReadOnly && existingVolume.RBD.ReadOnly) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// haveOverlap searches two arrays and returns true if they have at least one common element; returns false otherwise.
|
||||
func haveOverlap(a1, a2 []string) bool {
|
||||
if len(a1) > len(a2) {
|
||||
a1, a2 = a2, a1
|
||||
}
|
||||
m := map[string]bool{}
|
||||
|
||||
for _, val := range a1 {
|
||||
m[val] = true
|
||||
}
|
||||
for _, val := range a2 {
|
||||
if _, ok := m[val]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Filter invoked at the filter extension point.
|
||||
// It evaluates if a pod can fit due to the volumes it requests, and those that
|
||||
// are already mounted. If there is already a volume mounted on that node, another pod that uses the same volume
|
||||
// can't be scheduled there.
|
||||
// This is GCE, Amazon EBS, ISCSI and Ceph RBD specific for now:
|
||||
// - GCE PD allows multiple mounts as long as they're all read-only
|
||||
// - AWS EBS forbids any two pods mounting the same volume ID
|
||||
// - Ceph RBD forbids if any two pods share at least same monitor, and match pool and image, and the image is read-only
|
||||
// - ISCSI forbids if any two pods share at least same IQN and ISCSI volume is read-only
|
||||
func (pl *VolumeRestrictions) Filter(ctx context.Context, _ *framework.CycleState, pod *v1.Pod, nodeInfo *nodeinfo.NodeInfo) *framework.Status {
|
||||
// metadata is not needed for NoDiskConflict
|
||||
_, reasons, err := predicates.NoDiskConflict(pod, nil, nodeInfo)
|
||||
return migration.PredicateResultToFrameworkStatus(reasons, err)
|
||||
for _, v := range pod.Spec.Volumes {
|
||||
for _, ev := range nodeInfo.Pods() {
|
||||
if isVolumeConflict(v, ev) {
|
||||
return framework.NewStatus(framework.Unschedulable, predicates.ErrDiskConflict.GetReason())
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// New initializes a new plugin and returns it.
|
||||
|
Reference in New Issue
Block a user