Fail fast in PreFilter phase and return UnschedulableAndUnresolvable if immediate PVCs are not bound

This commit is contained in:
Yecheng Fu
2020-06-04 21:55:25 +08:00
parent 930b3a4df0
commit c4138361e4
12 changed files with 123 additions and 41 deletions

View File

@@ -78,17 +78,19 @@ type InTreeToCSITranslator interface {
//
// This integrates into the existing scheduler workflow as follows:
// 1. The scheduler takes a Pod off the scheduler queue and processes it serially:
// a. Invokes all filter plugins, parallelized across nodes. FindPodVolumes() is invoked here.
// b. Invokes all score plugins. Future/TBD
// c. Selects the best node for the Pod.
// d. Invokes all reserve plugins. AssumePodVolumes() is invoked here.
// a. Invokes all pre-filter plugins for the pod. GetPodVolumes() is invoked
// here, pod volume information will be saved in current scheduling cycle state for later use.
// b. Invokes all filter plugins, parallelized across nodes. FindPodVolumes() is invoked here.
// c. Invokes all score plugins. Future/TBD
// d. Selects the best node for the Pod.
// e. Invokes all reserve plugins. AssumePodVolumes() is invoked here.
// i. If PVC binding is required, cache in-memory only:
// * For manual binding: update PV objects for prebinding to the corresponding PVCs.
// * For dynamic provisioning: update PVC object with a selected node from c)
// * For the pod, which PVCs and PVs need API updates.
// ii. Afterwards, the main scheduler caches the Pod->Node binding in the scheduler's pod cache,
// This is handled in the scheduler and not here.
// e. Asynchronously bind volumes and pod in a separate goroutine
// f. Asynchronously bind volumes and pod in a separate goroutine
// i. BindPodVolumes() is called first in PreBind phase. It makes all the necessary API updates and waits for
// PV controller to fully bind and provision the PVCs. If binding fails, the Pod is sent
// back through the scheduler.
@@ -96,6 +98,10 @@ type InTreeToCSITranslator interface {
// 2. Once all the assume operations are done in d), the scheduler processes the next Pod in the scheduler queue
// while the actual binding operation occurs in the background.
type SchedulerVolumeBinder interface {
// GetPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
// and unbound with immediate binding (including prebound)
GetPodVolumes(pod *v1.Pod) (boundClaims, unboundClaimsDelayBinding, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error)
// FindPodVolumes checks if all of a Pod's PVCs can be satisfied by the node.
//
// If a PVC is bound, it checks if the PV's NodeAffinity matches the Node.
@@ -105,7 +111,7 @@ type SchedulerVolumeBinder interface {
// (currently) not usable for the pod.
//
// This function is called by the volume binding scheduler predicate and can be called in parallel
FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error)
FindPodVolumes(pod *v1.Pod, boundClaims, claimsToBind []*v1.PersistentVolumeClaim, node *v1.Node) (reasons ConflictReasons, err error)
// AssumePodVolumes will:
// 1. Take the PV matches for unbound PVCs and update the PV cache assuming
@@ -194,7 +200,7 @@ func (b *volumeBinder) DeletePodBindings(pod *v1.Pod) {
// FindPodVolumes caches the matching PVs and PVCs to provision per node in podBindingCache.
// This method intentionally takes in a *v1.Node object instead of using volumebinder.nodeInformer.
// That's necessary because some operations will need to pass in to the predicate fake node objects.
func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error) {
func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, boundClaims, claimsToBind []*v1.PersistentVolumeClaim, node *v1.Node) (reasons ConflictReasons, err error) {
podName := getPodName(pod)
// Warning: Below log needs high verbosity as it can be printed several times (#60933).
@@ -248,20 +254,10 @@ func (b *volumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons Confl
b.podBindingCache.UpdateBindings(pod, node.Name, matchedBindings, provisionedClaims)
}()
// The pod's volumes need to be processed in one call to avoid the race condition where
// volumes can get bound/provisioned in between calls.
boundClaims, claimsToBind, unboundClaimsImmediate, err := b.getPodVolumes(pod)
if err != nil {
return nil, err
}
// Immediate claims should be bound
if len(unboundClaimsImmediate) > 0 {
return nil, fmt.Errorf("pod has unbound immediate PersistentVolumeClaims")
}
// Check PV node affinity on bound volumes
if len(boundClaims) > 0 {
// TODO if node affinity does not match, we should
// UnschedulableAndUnresolvable error back to scheduler framework
boundVolumesSatisfied, err = b.checkBoundClaims(boundClaims, node, podName)
if err != nil {
return nil, err
@@ -684,9 +680,9 @@ func (b *volumeBinder) arePodVolumesBound(pod *v1.Pod) bool {
return true
}
// getPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
// GetPodVolumes returns a pod's PVCs separated into bound, unbound with delayed binding (including provisioning)
// and unbound with immediate binding (including prebound)
func (b *volumeBinder) getPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentVolumeClaim, unboundClaimsDelayBinding []*v1.PersistentVolumeClaim, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
func (b *volumeBinder) GetPodVolumes(pod *v1.Pod) (boundClaims []*v1.PersistentVolumeClaim, unboundClaimsDelayBinding []*v1.PersistentVolumeClaim, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
boundClaims = []*v1.PersistentVolumeClaim{}
unboundClaimsImmediate = []*v1.PersistentVolumeClaim{}
unboundClaimsDelayBinding = []*v1.PersistentVolumeClaim{}

View File

@@ -42,8 +42,13 @@ type FakeVolumeBinder struct {
BindCalled bool
}
// GetPodVolumes implements SchedulerVolumeBinder.GetPodVolumes.
func (b *FakeVolumeBinder) GetPodVolumes(pod *v1.Pod) (boundClaims, unboundClaimsDelayBinding, unboundClaimsImmediate []*v1.PersistentVolumeClaim, err error) {
return nil, nil, nil, nil
}
// FindPodVolumes implements SchedulerVolumeBinder.FindPodVolumes.
func (b *FakeVolumeBinder) FindPodVolumes(pod *v1.Pod, node *v1.Node) (reasons ConflictReasons, err error) {
func (b *FakeVolumeBinder) FindPodVolumes(pod *v1.Pod, _, _ []*v1.PersistentVolumeClaim, node *v1.Node) (reasons ConflictReasons, err error) {
return b.config.FindReasons, b.config.FindErr
}

View File

@@ -771,6 +771,18 @@ func checkReasons(t *testing.T, actual, expected ConflictReasons) {
}
}
// findPodVolumes gets and finds volumes for given pod and node
func findPodVolumes(binder SchedulerVolumeBinder, pod *v1.Pod, node *v1.Node) (ConflictReasons, error) {
boundClaims, claimsToBind, unboundClaimsImmediate, err := binder.GetPodVolumes(pod)
if err != nil {
return nil, err
}
if len(unboundClaimsImmediate) > 0 {
return nil, fmt.Errorf("pod has unbound immediate PersistentVolumeClaims")
}
return binder.FindPodVolumes(pod, boundClaims, claimsToBind, node)
}
func TestFindPodVolumesWithoutProvisioning(t *testing.T) {
type scenarioType struct {
// Inputs
@@ -907,7 +919,7 @@ func TestFindPodVolumesWithoutProvisioning(t *testing.T) {
}
// Execute
reasons, err := testEnv.binder.FindPodVolumes(scenario.pod, testNode)
reasons, err := findPodVolumes(testEnv.binder, scenario.pod, testNode)
// Validate
if !scenario.shouldFail && err != nil {
@@ -1012,7 +1024,7 @@ func TestFindPodVolumesWithProvisioning(t *testing.T) {
}
// Execute
reasons, err := testEnv.binder.FindPodVolumes(scenario.pod, testNode)
reasons, err := findPodVolumes(testEnv.binder, scenario.pod, testNode)
// Validate
if !scenario.shouldFail && err != nil {
@@ -1112,7 +1124,7 @@ func TestFindPodVolumesWithCSIMigration(t *testing.T) {
}
// Execute
reasons, err := testEnv.binder.FindPodVolumes(scenario.pod, node)
reasons, err := findPodVolumes(testEnv.binder, scenario.pod, node)
// Validate
if !scenario.shouldFail && err != nil {
@@ -1933,7 +1945,7 @@ func TestFindAssumeVolumes(t *testing.T) {
// Execute
// 1. Find matching PVs
reasons, err := testEnv.binder.FindPodVolumes(pod, testNode)
reasons, err := findPodVolumes(testEnv.binder, pod, testNode)
if err != nil {
t.Errorf("Test failed: FindPodVolumes returned error: %v", err)
}
@@ -1959,7 +1971,7 @@ func TestFindAssumeVolumes(t *testing.T) {
// This should always return the original chosen pv
// Run this many times in case sorting returns different orders for the two PVs.
for i := 0; i < 50; i++ {
reasons, err := testEnv.binder.FindPodVolumes(pod, testNode)
reasons, err := findPodVolumes(testEnv.binder, pod, testNode)
if err != nil {
t.Errorf("Test failed: FindPodVolumes returned error: %v", err)
}