Files
kubernetes/pkg/volume/aws_ebs/attacher.go
Jing Xu abbde43374 Add sync state loop in master's volume reconciler
At master volume reconciler, the information about which volumes are
attached to nodes is cached in actual state of world. However, this
information might be out of date in case that node is terminated (volume
is detached automatically). In this situation, reconciler assume volume
is still attached and will not issue attach operation when node comes
back. Pods created on those nodes will fail to mount.

This PR adds the logic to periodically sync up the truth for attached volumes kept in the actual state cache. If the volume is no longer attached to the node, the actual state will be updated to reflect the truth. In turn, reconciler will take actions if needed.

To avoid issuing many concurrent operations on cloud provider, this PR
tries to add batch operation to check whether a list of volumes are
attached to the node instead of one request per volume.

More details are explained in PR #33760
2016-10-28 09:24:53 -07:00

271 lines
7.9 KiB
Go

/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package aws_ebs
import (
"fmt"
"os"
"path"
"strconv"
"time"
"github.com/golang/glog"
"k8s.io/kubernetes/pkg/cloudprovider/providers/aws"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/exec"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume"
volumeutil "k8s.io/kubernetes/pkg/volume/util"
)
type awsElasticBlockStoreAttacher struct {
host volume.VolumeHost
awsVolumes aws.Volumes
}
var _ volume.Attacher = &awsElasticBlockStoreAttacher{}
var _ volume.AttachableVolumePlugin = &awsElasticBlockStorePlugin{}
func (plugin *awsElasticBlockStorePlugin) NewAttacher() (volume.Attacher, error) {
awsCloud, err := getCloudProvider(plugin.host.GetCloudProvider())
if err != nil {
return nil, err
}
return &awsElasticBlockStoreAttacher{
host: plugin.host,
awsVolumes: awsCloud,
}, nil
}
func (plugin *awsElasticBlockStorePlugin) GetDeviceMountRefs(deviceMountPath string) ([]string, error) {
mounter := plugin.host.GetMounter()
return mount.GetMountRefs(mounter, deviceMountPath)
}
func (attacher *awsElasticBlockStoreAttacher) Attach(spec *volume.Spec, nodeName types.NodeName) (string, error) {
volumeSource, readOnly, err := getVolumeSource(spec)
if err != nil {
return "", err
}
volumeID := volumeSource.VolumeID
// awsCloud.AttachDisk checks if disk is already attached to node and
// succeeds in that case, so no need to do that separately.
devicePath, err := attacher.awsVolumes.AttachDisk(volumeID, nodeName, readOnly)
if err != nil {
glog.Errorf("Error attaching volume %q: %+v", volumeID, err)
return "", err
}
return devicePath, nil
}
func (attacher *awsElasticBlockStoreAttacher) VolumesAreAttached(specs []*volume.Spec, nodeName types.NodeName) (map[*volume.Spec]bool, error) {
volumesAttachedCheck := make(map[*volume.Spec]bool)
volumeSpecMap := make(map[string]*volume.Spec)
volumeIDList := []string{}
for _, spec := range specs {
volumeSource, _, err := getVolumeSource(spec)
if err != nil {
glog.Errorf("Error getting volume (%q) source : %v", spec.Name(), err)
continue
}
volumeIDList = append(volumeIDList, volumeSource.VolumeID)
volumesAttachedCheck[spec] = true
volumeSpecMap[volumeSource.VolumeID] = spec
}
attachedResult, err := attacher.awsVolumes.DisksAreAttached(volumeIDList, nodeName)
if err != nil {
// Log error and continue with attach
glog.Errorf(
"Error checking if volumes (%v) is already attached to current node (%q). err=%v",
volumeIDList, nodeName, err)
return volumesAttachedCheck, err
}
for volumeID, attached := range attachedResult {
if !attached {
spec := volumeSpecMap[volumeID]
volumesAttachedCheck[spec] = false
glog.V(2).Infof("VolumesAreAttached: check volume %q (specName: %q) is no longer attached", volumeID, spec.Name())
}
}
return volumesAttachedCheck, nil
}
func (attacher *awsElasticBlockStoreAttacher) WaitForAttach(spec *volume.Spec, devicePath string, timeout time.Duration) (string, error) {
volumeSource, _, err := getVolumeSource(spec)
if err != nil {
return "", err
}
volumeID := volumeSource.VolumeID
partition := ""
if volumeSource.Partition != 0 {
partition = strconv.Itoa(int(volumeSource.Partition))
}
if devicePath == "" {
return "", fmt.Errorf("WaitForAttach failed for AWS Volume %q: devicePath is empty.", volumeID)
}
ticker := time.NewTicker(checkSleepDuration)
defer ticker.Stop()
timer := time.NewTimer(timeout)
defer timer.Stop()
for {
select {
case <-ticker.C:
glog.V(5).Infof("Checking AWS Volume %q is attached.", volumeID)
if devicePath != "" {
devicePaths := getDiskByIdPaths(partition, devicePath)
path, err := verifyDevicePath(devicePaths)
if err != nil {
// Log error, if any, and continue checking periodically. See issue #11321
glog.Errorf("Error verifying AWS Volume (%q) is attached: %v", volumeID, err)
} else if path != "" {
// A device path has successfully been created for the PD
glog.Infof("Successfully found attached AWS Volume %q.", volumeID)
return path, nil
}
} else {
glog.V(5).Infof("AWS Volume (%q) is not attached yet", volumeID)
}
case <-timer.C:
return "", fmt.Errorf("Could not find attached AWS Volume %q. Timeout waiting for mount paths to be created.", volumeID)
}
}
}
func (attacher *awsElasticBlockStoreAttacher) GetDeviceMountPath(
spec *volume.Spec) (string, error) {
volumeSource, _, err := getVolumeSource(spec)
if err != nil {
return "", err
}
return makeGlobalPDPath(attacher.host, volumeSource.VolumeID), nil
}
// FIXME: this method can be further pruned.
func (attacher *awsElasticBlockStoreAttacher) MountDevice(spec *volume.Spec, devicePath string, deviceMountPath string) error {
mounter := attacher.host.GetMounter()
notMnt, err := mounter.IsLikelyNotMountPoint(deviceMountPath)
if err != nil {
if os.IsNotExist(err) {
if err := os.MkdirAll(deviceMountPath, 0750); err != nil {
return err
}
notMnt = true
} else {
return err
}
}
volumeSource, readOnly, err := getVolumeSource(spec)
if err != nil {
return err
}
options := []string{}
if readOnly {
options = append(options, "ro")
}
if notMnt {
diskMounter := &mount.SafeFormatAndMount{Interface: mounter, Runner: exec.New()}
err = diskMounter.FormatAndMount(devicePath, deviceMountPath, volumeSource.FSType, options)
if err != nil {
os.Remove(deviceMountPath)
return err
}
}
return nil
}
type awsElasticBlockStoreDetacher struct {
mounter mount.Interface
awsVolumes aws.Volumes
}
var _ volume.Detacher = &awsElasticBlockStoreDetacher{}
func (plugin *awsElasticBlockStorePlugin) NewDetacher() (volume.Detacher, error) {
awsCloud, err := getCloudProvider(plugin.host.GetCloudProvider())
if err != nil {
return nil, err
}
return &awsElasticBlockStoreDetacher{
mounter: plugin.host.GetMounter(),
awsVolumes: awsCloud,
}, nil
}
func (detacher *awsElasticBlockStoreDetacher) Detach(deviceMountPath string, nodeName types.NodeName) error {
volumeID := path.Base(deviceMountPath)
attached, err := detacher.awsVolumes.DiskIsAttached(volumeID, nodeName)
if err != nil {
// Log error and continue with detach
glog.Errorf(
"Error checking if volume (%q) is already attached to current node (%q). Will continue and try detach anyway. err=%v",
volumeID, nodeName, err)
}
if err == nil && !attached {
// Volume is already detached from node.
glog.Infof("detach operation was successful. volume %q is already detached from node %q.", volumeID, nodeName)
return nil
}
if _, err = detacher.awsVolumes.DetachDisk(volumeID, nodeName); err != nil {
glog.Errorf("Error detaching volumeID %q: %v", volumeID, err)
return err
}
return nil
}
func (detacher *awsElasticBlockStoreDetacher) WaitForDetach(devicePath string, timeout time.Duration) error {
ticker := time.NewTicker(checkSleepDuration)
defer ticker.Stop()
timer := time.NewTimer(timeout)
defer timer.Stop()
for {
select {
case <-ticker.C:
glog.V(5).Infof("Checking device %q is detached.", devicePath)
if pathExists, err := volumeutil.PathExists(devicePath); err != nil {
return fmt.Errorf("Error checking if device path exists: %v", err)
} else if !pathExists {
return nil
}
case <-timer.C:
return fmt.Errorf("Timeout reached; PD Device %v is still attached", devicePath)
}
}
}
func (detacher *awsElasticBlockStoreDetacher) UnmountDevice(deviceMountPath string) error {
return volumeutil.UnmountPath(deviceMountPath, detacher.mounter)
}