Merge pull request #65223 from jsafrane/aws-inaccessible-key
Automatic merge from submit-queue (batch tested with PRs 65187, 65206, 65223, 64752, 65238). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Fixed detection of inaccessible AWS encryption key. AWS provisioner now checks if created encrypted volume gets "Available" or it gets silently deleted by AWS because StorageClass referenced invalid (e.g. non-existing) KMS key for encryption. This seems to be the only way how to detect such invalid key, because Kubernetes may not have enough permission to check if the key exists. **Which issue(s) this PR fixes** Fixes #62171 **Special notes for your reviewer**: **Release note**: ```release-note AWS now checks for validity of ecryption key when creating encrypted volumes. Dynamic provisioning of encrypted volume may get slower due to these checks. ``` /sig aws /sig storage @kubernetes/sig-aws-misc
This commit is contained in:
@@ -220,6 +220,13 @@ const (
|
||||
createTagFactor = 2.0
|
||||
createTagSteps = 9
|
||||
|
||||
// encryptedCheck* is configuration of poll for created volume to check
|
||||
// it has not been silently removed by AWS.
|
||||
// On a random AWS account (shared among several developers) it took 4s on
|
||||
// average.
|
||||
encryptedCheckInterval = 1 * time.Second
|
||||
encryptedCheckTimeout = 30 * time.Second
|
||||
|
||||
// Number of node names that can be added to a filter. The AWS limit is 200
|
||||
// but we are using a lower limit on purpose
|
||||
filterNodeLimit = 150
|
||||
@@ -2186,14 +2193,6 @@ func (c *Cloud) CreateDisk(volumeOptions *VolumeOptions) (KubernetesVolumeID, er
|
||||
request.VolumeType = aws.String(createType)
|
||||
request.Encrypted = aws.Bool(volumeOptions.Encrypted)
|
||||
if len(volumeOptions.KmsKeyId) > 0 {
|
||||
if missing, err := c.checkEncryptionKey(volumeOptions.KmsKeyId); err != nil {
|
||||
if missing {
|
||||
// KSM key is missing, provisioning would fail
|
||||
return "", err
|
||||
}
|
||||
// Log checkEncryptionKey error and try provisioning anyway.
|
||||
glog.Warningf("Cannot check KSM key %s: %v", volumeOptions.KmsKeyId, err)
|
||||
}
|
||||
request.KmsKeyId = aws.String(volumeOptions.KmsKeyId)
|
||||
request.Encrypted = aws.Bool(true)
|
||||
}
|
||||
@@ -2222,24 +2221,50 @@ func (c *Cloud) CreateDisk(volumeOptions *VolumeOptions) (KubernetesVolumeID, er
|
||||
return "", fmt.Errorf("error tagging volume %s: %q", volumeName, err)
|
||||
}
|
||||
|
||||
// AWS has a bad habbit of reporting success when creating a volume with
|
||||
// encryption keys that either don't exists or have wrong permissions.
|
||||
// Such volume lives for couple of seconds and then it's silently deleted
|
||||
// by AWS. There is no other check to ensure that given KMS key is correct,
|
||||
// because Kubernetes may have limited permissions to the key.
|
||||
if len(volumeOptions.KmsKeyId) > 0 {
|
||||
err := c.waitUntilVolumeAvailable(volumeName)
|
||||
if err != nil {
|
||||
if isAWSErrorVolumeNotFound(err) {
|
||||
err = fmt.Errorf("failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key")
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
return volumeName, nil
|
||||
}
|
||||
|
||||
// checkEncryptionKey tests that given encryption key exists.
|
||||
func (c *Cloud) checkEncryptionKey(keyId string) (missing bool, err error) {
|
||||
input := &kms.DescribeKeyInput{
|
||||
KeyId: aws.String(keyId),
|
||||
func (c *Cloud) waitUntilVolumeAvailable(volumeName KubernetesVolumeID) error {
|
||||
disk, err := newAWSDisk(c, volumeName)
|
||||
if err != nil {
|
||||
// Unreachable code
|
||||
return err
|
||||
}
|
||||
_, err = c.kms.DescribeKey(input)
|
||||
if err == nil {
|
||||
return false, nil
|
||||
}
|
||||
if awsError, ok := err.(awserr.Error); ok {
|
||||
if awsError.Code() == "NotFoundException" {
|
||||
return true, fmt.Errorf("KMS key %s not found: %q", keyId, err)
|
||||
|
||||
err = wait.Poll(encryptedCheckInterval, encryptedCheckTimeout, func() (done bool, err error) {
|
||||
vol, err := disk.describeVolume()
|
||||
if err != nil {
|
||||
return true, err
|
||||
}
|
||||
}
|
||||
return false, fmt.Errorf("Error checking KSM key %s: %q", keyId, err)
|
||||
if vol.State != nil {
|
||||
switch *vol.State {
|
||||
case "available":
|
||||
// The volume is Available, it won't be deleted now.
|
||||
return true, nil
|
||||
case "creating":
|
||||
return false, nil
|
||||
default:
|
||||
return true, fmt.Errorf("unexpected State of newly created AWS EBS volume %s: %q", volumeName, *vol.State)
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
// DeleteDisk implements Volumes.DeleteDisk
|
||||
|
@@ -851,6 +851,53 @@ var _ = utils.SIGDescribe("Dynamic Provisioning", func() {
|
||||
testDynamicProvisioning(test, c, claim, nil)
|
||||
})
|
||||
})
|
||||
Describe("Invalid AWS KMS key", func() {
|
||||
It("should report an error and create no PV", func() {
|
||||
framework.SkipUnlessProviderIs("aws")
|
||||
test := storageClassTest{
|
||||
name: "AWS EBS with invalid KMS key",
|
||||
provisioner: "kubernetes.io/aws-ebs",
|
||||
claimSize: "2Gi",
|
||||
parameters: map[string]string{"kmsKeyId": "arn:aws:kms:us-east-1:123456789012:key/55555555-5555-5555-5555-555555555555"},
|
||||
}
|
||||
|
||||
By("creating a StorageClass")
|
||||
suffix := fmt.Sprintf("invalid-aws")
|
||||
class := newStorageClass(test, ns, suffix)
|
||||
class, err := c.StorageV1().StorageClasses().Create(class)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer func() {
|
||||
framework.Logf("deleting storage class %s", class.Name)
|
||||
framework.ExpectNoError(c.StorageV1().StorageClasses().Delete(class.Name, nil))
|
||||
}()
|
||||
|
||||
By("creating a claim object with a suffix for gluster dynamic provisioner")
|
||||
claim := newClaim(test, ns, suffix)
|
||||
claim.Spec.StorageClassName = &class.Name
|
||||
claim, err = c.CoreV1().PersistentVolumeClaims(claim.Namespace).Create(claim)
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
defer func() {
|
||||
framework.Logf("deleting claim %q/%q", claim.Namespace, claim.Name)
|
||||
err = c.CoreV1().PersistentVolumeClaims(claim.Namespace).Delete(claim.Name, nil)
|
||||
if err != nil && !apierrs.IsNotFound(err) {
|
||||
framework.Failf("Error deleting claim %q. Error: %v", claim.Name, err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Watch events until the message about invalid key appears
|
||||
err = wait.Poll(time.Second, framework.ClaimProvisionTimeout, func() (bool, error) {
|
||||
events, err := c.CoreV1().Events(claim.Namespace).List(metav1.ListOptions{})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
for _, event := range events.Items {
|
||||
if strings.Contains(event.Message, "failed to create encrypted volume: the volume disappeared after creation, most likely due to inaccessible KMS encryption key") {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
})
|
||||
Expect(err).NotTo(HaveOccurred())
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
func getDefaultStorageClassName(c clientset.Interface) string {
|
||||
|
Reference in New Issue
Block a user