Merge pull request #4824 from dcantah/dcantah/reuse-scratch

Add scratch space re-use functionality to LCOW snapshotter
This commit is contained in:
Derek McGowan 2021-01-21 17:21:31 -08:00 committed by GitHub
commit 9b9de47eb9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -60,6 +60,9 @@ func init() {
const ( const (
rootfsSizeLabel = "containerd.io/snapshot/io.microsoft.container.storage.rootfs.size-gb" rootfsSizeLabel = "containerd.io/snapshot/io.microsoft.container.storage.rootfs.size-gb"
rootfsLocLabel = "containerd.io/snapshot/io.microsoft.container.storage.rootfs.location"
reuseScratchLabel = "containerd.io/snapshot/io.microsoft.container.storage.reuse-scratch"
reuseScratchOwnerKeyLabel = "containerd.io/snapshot/io.microsoft.owner.key"
) )
type snapshotter struct { type snapshotter struct {
@ -306,7 +309,7 @@ func (s *snapshotter) getSnapshotDir(id string) string {
return filepath.Join(s.root, "snapshots", id) return filepath.Join(s.root, "snapshots", id)
} }
func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) ([]mount.Mount, error) { func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
ctx, t, err := s.ms.TransactionContext(ctx, true) ctx, t, err := s.ms.TransactionContext(ctx, true)
if err != nil { if err != nil {
return nil, err return nil, err
@ -330,33 +333,54 @@ func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, k
for _, o := range opts { for _, o := range opts {
o(&snapshotInfo) o(&snapshotInfo)
} }
defer func() {
if err != nil {
os.RemoveAll(snDir)
}
}()
// IO/disk space optimization // IO/disk space optimization
// //
// We only need one sandbox.vhd for the container. Skip making one for this // We only need one sandbox.vhd for the container. Skip making one for this
// snapshot if this isn't the snapshot that just houses the final sandbox.vhd // snapshot if this isn't the snapshot that just houses the final sandbox.vhd
// that will be mounted as the containers scratch. Currently the key for a snapshot // that will be mounted as the containers scratch. The key for a snapshot
// where a layer.vhd will be extracted to it will have the string `extract-` in it. // where a layer.vhd will be extracted to it will have the substring `extract-` in it.
// If this is changed this will also need to be changed. // If this is changed this will also need to be changed.
// //
// We save about 17MB per layer (if the default scratch vhd size of 20GB is used) and of // We save about 17MB per layer (if the default scratch vhd size of 20GB is used) and of
// course the time to copy the vhd per snapshot. // course the time to copy the vhdx per snapshot.
if !strings.Contains(key, snapshots.UnpackKeyPrefix) { if !strings.Contains(key, snapshots.UnpackKeyPrefix) {
// This is the code path that handles re-using a scratch disk that has already been
// made/mounted for an LCOW UVM. In the non sharing case, we create a new disk and mount this
// into the LCOW UVM for every container but there are certain scenarios where we'd rather
// just mount a single disk and then have every container share this one storage space instead of
// every container having it's own xGB of space to play around with.
//
// This is accomplished by just making a symlink to the disk that we'd like to share and then
// using ref counting later on down the stack in hcsshim if we see that we've already mounted this
// disk.
shareScratch := snapshotInfo.Labels[reuseScratchLabel]
ownerKey := snapshotInfo.Labels[reuseScratchOwnerKeyLabel]
if shareScratch == "true" && ownerKey != "" {
if err = s.handleSharing(ctx, ownerKey, snDir); err != nil {
return nil, err
}
} else {
var sizeGB int var sizeGB int
if sizeGBstr, ok := snapshotInfo.Labels[rootfsSizeLabel]; ok { if sizeGBstr, ok := snapshotInfo.Labels[rootfsSizeLabel]; ok {
i32, err := strconv.ParseInt(sizeGBstr, 10, 32) i64, _ := strconv.ParseInt(sizeGBstr, 10, 32)
if err != nil { sizeGB = int(i64)
return nil, errors.Wrapf(err, "failed to parse label %q=%q", rootfsSizeLabel, sizeGBstr)
}
sizeGB = int(i32)
} }
scratchSource, err := s.openOrCreateScratch(ctx, sizeGB) scratchLocation := snapshotInfo.Labels[rootfsLocLabel]
scratchSource, err := s.openOrCreateScratch(ctx, sizeGB, scratchLocation)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer scratchSource.Close() defer scratchSource.Close()
// Create the sandbox.vhdx for this snapshot from the cache. // Create the sandbox.vhdx for this snapshot from the cache
destPath := filepath.Join(snDir, "sandbox.vhdx") destPath := filepath.Join(snDir, "sandbox.vhdx")
dest, err := os.OpenFile(destPath, os.O_RDWR|os.O_CREATE, 0700) dest, err := os.OpenFile(destPath, os.O_RDWR|os.O_CREATE, 0700)
if err != nil { if err != nil {
@ -370,6 +394,7 @@ func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, k
} }
} }
} }
}
if err := t.Commit(); err != nil { if err := t.Commit(); err != nil {
return nil, errors.Wrap(err, "commit failed") return nil, errors.Wrap(err, "commit failed")
@ -378,8 +403,38 @@ func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, k
return s.mounts(newSnapshot), nil return s.mounts(newSnapshot), nil
} }
func (s *snapshotter) openOrCreateScratch(ctx context.Context, sizeGB int) (_ *os.File, err error) { func (s *snapshotter) handleSharing(ctx context.Context, id, snDir string) error {
// Create the scratch.vhdx cache file if it doesn't already exist. var key string
if err := s.Walk(ctx, func(ctx context.Context, info snapshots.Info) error {
if strings.Contains(info.Name, id) {
key = info.Name
}
return nil
}); err != nil {
return err
}
mounts, err := s.Mounts(ctx, key)
if err != nil {
return errors.Wrap(err, "failed to get mounts for owner snapshot")
}
sandboxPath := filepath.Join(mounts[0].Source, "sandbox.vhdx")
linkPath := filepath.Join(snDir, "sandbox.vhdx")
if _, err := os.Stat(sandboxPath); err != nil {
return errors.Wrap(err, "failed to find sandbox.vhdx in snapshot directory")
}
// We've found everything we need, now just make a symlink in our new snapshot to the
// sandbox.vhdx in the scratch we're asking to share.
if err := os.Symlink(sandboxPath, linkPath); err != nil {
return errors.Wrap(err, "failed to create symlink for sandbox scratch space")
}
return nil
}
func (s *snapshotter) openOrCreateScratch(ctx context.Context, sizeGB int, scratchLoc string) (_ *os.File, err error) {
// Create the scratch.vhdx cache file if it doesn't already exit.
s.scratchLock.Lock() s.scratchLock.Lock()
defer s.scratchLock.Unlock() defer s.scratchLock.Unlock()
@ -389,13 +444,17 @@ func (s *snapshotter) openOrCreateScratch(ctx context.Context, sizeGB int) (_ *o
} }
scratchFinalPath := filepath.Join(s.root, vhdFileName) scratchFinalPath := filepath.Join(s.root, vhdFileName)
if scratchLoc != "" {
scratchFinalPath = filepath.Join(scratchLoc, vhdFileName)
}
scratchSource, err := os.OpenFile(scratchFinalPath, os.O_RDONLY, 0700) scratchSource, err := os.OpenFile(scratchFinalPath, os.O_RDONLY, 0700)
if err != nil { if err != nil {
if !os.IsNotExist(err) { if !os.IsNotExist(err) {
return nil, errors.Wrapf(err, "failed to open vhd %s for read", vhdFileName) return nil, errors.Wrapf(err, "failed to open vhd %s for read", vhdFileName)
} }
log.G(ctx).Debugf("vhd %s not found, creating a new one", vhdFileName) log.G(ctx).Debugf("vhdx %s not found, creating a new one", vhdFileName)
// Golang logic for ioutil.TempFile without the file creation // Golang logic for ioutil.TempFile without the file creation
r := uint32(time.Now().UnixNano() + int64(os.Getpid())) r := uint32(time.Now().UnixNano() + int64(os.Getpid()))
@ -417,18 +476,20 @@ func (s *snapshotter) openOrCreateScratch(ctx context.Context, sizeGB int) (_ *o
} }
if err := rhcs.CreateScratchWithOpts(ctx, scratchTempPath, &opt); err != nil { if err := rhcs.CreateScratchWithOpts(ctx, scratchTempPath, &opt); err != nil {
_ = os.Remove(scratchTempPath) os.Remove(scratchTempPath)
return nil, errors.Wrapf(err, "failed to create '%s' temp file", scratchTempName) return nil, errors.Wrapf(err, "failed to create '%s' temp file", scratchTempName)
} }
if err := os.Rename(scratchTempPath, scratchFinalPath); err != nil { if err := os.Rename(scratchTempPath, scratchFinalPath); err != nil {
_ = os.Remove(scratchTempPath) os.Remove(scratchTempPath)
return nil, errors.Wrapf(err, "failed to rename '%s' temp file to 'scratch.vhdx'", scratchTempName) return nil, errors.Wrapf(err, "failed to rename '%s' temp file to 'scratch.vhdx'", scratchTempName)
} }
scratchSource, err = os.OpenFile(scratchFinalPath, os.O_RDONLY, 0700) scratchSource, err = os.OpenFile(scratchFinalPath, os.O_RDONLY, 0700)
if err != nil { if err != nil {
_ = os.Remove(scratchFinalPath) os.Remove(scratchFinalPath)
return nil, errors.Wrap(err, "failed to open scratch.vhdx for read after creation") return nil, errors.Wrap(err, "failed to open scratch.vhdx for read after creation")
} }
} else {
log.G(ctx).Debugf("scratch vhd %s was already present. Retrieved from cache", vhdFileName)
} }
return scratchSource, nil return scratchSource, nil
} }