Add cimfs differ and snapshotter

Details about CimFs project are discussed in #8346

Signed-off-by: Amit Barve <ambarve@microsoft.com>
This commit is contained in:
Amit Barve
2023-09-14 16:18:13 -07:00
parent 643fa70a7d
commit daa1ea522b
104 changed files with 3848 additions and 2996 deletions

View File

@@ -1,3 +1,5 @@
//go:build windows
package wclayer
import (
@@ -64,7 +66,7 @@ func (r *baseLayerReader) walkUntilCancelled() error {
return nil
})
if err == errorIterationCanceled {
if err == errorIterationCanceled { //nolint:errorlint // explicitly returned
return nil
}
@@ -103,7 +105,7 @@ func (r *baseLayerReader) walkUntilCancelled() error {
return nil
})
if err == errorIterationCanceled {
if err == errorIterationCanceled { //nolint:errorlint // explicitly returned
return nil
}

View File

@@ -0,0 +1,289 @@
//go:build windows
package cim
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/oc"
"github.com/Microsoft/hcsshim/internal/wclayer"
"github.com/Microsoft/hcsshim/osversion"
"github.com/Microsoft/hcsshim/pkg/cimfs"
"go.opencensus.io/trace"
)
// A CimLayerWriter implements the wclayer.LayerWriter interface to allow writing container
// image layers in the cim format.
// A cim layer consist of cim files (which are usually stored in the `cim-layers` directory and
// some other files which are stored in the directory of that layer (i.e the `path` directory).
type CimLayerWriter struct {
ctx context.Context
s *trace.Span
// path to the layer (i.e layer's directory) as provided by the caller.
// Even if a layer is stored as a cim in the cim directory, some files associated
// with a layer are still stored in this path.
path string
// parent layer paths
parentLayerPaths []string
// Handle to the layer cim - writes to the cim file
cimWriter *cimfs.CimFsWriter
// Handle to the writer for writing files in the local filesystem
stdFileWriter *stdFileWriter
// reference to currently active writer either cimWriter or stdFileWriter
activeWriter io.Writer
// denotes if this layer has the UtilityVM directory
hasUtilityVM bool
// some files are written outside the cim during initial import (via stdFileWriter) because we need to
// make some modifications to these files before writing them to the cim. The pendingOps slice
// maintains a list of such delayed modifications to the layer cim. These modifications are applied at
// the very end of layer import process.
pendingOps []pendingCimOp
}
type hive struct {
name string
base string
delta string
}
var (
hives = []hive{
{"SYSTEM", "SYSTEM_BASE", "SYSTEM_DELTA"},
{"SOFTWARE", "SOFTWARE_BASE", "SOFTWARE_DELTA"},
{"SAM", "SAM_BASE", "SAM_DELTA"},
{"SECURITY", "SECURITY_BASE", "SECURITY_DELTA"},
{"DEFAULT", "DEFAULTUSER_BASE", "DEFAULTUSER_DELTA"},
}
)
func isDeltaOrBaseHive(path string) bool {
for _, hv := range hives {
if strings.EqualFold(path, filepath.Join(wclayer.HivesPath, hv.delta)) ||
strings.EqualFold(path, filepath.Join(wclayer.RegFilesPath, hv.name)) {
return true
}
}
return false
}
// checks if this particular file should be written with a stdFileWriter instead of
// using the cimWriter.
func isStdFile(path string) bool {
return (isDeltaOrBaseHive(path) ||
path == filepath.Join(wclayer.UtilityVMPath, wclayer.RegFilesPath, "SYSTEM") ||
path == filepath.Join(wclayer.UtilityVMPath, wclayer.RegFilesPath, "SOFTWARE") ||
path == wclayer.BcdFilePath || path == wclayer.BootMgrFilePath)
}
// Add adds a file to the layer with given metadata.
func (cw *CimLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo, fileSize int64, securityDescriptor []byte, extendedAttributes []byte, reparseData []byte) error {
if name == wclayer.UtilityVMPath {
cw.hasUtilityVM = true
}
if isStdFile(name) {
// create a pending op for this file
cw.pendingOps = append(cw.pendingOps, &addOp{
pathInCim: name,
hostPath: filepath.Join(cw.path, name),
fileInfo: fileInfo,
securityDescriptor: securityDescriptor,
extendedAttributes: extendedAttributes,
reparseData: reparseData,
})
if err := cw.stdFileWriter.Add(name); err != nil {
return err
}
cw.activeWriter = cw.stdFileWriter
} else {
if err := cw.cimWriter.AddFile(name, fileInfo, fileSize, securityDescriptor, extendedAttributes, reparseData); err != nil {
return err
}
cw.activeWriter = cw.cimWriter
}
return nil
}
// AddLink adds a hard link to the layer. The target must already have been added.
func (cw *CimLayerWriter) AddLink(name string, target string) error {
// set active write to nil so that we panic if layer tar is incorrectly formatted.
cw.activeWriter = nil
if isStdFile(target) {
// If this is a link to a std file it will have to be added later once the
// std file is written to the CIM. Create a pending op for this
cw.pendingOps = append(cw.pendingOps, &linkOp{
oldPath: target,
newPath: name,
})
return nil
} else if isStdFile(name) {
// None of the predefined std files are links. If they show up as links this is unexpected
// behavior. Error out.
return fmt.Errorf("unexpected link %s in layer", name)
} else {
return cw.cimWriter.AddLink(target, name)
}
}
// AddAlternateStream creates another alternate stream at the given
// path. Any writes made after this call will go to that stream.
func (cw *CimLayerWriter) AddAlternateStream(name string, size uint64) error {
if isStdFile(name) {
// As of now there is no known case of std file having multiple data streams.
// If such a file is encountered our assumptions are wrong. Error out.
return fmt.Errorf("unexpected alternate stream %s in layer", name)
}
if err := cw.cimWriter.CreateAlternateStream(name, size); err != nil {
return err
}
cw.activeWriter = cw.cimWriter
return nil
}
// Remove removes a file that was present in a parent layer from the layer.
func (cw *CimLayerWriter) Remove(name string) error {
// set active write to nil so that we panic if layer tar is incorrectly formatted.
cw.activeWriter = nil
return cw.cimWriter.Unlink(name)
}
// Write writes data to the current file. The data must be in the format of a Win32
// backup stream.
func (cw *CimLayerWriter) Write(b []byte) (int, error) {
return cw.activeWriter.Write(b)
}
// Close finishes the layer writing process and releases any resources.
func (cw *CimLayerWriter) Close(ctx context.Context) (retErr error) {
if err := cw.stdFileWriter.Close(ctx); err != nil {
return err
}
// cimWriter must be closed even if there are errors.
defer func() {
if err := cw.cimWriter.Close(); retErr == nil {
retErr = err
}
}()
// Find out the osversion of this layer, both base & non-base layers can have UtilityVM layer.
processUtilityVM := false
if cw.hasUtilityVM {
uvmSoftwareHivePath := filepath.Join(cw.path, wclayer.UtilityVMPath, wclayer.RegFilesPath, "SOFTWARE")
osvStr, err := getOsBuildNumberFromRegistry(uvmSoftwareHivePath)
if err != nil {
return fmt.Errorf("read os version string from UtilityVM SOFTWARE hive: %w", err)
}
osv, err := strconv.ParseUint(osvStr, 10, 16)
if err != nil {
return fmt.Errorf("parse os version string (%s): %w", osvStr, err)
}
// write this version to a file for future reference by the shim process
if err = wclayer.WriteLayerUvmBuildFile(cw.path, uint16(osv)); err != nil {
return fmt.Errorf("write uvm build version: %w", err)
}
// CIMFS for hyperV isolated is only supported after 20348, processing UtilityVM layer on 2048
// & lower will cause failures since those images won't have CIMFS specific UVM files (mostly
// BCD entries required for CIMFS)
processUtilityVM = (osv > osversion.LTSC2022)
log.G(ctx).Debugf("import image os version %d, processing UtilityVM layer: %t\n", osv, processUtilityVM)
}
if len(cw.parentLayerPaths) == 0 {
if err := cw.processBaseLayer(ctx, processUtilityVM); err != nil {
return fmt.Errorf("process base layer: %w", err)
}
} else {
if err := cw.processNonBaseLayer(ctx, processUtilityVM); err != nil {
return fmt.Errorf("process non base layer: %w", err)
}
}
for _, op := range cw.pendingOps {
if err := op.apply(cw.cimWriter); err != nil {
return fmt.Errorf("apply pending operations: %w", err)
}
}
return nil
}
func NewCimLayerWriter(ctx context.Context, path string, parentLayerPaths []string) (_ *CimLayerWriter, err error) {
if !cimfs.IsCimFSSupported() {
return nil, fmt.Errorf("CimFs not supported on this build")
}
ctx, span := trace.StartSpan(ctx, "hcsshim::NewCimLayerWriter")
defer func() {
if err != nil {
oc.SetSpanStatus(span, err)
span.End()
}
}()
span.AddAttributes(
trace.StringAttribute("path", path),
trace.StringAttribute("parentLayerPaths", strings.Join(parentLayerPaths, ", ")))
parentCim := ""
cimDirPath := GetCimDirFromLayer(path)
if _, err = os.Stat(cimDirPath); os.IsNotExist(err) {
// create cim directory
if err = os.Mkdir(cimDirPath, 0755); err != nil {
return nil, fmt.Errorf("failed while creating cim layers directory: %w", err)
}
} else if err != nil {
return nil, fmt.Errorf("unable to access cim layers directory: %w", err)
}
if len(parentLayerPaths) > 0 {
parentCim = GetCimNameFromLayer(parentLayerPaths[0])
}
cim, err := cimfs.Create(cimDirPath, parentCim, GetCimNameFromLayer(path))
if err != nil {
return nil, fmt.Errorf("error in creating a new cim: %w", err)
}
sfw, err := newStdFileWriter(path, parentLayerPaths)
if err != nil {
return nil, fmt.Errorf("error in creating new standard file writer: %w", err)
}
return &CimLayerWriter{
ctx: ctx,
s: span,
path: path,
parentLayerPaths: parentLayerPaths,
cimWriter: cim,
stdFileWriter: sfw,
}, nil
}
// DestroyCimLayer destroys a cim layer i.e it removes all the cimfs files for the given layer as well as
// all of the other files that are stored in the layer directory (at path `layerPath`).
// If this is not a cimfs layer (i.e a cim file for the given layer does not exist) then nothing is done.
func DestroyCimLayer(ctx context.Context, layerPath string) error {
cimPath := GetCimPathFromLayer(layerPath)
// verify that such a cim exists first, sometimes containerd tries to call
// this with the root snapshot directory as the layer path. We don't want to
// destroy everything inside the snapshots directory.
if _, err := os.Stat(cimPath); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return cimfs.DestroyCim(ctx, cimPath)
}

View File

@@ -0,0 +1,107 @@
//go:build windows
package cim
import (
"bytes"
"fmt"
"os/exec"
"github.com/Microsoft/go-winio/pkg/guid"
)
const (
bcdFilePath = "UtilityVM\\Files\\EFI\\Microsoft\\Boot\\BCD"
cimfsDeviceOptionsID = "{763e9fea-502d-434f-aad9-5fabe9c91a7b}"
vmbusDeviceID = "{c63c9bdf-5fa5-4208-b03f-6b458b365592}"
compositeDeviceOptionsID = "{e1787220-d17f-49e7-977a-d8fe4c8537e2}"
bootContainerID = "{b890454c-80de-4e98-a7ab-56b74b4fbd0c}"
)
func bcdExec(storePath string, args ...string) error {
var out bytes.Buffer
argsArr := []string{"/store", storePath, "/offline"}
argsArr = append(argsArr, args...)
cmd := exec.Command("bcdedit.exe", argsArr...)
cmd.Stdout = &out
if err := cmd.Run(); err != nil {
return fmt.Errorf("bcd command (%s) failed: %w", cmd, err)
}
return nil
}
// A registry configuration required for the uvm.
func setBcdRestartOnFailure(storePath string) error {
return bcdExec(storePath, "/set", "{default}", "restartonfailure", "yes")
}
func setBcdCimBootDevice(storePath, cimPathRelativeToVSMB string, diskID, partitionID guid.GUID) error {
// create options for cimfs boot device
if err := bcdExec(storePath, "/create", cimfsDeviceOptionsID, "/d", "CimFS Device Options", "/device"); err != nil {
return err
}
// Set options. For now we need to set 2 options. First is the parent device i.e the device under
// which all cim files will be available. Second is the path of the cim (from which this UVM should
// boot) relative to the parent device. Note that even though the 2nd option is named
// `cimfsrootdirectory` it expects a path to the cim file and not a directory path.
if err := bcdExec(storePath, "/set", cimfsDeviceOptionsID, "cimfsparentdevice", fmt.Sprintf("vmbus=%s", vmbusDeviceID)); err != nil {
return err
}
if err := bcdExec(storePath, "/set", cimfsDeviceOptionsID, "cimfsrootdirectory", fmt.Sprintf("\\%s", cimPathRelativeToVSMB)); err != nil {
return err
}
// create options for the composite device
if err := bcdExec(storePath, "/create", compositeDeviceOptionsID, "/d", "Composite Device Options", "/device"); err != nil {
return err
}
// We need to specify the diskID & the partition ID of the boot disk and we need to set the cimfs boot
// options ID
partitionStr := fmt.Sprintf("gpt_partition={%s};{%s}", diskID, partitionID)
if err := bcdExec(storePath, "/set", compositeDeviceOptionsID, "primarydevice", partitionStr); err != nil {
return err
}
if err := bcdExec(storePath, "/set", compositeDeviceOptionsID, "secondarydevice", fmt.Sprintf("cimfs=%s,%s", bootContainerID, cimfsDeviceOptionsID)); err != nil {
return err
}
if err := bcdExec(storePath, "/set", "{default}", "device", fmt.Sprintf("composite=0,%s", compositeDeviceOptionsID)); err != nil {
return err
}
if err := bcdExec(storePath, "/set", "{default}", "osdevice", fmt.Sprintf("composite=0,%s", compositeDeviceOptionsID)); err != nil {
return err
}
// Since our UVM file are stored under UtilityVM\Files directory inside the CIM we must prepend that
// directory in front of paths used by bootmgr
if err := bcdExec(storePath, "/set", "{default}", "path", "\\UtilityVM\\Files\\Windows\\System32\\winload.efi"); err != nil {
return err
}
if err := bcdExec(storePath, "/set", "{default}", "systemroot", "\\UtilityVM\\Files\\Windows"); err != nil {
return err
}
return nil
}
// updateBcdStoreForBoot Updates the bcd store at path layerPath + UtilityVM\Files\EFI\Microsoft\Boot\BCD` to
// boot with the disk with given ID and given partitionID. cimPathRelativeToVSMB is the path of the cim which
// will be used for booting this UVM relative to the VSMB share. (Usually, the entire snapshots directory will
// be shared over VSMB, so if this is the cim-layers\1.cim under that directory, the value of
// `cimPathRelativeToVSMB` should be cim-layers\1.cim)
func updateBcdStoreForBoot(storePath string, cimPathRelativeToVSMB string, diskID, partitionID guid.GUID) error {
if err := setBcdRestartOnFailure(storePath); err != nil {
return err
}
if err := setBcdCimBootDevice(storePath, cimPathRelativeToVSMB, diskID, partitionID); err != nil {
return err
}
return nil
}

View File

@@ -0,0 +1,41 @@
//go:build windows
package cim
import (
"os"
"path/filepath"
)
const (
// name of the directory in which cims are stored
cimDir = "cim-layers"
)
// Usually layers are stored at ./root/io.containerd.snapshotter.v1.windows/snapshots/<layerid>. For cimfs we
// must store all layer cims in the same directory (for forked cims to work). So all cim layers are stored in
// /root/io.containerd.snapshotter.v1.windows/snapshots/cim-layers. And the cim file representing each
// individual layer is stored at /root/io.containerd.snapshotter.v1.windows/snapshots/cim-layers/<layerid>.cim
// CimName is the filename (<layerid>.cim) of the file representing the cim
func GetCimNameFromLayer(layerPath string) string {
return filepath.Base(layerPath) + ".cim"
}
// CimPath is the path to the CimDir/<layerid>.cim file that represents a layer cim.
func GetCimPathFromLayer(layerPath string) string {
return filepath.Join(GetCimDirFromLayer(layerPath), GetCimNameFromLayer(layerPath))
}
// CimDir is the directory inside which all cims are stored.
func GetCimDirFromLayer(layerPath string) string {
dir := filepath.Dir(layerPath)
return filepath.Join(dir, cimDir)
}
// IsCimLayer returns `true` if the layer at path `layerPath` is a cim layer. Returns `false` otherwise.
func IsCimLayer(layerPath string) bool {
cimPath := GetCimPathFromLayer(layerPath)
_, err := os.Stat(cimPath)
return (err == nil)
}

View File

@@ -0,0 +1,3 @@
// This package provides utilities for working with container image layers in the cim format
// via the wclayer APIs.
package cim

View File

@@ -0,0 +1,90 @@
//go:build windows
package cim
import (
"context"
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/hcsshim/internal/safefile"
"github.com/Microsoft/hcsshim/internal/winapi"
)
// stdFileWriter writes the files of a layer to the layer folder instead of writing them inside the cim.
// For some files (like the Hive files or some UtilityVM files) it is necessary to write them as a normal file
// first, do some modifications on them (for example merging of hives or processing of UtilityVM files)
// and then write the modified versions into the cim. This writer is used for such files.
type stdFileWriter struct {
activeFile *os.File
// parent layer paths
parentLayerPaths []string
// path to the current layer
path string
// the open handle to the path directory
root *os.File
}
func newStdFileWriter(root string, parentRoots []string) (sfw *stdFileWriter, err error) {
sfw = &stdFileWriter{
path: root,
parentLayerPaths: parentRoots,
}
sfw.root, err = safefile.OpenRoot(root)
if err != nil {
return
}
return
}
func (sfw *stdFileWriter) closeActiveFile() (err error) {
if sfw.activeFile != nil {
err = sfw.activeFile.Close()
sfw.activeFile = nil
}
return
}
// Adds a new file or an alternate data stream to an existing file inside the layer directory.
func (sfw *stdFileWriter) Add(name string) error {
if err := sfw.closeActiveFile(); err != nil {
return err
}
// The directory of this file might be created inside the cim.
// make sure we have the same parent directory chain here
if err := safefile.MkdirAllRelative(filepath.Dir(name), sfw.root); err != nil {
return fmt.Errorf("failed to create file %s: %w", name, err)
}
f, err := safefile.OpenRelative(
name,
sfw.root,
syscall.GENERIC_READ|syscall.GENERIC_WRITE|winio.WRITE_DAC|winio.WRITE_OWNER,
syscall.FILE_SHARE_READ,
winapi.FILE_CREATE,
0,
)
if err != nil {
return fmt.Errorf("error creating file %s: %w", name, err)
}
sfw.activeFile = f
return nil
}
// Write writes data to the current file. The data must be in the format of a Win32
// backup stream.
func (sfw *stdFileWriter) Write(b []byte) (int, error) {
return sfw.activeFile.Write(b)
}
// Close finishes the layer writing process and releases any resources.
func (sfw *stdFileWriter) Close(ctx context.Context) error {
if err := sfw.closeActiveFile(); err != nil {
return fmt.Errorf("failed to close active file %s : %w", sfw.activeFile.Name(), err)
}
return nil
}

View File

@@ -0,0 +1,89 @@
//go:build windows
package cim
import (
"context"
"fmt"
"os"
"sync"
"github.com/Microsoft/go-winio/pkg/guid"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
cimfs "github.com/Microsoft/hcsshim/pkg/cimfs"
)
// a cache of cim layer to its mounted volume - The mount manager plugin currently doesn't have an option of
// querying a mounted cim to get the volume at which it is mounted, so we maintain a cache of that here
var (
cimMounts map[string]string = make(map[string]string)
cimMountMapLock sync.Mutex
// A random GUID used as a namespace for generating cim mount volume GUIDs: 6827367b-c388-4e9b-95ec-961c6d2c936c
cimMountNamespace guid.GUID = guid.GUID{Data1: 0x6827367b, Data2: 0xc388, Data3: 0x4e9b, Data4: [8]byte{0x96, 0x1c, 0x6d, 0x2c, 0x93, 0x6c}}
)
// MountCimLayer mounts the cim at path `cimPath` and returns the mount location of that cim. This method
// uses the `CimMountFlagCacheFiles` mount flag when mounting the cim. The containerID is used to generated
// the volumeID for the volume at which this CIM is mounted. containerID is used so that if the shim process
// crashes for any reason, the mounted cim can be correctly cleaned up during `shim delete` call.
func MountCimLayer(ctx context.Context, cimPath, containerID string) (string, error) {
volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID))
if err != nil {
return "", fmt.Errorf("generated cim mount GUID: %w", err)
}
vol, err := cimfs.Mount(cimPath, volumeGUID, hcsschema.CimMountFlagCacheFiles)
if err != nil {
return "", err
}
cimMountMapLock.Lock()
defer cimMountMapLock.Unlock()
cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)] = vol
return vol, nil
}
// Unmount unmounts the cim at mounted for given container.
func UnmountCimLayer(ctx context.Context, cimPath, containerID string) error {
cimMountMapLock.Lock()
defer cimMountMapLock.Unlock()
if vol, ok := cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)]; !ok {
return fmt.Errorf("cim %s not mounted", cimPath)
} else {
delete(cimMounts, fmt.Sprintf("%s_%s", containerID, cimPath))
err := cimfs.Unmount(vol)
if err != nil {
return err
}
}
return nil
}
// GetCimMountPath returns the volume at which a cim is mounted. If the cim is not mounted returns error
func GetCimMountPath(cimPath, containerID string) (string, error) {
cimMountMapLock.Lock()
defer cimMountMapLock.Unlock()
if vol, ok := cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)]; !ok {
return "", fmt.Errorf("cim %s not mounted", cimPath)
} else {
return vol, nil
}
}
func CleanupContainerMounts(containerID string) error {
volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID))
if err != nil {
return fmt.Errorf("generated cim mount GUID: %w", err)
}
volPath := fmt.Sprintf("\\\\?\\Volume{%s}\\", volumeGUID.String())
if _, err := os.Stat(volPath); err == nil {
err = cimfs.Unmount(volPath)
if err != nil {
return err
}
}
return nil
}

View File

@@ -0,0 +1,68 @@
//go:build windows
package cim
import (
"fmt"
"io"
"os"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/hcsshim/pkg/cimfs"
"golang.org/x/sys/windows"
)
type pendingCimOp interface {
apply(cw *cimfs.CimFsWriter) error
}
// add op represents a pending operation of adding a new file inside the cim
type addOp struct {
// path inside the cim at which the file should be added
pathInCim string
// host path where this file was temporarily written.
hostPath string
// other file metadata fields that were provided during the add call.
fileInfo *winio.FileBasicInfo
securityDescriptor []byte
extendedAttributes []byte
reparseData []byte
}
func (o *addOp) apply(cw *cimfs.CimFsWriter) error {
f, err := os.Open(o.hostPath)
if err != nil {
return fmt.Errorf("open file %s: %w", o.hostPath, err)
}
defer f.Close()
fs, err := f.Stat()
if err != nil {
return fmt.Errorf("stat file %s: %w", o.hostPath, err)
}
if err := cw.AddFile(o.pathInCim, o.fileInfo, fs.Size(), o.securityDescriptor, o.extendedAttributes, o.reparseData); err != nil {
return fmt.Errorf("cim add file %s: %w", o.hostPath, err)
}
if o.fileInfo.FileAttributes != windows.FILE_ATTRIBUTE_DIRECTORY {
written, err := io.Copy(cw, f)
if err != nil {
return fmt.Errorf("write file %s inside cim: %w", o.hostPath, err)
} else if written != fs.Size() {
return fmt.Errorf("short write to cim for file %s, expected %d bytes wrote %d", o.hostPath, fs.Size(), written)
}
}
return nil
}
// linkOp represents a pending link file operation inside the cim
type linkOp struct {
// old & new paths inside the cim where the link should be created
oldPath string
newPath string
}
func (o *linkOp) apply(cw *cimfs.CimFsWriter) error {
return cw.AddLink(o.oldPath, o.newPath)
}

View File

@@ -0,0 +1,293 @@
//go:build windows
package cim
import (
"context"
"fmt"
"os"
"path/filepath"
"syscall"
"time"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/go-winio/vhd"
"github.com/Microsoft/hcsshim/computestorage"
"github.com/Microsoft/hcsshim/internal/memory"
"github.com/Microsoft/hcsshim/internal/security"
"github.com/Microsoft/hcsshim/internal/vhdx"
"github.com/Microsoft/hcsshim/internal/wclayer"
"golang.org/x/sys/windows"
)
const defaultVHDXBlockSizeInMB = 1
func createContainerBaseLayerVHDs(ctx context.Context, layerPath string) (err error) {
baseVhdPath := filepath.Join(layerPath, wclayer.ContainerBaseVhd)
diffVhdPath := filepath.Join(layerPath, wclayer.ContainerScratchVhd)
defaultVhdSize := uint64(20)
if _, err := os.Stat(baseVhdPath); err == nil {
if err := os.RemoveAll(baseVhdPath); err != nil {
return fmt.Errorf("failed to remove base vhdx path: %w", err)
}
}
if _, err := os.Stat(diffVhdPath); err == nil {
if err := os.RemoveAll(diffVhdPath); err != nil {
return fmt.Errorf("failed to remove differencing vhdx: %w", err)
}
}
createParams := &vhd.CreateVirtualDiskParameters{
Version: 2,
Version2: vhd.CreateVersion2{
MaximumSize: defaultVhdSize * memory.GiB,
BlockSizeInBytes: defaultVHDXBlockSizeInMB * memory.MiB,
},
}
handle, err := vhd.CreateVirtualDisk(baseVhdPath, vhd.VirtualDiskAccessNone, vhd.CreateVirtualDiskFlagNone, createParams)
if err != nil {
return fmt.Errorf("failed to create vhdx: %w", err)
}
defer func() {
if err != nil {
os.RemoveAll(baseVhdPath)
os.RemoveAll(diffVhdPath)
}
}()
err = computestorage.FormatWritableLayerVhd(ctx, windows.Handle(handle))
// we always wanna close the handle whether format succeeds for not.
closeErr := syscall.CloseHandle(handle)
if err != nil {
return err
} else if closeErr != nil {
return fmt.Errorf("failed to close vhdx handle: %w", closeErr)
}
// Create the differencing disk that will be what's copied for the final rw layer
// for a container.
if err = vhd.CreateDiffVhd(diffVhdPath, baseVhdPath, defaultVHDXBlockSizeInMB); err != nil {
return fmt.Errorf("failed to create differencing disk: %w", err)
}
if err = security.GrantVmGroupAccess(baseVhdPath); err != nil {
return fmt.Errorf("failed to grant vm group access to %s: %w", baseVhdPath, err)
}
if err = security.GrantVmGroupAccess(diffVhdPath); err != nil {
return fmt.Errorf("failed to grant vm group access to %s: %w", diffVhdPath, err)
}
return nil
}
// processUtilityVMLayer is similar to createContainerBaseLayerVHDs but along with the scratch creation it
// also does some BCD modifications to allow the UVM to boot from the CIM. It expects that the UVM BCD file is
// present at layerPath/`wclayer.BcdFilePath` and a UVM SYSTEM hive is present at
// layerPath/UtilityVM/`wclayer.RegFilesPath`/SYSTEM. The scratch VHDs are created under the `layerPath`
// directory.
func processUtilityVMLayer(ctx context.Context, layerPath string) error {
// func createUtilityVMLayerVHDs(ctx context.Context, layerPath string) error {
baseVhdPath := filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.UtilityVMBaseVhd)
diffVhdPath := filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.UtilityVMScratchVhd)
defaultVhdSize := uint64(10)
// Just create the vhdx for utilityVM layer, no need to format it.
createParams := &vhd.CreateVirtualDiskParameters{
Version: 2,
Version2: vhd.CreateVersion2{
MaximumSize: defaultVhdSize * memory.GiB,
BlockSizeInBytes: defaultVHDXBlockSizeInMB * memory.MiB,
},
}
handle, err := vhd.CreateVirtualDisk(baseVhdPath, vhd.VirtualDiskAccessNone, vhd.CreateVirtualDiskFlagNone, createParams)
if err != nil {
return fmt.Errorf("failed to create vhdx: %w", err)
}
defer func() {
if err != nil {
os.RemoveAll(baseVhdPath)
os.RemoveAll(diffVhdPath)
}
}()
err = computestorage.FormatWritableLayerVhd(ctx, windows.Handle(handle))
closeErr := syscall.CloseHandle(handle)
if err != nil {
return err
} else if closeErr != nil {
return fmt.Errorf("failed to close vhdx handle: %w", closeErr)
}
partitionInfo, err := vhdx.GetScratchVhdPartitionInfo(ctx, baseVhdPath)
if err != nil {
return fmt.Errorf("failed to get base vhd layout info: %w", err)
}
// relativeCimPath needs to be the cim path relative to the snapshots directory. The snapshots
// directory is shared inside the UVM over VSMB, so during the UVM boot this relative path will be
// used to find the cim file under that VSMB share.
relativeCimPath := filepath.Join(filepath.Base(GetCimDirFromLayer(layerPath)), GetCimNameFromLayer(layerPath))
bcdPath := filepath.Join(layerPath, bcdFilePath)
if err = updateBcdStoreForBoot(bcdPath, relativeCimPath, partitionInfo.DiskID, partitionInfo.PartitionID); err != nil {
return fmt.Errorf("failed to update BCD: %w", err)
}
if err := enableCimBoot(filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.RegFilesPath, "SYSTEM")); err != nil {
return fmt.Errorf("failed to setup cim image for uvm boot: %w", err)
}
// Note: diff vhd creation and granting of vm group access must be done AFTER
// getting the partition info of the base VHD. Otherwise it causes the vhd parent
// chain to get corrupted.
// TODO(ambarve): figure out why this happens so that bcd update can be moved to a separate function
// Create the differencing disk that will be what's copied for the final rw layer
// for a container.
if err = vhd.CreateDiffVhd(diffVhdPath, baseVhdPath, defaultVHDXBlockSizeInMB); err != nil {
return fmt.Errorf("failed to create differencing disk: %w", err)
}
if err := security.GrantVmGroupAccess(baseVhdPath); err != nil {
return fmt.Errorf("failed to grant vm group access to %s: %w", baseVhdPath, err)
}
if err := security.GrantVmGroupAccess(diffVhdPath); err != nil {
return fmt.Errorf("failed to grant vm group access to %s: %w", diffVhdPath, err)
}
return nil
}
// processBaseLayerHives make the base layer specific modifications on the hives and emits equivalent the
// pendingCimOps that should be applied on the CIM. In base layer we need to create hard links from registry
// hives under Files/Windows/Sysetm32/config into Hives/*_BASE. This function creates these links outside so
// that the registry hives under Hives/ are available during children layers import. Then we write these hive
// files inside the cim and create links inside the cim.
func processBaseLayerHives(layerPath string) ([]pendingCimOp, error) {
pendingOps := []pendingCimOp{}
// make hives directory both outside and in the cim
if err := os.Mkdir(filepath.Join(layerPath, wclayer.HivesPath), 0755); err != nil {
return pendingOps, fmt.Errorf("hives directory creation: %w", err)
}
hivesDirInfo := &winio.FileBasicInfo{
CreationTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()),
ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()),
FileAttributes: windows.FILE_ATTRIBUTE_DIRECTORY,
}
pendingOps = append(pendingOps, &addOp{
pathInCim: wclayer.HivesPath,
hostPath: filepath.Join(layerPath, wclayer.HivesPath),
fileInfo: hivesDirInfo,
})
// add hard links from base hive files.
for _, hv := range hives {
oldHivePathRelative := filepath.Join(wclayer.RegFilesPath, hv.name)
newHivePathRelative := filepath.Join(wclayer.HivesPath, hv.base)
if err := os.Link(filepath.Join(layerPath, oldHivePathRelative), filepath.Join(layerPath, newHivePathRelative)); err != nil {
return pendingOps, fmt.Errorf("hive link creation: %w", err)
}
pendingOps = append(pendingOps, &linkOp{
oldPath: oldHivePathRelative,
newPath: newHivePathRelative,
})
}
return pendingOps, nil
}
// processLayoutFile creates a file named "layout" in the root of the base layer. This allows certain
// container startup related functions to understand that the hives are a part of the container rootfs.
func processLayoutFile(layerPath string) ([]pendingCimOp, error) {
fileContents := "vhd-with-hives\n"
if err := os.WriteFile(filepath.Join(layerPath, "layout"), []byte(fileContents), 0755); err != nil {
return []pendingCimOp{}, fmt.Errorf("write layout file: %w", err)
}
layoutFileInfo := &winio.FileBasicInfo{
CreationTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()),
ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()),
FileAttributes: windows.FILE_ATTRIBUTE_NORMAL,
}
op := &addOp{
pathInCim: "layout",
hostPath: filepath.Join(layerPath, "layout"),
fileInfo: layoutFileInfo,
}
return []pendingCimOp{op}, nil
}
// Some of the layer files that are generated during the processBaseLayer call must be added back
// inside the cim, some registry file links must be updated. This function takes care of all those
// steps. This function opens the cim file for writing and updates it.
func (cw *CimLayerWriter) processBaseLayer(ctx context.Context, processUtilityVM bool) (err error) {
if err = createContainerBaseLayerVHDs(ctx, cw.path); err != nil {
return fmt.Errorf("failed to create container base VHDs: %w", err)
}
if processUtilityVM {
if err = processUtilityVMLayer(ctx, cw.path); err != nil {
return fmt.Errorf("process utilityVM layer: %w", err)
}
}
ops, err := processBaseLayerHives(cw.path)
if err != nil {
return err
}
cw.pendingOps = append(cw.pendingOps, ops...)
ops, err = processLayoutFile(cw.path)
if err != nil {
return err
}
cw.pendingOps = append(cw.pendingOps, ops...)
return nil
}
// processNonBaseLayer takes care of the processing required for a non base layer. As of now
// the only processing required for non base layer is to merge the delta registry hives of the
// non-base layer with it's parent layer.
func (cw *CimLayerWriter) processNonBaseLayer(ctx context.Context, processUtilityVM bool) (err error) {
for _, hv := range hives {
baseHive := filepath.Join(wclayer.HivesPath, hv.base)
deltaHive := filepath.Join(wclayer.HivesPath, hv.delta)
_, err := os.Stat(filepath.Join(cw.path, deltaHive))
// merge with parent layer if delta exists.
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("stat delta hive %s: %w", filepath.Join(cw.path, deltaHive), err)
} else if err == nil {
// merge base hive of parent layer with the delta hive of this layer and write it as
// the base hive of this layer.
err = mergeHive(filepath.Join(cw.parentLayerPaths[0], baseHive), filepath.Join(cw.path, deltaHive), filepath.Join(cw.path, baseHive))
if err != nil {
return err
}
// the newly created merged file must be added to the cim
cw.pendingOps = append(cw.pendingOps, &addOp{
pathInCim: baseHive,
hostPath: filepath.Join(cw.path, baseHive),
fileInfo: &winio.FileBasicInfo{
CreationTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()),
ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()),
FileAttributes: windows.FILE_ATTRIBUTE_NORMAL,
},
})
}
}
if processUtilityVM {
return processUtilityVMLayer(ctx, cw.path)
}
return nil
}

View File

@@ -0,0 +1,172 @@
//go:build windows
package cim
import (
"encoding/binary"
"fmt"
"os"
"unsafe"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/winapi"
"github.com/Microsoft/hcsshim/osversion"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
)
// enableCimBoot Opens the SYSTEM registry hive at path `hivePath` and updates it to include a CIMFS Start
// registry key. This prepares the uvm to boot from a cim file if requested. The registry changes required to
// actually make the uvm boot from a cim will be added in the uvm config (look at
// addBootFromCimRegistryChanges for details). This registry key needs to be available in the early boot
// phase and so including it in the uvm config doesn't work.
func enableCimBoot(hivePath string) (err error) {
dataZero := make([]byte, 4)
dataOne := make([]byte, 4)
binary.LittleEndian.PutUint32(dataOne, 1)
dataFour := make([]byte, 4)
binary.LittleEndian.PutUint32(dataFour, 4)
bootGUID, err := windows.UTF16FromString(bootContainerID)
if err != nil {
return fmt.Errorf("failed to encode boot guid to utf16: %w", err)
}
overrideBootPath, err := windows.UTF16FromString("\\Windows\\")
if err != nil {
return fmt.Errorf("failed to encode override boot path to utf16: %w", err)
}
regChanges := []struct {
keyPath string
valueName string
valueType winapi.RegType
data *byte
dataLen uint32
}{
{"ControlSet001\\Control", "BootContainerGuid", winapi.REG_TYPE_SZ, (*byte)(unsafe.Pointer(&bootGUID[0])), 2 * uint32(len(bootGUID))},
{"ControlSet001\\Services\\UnionFS", "Start", winapi.REG_TYPE_DWORD, &dataZero[0], uint32(len(dataZero))},
{"ControlSet001\\Services\\wcifs", "Start", winapi.REG_TYPE_DWORD, &dataFour[0], uint32(len(dataZero))},
// The bootmgr loads the uvm files from the cim and so uses the relative path `UtilityVM\\Files` inside the cim to access the uvm files. However, once the cim is mounted UnionFS will merge the correct directory (UtilityVM\\Files) of the cim with the scratch and then that point onwards we don't need to use the relative path. Below registry key tells the kernel that the boot path that was provided in BCD should now be overriden with this new path.
{"Setup", "BootPathOverride", winapi.REG_TYPE_SZ, (*byte)(unsafe.Pointer(&overrideBootPath[0])), 2 * uint32(len(overrideBootPath))},
}
var storeHandle winapi.ORHKey
if err = winapi.OROpenHive(hivePath, &storeHandle); err != nil {
return fmt.Errorf("failed to open registry store at %s: %w", hivePath, err)
}
for _, change := range regChanges {
var changeKey winapi.ORHKey
if err = winapi.ORCreateKey(storeHandle, change.keyPath, 0, 0, 0, &changeKey, nil); err != nil {
return fmt.Errorf("failed to open reg key %s: %w", change.keyPath, err)
}
if err = winapi.ORSetValue(changeKey, change.valueName, uint32(change.valueType), change.data, change.dataLen); err != nil {
return fmt.Errorf("failed to set value for regkey %s\\%s : %w", change.keyPath, change.valueName, err)
}
}
// remove the existing file first
if err := os.Remove(hivePath); err != nil {
return fmt.Errorf("failed to remove existing registry %s: %w", hivePath, err)
}
if err = winapi.ORSaveHive(winapi.ORHKey(storeHandle), hivePath, uint32(osversion.Get().MajorVersion), uint32(osversion.Get().MinorVersion)); err != nil {
return fmt.Errorf("error saving the registry store: %w", err)
}
// close hive irrespective of the errors
if err := winapi.ORCloseHive(winapi.ORHKey(storeHandle)); err != nil {
return fmt.Errorf("error closing registry store; %w", err)
}
return nil
}
// mergeHive merges the hive located at parentHivePath with the hive located at deltaHivePath and stores
// the result into the file at mergedHivePath. If a file already exists at path `mergedHivePath` then it
// throws an error.
func mergeHive(parentHivePath, deltaHivePath, mergedHivePath string) (err error) {
var baseHive, deltaHive, mergedHive winapi.ORHKey
if err := winapi.OROpenHive(parentHivePath, &baseHive); err != nil {
return fmt.Errorf("failed to open base hive %s: %w", parentHivePath, err)
}
defer func() {
err2 := winapi.ORCloseHive(baseHive)
if err == nil {
err = errors.Wrap(err2, "failed to close base hive")
}
}()
if err := winapi.OROpenHive(deltaHivePath, &deltaHive); err != nil {
return fmt.Errorf("failed to open delta hive %s: %w", deltaHivePath, err)
}
defer func() {
err2 := winapi.ORCloseHive(deltaHive)
if err == nil {
err = errors.Wrap(err2, "failed to close delta hive")
}
}()
if err := winapi.ORMergeHives([]winapi.ORHKey{baseHive, deltaHive}, &mergedHive); err != nil {
return fmt.Errorf("failed to merge hives: %w", err)
}
defer func() {
err2 := winapi.ORCloseHive(mergedHive)
if err == nil {
err = errors.Wrap(err2, "failed to close merged hive")
}
}()
if err := winapi.ORSaveHive(mergedHive, mergedHivePath, uint32(osversion.Get().MajorVersion), uint32(osversion.Get().MinorVersion)); err != nil {
return fmt.Errorf("failed to save hive: %w", err)
}
return
}
// getOsBuildNumberFromRegistry fetches the "CurrentBuild" value at path
// "Microsoft\Windows NT\CurrentVersion" from the SOFTWARE registry hive at path
// `regHivePath`. This is used to detect the build version of the uvm.
func getOsBuildNumberFromRegistry(regHivePath string) (_ string, err error) {
var storeHandle, keyHandle winapi.ORHKey
var dataType, dataLen uint32
keyPath := "Microsoft\\Windows NT\\CurrentVersion"
valueName := "CurrentBuild"
dataLen = 16 // build version string can't be more than 5 wide chars?
dataBuf := make([]byte, dataLen)
if err = winapi.OROpenHive(regHivePath, &storeHandle); err != nil {
return "", fmt.Errorf("failed to open registry store at %s: %w", regHivePath, err)
}
defer func() {
if closeErr := winapi.ORCloseHive(storeHandle); closeErr != nil {
log.L.WithFields(logrus.Fields{
"error": closeErr,
"hive": regHivePath,
}).Warnf("failed to close hive")
}
}()
if err = winapi.OROpenKey(storeHandle, keyPath, &keyHandle); err != nil {
return "", fmt.Errorf("failed to open key at %s: %w", keyPath, err)
}
defer func() {
if closeErr := winapi.ORCloseKey(keyHandle); closeErr != nil {
log.L.WithFields(logrus.Fields{
"error": closeErr,
"hive": regHivePath,
"key": keyPath,
"value": valueName,
}).Warnf("failed to close hive key")
}
}()
if err = winapi.ORGetValue(keyHandle, "", valueName, &dataType, &dataBuf[0], &dataLen); err != nil {
return "", fmt.Errorf("failed to get value of %s: %w", valueName, err)
}
if dataType != uint32(winapi.REG_TYPE_SZ) {
return "", fmt.Errorf("unexpected build number data type (%d)", dataType)
}
return winapi.ParseUtf16LE(dataBuf[:(dataLen - 2)]), nil
}

View File

@@ -1,3 +1,5 @@
//go:build windows
package wclayer
import (

View File

@@ -11,7 +11,6 @@ import (
"github.com/Microsoft/hcsshim/internal/hcserror"
"github.com/Microsoft/hcsshim/internal/oc"
"github.com/Microsoft/hcsshim/osversion"
"go.opencensus.io/trace"
)
@@ -30,14 +29,17 @@ func ExpandScratchSize(ctx context.Context, path string, size uint64) (err error
return hcserror.New(err, title, "")
}
// Manually expand the volume now in order to work around bugs in 19H1 and
// prerelease versions of Vb. Remove once this is fixed in Windows.
if build := osversion.Build(); build >= osversion.V19H1 && build < 19020 {
err = expandSandboxVolume(ctx, path)
if err != nil {
return err
}
// Always expand the volume too. In case of legacy layers not expanding the volume here works because
// the PrepareLayer call internally handles the expansion. However, in other cases (like CimFS) we
// don't call PrepareLayer and so the volume will never be expanded. This also means in case of
// legacy layers, we might have a small perf hit because the VHD is mounted twice for expansion (once
// here and once during the PrepareLayer call). But as long as the perf hit is minimal, we should be
// okay.
err = expandSandboxVolume(ctx, path)
if err != nil {
return err
}
return nil
}

View File

@@ -154,7 +154,7 @@ func (r *legacyLayerReader) walkUntilCancelled() error {
}
return nil
})
if err == errorIterationCanceled {
if err == errorIterationCanceled { //nolint:errorlint // explicitly returned
return nil
}
if err == nil {
@@ -196,7 +196,7 @@ func findBackupStreamSize(r io.Reader) (int64, error) {
for {
hdr, err := br.Next()
if err != nil {
if err == io.EOF {
if errors.Is(err, io.EOF) {
err = nil
}
return 0, err
@@ -428,7 +428,7 @@ func (w *legacyLayerWriter) initUtilityVM() error {
// immutable.
err = cloneTree(w.parentRoots[0], w.destRoot, UtilityVMFilesPath, mutatedUtilityVMFiles)
if err != nil {
return fmt.Errorf("cloning the parent utility VM image failed: %s", err)
return fmt.Errorf("cloning the parent utility VM image failed: %w", err)
}
w.HasUtilityVM = true
}
@@ -451,7 +451,7 @@ func (w *legacyLayerWriter) reset() error {
for {
bhdr, err := br.Next()
if err == io.EOF {
if errors.Is(err, io.EOF) {
// end of backupstream data
break
}