From 86de625ece43ccf3f61f41bc0d2a78bb7a29e4be Mon Sep 17 00:00:00 2001 From: Lantao Liu Date: Sun, 1 Sep 2019 00:58:32 -0700 Subject: [PATCH] Update vendor Signed-off-by: Lantao Liu --- vendor.conf | 1 + .../Microsoft/go-winio/pkg/fs/fs_windows.go | 47 + .../pkg/security/grantvmgroupaccess.go | 159 +++ .../go-winio/pkg/security/syscall_windows.go | 7 + .../go-winio/pkg/security/zsyscall_windows.go | 81 ++ .../github.com/Microsoft/go-winio/vhd/vhd.go | 151 ++ .../github.com/Microsoft/go-winio/vhd/zvhd.go | 99 ++ .../ext4/internal/compactext4/compact.go | 1263 +++++++++++++++++ .../hcsshim/ext4/internal/format/format.go | 411 ++++++ .../hcsshim/ext4/tar2ext4/tar2ext4.go | 174 +++ .../hcsshim/ext4/tar2ext4/vhdfooter.go | 76 + .../containerd/containerd/diff/lcow/lcow.go | 210 +++ .../containerd/diff/windows/windows.go | 193 +++ .../containerd/snapshots/windows/windows.go | 338 +++++ .../go-windows-terminal-sequences/LICENSE | 9 + .../go-windows-terminal-sequences/README.md | 40 + .../go-windows-terminal-sequences/go.mod | 1 + .../sequences.go | 36 + 18 files changed, 3296 insertions(+) create mode 100644 vendor/github.com/Microsoft/go-winio/pkg/fs/fs_windows.go create mode 100644 vendor/github.com/Microsoft/go-winio/pkg/security/grantvmgroupaccess.go create mode 100644 vendor/github.com/Microsoft/go-winio/pkg/security/syscall_windows.go create mode 100644 vendor/github.com/Microsoft/go-winio/pkg/security/zsyscall_windows.go create mode 100644 vendor/github.com/Microsoft/go-winio/vhd/vhd.go create mode 100644 vendor/github.com/Microsoft/go-winio/vhd/zvhd.go create mode 100644 vendor/github.com/Microsoft/hcsshim/ext4/internal/compactext4/compact.go create mode 100644 vendor/github.com/Microsoft/hcsshim/ext4/internal/format/format.go create mode 100644 vendor/github.com/Microsoft/hcsshim/ext4/tar2ext4/tar2ext4.go create mode 100644 vendor/github.com/Microsoft/hcsshim/ext4/tar2ext4/vhdfooter.go create mode 100644 vendor/github.com/containerd/containerd/diff/lcow/lcow.go create mode 100644 vendor/github.com/containerd/containerd/diff/windows/windows.go create mode 100644 vendor/github.com/containerd/containerd/snapshots/windows/windows.go create mode 100644 vendor/github.com/konsorten/go-windows-terminal-sequences/LICENSE create mode 100644 vendor/github.com/konsorten/go-windows-terminal-sequences/README.md create mode 100644 vendor/github.com/konsorten/go-windows-terminal-sequences/go.mod create mode 100644 vendor/github.com/konsorten/go-windows-terminal-sequences/sequences.go diff --git a/vendor.conf b/vendor.conf index 6210e9f2b..235be017f 100644 --- a/vendor.conf +++ b/vendor.conf @@ -25,6 +25,7 @@ github.com/opencontainers/runc f4982d86f7fde0b6f953cc62ccc4022c519a10a9 # v1.0.0 github.com/opencontainers/image-spec v1.0.1 github.com/opencontainers/go-digest c9281466c8b2f606084ac71339773efd177436e7 github.com/matttproud/golang_protobuf_extensions v1.0.1 +github.com/konsorten/go-windows-terminal-sequences v1.0.1 github.com/grpc-ecosystem/go-grpc-prometheus v1.1 github.com/google/uuid v1.1.1 github.com/golang/protobuf v1.2.0 diff --git a/vendor/github.com/Microsoft/go-winio/pkg/fs/fs_windows.go b/vendor/github.com/Microsoft/go-winio/pkg/fs/fs_windows.go new file mode 100644 index 000000000..cf7de6d91 --- /dev/null +++ b/vendor/github.com/Microsoft/go-winio/pkg/fs/fs_windows.go @@ -0,0 +1,47 @@ +package fs + +import ( + "errors" + "path/filepath" + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var ( + // ErrInvalidPath is returned when the location of a file path doesn't begin with a driver letter. + ErrInvalidPath = errors.New("the path provided to GetFileSystemType must start with a drive letter") +) + +// GetFileSystemType obtains the type of a file system through GetVolumeInformation. +// https://msdn.microsoft.com/en-us/library/windows/desktop/aa364993(v=vs.85).aspx +func GetFileSystemType(path string) (fsType string, hr error) { + drive := filepath.VolumeName(path) + if len(drive) != 2 { + return "", ErrInvalidPath + } + + var ( + modkernel32 = windows.NewLazySystemDLL("kernel32.dll") + procGetVolumeInformation = modkernel32.NewProc("GetVolumeInformationW") + buf = make([]uint16, 255) + size = windows.MAX_PATH + 1 + ) + drive += `\` + n := uintptr(unsafe.Pointer(nil)) + r0, _, _ := syscall.Syscall9(procGetVolumeInformation.Addr(), 8, uintptr(unsafe.Pointer(windows.StringToUTF16Ptr(drive))), n, n, n, n, n, uintptr(unsafe.Pointer(&buf[0])), uintptr(size), 0) + if int32(r0) < 0 { + hr = syscall.Errno(win32FromHresult(r0)) + } + fsType = windows.UTF16ToString(buf) + return +} + +// win32FromHresult is a helper function to get the win32 error code from an HRESULT. +func win32FromHresult(hr uintptr) uintptr { + if hr&0x1fff0000 == 0x00070000 { + return hr & 0xffff + } + return hr +} diff --git a/vendor/github.com/Microsoft/go-winio/pkg/security/grantvmgroupaccess.go b/vendor/github.com/Microsoft/go-winio/pkg/security/grantvmgroupaccess.go new file mode 100644 index 000000000..2df31b660 --- /dev/null +++ b/vendor/github.com/Microsoft/go-winio/pkg/security/grantvmgroupaccess.go @@ -0,0 +1,159 @@ +package security + +import ( + "os" + "syscall" + "unsafe" + + "github.com/pkg/errors" +) + +type ( + accessMask uint32 + accessMode uint32 + desiredAccess uint32 + inheritMode uint32 + objectType uint32 + shareMode uint32 + securityInformation uint32 + trusteeForm uint32 + trusteeType uint32 + + explicitAccess struct { + accessPermissions accessMask + accessMode accessMode + inheritance inheritMode + trustee trustee + } + + trustee struct { + multipleTrustee *trustee + multipleTrusteeOperation int32 + trusteeForm trusteeForm + trusteeType trusteeType + name uintptr + } +) + +const ( + accessMaskDesiredPermission accessMask = 1 << 31 // GENERIC_READ + + accessModeGrant accessMode = 1 + + desiredAccessReadControl desiredAccess = 0x20000 + desiredAccessWriteDac desiredAccess = 0x40000 + + gvmga = "GrantVmGroupAccess:" + + inheritModeNoInheritance inheritMode = 0x0 + inheritModeSubContainersAndObjectsInherit inheritMode = 0x3 + + objectTypeFileObject objectType = 0x1 + + securityInformationDACL securityInformation = 0x4 + + shareModeRead shareMode = 0x1 + shareModeWrite shareMode = 0x2 + + sidVmGroup = "S-1-5-83-0" + + trusteeFormIsSid trusteeForm = 0 + + trusteeTypeWellKnownGroup trusteeType = 5 +) + +// GrantVMGroupAccess sets the DACL for a specified file or directory to +// include Grant ACE entries for the VM Group SID. This is a golang re- +// implementation of the same function in vmcompute, just not exported in +// RS5. Which kind of sucks. Sucks a lot :/ +func GrantVmGroupAccess(name string) error { + // Stat (to determine if `name` is a directory). + s, err := os.Stat(name) + if err != nil { + return errors.Wrapf(err, "%s os.Stat %s", gvmga, name) + } + + // Get a handle to the file/directory. Must defer Close on success. + fd, err := createFile(name, s.IsDir()) + if err != nil { + return err // Already wrapped + } + defer syscall.CloseHandle(fd) + + // Get the current DACL and Security Descriptor. Must defer LocalFree on success. + ot := objectTypeFileObject + si := securityInformationDACL + sd := uintptr(0) + origDACL := uintptr(0) + if err := getSecurityInfo(fd, uint32(ot), uint32(si), nil, nil, &origDACL, nil, &sd); err != nil { + return errors.Wrapf(err, "%s GetSecurityInfo %s", gvmga, name) + } + defer syscall.LocalFree((syscall.Handle)(unsafe.Pointer(sd))) + + // Generate a new DACL which is the current DACL with the required ACEs added. + // Must defer LocalFree on success. + newDACL, err := generateDACLWithAcesAdded(name, s.IsDir(), origDACL) + if err != nil { + return err // Already wrapped + } + defer syscall.LocalFree((syscall.Handle)(unsafe.Pointer(newDACL))) + + // And finally use SetSecurityInfo to apply the updated DACL. + if err := setSecurityInfo(fd, uint32(ot), uint32(si), uintptr(0), uintptr(0), newDACL, uintptr(0)); err != nil { + return errors.Wrapf(err, "%s SetSecurityInfo %s", gvmga, name) + } + + return nil +} + +// createFile is a helper function to call [Nt]CreateFile to get a handle to +// the file or directory. +func createFile(name string, isDir bool) (syscall.Handle, error) { + namep := syscall.StringToUTF16(name) + da := uint32(desiredAccessReadControl | desiredAccessWriteDac) + sm := uint32(shareModeRead | shareModeWrite) + fa := uint32(syscall.FILE_ATTRIBUTE_NORMAL) + if isDir { + fa = uint32(fa | syscall.FILE_FLAG_BACKUP_SEMANTICS) + } + fd, err := syscall.CreateFile(&namep[0], da, sm, nil, syscall.OPEN_EXISTING, fa, 0) + if err != nil { + return 0, errors.Wrapf(err, "%s syscall.CreateFile %s", gvmga, name) + } + return fd, nil +} + +// generateDACLWithAcesAdded generates a new DACL with the two needed ACEs added. +// The caller is responsible for LocalFree of the returned DACL on success. +func generateDACLWithAcesAdded(name string, isDir bool, origDACL uintptr) (uintptr, error) { + // Generate pointers to the SIDs based on the string SIDs + sid, err := syscall.StringToSid(sidVmGroup) + if err != nil { + return 0, errors.Wrapf(err, "%s syscall.StringToSid %s %s", gvmga, name, sidVmGroup) + } + + inheritance := inheritModeNoInheritance + if isDir { + inheritance = inheritModeSubContainersAndObjectsInherit + } + + eaArray := []explicitAccess{ + explicitAccess{ + accessPermissions: accessMaskDesiredPermission, + accessMode: accessModeGrant, + inheritance: inheritance, + trustee: trustee{ + trusteeForm: trusteeFormIsSid, + trusteeType: trusteeTypeWellKnownGroup, + name: uintptr(unsafe.Pointer(sid)), + }, + }, + } + + modifiedDACL := uintptr(0) + if err := setEntriesInAcl(uintptr(uint32(1)), uintptr(unsafe.Pointer(&eaArray[0])), origDACL, &modifiedDACL); err != nil { + return 0, errors.Wrapf(err, "%s SetEntriesInAcl %s", gvmga, name) + } + + return modifiedDACL, nil +} diff --git a/vendor/github.com/Microsoft/go-winio/pkg/security/syscall_windows.go b/vendor/github.com/Microsoft/go-winio/pkg/security/syscall_windows.go new file mode 100644 index 000000000..c40c2739b --- /dev/null +++ b/vendor/github.com/Microsoft/go-winio/pkg/security/syscall_windows.go @@ -0,0 +1,7 @@ +package security + +//go:generate go run mksyscall_windows.go -output zsyscall_windows.go syscall_windows.go + +//sys getSecurityInfo(handle syscall.Handle, objectType uint32, si uint32, ppsidOwner **uintptr, ppsidGroup **uintptr, ppDacl *uintptr, ppSacl *uintptr, ppSecurityDescriptor *uintptr) (err error) [failretval!=0] = advapi32.GetSecurityInfo +//sys setSecurityInfo(handle syscall.Handle, objectType uint32, si uint32, psidOwner uintptr, psidGroup uintptr, pDacl uintptr, pSacl uintptr) (err error) [failretval!=0] = advapi32.SetSecurityInfo +//sys setEntriesInAcl(count uintptr, pListOfEEs uintptr, oldAcl uintptr, newAcl *uintptr) (err error) [failretval!=0] = advapi32.SetEntriesInAclW diff --git a/vendor/github.com/Microsoft/go-winio/pkg/security/zsyscall_windows.go b/vendor/github.com/Microsoft/go-winio/pkg/security/zsyscall_windows.go new file mode 100644 index 000000000..0f0c0deff --- /dev/null +++ b/vendor/github.com/Microsoft/go-winio/pkg/security/zsyscall_windows.go @@ -0,0 +1,81 @@ +// Code generated mksyscall_windows.exe DO NOT EDIT + +package security + +import ( + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var _ unsafe.Pointer + +// Do the interface allocations only once for common +// Errno values. +const ( + errnoERROR_IO_PENDING = 997 +) + +var ( + errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING) +) + +// errnoErr returns common boxed Errno values, to prevent +// allocations at runtime. +func errnoErr(e syscall.Errno) error { + switch e { + case 0: + return nil + case errnoERROR_IO_PENDING: + return errERROR_IO_PENDING + } + // TODO: add more here, after collecting data on the common + // error values see on Windows. (perhaps when running + // all.bat?) + return e +} + +var ( + modadvapi32 = windows.NewLazySystemDLL("advapi32.dll") + + procGetSecurityInfo = modadvapi32.NewProc("GetSecurityInfo") + procSetSecurityInfo = modadvapi32.NewProc("SetSecurityInfo") + procSetEntriesInAclW = modadvapi32.NewProc("SetEntriesInAclW") +) + +func getSecurityInfo(handle syscall.Handle, objectType uint32, si uint32, ppsidOwner **uintptr, ppsidGroup **uintptr, ppDacl *uintptr, ppSacl *uintptr, ppSecurityDescriptor *uintptr) (err error) { + r1, _, e1 := syscall.Syscall9(procGetSecurityInfo.Addr(), 8, uintptr(handle), uintptr(objectType), uintptr(si), uintptr(unsafe.Pointer(ppsidOwner)), uintptr(unsafe.Pointer(ppsidGroup)), uintptr(unsafe.Pointer(ppDacl)), uintptr(unsafe.Pointer(ppSacl)), uintptr(unsafe.Pointer(ppSecurityDescriptor)), 0) + if r1 != 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func setSecurityInfo(handle syscall.Handle, objectType uint32, si uint32, psidOwner uintptr, psidGroup uintptr, pDacl uintptr, pSacl uintptr) (err error) { + r1, _, e1 := syscall.Syscall9(procSetSecurityInfo.Addr(), 7, uintptr(handle), uintptr(objectType), uintptr(si), uintptr(psidOwner), uintptr(psidGroup), uintptr(pDacl), uintptr(pSacl), 0, 0) + if r1 != 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func setEntriesInAcl(count uintptr, pListOfEEs uintptr, oldAcl uintptr, newAcl *uintptr) (err error) { + r1, _, e1 := syscall.Syscall6(procSetEntriesInAclW.Addr(), 4, uintptr(count), uintptr(pListOfEEs), uintptr(oldAcl), uintptr(unsafe.Pointer(newAcl)), 0, 0) + if r1 != 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} diff --git a/vendor/github.com/Microsoft/go-winio/vhd/vhd.go b/vendor/github.com/Microsoft/go-winio/vhd/vhd.go new file mode 100644 index 000000000..229ac2556 --- /dev/null +++ b/vendor/github.com/Microsoft/go-winio/vhd/vhd.go @@ -0,0 +1,151 @@ +// +build windows + +package vhd + +import "syscall" + +//go:generate go run mksyscall_windows.go -output zvhd.go vhd.go + +//sys createVirtualDisk(virtualStorageType *virtualStorageType, path string, virtualDiskAccessMask uint32, securityDescriptor *uintptr, flags uint32, providerSpecificFlags uint32, parameters *createVirtualDiskParameters, o *syscall.Overlapped, handle *syscall.Handle) (err error) [failretval != 0] = VirtDisk.CreateVirtualDisk +//sys openVirtualDisk(virtualStorageType *virtualStorageType, path string, virtualDiskAccessMask uint32, flags uint32, parameters *openVirtualDiskParameters, handle *syscall.Handle) (err error) [failretval != 0] = VirtDisk.OpenVirtualDisk +//sys detachVirtualDisk(handle syscall.Handle, flags uint32, providerSpecificFlags uint32) (err error) [failretval != 0] = VirtDisk.DetachVirtualDisk + +type virtualStorageType struct { + DeviceID uint32 + VendorID [16]byte +} + +type ( + createVirtualDiskFlag uint32 + VirtualDiskAccessMask uint32 + VirtualDiskFlag uint32 +) + +const ( + // Flags for creating a VHD (not exported) + createVirtualDiskFlagNone createVirtualDiskFlag = 0 + createVirtualDiskFlagFullPhysicalAllocation createVirtualDiskFlag = 1 + createVirtualDiskFlagPreventWritesToSourceDisk createVirtualDiskFlag = 2 + createVirtualDiskFlagDoNotCopyMetadataFromParent createVirtualDiskFlag = 4 + + // Access Mask for opening a VHD + VirtualDiskAccessNone VirtualDiskAccessMask = 0 + VirtualDiskAccessAttachRO VirtualDiskAccessMask = 65536 + VirtualDiskAccessAttachRW VirtualDiskAccessMask = 131072 + VirtualDiskAccessDetach VirtualDiskAccessMask = 262144 + VirtualDiskAccessGetInfo VirtualDiskAccessMask = 524288 + VirtualDiskAccessCreate VirtualDiskAccessMask = 1048576 + VirtualDiskAccessMetaOps VirtualDiskAccessMask = 2097152 + VirtualDiskAccessRead VirtualDiskAccessMask = 851968 + VirtualDiskAccessAll VirtualDiskAccessMask = 4128768 + VirtualDiskAccessWritable VirtualDiskAccessMask = 3276800 + + // Flags for opening a VHD + OpenVirtualDiskFlagNone VirtualDiskFlag = 0 + OpenVirtualDiskFlagNoParents VirtualDiskFlag = 0x1 + OpenVirtualDiskFlagBlankFile VirtualDiskFlag = 0x2 + OpenVirtualDiskFlagBootDrive VirtualDiskFlag = 0x4 + OpenVirtualDiskFlagCachedIO VirtualDiskFlag = 0x8 + OpenVirtualDiskFlagCustomDiffChain VirtualDiskFlag = 0x10 + OpenVirtualDiskFlagParentCachedIO VirtualDiskFlag = 0x20 + OpenVirtualDiskFlagVhdSetFileOnly VirtualDiskFlag = 0x40 + OpenVirtualDiskFlagIgnoreRelativeParentLocator VirtualDiskFlag = 0x80 + OpenVirtualDiskFlagNoWriteHardening VirtualDiskFlag = 0x100 +) + +type createVersion2 struct { + UniqueID [16]byte // GUID + MaximumSize uint64 + BlockSizeInBytes uint32 + SectorSizeInBytes uint32 + ParentPath *uint16 // string + SourcePath *uint16 // string + OpenFlags uint32 + ParentVirtualStorageType virtualStorageType + SourceVirtualStorageType virtualStorageType + ResiliencyGUID [16]byte // GUID +} + +type createVirtualDiskParameters struct { + Version uint32 // Must always be set to 2 + Version2 createVersion2 +} + +type openVersion2 struct { + GetInfoOnly int32 // bool but 4-byte aligned + ReadOnly int32 // bool but 4-byte aligned + ResiliencyGUID [16]byte // GUID +} + +type openVirtualDiskParameters struct { + Version uint32 // Must always be set to 2 + Version2 openVersion2 +} + +// CreateVhdx will create a simple vhdx file at the given path using default values. +func CreateVhdx(path string, maxSizeInGb, blockSizeInMb uint32) error { + var ( + defaultType virtualStorageType + handle syscall.Handle + ) + + parameters := createVirtualDiskParameters{ + Version: 2, + Version2: createVersion2{ + MaximumSize: uint64(maxSizeInGb) * 1024 * 1024 * 1024, + BlockSizeInBytes: blockSizeInMb * 1024 * 1024, + }, + } + + if err := createVirtualDisk( + &defaultType, + path, + uint32(VirtualDiskAccessNone), + nil, + uint32(createVirtualDiskFlagNone), + 0, + ¶meters, + nil, + &handle); err != nil { + return err + } + + if err := syscall.CloseHandle(handle); err != nil { + return err + } + + return nil +} + +// DetachVhd detaches a mounted container layer vhd found at `path`. +func DetachVhd(path string) error { + handle, err := OpenVirtualDisk( + path, + VirtualDiskAccessNone, + OpenVirtualDiskFlagCachedIO|OpenVirtualDiskFlagIgnoreRelativeParentLocator) + + if err != nil { + return err + } + defer syscall.CloseHandle(handle) + return detachVirtualDisk(handle, 0, 0) +} + +// OpenVirtualDisk obtains a handle to a VHD opened with supplied access mask and flags. +func OpenVirtualDisk(path string, accessMask VirtualDiskAccessMask, flag VirtualDiskFlag) (syscall.Handle, error) { + var ( + defaultType virtualStorageType + handle syscall.Handle + ) + parameters := openVirtualDiskParameters{Version: 2} + if err := openVirtualDisk( + &defaultType, + path, + uint32(accessMask), + uint32(flag), + ¶meters, + &handle); err != nil { + return 0, err + } + return handle, nil +} diff --git a/vendor/github.com/Microsoft/go-winio/vhd/zvhd.go b/vendor/github.com/Microsoft/go-winio/vhd/zvhd.go new file mode 100644 index 000000000..00599ea49 --- /dev/null +++ b/vendor/github.com/Microsoft/go-winio/vhd/zvhd.go @@ -0,0 +1,99 @@ +// MACHINE GENERATED BY 'go generate' COMMAND; DO NOT EDIT + +package vhd + +import ( + "syscall" + "unsafe" + + "golang.org/x/sys/windows" +) + +var _ unsafe.Pointer + +// Do the interface allocations only once for common +// Errno values. +const ( + errnoERROR_IO_PENDING = 997 +) + +var ( + errERROR_IO_PENDING error = syscall.Errno(errnoERROR_IO_PENDING) +) + +// errnoErr returns common boxed Errno values, to prevent +// allocations at runtime. +func errnoErr(e syscall.Errno) error { + switch e { + case 0: + return nil + case errnoERROR_IO_PENDING: + return errERROR_IO_PENDING + } + // TODO: add more here, after collecting data on the common + // error values see on Windows. (perhaps when running + // all.bat?) + return e +} + +var ( + modVirtDisk = windows.NewLazySystemDLL("VirtDisk.dll") + + procCreateVirtualDisk = modVirtDisk.NewProc("CreateVirtualDisk") + procOpenVirtualDisk = modVirtDisk.NewProc("OpenVirtualDisk") + procDetachVirtualDisk = modVirtDisk.NewProc("DetachVirtualDisk") +) + +func createVirtualDisk(virtualStorageType *virtualStorageType, path string, virtualDiskAccessMask uint32, securityDescriptor *uintptr, flags uint32, providerSpecificFlags uint32, parameters *createVirtualDiskParameters, o *syscall.Overlapped, handle *syscall.Handle) (err error) { + var _p0 *uint16 + _p0, err = syscall.UTF16PtrFromString(path) + if err != nil { + return + } + return _createVirtualDisk(virtualStorageType, _p0, virtualDiskAccessMask, securityDescriptor, flags, providerSpecificFlags, parameters, o, handle) +} + +func _createVirtualDisk(virtualStorageType *virtualStorageType, path *uint16, virtualDiskAccessMask uint32, securityDescriptor *uintptr, flags uint32, providerSpecificFlags uint32, parameters *createVirtualDiskParameters, o *syscall.Overlapped, handle *syscall.Handle) (err error) { + r1, _, e1 := syscall.Syscall9(procCreateVirtualDisk.Addr(), 9, uintptr(unsafe.Pointer(virtualStorageType)), uintptr(unsafe.Pointer(path)), uintptr(virtualDiskAccessMask), uintptr(unsafe.Pointer(securityDescriptor)), uintptr(flags), uintptr(providerSpecificFlags), uintptr(unsafe.Pointer(parameters)), uintptr(unsafe.Pointer(o)), uintptr(unsafe.Pointer(handle))) + if r1 != 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func openVirtualDisk(virtualStorageType *virtualStorageType, path string, virtualDiskAccessMask uint32, flags uint32, parameters *openVirtualDiskParameters, handle *syscall.Handle) (err error) { + var _p0 *uint16 + _p0, err = syscall.UTF16PtrFromString(path) + if err != nil { + return + } + return _openVirtualDisk(virtualStorageType, _p0, virtualDiskAccessMask, flags, parameters, handle) +} + +func _openVirtualDisk(virtualStorageType *virtualStorageType, path *uint16, virtualDiskAccessMask uint32, flags uint32, parameters *openVirtualDiskParameters, handle *syscall.Handle) (err error) { + r1, _, e1 := syscall.Syscall6(procOpenVirtualDisk.Addr(), 6, uintptr(unsafe.Pointer(virtualStorageType)), uintptr(unsafe.Pointer(path)), uintptr(virtualDiskAccessMask), uintptr(flags), uintptr(unsafe.Pointer(parameters)), uintptr(unsafe.Pointer(handle))) + if r1 != 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} + +func detachVirtualDisk(handle syscall.Handle, flags uint32, providerSpecificFlags uint32) (err error) { + r1, _, e1 := syscall.Syscall(procDetachVirtualDisk.Addr(), 3, uintptr(handle), uintptr(flags), uintptr(providerSpecificFlags)) + if r1 != 0 { + if e1 != 0 { + err = errnoErr(e1) + } else { + err = syscall.EINVAL + } + } + return +} diff --git a/vendor/github.com/Microsoft/hcsshim/ext4/internal/compactext4/compact.go b/vendor/github.com/Microsoft/hcsshim/ext4/internal/compactext4/compact.go new file mode 100644 index 000000000..f2274fd4c --- /dev/null +++ b/vendor/github.com/Microsoft/hcsshim/ext4/internal/compactext4/compact.go @@ -0,0 +1,1263 @@ +package compactext4 + +import ( + "bufio" + "bytes" + "encoding/binary" + "errors" + "fmt" + "io" + "path" + "sort" + "strings" + "time" + + "github.com/Microsoft/hcsshim/ext4/internal/format" +) + +// Writer writes a compact ext4 file system. +type Writer struct { + f io.ReadWriteSeeker + bw *bufio.Writer + inodes []*inode + curName string + curInode *inode + pos int64 + dataWritten, dataMax int64 + err error + initialized bool + supportInlineData bool + maxDiskSize int64 + gdBlocks uint32 +} + +// Mode flags for Linux files. +const ( + S_IXOTH = format.S_IXOTH + S_IWOTH = format.S_IWOTH + S_IROTH = format.S_IROTH + S_IXGRP = format.S_IXGRP + S_IWGRP = format.S_IWGRP + S_IRGRP = format.S_IRGRP + S_IXUSR = format.S_IXUSR + S_IWUSR = format.S_IWUSR + S_IRUSR = format.S_IRUSR + S_ISVTX = format.S_ISVTX + S_ISGID = format.S_ISGID + S_ISUID = format.S_ISUID + S_IFIFO = format.S_IFIFO + S_IFCHR = format.S_IFCHR + S_IFDIR = format.S_IFDIR + S_IFBLK = format.S_IFBLK + S_IFREG = format.S_IFREG + S_IFLNK = format.S_IFLNK + S_IFSOCK = format.S_IFSOCK + + TypeMask = format.TypeMask +) + +type inode struct { + Size int64 + Atime, Ctime, Mtime, Crtime uint64 + Number format.InodeNumber + Mode uint16 + Uid, Gid uint32 + LinkCount uint32 + XattrBlock uint32 + BlockCount uint32 + Devmajor, Devminor uint32 + Flags format.InodeFlag + Data []byte + XattrInline []byte + Children directory +} + +func (node *inode) FileType() uint16 { + return node.Mode & format.TypeMask +} + +func (node *inode) IsDir() bool { + return node.FileType() == S_IFDIR +} + +// A File represents a file to be added to an ext4 file system. +type File struct { + Linkname string + Size int64 + Mode uint16 + Uid, Gid uint32 + Atime, Ctime, Mtime, Crtime time.Time + Devmajor, Devminor uint32 + Xattrs map[string][]byte +} + +const ( + inodeFirst = 11 + inodeLostAndFound = inodeFirst + + blockSize = 4096 + blocksPerGroup = blockSize * 8 + inodeSize = 256 + maxInodesPerGroup = blockSize * 8 // Limited by the inode bitmap + inodesPerGroupIncrement = blockSize / inodeSize + + defaultMaxDiskSize = 16 * 1024 * 1024 * 1024 // 16GB + maxMaxDiskSize = 16 * 1024 * 1024 * 1024 * 1024 // 16TB + + groupDescriptorSize = 32 // Use the small group descriptor + groupsPerDescriptorBlock = blockSize / groupDescriptorSize + + maxFileSize = 128 * 1024 * 1024 * 1024 // 128GB file size maximum for now + smallSymlinkSize = 59 // max symlink size that goes directly in the inode + maxBlocksPerExtent = 0x8000 // maximum number of blocks in an extent + inodeDataSize = 60 + inodeUsedSize = 152 // fields through CrtimeExtra + inodeExtraSize = inodeSize - inodeUsedSize + xattrInodeOverhead = 4 + 4 // magic number + empty next entry value + xattrBlockOverhead = 32 + 4 // header + empty next entry value + inlineDataXattrOverhead = xattrInodeOverhead + 16 + 4 // entry + "data" + inlineDataSize = inodeDataSize + inodeExtraSize - inlineDataXattrOverhead +) + +type exceededMaxSizeError struct { + Size int64 +} + +func (err exceededMaxSizeError) Error() string { + return fmt.Sprintf("disk exceeded maximum size of %d bytes", err.Size) +} + +var directoryEntrySize = binary.Size(format.DirectoryEntry{}) +var extraIsize = uint16(inodeUsedSize - 128) + +type directory map[string]*inode + +func splitFirst(p string) (string, string) { + n := strings.IndexByte(p, '/') + if n >= 0 { + return p[:n], p[n+1:] + } + return p, "" +} + +func (w *Writer) findPath(root *inode, p string) *inode { + inode := root + for inode != nil && len(p) != 0 { + name, rest := splitFirst(p) + p = rest + inode = inode.Children[name] + } + return inode +} + +func timeToFsTime(t time.Time) uint64 { + if t.IsZero() { + return 0 + } + s := t.Unix() + if s < -0x80000000 { + return 0x80000000 + } + if s > 0x37fffffff { + return 0x37fffffff + } + return uint64(s) | uint64(t.Nanosecond())<<34 +} + +func fsTimeToTime(t uint64) time.Time { + if t == 0 { + return time.Time{} + } + s := int64(t & 0x3ffffffff) + if s > 0x7fffffff && s < 0x100000000 { + s = int64(int32(uint32(s))) + } + return time.Unix(s, int64(t>>34)) +} + +func (w *Writer) getInode(i format.InodeNumber) *inode { + if i == 0 || int(i) > len(w.inodes) { + return nil + } + return w.inodes[i-1] +} + +var xattrPrefixes = []struct { + Index uint8 + Prefix string +}{ + {2, "system.posix_acl_access"}, + {3, "system.posix_acl_default"}, + {8, "system.richacl"}, + {7, "system."}, + {1, "user."}, + {4, "trusted."}, + {6, "security."}, +} + +func compressXattrName(name string) (uint8, string) { + for _, p := range xattrPrefixes { + if strings.HasPrefix(name, p.Prefix) { + return p.Index, name[len(p.Prefix):] + } + } + return 0, name +} + +func decompressXattrName(index uint8, name string) string { + for _, p := range xattrPrefixes { + if index == p.Index { + return p.Prefix + name + } + } + return name +} + +func hashXattrEntry(name string, value []byte) uint32 { + var hash uint32 + for i := 0; i < len(name); i++ { + hash = (hash << 5) ^ (hash >> 27) ^ uint32(name[i]) + } + + for i := 0; i+3 < len(value); i += 4 { + hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(value[i:i+4]) + } + + if len(value)%4 != 0 { + var last [4]byte + copy(last[:], value[len(value)&^3:]) + hash = (hash << 16) ^ (hash >> 16) ^ binary.LittleEndian.Uint32(last[:]) + } + return hash +} + +type xattr struct { + Name string + Index uint8 + Value []byte +} + +func (x *xattr) EntryLen() int { + return (len(x.Name)+3)&^3 + 16 +} + +func (x *xattr) ValueLen() int { + return (len(x.Value) + 3) &^ 3 +} + +type xattrState struct { + inode, block []xattr + inodeLeft, blockLeft int +} + +func (s *xattrState) init() { + s.inodeLeft = inodeExtraSize - xattrInodeOverhead + s.blockLeft = blockSize - xattrBlockOverhead +} + +func (s *xattrState) addXattr(name string, value []byte) bool { + index, name := compressXattrName(name) + x := xattr{ + Index: index, + Name: name, + Value: value, + } + length := x.EntryLen() + x.ValueLen() + if s.inodeLeft >= length { + s.inode = append(s.inode, x) + s.inodeLeft -= length + } else if s.blockLeft >= length { + s.block = append(s.block, x) + s.blockLeft -= length + } else { + return false + } + return true +} + +func putXattrs(xattrs []xattr, b []byte, offsetDelta uint16) { + offset := uint16(len(b)) + offsetDelta + eb := b + db := b + for _, xattr := range xattrs { + vl := xattr.ValueLen() + offset -= uint16(vl) + eb[0] = uint8(len(xattr.Name)) + eb[1] = xattr.Index + binary.LittleEndian.PutUint16(eb[2:], offset) + binary.LittleEndian.PutUint32(eb[8:], uint32(len(xattr.Value))) + binary.LittleEndian.PutUint32(eb[12:], hashXattrEntry(xattr.Name, xattr.Value)) + copy(eb[16:], xattr.Name) + eb = eb[xattr.EntryLen():] + copy(db[len(db)-vl:], xattr.Value) + db = db[:len(db)-vl] + } +} + +func getXattrs(b []byte, xattrs map[string][]byte, offsetDelta uint16) { + eb := b + for len(eb) != 0 { + nameLen := eb[0] + if nameLen == 0 { + break + } + index := eb[1] + offset := binary.LittleEndian.Uint16(eb[2:]) - offsetDelta + valueLen := binary.LittleEndian.Uint32(eb[8:]) + attr := xattr{ + Index: index, + Name: string(eb[16 : 16+nameLen]), + Value: b[offset : uint32(offset)+valueLen], + } + xattrs[decompressXattrName(index, attr.Name)] = attr.Value + eb = eb[attr.EntryLen():] + } +} + +func (w *Writer) writeXattrs(inode *inode, state *xattrState) error { + // Write the inline attributes. + if len(state.inode) != 0 { + inode.XattrInline = make([]byte, inodeExtraSize) + binary.LittleEndian.PutUint32(inode.XattrInline[0:], format.XAttrHeaderMagic) // Magic + putXattrs(state.inode, inode.XattrInline[4:], 0) + } + + // Write the block attributes. If there was previously an xattr block, then + // rewrite it even if it is now empty. + if len(state.block) != 0 || inode.XattrBlock != 0 { + sort.Slice(state.block, func(i, j int) bool { + return state.block[i].Index < state.block[j].Index || + len(state.block[i].Name) < len(state.block[j].Name) || + state.block[i].Name < state.block[j].Name + }) + + var b [blockSize]byte + binary.LittleEndian.PutUint32(b[0:], format.XAttrHeaderMagic) // Magic + binary.LittleEndian.PutUint32(b[4:], 1) // ReferenceCount + binary.LittleEndian.PutUint32(b[8:], 1) // Blocks + putXattrs(state.block, b[32:], 32) + + orig := w.block() + if inode.XattrBlock == 0 { + inode.XattrBlock = orig + inode.BlockCount++ + } else { + // Reuse the original block. + w.seekBlock(inode.XattrBlock) + defer w.seekBlock(orig) + } + + if _, err := w.write(b[:]); err != nil { + return err + } + } + + return nil +} + +func (w *Writer) write(b []byte) (int, error) { + if w.err != nil { + return 0, w.err + } + if w.pos+int64(len(b)) > w.maxDiskSize { + w.err = exceededMaxSizeError{w.maxDiskSize} + return 0, w.err + } + n, err := w.bw.Write(b) + w.pos += int64(n) + w.err = err + return n, err +} + +func (w *Writer) zero(n int64) (int64, error) { + if w.err != nil { + return 0, w.err + } + if w.pos+int64(n) > w.maxDiskSize { + w.err = exceededMaxSizeError{w.maxDiskSize} + return 0, w.err + } + n, err := io.CopyN(w.bw, zero, n) + w.pos += n + w.err = err + return n, err +} + +func (w *Writer) makeInode(f *File, node *inode) (*inode, error) { + mode := f.Mode + if mode&format.TypeMask == 0 { + mode |= format.S_IFREG + } + typ := mode & format.TypeMask + ino := format.InodeNumber(len(w.inodes) + 1) + if node == nil { + node = &inode{ + Number: ino, + } + if typ == S_IFDIR { + node.Children = make(directory) + node.LinkCount = 1 // A directory is linked to itself. + } + } else if node.Flags&format.InodeFlagExtents != 0 { + // Since we cannot deallocate or reuse blocks, don't allow updates that + // would invalidate data that has already been written. + return nil, errors.New("cannot overwrite file with non-inline data") + } + node.Mode = mode + node.Uid = f.Uid + node.Gid = f.Gid + node.Flags = format.InodeFlagHugeFile + node.Atime = timeToFsTime(f.Atime) + node.Ctime = timeToFsTime(f.Ctime) + node.Mtime = timeToFsTime(f.Mtime) + node.Crtime = timeToFsTime(f.Crtime) + node.Devmajor = f.Devmajor + node.Devminor = f.Devminor + node.Data = nil + node.XattrInline = nil + + var xstate xattrState + xstate.init() + + var size int64 + switch typ { + case format.S_IFREG: + size = f.Size + if f.Size > maxFileSize { + return nil, fmt.Errorf("file too big: %d > %d", f.Size, int64(maxFileSize)) + } + if f.Size <= inlineDataSize && w.supportInlineData { + node.Data = make([]byte, f.Size) + extra := 0 + if f.Size > inodeDataSize { + extra = int(f.Size - inodeDataSize) + } + // Add a dummy entry for now. + if !xstate.addXattr("system.data", node.Data[:extra]) { + panic("not enough room for inline data") + } + node.Flags |= format.InodeFlagInlineData + } + case format.S_IFLNK: + node.Mode |= 0777 // Symlinks should appear as ugw rwx + size = int64(len(f.Linkname)) + if size <= smallSymlinkSize { + // Special case: small symlinks go directly in Block without setting + // an inline data flag. + node.Data = make([]byte, len(f.Linkname)) + copy(node.Data, f.Linkname) + } + case format.S_IFDIR, format.S_IFIFO, format.S_IFSOCK, format.S_IFCHR, format.S_IFBLK: + default: + return nil, fmt.Errorf("invalid mode %o", mode) + } + + // Accumulate the extended attributes. + if len(f.Xattrs) != 0 { + // Sort the xattrs to avoid non-determinism in map iteration. + var xattrs []string + for name := range f.Xattrs { + xattrs = append(xattrs, name) + } + sort.Strings(xattrs) + for _, name := range xattrs { + if !xstate.addXattr(name, f.Xattrs[name]) { + return nil, fmt.Errorf("could not fit xattr %s", name) + } + } + } + + if err := w.writeXattrs(node, &xstate); err != nil { + return nil, err + } + + node.Size = size + if typ == format.S_IFLNK && size > smallSymlinkSize { + // Write the link name as data. + w.startInode("", node, size) + if _, err := w.Write([]byte(f.Linkname)); err != nil { + return nil, err + } + if err := w.finishInode(); err != nil { + return nil, err + } + } + + if int(node.Number-1) >= len(w.inodes) { + w.inodes = append(w.inodes, node) + } + return node, nil +} + +func (w *Writer) root() *inode { + return w.getInode(format.InodeRoot) +} + +func (w *Writer) lookup(name string, mustExist bool) (*inode, *inode, string, error) { + root := w.root() + cleanname := path.Clean("/" + name)[1:] + if len(cleanname) == 0 { + return root, root, "", nil + } + dirname, childname := path.Split(cleanname) + if len(childname) == 0 || len(childname) > 0xff { + return nil, nil, "", fmt.Errorf("%s: invalid name", name) + } + dir := w.findPath(root, dirname) + if dir == nil || !dir.IsDir() { + return nil, nil, "", fmt.Errorf("%s: path not found", name) + } + child := dir.Children[childname] + if child == nil && mustExist { + return nil, nil, "", fmt.Errorf("%s: file not found", name) + } + return dir, child, childname, nil +} + +// Create adds a file to the file system. +func (w *Writer) Create(name string, f *File) error { + if err := w.finishInode(); err != nil { + return err + } + dir, existing, childname, err := w.lookup(name, false) + if err != nil { + return err + } + var reuse *inode + if existing != nil { + if existing.IsDir() { + if f.Mode&TypeMask != S_IFDIR { + return fmt.Errorf("%s: cannot replace a directory with a file", name) + } + reuse = existing + } else if f.Mode&TypeMask == S_IFDIR { + return fmt.Errorf("%s: cannot replace a file with a directory", name) + } else if existing.LinkCount < 2 { + reuse = existing + } + } else { + if f.Mode&TypeMask == S_IFDIR && dir.LinkCount >= format.MaxLinks { + return fmt.Errorf("%s: exceeded parent directory maximum link count", name) + } + } + child, err := w.makeInode(f, reuse) + if err != nil { + return fmt.Errorf("%s: %s", name, err) + } + if existing != child { + if existing != nil { + existing.LinkCount-- + } + dir.Children[childname] = child + child.LinkCount++ + if child.IsDir() { + dir.LinkCount++ + } + } + if child.Mode&format.TypeMask == format.S_IFREG { + w.startInode(name, child, f.Size) + } + return nil +} + +// Link adds a hard link to the file system. +func (w *Writer) Link(oldname, newname string) error { + if err := w.finishInode(); err != nil { + return err + } + newdir, existing, newchildname, err := w.lookup(newname, false) + if err != nil { + return err + } + if existing != nil && (existing.IsDir() || existing.LinkCount < 2) { + return fmt.Errorf("%s: cannot orphan existing file or directory", newname) + } + + _, oldfile, _, err := w.lookup(oldname, true) + if err != nil { + return err + } + switch oldfile.Mode & format.TypeMask { + case format.S_IFDIR, format.S_IFLNK: + return fmt.Errorf("%s: link target cannot be a directory or symlink: %s", newname, oldname) + } + + if existing != oldfile && oldfile.LinkCount >= format.MaxLinks { + return fmt.Errorf("%s: link target would exceed maximum link count: %s", newname, oldname) + } + + if existing != nil { + existing.LinkCount-- + } + oldfile.LinkCount++ + newdir.Children[newchildname] = oldfile + return nil +} + +// Stat returns information about a file that has been written. +func (w *Writer) Stat(name string) (*File, error) { + if err := w.finishInode(); err != nil { + return nil, err + } + _, node, _, err := w.lookup(name, true) + if err != nil { + return nil, err + } + f := &File{ + Size: node.Size, + Mode: node.Mode, + Uid: node.Uid, + Gid: node.Gid, + Atime: fsTimeToTime(node.Atime), + Ctime: fsTimeToTime(node.Ctime), + Mtime: fsTimeToTime(node.Mtime), + Crtime: fsTimeToTime(node.Crtime), + Devmajor: node.Devmajor, + Devminor: node.Devminor, + } + f.Xattrs = make(map[string][]byte) + if node.XattrBlock != 0 || len(node.XattrInline) != 0 { + if node.XattrBlock != 0 { + orig := w.block() + w.seekBlock(node.XattrBlock) + if w.err != nil { + return nil, w.err + } + var b [blockSize]byte + _, err := w.f.Read(b[:]) + w.seekBlock(orig) + if err != nil { + return nil, err + } + getXattrs(b[32:], f.Xattrs, 32) + } + if len(node.XattrInline) != 0 { + getXattrs(node.XattrInline[4:], f.Xattrs, 0) + delete(f.Xattrs, "system.data") + } + } + if node.FileType() == S_IFLNK { + if node.Size > smallSymlinkSize { + return nil, fmt.Errorf("%s: cannot retrieve link information", name) + } + f.Linkname = string(node.Data) + } + return f, nil +} + +func (w *Writer) Write(b []byte) (int, error) { + if len(b) == 0 { + return 0, nil + } + if w.dataWritten+int64(len(b)) > w.dataMax { + return 0, fmt.Errorf("%s: wrote too much: %d > %d", w.curName, w.dataWritten+int64(len(b)), w.dataMax) + } + + if w.curInode.Flags&format.InodeFlagInlineData != 0 { + copy(w.curInode.Data[w.dataWritten:], b) + w.dataWritten += int64(len(b)) + return len(b), nil + } + + n, err := w.write(b) + w.dataWritten += int64(n) + return n, err +} + +func (w *Writer) startInode(name string, inode *inode, size int64) { + if w.curInode != nil { + panic("inode already in progress") + } + w.curName = name + w.curInode = inode + w.dataWritten = 0 + w.dataMax = size +} + +func (w *Writer) block() uint32 { + return uint32(w.pos / blockSize) +} + +func (w *Writer) seekBlock(block uint32) { + w.pos = int64(block) * blockSize + if w.err != nil { + return + } + w.err = w.bw.Flush() + if w.err != nil { + return + } + _, w.err = w.f.Seek(w.pos, io.SeekStart) +} + +func (w *Writer) nextBlock() { + if w.pos%blockSize != 0 { + // Simplify callers; w.err is updated on failure. + w.zero(blockSize - w.pos%blockSize) + } +} + +func fillExtents(hdr *format.ExtentHeader, extents []format.ExtentLeafNode, startBlock, offset, inodeSize uint32) { + *hdr = format.ExtentHeader{ + Magic: format.ExtentHeaderMagic, + Entries: uint16(len(extents)), + Max: uint16(cap(extents)), + Depth: 0, + } + for i := range extents { + block := offset + uint32(i)*maxBlocksPerExtent + length := inodeSize - block + if length > maxBlocksPerExtent { + length = maxBlocksPerExtent + } + start := startBlock + block + extents[i] = format.ExtentLeafNode{ + Block: block, + Length: uint16(length), + StartLow: start, + } + } +} + +func (w *Writer) writeExtents(inode *inode) error { + start := w.pos - w.dataWritten + if start%blockSize != 0 { + panic("unaligned") + } + w.nextBlock() + + startBlock := uint32(start / blockSize) + blocks := w.block() - startBlock + usedBlocks := blocks + + const extentNodeSize = 12 + const extentsPerBlock = blockSize/extentNodeSize - 1 + + extents := (blocks + maxBlocksPerExtent - 1) / maxBlocksPerExtent + var b bytes.Buffer + if extents == 0 { + // Nothing to do. + } else if extents <= 4 { + var root struct { + hdr format.ExtentHeader + extents [4]format.ExtentLeafNode + } + fillExtents(&root.hdr, root.extents[:extents], startBlock, 0, blocks) + binary.Write(&b, binary.LittleEndian, root) + } else if extents <= 4*extentsPerBlock { + const extentsPerBlock = blockSize/extentNodeSize - 1 + extentBlocks := extents/extentsPerBlock + 1 + usedBlocks += extentBlocks + var b2 bytes.Buffer + + var root struct { + hdr format.ExtentHeader + nodes [4]format.ExtentIndexNode + } + root.hdr = format.ExtentHeader{ + Magic: format.ExtentHeaderMagic, + Entries: uint16(extentBlocks), + Max: 4, + Depth: 1, + } + for i := uint32(0); i < extentBlocks; i++ { + root.nodes[i] = format.ExtentIndexNode{ + Block: i * extentsPerBlock * maxBlocksPerExtent, + LeafLow: w.block(), + } + extentsInBlock := extents - i*extentBlocks + if extentsInBlock > extentsPerBlock { + extentsInBlock = extentsPerBlock + } + + var node struct { + hdr format.ExtentHeader + extents [extentsPerBlock]format.ExtentLeafNode + _ [blockSize - (extentsPerBlock+1)*extentNodeSize]byte + } + + offset := i * extentsPerBlock * maxBlocksPerExtent + fillExtents(&node.hdr, node.extents[:extentsInBlock], startBlock+offset, offset, blocks) + binary.Write(&b2, binary.LittleEndian, node) + if _, err := w.write(b2.Next(blockSize)); err != nil { + return err + } + } + binary.Write(&b, binary.LittleEndian, root) + } else { + panic("file too big") + } + + inode.Data = b.Bytes() + inode.Flags |= format.InodeFlagExtents + inode.BlockCount += usedBlocks + return w.err +} + +func (w *Writer) finishInode() error { + if !w.initialized { + if err := w.init(); err != nil { + return err + } + } + if w.curInode == nil { + return nil + } + if w.dataWritten != w.dataMax { + return fmt.Errorf("did not write the right amount: %d != %d", w.dataWritten, w.dataMax) + } + + if w.dataMax != 0 && w.curInode.Flags&format.InodeFlagInlineData == 0 { + if err := w.writeExtents(w.curInode); err != nil { + return err + } + } + + w.dataWritten = 0 + w.dataMax = 0 + w.curInode = nil + return w.err +} + +func modeToFileType(mode uint16) format.FileType { + switch mode & format.TypeMask { + default: + return format.FileTypeUnknown + case format.S_IFREG: + return format.FileTypeRegular + case format.S_IFDIR: + return format.FileTypeDirectory + case format.S_IFCHR: + return format.FileTypeCharacter + case format.S_IFBLK: + return format.FileTypeBlock + case format.S_IFIFO: + return format.FileTypeFIFO + case format.S_IFSOCK: + return format.FileTypeSocket + case format.S_IFLNK: + return format.FileTypeSymbolicLink + } +} + +type constReader byte + +var zero = constReader(0) + +func (r constReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = byte(r) + } + return len(b), nil +} + +func (w *Writer) writeDirectory(dir, parent *inode) error { + if err := w.finishInode(); err != nil { + return err + } + + // The size of the directory is not known yet. + w.startInode("", dir, 0x7fffffffffffffff) + left := blockSize + finishBlock := func() error { + if left > 0 { + e := format.DirectoryEntry{ + RecordLength: uint16(left), + } + err := binary.Write(w, binary.LittleEndian, e) + if err != nil { + return err + } + left -= directoryEntrySize + if left < 4 { + panic("not enough space for trailing entry") + } + _, err = io.CopyN(w, zero, int64(left)) + if err != nil { + return err + } + } + left = blockSize + return nil + } + + writeEntry := func(ino format.InodeNumber, name string) error { + rlb := directoryEntrySize + len(name) + rl := (rlb + 3) & ^3 + if left < rl+12 { + if err := finishBlock(); err != nil { + return err + } + } + e := format.DirectoryEntry{ + Inode: ino, + RecordLength: uint16(rl), + NameLength: uint8(len(name)), + FileType: modeToFileType(w.getInode(ino).Mode), + } + err := binary.Write(w, binary.LittleEndian, e) + if err != nil { + return err + } + _, err = w.Write([]byte(name)) + if err != nil { + return err + } + var zero [4]byte + _, err = w.Write(zero[:rl-rlb]) + if err != nil { + return err + } + left -= rl + return nil + } + if err := writeEntry(dir.Number, "."); err != nil { + return err + } + if err := writeEntry(parent.Number, ".."); err != nil { + return err + } + + // Follow e2fsck's convention and sort the children by inode number. + var children []string + for name := range dir.Children { + children = append(children, name) + } + sort.Slice(children, func(i, j int) bool { + return dir.Children[children[i]].Number < dir.Children[children[j]].Number + }) + + for _, name := range children { + child := dir.Children[name] + if err := writeEntry(child.Number, name); err != nil { + return err + } + } + if err := finishBlock(); err != nil { + return err + } + w.curInode.Size = w.dataWritten + w.dataMax = w.dataWritten + return nil +} + +func (w *Writer) writeDirectoryRecursive(dir, parent *inode) error { + if err := w.writeDirectory(dir, parent); err != nil { + return err + } + for _, child := range dir.Children { + if child.IsDir() { + if err := w.writeDirectoryRecursive(child, dir); err != nil { + return err + } + } + } + return nil +} + +func (w *Writer) writeInodeTable(tableSize uint32) error { + var b bytes.Buffer + for _, inode := range w.inodes { + if inode != nil { + binode := format.Inode{ + Mode: inode.Mode, + Uid: uint16(inode.Uid & 0xffff), + Gid: uint16(inode.Gid & 0xffff), + SizeLow: uint32(inode.Size & 0xffffffff), + SizeHigh: uint32(inode.Size >> 32), + LinksCount: uint16(inode.LinkCount), + BlocksLow: inode.BlockCount, + Flags: inode.Flags, + XattrBlockLow: inode.XattrBlock, + UidHigh: uint16(inode.Uid >> 16), + GidHigh: uint16(inode.Gid >> 16), + ExtraIsize: uint16(inodeUsedSize - 128), + Atime: uint32(inode.Atime), + AtimeExtra: uint32(inode.Atime >> 32), + Ctime: uint32(inode.Ctime), + CtimeExtra: uint32(inode.Ctime >> 32), + Mtime: uint32(inode.Mtime), + MtimeExtra: uint32(inode.Mtime >> 32), + Crtime: uint32(inode.Crtime), + CrtimeExtra: uint32(inode.Crtime >> 32), + } + switch inode.Mode & format.TypeMask { + case format.S_IFDIR, format.S_IFREG, format.S_IFLNK: + n := copy(binode.Block[:], inode.Data) + if n < len(inode.Data) { + // Rewrite the first xattr with the data. + xattr := [1]xattr{{ + Name: "data", + Index: 7, // "system." + Value: inode.Data[n:], + }} + putXattrs(xattr[:], inode.XattrInline[4:], 0) + } + case format.S_IFBLK, format.S_IFCHR: + dev := inode.Devminor&0xff | inode.Devmajor<<8 | (inode.Devminor&0xffffff00)<<12 + binary.LittleEndian.PutUint32(binode.Block[4:], dev) + } + + binary.Write(&b, binary.LittleEndian, binode) + b.Truncate(inodeUsedSize) + n, _ := b.Write(inode.XattrInline) + io.CopyN(&b, zero, int64(inodeExtraSize-n)) + } else { + io.CopyN(&b, zero, inodeSize) + } + if _, err := w.write(b.Next(inodeSize)); err != nil { + return err + } + } + rest := tableSize - uint32(len(w.inodes)*inodeSize) + if _, err := w.zero(int64(rest)); err != nil { + return err + } + return nil +} + +// NewWriter returns a Writer that writes an ext4 file system to the provided +// WriteSeeker. +func NewWriter(f io.ReadWriteSeeker, opts ...Option) *Writer { + w := &Writer{ + f: f, + bw: bufio.NewWriterSize(f, 65536*8), + maxDiskSize: defaultMaxDiskSize, + } + for _, opt := range opts { + opt(w) + } + return w +} + +// An Option provides extra options to NewWriter. +type Option func(*Writer) + +// InlineData instructs the Writer to write small files into the inode +// structures directly. This creates smaller images but currently is not +// compatible with DAX. +func InlineData(w *Writer) { + w.supportInlineData = true +} + +// MaximumDiskSize instructs the writer to reserve enough metadata space for the +// specified disk size. If not provided, then 16GB is the default. +func MaximumDiskSize(size int64) Option { + return func(w *Writer) { + if size < 0 || size > maxMaxDiskSize { + w.maxDiskSize = maxMaxDiskSize + } else if size == 0 { + w.maxDiskSize = defaultMaxDiskSize + } else { + w.maxDiskSize = (size + blockSize - 1) &^ (blockSize - 1) + } + } +} + +func (w *Writer) init() error { + // Skip the defective block inode. + w.inodes = make([]*inode, 1, 32) + // Create the root directory. + root, _ := w.makeInode(&File{ + Mode: format.S_IFDIR | 0755, + }, nil) + root.LinkCount++ // The root is linked to itself. + // Skip until the first non-reserved inode. + w.inodes = append(w.inodes, make([]*inode, inodeFirst-len(w.inodes)-1)...) + maxBlocks := (w.maxDiskSize-1)/blockSize + 1 + maxGroups := (maxBlocks-1)/blocksPerGroup + 1 + w.gdBlocks = uint32((maxGroups-1)/groupsPerDescriptorBlock + 1) + + // Skip past the superblock and block descriptor table. + w.seekBlock(1 + w.gdBlocks) + w.initialized = true + + // The lost+found directory is required to exist for e2fsck to pass. + if err := w.Create("lost+found", &File{Mode: format.S_IFDIR | 0700}); err != nil { + return err + } + return w.err +} + +func groupCount(blocks uint32, inodes uint32, inodesPerGroup uint32) uint32 { + inodeBlocksPerGroup := inodesPerGroup * inodeSize / blockSize + dataBlocksPerGroup := blocksPerGroup - inodeBlocksPerGroup - 2 // save room for the bitmaps + + // Increase the block count to ensure there are enough groups for all the + // inodes. + minBlocks := (inodes-1)/inodesPerGroup*dataBlocksPerGroup + 1 + if blocks < minBlocks { + blocks = minBlocks + } + + return (blocks + dataBlocksPerGroup - 1) / dataBlocksPerGroup +} + +func bestGroupCount(blocks uint32, inodes uint32) (groups uint32, inodesPerGroup uint32) { + groups = 0xffffffff + for ipg := uint32(inodesPerGroupIncrement); ipg <= maxInodesPerGroup; ipg += inodesPerGroupIncrement { + g := groupCount(blocks, inodes, ipg) + if g < groups { + groups = g + inodesPerGroup = ipg + } + } + return +} + +func (w *Writer) Close() error { + if err := w.finishInode(); err != nil { + return err + } + root := w.root() + if err := w.writeDirectoryRecursive(root, root); err != nil { + return err + } + // Finish the last inode (probably a directory). + if err := w.finishInode(); err != nil { + return err + } + + // Write the inode table + inodeTableOffset := w.block() + groups, inodesPerGroup := bestGroupCount(inodeTableOffset, uint32(len(w.inodes))) + err := w.writeInodeTable(groups * inodesPerGroup * inodeSize) + if err != nil { + return err + } + + // Write the bitmaps. + bitmapOffset := w.block() + bitmapSize := groups * 2 + validDataSize := bitmapOffset + bitmapSize + diskSize := validDataSize + minSize := (groups-1)*blocksPerGroup + 1 + if diskSize < minSize { + diskSize = minSize + } + + usedGdBlocks := (groups-1)/groupDescriptorSize + 1 + if usedGdBlocks > w.gdBlocks { + return exceededMaxSizeError{w.maxDiskSize} + } + + gds := make([]format.GroupDescriptor, w.gdBlocks*groupsPerDescriptorBlock) + inodeTableSizePerGroup := inodesPerGroup * inodeSize / blockSize + var totalUsedBlocks, totalUsedInodes uint32 + for g := uint32(0); g < groups; g++ { + var b [blockSize * 2]byte + var dirCount, usedInodeCount, usedBlockCount uint16 + + // Block bitmap + if (g+1)*blocksPerGroup <= validDataSize { + // This group is fully allocated. + for j := range b[:blockSize] { + b[j] = 0xff + } + usedBlockCount = blocksPerGroup + } else if g*blocksPerGroup < validDataSize { + for j := uint32(0); j < validDataSize-g*blocksPerGroup; j++ { + b[j/8] |= 1 << (j % 8) + usedBlockCount++ + } + } + if g == 0 { + // Unused group descriptor blocks should be cleared. + for j := 1 + usedGdBlocks; j < 1+w.gdBlocks; j++ { + b[j/8] &^= 1 << (j % 8) + usedBlockCount-- + } + } + if g == groups-1 && diskSize%blocksPerGroup != 0 { + // Blocks that aren't present in the disk should be marked as + // allocated. + for j := diskSize % blocksPerGroup; j < blocksPerGroup; j++ { + b[j/8] |= 1 << (j % 8) + usedBlockCount++ + } + } + // Inode bitmap + for j := uint32(0); j < inodesPerGroup; j++ { + ino := format.InodeNumber(1 + g*inodesPerGroup + j) + inode := w.getInode(ino) + if ino < inodeFirst || inode != nil { + b[blockSize+j/8] |= 1 << (j % 8) + usedInodeCount++ + } + if inode != nil && inode.Mode&format.TypeMask == format.S_IFDIR { + dirCount++ + } + } + _, err := w.write(b[:]) + if err != nil { + return err + } + gds[g] = format.GroupDescriptor{ + BlockBitmapLow: bitmapOffset + 2*g, + InodeBitmapLow: bitmapOffset + 2*g + 1, + InodeTableLow: inodeTableOffset + g*inodeTableSizePerGroup, + UsedDirsCountLow: dirCount, + FreeInodesCountLow: uint16(inodesPerGroup) - usedInodeCount, + FreeBlocksCountLow: blocksPerGroup - usedBlockCount, + } + + totalUsedBlocks += uint32(usedBlockCount) + totalUsedInodes += uint32(usedInodeCount) + } + + // Zero up to the disk size. + _, err = w.zero(int64(diskSize-bitmapOffset-bitmapSize) * blockSize) + if err != nil { + return err + } + + // Write the block descriptors + w.seekBlock(1) + if w.err != nil { + return w.err + } + err = binary.Write(w.bw, binary.LittleEndian, gds) + if err != nil { + return err + } + + // Write the super block + var blk [blockSize]byte + b := bytes.NewBuffer(blk[:1024]) + sb := &format.SuperBlock{ + InodesCount: inodesPerGroup * groups, + BlocksCountLow: diskSize, + FreeBlocksCountLow: blocksPerGroup*groups - totalUsedBlocks, + FreeInodesCount: inodesPerGroup*groups - totalUsedInodes, + FirstDataBlock: 0, + LogBlockSize: 2, // 2^(10 + 2) + LogClusterSize: 2, + BlocksPerGroup: blocksPerGroup, + ClustersPerGroup: blocksPerGroup, + InodesPerGroup: inodesPerGroup, + Magic: format.SuperBlockMagic, + State: 1, // cleanly unmounted + Errors: 1, // continue on error? + CreatorOS: 0, // Linux + RevisionLevel: 1, // dynamic inode sizes + FirstInode: inodeFirst, + LpfInode: inodeLostAndFound, + InodeSize: inodeSize, + FeatureCompat: format.CompatSparseSuper2 | format.CompatExtAttr, + FeatureIncompat: format.IncompatFiletype | format.IncompatExtents | format.IncompatFlexBg, + FeatureRoCompat: format.RoCompatLargeFile | format.RoCompatHugeFile | format.RoCompatExtraIsize | format.RoCompatReadonly, + MinExtraIsize: extraIsize, + WantExtraIsize: extraIsize, + LogGroupsPerFlex: 31, + } + if w.supportInlineData { + sb.FeatureIncompat |= format.IncompatInlineData + } + binary.Write(b, binary.LittleEndian, sb) + w.seekBlock(0) + if _, err := w.write(blk[:]); err != nil { + return err + } + w.seekBlock(diskSize) + return w.err +} diff --git a/vendor/github.com/Microsoft/hcsshim/ext4/internal/format/format.go b/vendor/github.com/Microsoft/hcsshim/ext4/internal/format/format.go new file mode 100644 index 000000000..9dc4c4e16 --- /dev/null +++ b/vendor/github.com/Microsoft/hcsshim/ext4/internal/format/format.go @@ -0,0 +1,411 @@ +package format + +type SuperBlock struct { + InodesCount uint32 + BlocksCountLow uint32 + RootBlocksCountLow uint32 + FreeBlocksCountLow uint32 + FreeInodesCount uint32 + FirstDataBlock uint32 + LogBlockSize uint32 + LogClusterSize uint32 + BlocksPerGroup uint32 + ClustersPerGroup uint32 + InodesPerGroup uint32 + Mtime uint32 + Wtime uint32 + MountCount uint16 + MaxMountCount uint16 + Magic uint16 + State uint16 + Errors uint16 + MinorRevisionLevel uint16 + LastCheck uint32 + CheckInterval uint32 + CreatorOS uint32 + RevisionLevel uint32 + DefaultReservedUid uint16 + DefaultReservedGid uint16 + FirstInode uint32 + InodeSize uint16 + BlockGroupNr uint16 + FeatureCompat CompatFeature + FeatureIncompat IncompatFeature + FeatureRoCompat RoCompatFeature + UUID [16]uint8 + VolumeName [16]byte + LastMounted [64]byte + AlgorithmUsageBitmap uint32 + PreallocBlocks uint8 + PreallocDirBlocks uint8 + ReservedGdtBlocks uint16 + JournalUUID [16]uint8 + JournalInum uint32 + JournalDev uint32 + LastOrphan uint32 + HashSeed [4]uint32 + DefHashVersion uint8 + JournalBackupType uint8 + DescSize uint16 + DefaultMountOpts uint32 + FirstMetaBg uint32 + MkfsTime uint32 + JournalBlocks [17]uint32 + BlocksCountHigh uint32 + RBlocksCountHigh uint32 + FreeBlocksCountHigh uint32 + MinExtraIsize uint16 + WantExtraIsize uint16 + Flags uint32 + RaidStride uint16 + MmpInterval uint16 + MmpBlock uint64 + RaidStripeWidth uint32 + LogGroupsPerFlex uint8 + ChecksumType uint8 + ReservedPad uint16 + KbytesWritten uint64 + SnapshotInum uint32 + SnapshotID uint32 + SnapshotRBlocksCount uint64 + SnapshotList uint32 + ErrorCount uint32 + FirstErrorTime uint32 + FirstErrorInode uint32 + FirstErrorBlock uint64 + FirstErrorFunc [32]uint8 + FirstErrorLine uint32 + LastErrorTime uint32 + LastErrorInode uint32 + LastErrorLine uint32 + LastErrorBlock uint64 + LastErrorFunc [32]uint8 + MountOpts [64]uint8 + UserQuotaInum uint32 + GroupQuotaInum uint32 + OverheadBlocks uint32 + BackupBgs [2]uint32 + EncryptAlgos [4]uint8 + EncryptPwSalt [16]uint8 + LpfInode uint32 + ProjectQuotaInum uint32 + ChecksumSeed uint32 + WtimeHigh uint8 + MtimeHigh uint8 + MkfsTimeHigh uint8 + LastcheckHigh uint8 + FirstErrorTimeHigh uint8 + LastErrorTimeHigh uint8 + Pad [2]uint8 + Reserved [96]uint32 + Checksum uint32 +} + +const SuperBlockMagic uint16 = 0xef53 + +type CompatFeature uint32 +type IncompatFeature uint32 +type RoCompatFeature uint32 + +const ( + CompatDirPrealloc CompatFeature = 0x1 + CompatImagicInodes CompatFeature = 0x2 + CompatHasJournal CompatFeature = 0x4 + CompatExtAttr CompatFeature = 0x8 + CompatResizeInode CompatFeature = 0x10 + CompatDirIndex CompatFeature = 0x20 + CompatLazyBg CompatFeature = 0x40 + CompatExcludeInode CompatFeature = 0x80 + CompatExcludeBitmap CompatFeature = 0x100 + CompatSparseSuper2 CompatFeature = 0x200 + + IncompatCompression IncompatFeature = 0x1 + IncompatFiletype IncompatFeature = 0x2 + IncompatRecover IncompatFeature = 0x4 + IncompatJournalDev IncompatFeature = 0x8 + IncompatMetaBg IncompatFeature = 0x10 + IncompatExtents IncompatFeature = 0x40 + Incompat_64Bit IncompatFeature = 0x80 + IncompatMmp IncompatFeature = 0x100 + IncompatFlexBg IncompatFeature = 0x200 + IncompatEaInode IncompatFeature = 0x400 + IncompatDirdata IncompatFeature = 0x1000 + IncompatCsumSeed IncompatFeature = 0x2000 + IncompatLargedir IncompatFeature = 0x4000 + IncompatInlineData IncompatFeature = 0x8000 + IncompatEncrypt IncompatFeature = 0x10000 + + RoCompatSparseSuper RoCompatFeature = 0x1 + RoCompatLargeFile RoCompatFeature = 0x2 + RoCompatBtreeDir RoCompatFeature = 0x4 + RoCompatHugeFile RoCompatFeature = 0x8 + RoCompatGdtCsum RoCompatFeature = 0x10 + RoCompatDirNlink RoCompatFeature = 0x20 + RoCompatExtraIsize RoCompatFeature = 0x40 + RoCompatHasSnapshot RoCompatFeature = 0x80 + RoCompatQuota RoCompatFeature = 0x100 + RoCompatBigalloc RoCompatFeature = 0x200 + RoCompatMetadataCsum RoCompatFeature = 0x400 + RoCompatReplica RoCompatFeature = 0x800 + RoCompatReadonly RoCompatFeature = 0x1000 + RoCompatProject RoCompatFeature = 0x2000 +) + +type BlockGroupFlag uint16 + +const ( + BlockGroupInodeUninit BlockGroupFlag = 0x1 + BlockGroupBlockUninit BlockGroupFlag = 0x2 + BlockGroupInodeZeroed BlockGroupFlag = 0x4 +) + +type GroupDescriptor struct { + BlockBitmapLow uint32 + InodeBitmapLow uint32 + InodeTableLow uint32 + FreeBlocksCountLow uint16 + FreeInodesCountLow uint16 + UsedDirsCountLow uint16 + Flags BlockGroupFlag + ExcludeBitmapLow uint32 + BlockBitmapCsumLow uint16 + InodeBitmapCsumLow uint16 + ItableUnusedLow uint16 + Checksum uint16 +} + +type GroupDescriptor64 struct { + GroupDescriptor + BlockBitmapHigh uint32 + InodeBitmapHigh uint32 + InodeTableHigh uint32 + FreeBlocksCountHigh uint16 + FreeInodesCountHigh uint16 + UsedDirsCountHigh uint16 + ItableUnusedHigh uint16 + ExcludeBitmapHigh uint32 + BlockBitmapCsumHigh uint16 + InodeBitmapCsumHigh uint16 + Reserved uint32 +} + +const ( + S_IXOTH = 0x1 + S_IWOTH = 0x2 + S_IROTH = 0x4 + S_IXGRP = 0x8 + S_IWGRP = 0x10 + S_IRGRP = 0x20 + S_IXUSR = 0x40 + S_IWUSR = 0x80 + S_IRUSR = 0x100 + S_ISVTX = 0x200 + S_ISGID = 0x400 + S_ISUID = 0x800 + S_IFIFO = 0x1000 + S_IFCHR = 0x2000 + S_IFDIR = 0x4000 + S_IFBLK = 0x6000 + S_IFREG = 0x8000 + S_IFLNK = 0xA000 + S_IFSOCK = 0xC000 + + TypeMask uint16 = 0xF000 +) + +type InodeNumber uint32 + +const ( + InodeRoot = 2 +) + +type Inode struct { + Mode uint16 + Uid uint16 + SizeLow uint32 + Atime uint32 + Ctime uint32 + Mtime uint32 + Dtime uint32 + Gid uint16 + LinksCount uint16 + BlocksLow uint32 + Flags InodeFlag + Version uint32 + Block [60]byte + Generation uint32 + XattrBlockLow uint32 + SizeHigh uint32 + ObsoleteFragmentAddr uint32 + BlocksHigh uint16 + XattrBlockHigh uint16 + UidHigh uint16 + GidHigh uint16 + ChecksumLow uint16 + Reserved uint16 + ExtraIsize uint16 + ChecksumHigh uint16 + CtimeExtra uint32 + MtimeExtra uint32 + AtimeExtra uint32 + Crtime uint32 + CrtimeExtra uint32 + VersionHigh uint32 + Projid uint32 +} + +type InodeFlag uint32 + +const ( + InodeFlagSecRm InodeFlag = 0x1 + InodeFlagUnRm InodeFlag = 0x2 + InodeFlagCompressed InodeFlag = 0x4 + InodeFlagSync InodeFlag = 0x8 + InodeFlagImmutable InodeFlag = 0x10 + InodeFlagAppend InodeFlag = 0x20 + InodeFlagNoDump InodeFlag = 0x40 + InodeFlagNoAtime InodeFlag = 0x80 + InodeFlagDirtyCompressed InodeFlag = 0x100 + InodeFlagCompressedClusters InodeFlag = 0x200 + InodeFlagNoCompress InodeFlag = 0x400 + InodeFlagEncrypted InodeFlag = 0x800 + InodeFlagHashedIndex InodeFlag = 0x1000 + InodeFlagMagic InodeFlag = 0x2000 + InodeFlagJournalData InodeFlag = 0x4000 + InodeFlagNoTail InodeFlag = 0x8000 + InodeFlagDirSync InodeFlag = 0x10000 + InodeFlagTopDir InodeFlag = 0x20000 + InodeFlagHugeFile InodeFlag = 0x40000 + InodeFlagExtents InodeFlag = 0x80000 + InodeFlagEaInode InodeFlag = 0x200000 + InodeFlagEOFBlocks InodeFlag = 0x400000 + InodeFlagSnapfile InodeFlag = 0x01000000 + InodeFlagSnapfileDeleted InodeFlag = 0x04000000 + InodeFlagSnapfileShrunk InodeFlag = 0x08000000 + InodeFlagInlineData InodeFlag = 0x10000000 + InodeFlagProjectIDInherit InodeFlag = 0x20000000 + InodeFlagReserved InodeFlag = 0x80000000 +) + +const ( + MaxLinks = 65000 +) + +type ExtentHeader struct { + Magic uint16 + Entries uint16 + Max uint16 + Depth uint16 + Generation uint32 +} + +const ExtentHeaderMagic uint16 = 0xf30a + +type ExtentIndexNode struct { + Block uint32 + LeafLow uint32 + LeafHigh uint16 + Unused uint16 +} + +type ExtentLeafNode struct { + Block uint32 + Length uint16 + StartHigh uint16 + StartLow uint32 +} + +type ExtentTail struct { + Checksum uint32 +} + +type DirectoryEntry struct { + Inode InodeNumber + RecordLength uint16 + NameLength uint8 + FileType FileType + //Name []byte +} + +type FileType uint8 + +const ( + FileTypeUnknown FileType = 0x0 + FileTypeRegular FileType = 0x1 + FileTypeDirectory FileType = 0x2 + FileTypeCharacter FileType = 0x3 + FileTypeBlock FileType = 0x4 + FileTypeFIFO FileType = 0x5 + FileTypeSocket FileType = 0x6 + FileTypeSymbolicLink FileType = 0x7 +) + +type DirectoryEntryTail struct { + ReservedZero1 uint32 + RecordLength uint16 + ReservedZero2 uint8 + FileType uint8 + Checksum uint32 +} + +type DirectoryTreeRoot struct { + Dot DirectoryEntry + DotName [4]byte + DotDot DirectoryEntry + DotDotName [4]byte + ReservedZero uint32 + HashVersion uint8 + InfoLength uint8 + IndirectLevels uint8 + UnusedFlags uint8 + Limit uint16 + Count uint16 + Block uint32 + //Entries []DirectoryTreeEntry +} + +type DirectoryTreeNode struct { + FakeInode uint32 + FakeRecordLength uint16 + NameLength uint8 + FileType uint8 + Limit uint16 + Count uint16 + Block uint32 + //Entries []DirectoryTreeEntry +} + +type DirectoryTreeEntry struct { + Hash uint32 + Block uint32 +} + +type DirectoryTreeTail struct { + Reserved uint32 + Checksum uint32 +} + +type XAttrInodeBodyHeader struct { + Magic uint32 +} + +type XAttrHeader struct { + Magic uint32 + ReferenceCount uint32 + Blocks uint32 + Hash uint32 + Checksum uint32 + Reserved [3]uint32 +} + +const XAttrHeaderMagic uint32 = 0xea020000 + +type XAttrEntry struct { + NameLength uint8 + NameIndex uint8 + ValueOffset uint16 + ValueInum uint32 + ValueSize uint32 + Hash uint32 + //Name []byte +} diff --git a/vendor/github.com/Microsoft/hcsshim/ext4/tar2ext4/tar2ext4.go b/vendor/github.com/Microsoft/hcsshim/ext4/tar2ext4/tar2ext4.go new file mode 100644 index 000000000..ad0921046 --- /dev/null +++ b/vendor/github.com/Microsoft/hcsshim/ext4/tar2ext4/tar2ext4.go @@ -0,0 +1,174 @@ +package tar2ext4 + +import ( + "archive/tar" + "bufio" + "encoding/binary" + "io" + "path" + "strings" + + "github.com/Microsoft/hcsshim/ext4/internal/compactext4" +) + +type params struct { + convertWhiteout bool + appendVhdFooter bool + ext4opts []compactext4.Option +} + +// Option is the type for optional parameters to Convert. +type Option func(*params) + +// ConvertWhiteout instructs the converter to convert OCI-style whiteouts +// (beginning with .wh.) to overlay-style whiteouts. +func ConvertWhiteout(p *params) { + p.convertWhiteout = true +} + +// AppendVhdFooter instructs the converter to add a fixed VHD footer to the +// file. +func AppendVhdFooter(p *params) { + p.appendVhdFooter = true +} + +// InlineData instructs the converter to write small files into the inode +// structures directly. This creates smaller images but currently is not +// compatible with DAX. +func InlineData(p *params) { + p.ext4opts = append(p.ext4opts, compactext4.InlineData) +} + +// MaximumDiskSize instructs the writer to limit the disk size to the specified +// value. This also reserves enough metadata space for the specified disk size. +// If not provided, then 16GB is the default. +func MaximumDiskSize(size int64) Option { + return func(p *params) { + p.ext4opts = append(p.ext4opts, compactext4.MaximumDiskSize(size)) + } +} + +const ( + whiteoutPrefix = ".wh." + opaqueWhiteout = ".wh..wh..opq" +) + +// Convert writes a compact ext4 file system image that contains the files in the +// input tar stream. +func Convert(r io.Reader, w io.ReadWriteSeeker, options ...Option) error { + var p params + for _, opt := range options { + opt(&p) + } + t := tar.NewReader(bufio.NewReader(r)) + fs := compactext4.NewWriter(w, p.ext4opts...) + for { + hdr, err := t.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + if p.convertWhiteout { + dir, name := path.Split(hdr.Name) + if strings.HasPrefix(name, whiteoutPrefix) { + if name == opaqueWhiteout { + // Update the directory with the appropriate xattr. + f, err := fs.Stat(dir) + if err != nil { + return err + } + f.Xattrs["trusted.overlay.opaque"] = []byte("y") + err = fs.Create(dir, f) + if err != nil { + return err + } + } else { + // Create an overlay-style whiteout. + f := &compactext4.File{ + Mode: compactext4.S_IFCHR, + Devmajor: 0, + Devminor: 0, + } + err = fs.Create(path.Join(dir, name[len(whiteoutPrefix):]), f) + if err != nil { + return err + } + } + + continue + } + } + + if hdr.Typeflag == tar.TypeLink { + err = fs.Link(hdr.Linkname, hdr.Name) + if err != nil { + return err + } + } else { + f := &compactext4.File{ + Mode: uint16(hdr.Mode), + Atime: hdr.AccessTime, + Mtime: hdr.ModTime, + Ctime: hdr.ChangeTime, + Crtime: hdr.ModTime, + Size: hdr.Size, + Uid: uint32(hdr.Uid), + Gid: uint32(hdr.Gid), + Linkname: hdr.Linkname, + Devmajor: uint32(hdr.Devmajor), + Devminor: uint32(hdr.Devminor), + Xattrs: make(map[string][]byte), + } + for key, value := range hdr.PAXRecords { + const xattrPrefix = "SCHILY.xattr." + if strings.HasPrefix(key, xattrPrefix) { + f.Xattrs[key[len(xattrPrefix):]] = []byte(value) + } + } + + var typ uint16 + switch hdr.Typeflag { + case tar.TypeReg, tar.TypeRegA: + typ = compactext4.S_IFREG + case tar.TypeSymlink: + typ = compactext4.S_IFLNK + case tar.TypeChar: + typ = compactext4.S_IFCHR + case tar.TypeBlock: + typ = compactext4.S_IFBLK + case tar.TypeDir: + typ = compactext4.S_IFDIR + case tar.TypeFifo: + typ = compactext4.S_IFIFO + } + f.Mode &= ^compactext4.TypeMask + f.Mode |= typ + err = fs.Create(hdr.Name, f) + if err != nil { + return err + } + _, err = io.Copy(fs, t) + if err != nil { + return err + } + } + } + err := fs.Close() + if err != nil { + return err + } + if p.appendVhdFooter { + size, err := w.Seek(0, io.SeekEnd) + if err != nil { + return err + } + err = binary.Write(w, binary.BigEndian, makeFixedVHDFooter(size)) + if err != nil { + return err + } + } + return nil +} diff --git a/vendor/github.com/Microsoft/hcsshim/ext4/tar2ext4/vhdfooter.go b/vendor/github.com/Microsoft/hcsshim/ext4/tar2ext4/vhdfooter.go new file mode 100644 index 000000000..c98740302 --- /dev/null +++ b/vendor/github.com/Microsoft/hcsshim/ext4/tar2ext4/vhdfooter.go @@ -0,0 +1,76 @@ +package tar2ext4 + +import ( + "bytes" + "crypto/rand" + "encoding/binary" +) + +// Constants for the VHD footer +const ( + cookieMagic = "conectix" + featureMask = 0x2 + fileFormatVersionMagic = 0x00010000 + fixedDataOffset = -1 + creatorVersionMagic = 0x000a0000 + diskTypeFixed = 2 +) + +type vhdFooter struct { + Cookie [8]byte + Features uint32 + FileFormatVersion uint32 + DataOffset int64 + TimeStamp uint32 + CreatorApplication [4]byte + CreatorVersion uint32 + CreatorHostOS [4]byte + OriginalSize int64 + CurrentSize int64 + DiskGeometry uint32 + DiskType uint32 + Checksum uint32 + UniqueID [16]uint8 + SavedState uint8 + Reserved [427]uint8 +} + +func makeFixedVHDFooter(size int64) *vhdFooter { + footer := &vhdFooter{ + Features: featureMask, + FileFormatVersion: fileFormatVersionMagic, + DataOffset: fixedDataOffset, + CreatorVersion: creatorVersionMagic, + OriginalSize: size, + CurrentSize: size, + DiskType: diskTypeFixed, + UniqueID: generateUUID(), + } + copy(footer.Cookie[:], cookieMagic) + footer.Checksum = calculateCheckSum(footer) + return footer +} + +func calculateCheckSum(footer *vhdFooter) uint32 { + oldchk := footer.Checksum + footer.Checksum = 0 + + buf := &bytes.Buffer{} + binary.Write(buf, binary.BigEndian, footer) + + var chk uint32 + bufBytes := buf.Bytes() + for i := 0; i < len(bufBytes); i++ { + chk += uint32(bufBytes[i]) + } + footer.Checksum = oldchk + return uint32(^chk) +} + +func generateUUID() [16]byte { + res := [16]byte{} + if _, err := rand.Read(res[:]); err != nil { + panic(err) + } + return res +} diff --git a/vendor/github.com/containerd/containerd/diff/lcow/lcow.go b/vendor/github.com/containerd/containerd/diff/lcow/lcow.go new file mode 100644 index 000000000..05ce3fb21 --- /dev/null +++ b/vendor/github.com/containerd/containerd/diff/lcow/lcow.go @@ -0,0 +1,210 @@ +// +build windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package lcow + +import ( + "context" + "io" + "os" + "path" + "time" + + "github.com/Microsoft/go-winio/pkg/security" + "github.com/Microsoft/hcsshim/ext4/tar2ext4" + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/diff" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/metadata" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/plugin" + digest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +const ( + // maxLcowVhdSizeGB is the max size in GB of any layer + maxLcowVhdSizeGB = 128 * 1024 * 1024 * 1024 +) + +func init() { + plugin.Register(&plugin.Registration{ + Type: plugin.DiffPlugin, + ID: "windows-lcow", + Requires: []plugin.Type{ + plugin.MetadataPlugin, + }, + InitFn: func(ic *plugin.InitContext) (interface{}, error) { + md, err := ic.Get(plugin.MetadataPlugin) + if err != nil { + return nil, err + } + + ic.Meta.Platforms = append(ic.Meta.Platforms, ocispec.Platform{ + OS: "linux", + Architecture: "amd64", + }) + return NewWindowsLcowDiff(md.(*metadata.DB).ContentStore()) + }, + }) +} + +// CompareApplier handles both comparison and +// application of layer diffs. +type CompareApplier interface { + diff.Applier + diff.Comparer +} + +// windowsLcowDiff does filesystem comparison and application +// for Windows specific Linux layer diffs. +type windowsLcowDiff struct { + store content.Store +} + +var emptyDesc = ocispec.Descriptor{} + +// NewWindowsLcowDiff is the Windows LCOW container layer implementation +// for comparing and applying Linux filesystem layers on Windows +func NewWindowsLcowDiff(store content.Store) (CompareApplier, error) { + return windowsLcowDiff{ + store: store, + }, nil +} + +// Apply applies the content associated with the provided digests onto the +// provided mounts. Archive content will be extracted and decompressed if +// necessary. +func (s windowsLcowDiff) Apply(ctx context.Context, desc ocispec.Descriptor, mounts []mount.Mount, opts ...diff.ApplyOpt) (d ocispec.Descriptor, err error) { + t1 := time.Now() + defer func() { + if err == nil { + log.G(ctx).WithFields(logrus.Fields{ + "d": time.Since(t1), + "dgst": desc.Digest, + "size": desc.Size, + "media": desc.MediaType, + }).Debugf("diff applied") + } + }() + + var config diff.ApplyConfig + for _, o := range opts { + if err := o(ctx, desc, &config); err != nil { + return emptyDesc, errors.Wrap(err, "failed to apply config opt") + } + } + + layer, _, err := mountsToLayerAndParents(mounts) + if err != nil { + return emptyDesc, err + } + + ra, err := s.store.ReaderAt(ctx, desc) + if err != nil { + return emptyDesc, errors.Wrap(err, "failed to get reader from content store") + } + defer ra.Close() + + processor := diff.NewProcessorChain(desc.MediaType, content.NewReader(ra)) + for { + if processor, err = diff.GetProcessor(ctx, processor, config.ProcessorPayloads); err != nil { + return emptyDesc, errors.Wrapf(err, "failed to get stream processor for %s", desc.MediaType) + } + if processor.MediaType() == ocispec.MediaTypeImageLayer { + break + } + } + defer processor.Close() + + // Calculate the Digest as we go + digester := digest.Canonical.Digester() + rc := &readCounter{ + r: io.TeeReader(processor, digester.Hash()), + } + + layerPath := path.Join(layer, "layer.vhd") + outFile, err := os.Create(layerPath) + if err != nil { + return emptyDesc, err + } + defer func() { + if err != nil { + outFile.Close() + os.Remove(layerPath) + } + }() + + err = tar2ext4.Convert(rc, outFile, tar2ext4.ConvertWhiteout, tar2ext4.AppendVhdFooter, tar2ext4.MaximumDiskSize(maxLcowVhdSizeGB)) + if err != nil { + return emptyDesc, errors.Wrapf(err, "failed to convert tar2ext4 vhd") + } + err = outFile.Sync() + if err != nil { + return emptyDesc, errors.Wrapf(err, "failed to sync tar2ext4 vhd to disk") + } + outFile.Close() + + err = security.GrantVmGroupAccess(layerPath) + if err != nil { + return emptyDesc, errors.Wrapf(err, "failed GrantVmGroupAccess on layer vhd: %v", layerPath) + } + + return ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageLayer, + Size: rc.c, + Digest: digester.Digest(), + }, nil +} + +// Compare creates a diff between the given mounts and uploads the result +// to the content store. +func (s windowsLcowDiff) Compare(ctx context.Context, lower, upper []mount.Mount, opts ...diff.Opt) (d ocispec.Descriptor, err error) { + return emptyDesc, errdefs.ErrNotImplemented +} + +type readCounter struct { + r io.Reader + c int64 +} + +func (rc *readCounter) Read(p []byte) (n int, err error) { + n, err = rc.r.Read(p) + rc.c += int64(n) + return +} + +func mountsToLayerAndParents(mounts []mount.Mount) (string, []string, error) { + if len(mounts) != 1 { + return "", nil, errors.Wrap(errdefs.ErrInvalidArgument, "number of mounts should always be 1 for Windows lcow-layers") + } + mnt := mounts[0] + if mnt.Type != "lcow-layer" { + return "", nil, errors.Wrap(errdefs.ErrInvalidArgument, "mount layer type must be lcow-layer") + } + + parentLayerPaths, err := mnt.GetParentPaths() + if err != nil { + return "", nil, err + } + + return mnt.Source, parentLayerPaths, nil +} diff --git a/vendor/github.com/containerd/containerd/diff/windows/windows.go b/vendor/github.com/containerd/containerd/diff/windows/windows.go new file mode 100644 index 000000000..ce584dc27 --- /dev/null +++ b/vendor/github.com/containerd/containerd/diff/windows/windows.go @@ -0,0 +1,193 @@ +// +build windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package windows + +import ( + "context" + "io" + "io/ioutil" + "time" + + winio "github.com/Microsoft/go-winio" + "github.com/containerd/containerd/archive" + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/diff" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/metadata" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/platforms" + "github.com/containerd/containerd/plugin" + digest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +func init() { + plugin.Register(&plugin.Registration{ + Type: plugin.DiffPlugin, + ID: "windows", + Requires: []plugin.Type{ + plugin.MetadataPlugin, + }, + InitFn: func(ic *plugin.InitContext) (interface{}, error) { + md, err := ic.Get(plugin.MetadataPlugin) + if err != nil { + return nil, err + } + + ic.Meta.Platforms = append(ic.Meta.Platforms, platforms.DefaultSpec()) + return NewWindowsDiff(md.(*metadata.DB).ContentStore()) + }, + }) +} + +// CompareApplier handles both comparison and +// application of layer diffs. +type CompareApplier interface { + diff.Applier + diff.Comparer +} + +// windowsDiff does filesystem comparison and application +// for Windows specific layer diffs. +type windowsDiff struct { + store content.Store +} + +var emptyDesc = ocispec.Descriptor{} + +// NewWindowsDiff is the Windows container layer implementation +// for comparing and applying filesystem layers +func NewWindowsDiff(store content.Store) (CompareApplier, error) { + return windowsDiff{ + store: store, + }, nil +} + +// Apply applies the content associated with the provided digests onto the +// provided mounts. Archive content will be extracted and decompressed if +// necessary. +func (s windowsDiff) Apply(ctx context.Context, desc ocispec.Descriptor, mounts []mount.Mount, opts ...diff.ApplyOpt) (d ocispec.Descriptor, err error) { + t1 := time.Now() + defer func() { + if err == nil { + log.G(ctx).WithFields(logrus.Fields{ + "d": time.Since(t1), + "dgst": desc.Digest, + "size": desc.Size, + "media": desc.MediaType, + }).Debugf("diff applied") + } + }() + + var config diff.ApplyConfig + for _, o := range opts { + if err := o(ctx, desc, &config); err != nil { + return emptyDesc, errors.Wrap(err, "failed to apply config opt") + } + } + + ra, err := s.store.ReaderAt(ctx, desc) + if err != nil { + return emptyDesc, errors.Wrap(err, "failed to get reader from content store") + } + defer ra.Close() + + processor := diff.NewProcessorChain(desc.MediaType, content.NewReader(ra)) + for { + if processor, err = diff.GetProcessor(ctx, processor, config.ProcessorPayloads); err != nil { + return emptyDesc, errors.Wrapf(err, "failed to get stream processor for %s", desc.MediaType) + } + if processor.MediaType() == ocispec.MediaTypeImageLayer { + break + } + } + defer processor.Close() + + digester := digest.Canonical.Digester() + rc := &readCounter{ + r: io.TeeReader(processor, digester.Hash()), + } + + layer, parentLayerPaths, err := mountsToLayerAndParents(mounts) + if err != nil { + return emptyDesc, err + } + + // TODO darrenstahlmsft: When this is done isolated, we should disable these. + // it currently cannot be disabled, unless we add ref counting. Since this is + // temporary, leaving it enabled is OK for now. + if err := winio.EnableProcessPrivileges([]string{winio.SeBackupPrivilege, winio.SeRestorePrivilege}); err != nil { + return emptyDesc, err + } + + if _, err := archive.Apply(ctx, layer, rc, archive.WithParentLayers(parentLayerPaths), archive.AsWindowsContainerLayer()); err != nil { + return emptyDesc, err + } + + // Read any trailing data + if _, err := io.Copy(ioutil.Discard, rc); err != nil { + return emptyDesc, err + } + + return ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageLayer, + Size: rc.c, + Digest: digester.Digest(), + }, nil +} + +// Compare creates a diff between the given mounts and uploads the result +// to the content store. +func (s windowsDiff) Compare(ctx context.Context, lower, upper []mount.Mount, opts ...diff.Opt) (d ocispec.Descriptor, err error) { + return emptyDesc, errdefs.ErrNotImplemented +} + +type readCounter struct { + r io.Reader + c int64 +} + +func (rc *readCounter) Read(p []byte) (n int, err error) { + n, err = rc.r.Read(p) + rc.c += int64(n) + return +} + +func mountsToLayerAndParents(mounts []mount.Mount) (string, []string, error) { + if len(mounts) != 1 { + return "", nil, errors.Wrap(errdefs.ErrInvalidArgument, "number of mounts should always be 1 for Windows layers") + } + mnt := mounts[0] + if mnt.Type != "windows-layer" { + // This is a special case error. When this is received the diff service + // will attempt the next differ in the chain which for Windows is the + // lcow differ that we want. + return "", nil, errdefs.ErrNotImplemented + } + + parentLayerPaths, err := mnt.GetParentPaths() + if err != nil { + return "", nil, err + } + + return mnt.Source, parentLayerPaths, nil +} diff --git a/vendor/github.com/containerd/containerd/snapshots/windows/windows.go b/vendor/github.com/containerd/containerd/snapshots/windows/windows.go new file mode 100644 index 000000000..2378b617d --- /dev/null +++ b/vendor/github.com/containerd/containerd/snapshots/windows/windows.go @@ -0,0 +1,338 @@ +// +build windows + +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package windows + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "strings" + + winfs "github.com/Microsoft/go-winio/pkg/fs" + "github.com/Microsoft/go-winio/vhd" + "github.com/Microsoft/hcsshim" + "github.com/containerd/containerd/errdefs" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/mount" + "github.com/containerd/containerd/plugin" + "github.com/containerd/containerd/snapshots" + "github.com/containerd/containerd/snapshots/storage" + "github.com/containerd/continuity/fs" + "github.com/pkg/errors" +) + +func init() { + plugin.Register(&plugin.Registration{ + Type: plugin.SnapshotPlugin, + ID: "windows", + InitFn: func(ic *plugin.InitContext) (interface{}, error) { + return NewSnapshotter(ic.Root) + }, + }) +} + +type snapshotter struct { + root string + info hcsshim.DriverInfo + ms *storage.MetaStore +} + +// NewSnapshotter returns a new windows snapshotter +func NewSnapshotter(root string) (snapshots.Snapshotter, error) { + fsType, err := winfs.GetFileSystemType(root) + if err != nil { + return nil, err + } + if strings.ToLower(fsType) != "ntfs" { + return nil, errors.Wrapf(errdefs.ErrInvalidArgument, "%s is not on an NTFS volume - only NTFS volumes are supported", root) + } + + if err := os.MkdirAll(root, 0700); err != nil { + return nil, err + } + ms, err := storage.NewMetaStore(filepath.Join(root, "metadata.db")) + if err != nil { + return nil, err + } + + if err := os.Mkdir(filepath.Join(root, "snapshots"), 0700); err != nil && !os.IsExist(err) { + return nil, err + } + + return &snapshotter{ + info: hcsshim.DriverInfo{ + HomeDir: filepath.Join(root, "snapshots"), + }, + root: root, + ms: ms, + }, nil +} + +// Stat returns the info for an active or committed snapshot by name or +// key. +// +// Should be used for parent resolution, existence checks and to discern +// the kind of snapshot. +func (s *snapshotter) Stat(ctx context.Context, key string) (snapshots.Info, error) { + ctx, t, err := s.ms.TransactionContext(ctx, false) + if err != nil { + return snapshots.Info{}, err + } + defer t.Rollback() + + _, info, _, err := storage.GetInfo(ctx, key) + return info, err +} + +func (s *snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (snapshots.Info, error) { + ctx, t, err := s.ms.TransactionContext(ctx, true) + if err != nil { + return snapshots.Info{}, err + } + defer t.Rollback() + + info, err = storage.UpdateInfo(ctx, info, fieldpaths...) + if err != nil { + return snapshots.Info{}, err + } + + if err := t.Commit(); err != nil { + return snapshots.Info{}, err + } + + return info, nil +} + +func (s *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) { + ctx, t, err := s.ms.TransactionContext(ctx, false) + if err != nil { + return snapshots.Usage{}, err + } + defer t.Rollback() + + _, info, usage, err := storage.GetInfo(ctx, key) + if err != nil { + return snapshots.Usage{}, err + } + + if info.Kind == snapshots.KindActive { + du := fs.Usage{ + Size: 0, + } + usage = snapshots.Usage(du) + } + + return usage, nil +} + +func (s *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) { + return s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts) +} + +func (s *snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) { + return s.createSnapshot(ctx, snapshots.KindView, key, parent, opts) +} + +// Mounts returns the mounts for the transaction identified by key. Can be +// called on an read-write or readonly transaction. +// +// This can be used to recover mounts after calling View or Prepare. +func (s *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) { + ctx, t, err := s.ms.TransactionContext(ctx, false) + if err != nil { + return nil, err + } + defer t.Rollback() + + snapshot, err := storage.GetSnapshot(ctx, key) + if err != nil { + return nil, errors.Wrap(err, "failed to get snapshot mount") + } + return s.mounts(snapshot), nil +} + +func (s *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error { + ctx, t, err := s.ms.TransactionContext(ctx, true) + if err != nil { + return err + } + defer t.Rollback() + + usage := fs.Usage{ + Size: 0, + } + + if _, err = storage.CommitActive(ctx, key, name, snapshots.Usage(usage), opts...); err != nil { + return errors.Wrap(err, "failed to commit snapshot") + } + + if err := t.Commit(); err != nil { + return err + } + return nil +} + +// Remove abandons the transaction identified by key. All resources +// associated with the key will be removed. +func (s *snapshotter) Remove(ctx context.Context, key string) error { + ctx, t, err := s.ms.TransactionContext(ctx, true) + if err != nil { + return err + } + defer t.Rollback() + + id, _, err := storage.Remove(ctx, key) + if err != nil { + return errors.Wrap(err, "failed to remove") + } + + path := s.getSnapshotDir(id) + renamedID := "rm-" + id + renamed := s.getSnapshotDir(renamedID) + if err := os.Rename(path, renamed); err != nil && !os.IsNotExist(err) { + if !os.IsPermission(err) { + return err + } + // If permission denied, it's possible that the scratch is still mounted, an + // artifact after a hard daemon crash for example. Worth a shot to try detaching it + // before retrying the rename. + if detachErr := vhd.DetachVhd(filepath.Join(path, "sandbox.vhdx")); detachErr != nil { + return errors.Wrapf(err, "failed to detach VHD: %s", detachErr) + } + if renameErr := os.Rename(path, renamed); renameErr != nil && !os.IsNotExist(renameErr) { + return errors.Wrapf(err, "second rename attempt following detach failed: %s", renameErr) + } + } + + if err := t.Commit(); err != nil { + if err1 := os.Rename(renamed, path); err1 != nil { + // May cause inconsistent data on disk + log.G(ctx).WithError(err1).WithField("path", renamed).Errorf("Failed to rename after failed commit") + } + return errors.Wrap(err, "failed to commit") + } + + if err := hcsshim.DestroyLayer(s.info, renamedID); err != nil { + // Must be cleaned up, any "rm-*" could be removed if no active transactions + log.G(ctx).WithError(err).WithField("path", renamed).Warnf("Failed to remove root filesystem") + } + + return nil +} + +// Walk the committed snapshots. +func (s *snapshotter) Walk(ctx context.Context, fn func(context.Context, snapshots.Info) error) error { + ctx, t, err := s.ms.TransactionContext(ctx, false) + if err != nil { + return err + } + defer t.Rollback() + + return storage.WalkInfo(ctx, fn) +} + +// Close closes the snapshotter +func (s *snapshotter) Close() error { + return s.ms.Close() +} + +func (s *snapshotter) mounts(sn storage.Snapshot) []mount.Mount { + var ( + roFlag string + source string + parentLayerPaths []string + ) + + if sn.Kind == snapshots.KindView { + roFlag = "ro" + } else { + roFlag = "rw" + } + + if len(sn.ParentIDs) == 0 || sn.Kind == snapshots.KindActive { + source = s.getSnapshotDir(sn.ID) + parentLayerPaths = s.parentIDsToParentPaths(sn.ParentIDs) + } else { + source = s.getSnapshotDir(sn.ParentIDs[0]) + parentLayerPaths = s.parentIDsToParentPaths(sn.ParentIDs[1:]) + } + + // error is not checked here, as a string array will never fail to Marshal + parentLayersJSON, _ := json.Marshal(parentLayerPaths) + parentLayersOption := mount.ParentLayerPathsFlag + string(parentLayersJSON) + + var mounts []mount.Mount + mounts = append(mounts, mount.Mount{ + Source: source, + Type: "windows-layer", + Options: []string{ + roFlag, + parentLayersOption, + }, + }) + + return mounts +} + +func (s *snapshotter) getSnapshotDir(id string) string { + return filepath.Join(s.root, "snapshots", id) +} + +func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) ([]mount.Mount, error) { + ctx, t, err := s.ms.TransactionContext(ctx, true) + if err != nil { + return nil, err + } + defer t.Rollback() + + newSnapshot, err := storage.CreateSnapshot(ctx, kind, key, parent, opts...) + if err != nil { + return nil, errors.Wrap(err, "failed to create snapshot") + } + + if kind == snapshots.KindActive { + parentLayerPaths := s.parentIDsToParentPaths(newSnapshot.ParentIDs) + + var parentPath string + if len(parentLayerPaths) != 0 { + parentPath = parentLayerPaths[0] + } + + if err := hcsshim.CreateSandboxLayer(s.info, newSnapshot.ID, parentPath, parentLayerPaths); err != nil { + return nil, errors.Wrap(err, "failed to create sandbox layer") + } + + // TODO(darrenstahlmsft): Allow changing sandbox size + } + + if err := t.Commit(); err != nil { + return nil, errors.Wrap(err, "commit failed") + } + + return s.mounts(newSnapshot), nil +} + +func (s *snapshotter) parentIDsToParentPaths(parentIDs []string) []string { + var parentLayerPaths []string + for _, ID := range parentIDs { + parentLayerPaths = append(parentLayerPaths, s.getSnapshotDir(ID)) + } + return parentLayerPaths +} diff --git a/vendor/github.com/konsorten/go-windows-terminal-sequences/LICENSE b/vendor/github.com/konsorten/go-windows-terminal-sequences/LICENSE new file mode 100644 index 000000000..14127cd83 --- /dev/null +++ b/vendor/github.com/konsorten/go-windows-terminal-sequences/LICENSE @@ -0,0 +1,9 @@ +(The MIT License) + +Copyright (c) 2017 marvin + konsorten GmbH (open-source@konsorten.de) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/konsorten/go-windows-terminal-sequences/README.md b/vendor/github.com/konsorten/go-windows-terminal-sequences/README.md new file mode 100644 index 000000000..949b77e30 --- /dev/null +++ b/vendor/github.com/konsorten/go-windows-terminal-sequences/README.md @@ -0,0 +1,40 @@ +# Windows Terminal Sequences + +This library allow for enabling Windows terminal color support for Go. + +See [Console Virtual Terminal Sequences](https://docs.microsoft.com/en-us/windows/console/console-virtual-terminal-sequences) for details. + +## Usage + +```go +import ( + "syscall" + + sequences "github.com/konsorten/go-windows-terminal-sequences" +) + +func main() { + sequences.EnableVirtualTerminalProcessing(syscall.Stdout, true) +} + +``` + +## Authors + +The tool is sponsored by the [marvin + konsorten GmbH](http://www.konsorten.de). + +We thank all the authors who provided code to this library: + +* Felix Kollmann + +## License + +(The MIT License) + +Copyright (c) 2018 marvin + konsorten GmbH (open-source@konsorten.de) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the 'Software'), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/konsorten/go-windows-terminal-sequences/go.mod b/vendor/github.com/konsorten/go-windows-terminal-sequences/go.mod new file mode 100644 index 000000000..716c61312 --- /dev/null +++ b/vendor/github.com/konsorten/go-windows-terminal-sequences/go.mod @@ -0,0 +1 @@ +module github.com/konsorten/go-windows-terminal-sequences diff --git a/vendor/github.com/konsorten/go-windows-terminal-sequences/sequences.go b/vendor/github.com/konsorten/go-windows-terminal-sequences/sequences.go new file mode 100644 index 000000000..ef18d8f97 --- /dev/null +++ b/vendor/github.com/konsorten/go-windows-terminal-sequences/sequences.go @@ -0,0 +1,36 @@ +// +build windows + +package sequences + +import ( + "syscall" + "unsafe" +) + +var ( + kernel32Dll *syscall.LazyDLL = syscall.NewLazyDLL("Kernel32.dll") + setConsoleMode *syscall.LazyProc = kernel32Dll.NewProc("SetConsoleMode") +) + +func EnableVirtualTerminalProcessing(stream syscall.Handle, enable bool) error { + const ENABLE_VIRTUAL_TERMINAL_PROCESSING uint32 = 0x4 + + var mode uint32 + err := syscall.GetConsoleMode(syscall.Stdout, &mode) + if err != nil { + return err + } + + if enable { + mode |= ENABLE_VIRTUAL_TERMINAL_PROCESSING + } else { + mode &^= ENABLE_VIRTUAL_TERMINAL_PROCESSING + } + + ret, _, err := setConsoleMode.Call(uintptr(unsafe.Pointer(stream)), uintptr(mode)) + if ret == 0 { + return err + } + + return nil +}