Merge pull request #8807 from ambarve/cimfs

Add support for cimfs snapshotter & differ
This commit is contained in:
Kevin Parsons 2023-12-22 21:09:51 +00:00 committed by GitHub
commit 124bc0dcbe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
104 changed files with 3848 additions and 2996 deletions

View File

@ -22,6 +22,7 @@ import (
"github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio"
"github.com/Microsoft/hcsshim/pkg/ociwclayer" "github.com/Microsoft/hcsshim/pkg/ociwclayer"
ocicimlayer "github.com/Microsoft/hcsshim/pkg/ociwclayer/cim"
) )
// applyWindowsLayer applies a tar stream of an OCI style diff tar of a Windows layer // applyWindowsLayer applies a tar stream of an OCI style diff tar of a Windows layer
@ -77,3 +78,16 @@ func WithParentLayers(p []string) WriteDiffOpt {
return nil return nil
} }
} }
func applyWindowsCimLayer(ctx context.Context, root string, r io.Reader, options ApplyOptions) (size int64, err error) {
return ocicimlayer.ImportCimLayerFromTar(ctx, r, root, options.Parents)
}
// AsCimContainerLayer indicates that the tar stream to apply is that of a Windows container Layer written in
// the cim format.
func AsCimContainerLayer() ApplyOpt {
return func(options *ApplyOptions) error {
options.applyFunc = applyWindowsCimLayer
return nil
}
}

111
diff/windows/cimfs.go Normal file
View File

@ -0,0 +1,111 @@
//go:build windows
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package windows
import (
"context"
"fmt"
"github.com/Microsoft/hcsshim/pkg/cimfs"
"github.com/containerd/containerd/v2/archive"
"github.com/containerd/containerd/v2/content"
"github.com/containerd/containerd/v2/diff"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/metadata"
"github.com/containerd/containerd/v2/mount"
"github.com/containerd/containerd/v2/platforms"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
func init() {
registry.Register(&plugin.Registration{
Type: plugins.DiffPlugin,
ID: "cimfs",
Requires: []plugin.Type{
plugins.MetadataPlugin,
},
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
md, err := ic.GetSingle(plugins.MetadataPlugin)
if err != nil {
return nil, err
}
if !cimfs.IsCimFSSupported() {
return nil, fmt.Errorf("host windows version doesn't support CimFS")
}
ic.Meta.Platforms = append(ic.Meta.Platforms, platforms.DefaultSpec())
return NewCimDiff(md.(*metadata.DB).ContentStore())
},
})
}
// cimDiff does filesystem comparison and application
// for CimFS specific layer diffs.
type cimDiff struct {
store content.Store
}
// NewCimDiff is the Windows cim container layer implementation
// for comparing and applying filesystem layers
func NewCimDiff(store content.Store) (CompareApplier, error) {
return cimDiff{
store: store,
}, nil
}
// Apply applies the content associated with the provided digests onto the
// provided mounts. Archive content will be extracted and decompressed if
// necessary.
func (c cimDiff) Apply(ctx context.Context, desc ocispec.Descriptor, mounts []mount.Mount, opts ...diff.ApplyOpt) (d ocispec.Descriptor, err error) {
layer, parentLayerPaths, err := cimMountsToLayerAndParents(mounts)
if err != nil {
return emptyDesc, err
}
return applyDiffCommon(ctx, c.store, desc, layer, parentLayerPaths, archive.AsCimContainerLayer(), opts...)
}
// Compare creates a diff between the given mounts and uploads the result
// to the content store.
func (c cimDiff) Compare(ctx context.Context, lower, upper []mount.Mount, opts ...diff.Opt) (d ocispec.Descriptor, err error) {
// support for generating layer diff of cimfs layers will be added later.
return emptyDesc, errdefs.ErrNotImplemented
}
func cimMountsToLayerAndParents(mounts []mount.Mount) (string, []string, error) {
if len(mounts) != 1 {
return "", nil, fmt.Errorf("%w: number of mounts should always be 1 for Windows layers", errdefs.ErrInvalidArgument)
}
mnt := mounts[0]
if mnt.Type != "CimFS" {
// This is a special case error. When this is received the diff service
// will attempt the next differ in the chain.
return "", nil, errdefs.ErrNotImplemented
}
parentLayerPaths, err := mnt.GetParentPaths()
if err != nil {
return "", nil, err
}
return mnt.Source, parentLayerPaths, nil
}

View File

@ -88,10 +88,8 @@ func NewWindowsDiff(store content.Store) (CompareApplier, error) {
}, nil }, nil
} }
// Apply applies the content associated with the provided digests onto the // applyDiffCommon is a common function that is called by both windows & cimfs differs.
// provided mounts. Archive content will be extracted and decompressed if func applyDiffCommon(ctx context.Context, store content.Store, desc ocispec.Descriptor, layerPath string, parentLayerPaths []string, applyOpt archive.ApplyOpt, opts ...diff.ApplyOpt) (d ocispec.Descriptor, err error) {
// necessary.
func (s windowsDiff) Apply(ctx context.Context, desc ocispec.Descriptor, mounts []mount.Mount, opts ...diff.ApplyOpt) (d ocispec.Descriptor, err error) {
t1 := time.Now() t1 := time.Now()
defer func() { defer func() {
if err == nil { if err == nil {
@ -111,7 +109,7 @@ func (s windowsDiff) Apply(ctx context.Context, desc ocispec.Descriptor, mounts
} }
} }
ra, err := s.store.ReaderAt(ctx, desc) ra, err := store.ReaderAt(ctx, desc)
if err != nil { if err != nil {
return emptyDesc, fmt.Errorf("failed to get reader from content store: %w", err) return emptyDesc, fmt.Errorf("failed to get reader from content store: %w", err)
} }
@ -133,26 +131,13 @@ func (s windowsDiff) Apply(ctx context.Context, desc ocispec.Descriptor, mounts
r: io.TeeReader(processor, digester.Hash()), r: io.TeeReader(processor, digester.Hash()),
} }
layer, parentLayerPaths, err := mountsToLayerAndParents(mounts)
if err != nil {
return emptyDesc, err
}
// TODO darrenstahlmsft: When this is done isolated, we should disable these.
// it currently cannot be disabled, unless we add ref counting. Since this is
// temporary, leaving it enabled is OK for now.
// https://github.com/containerd/containerd/issues/1681
if err := winio.EnableProcessPrivileges([]string{winio.SeBackupPrivilege, winio.SeRestorePrivilege}); err != nil {
return emptyDesc, err
}
archiveOpts := []archive.ApplyOpt{ archiveOpts := []archive.ApplyOpt{
archive.WithParents(parentLayerPaths), archive.WithParents(parentLayerPaths),
archive.AsWindowsContainerLayer(),
archive.WithNoSameOwner(), // Lchown is not supported on Windows archive.WithNoSameOwner(), // Lchown is not supported on Windows
applyOpt,
} }
if _, err := archive.Apply(ctx, layer, rc, archiveOpts...); err != nil { if _, err := archive.Apply(ctx, layerPath, rc, archiveOpts...); err != nil {
return emptyDesc, err return emptyDesc, err
} }
@ -168,6 +153,26 @@ func (s windowsDiff) Apply(ctx context.Context, desc ocispec.Descriptor, mounts
}, nil }, nil
} }
// Apply applies the content associated with the provided digests onto the
// provided mounts. Archive content will be extracted and decompressed if
// necessary.
func (s windowsDiff) Apply(ctx context.Context, desc ocispec.Descriptor, mounts []mount.Mount, opts ...diff.ApplyOpt) (d ocispec.Descriptor, err error) {
layer, parentLayerPaths, err := mountsToLayerAndParents(mounts)
if err != nil {
return emptyDesc, err
}
// TODO darrenstahlmsft: When this is done isolated, we should disable these.
// it currently cannot be disabled, unless we add ref counting. Since this is
// temporary, leaving it enabled is OK for now.
// https://github.com/containerd/containerd/issues/1681
if err := winio.EnableProcessPrivileges([]string{winio.SeBackupPrivilege, winio.SeRestorePrivilege}); err != nil {
return emptyDesc, err
}
return applyDiffCommon(ctx, s.store, desc, layer, parentLayerPaths, archive.AsWindowsContainerLayer(), opts...)
}
// Compare creates a diff between the given mounts and uploads the result // Compare creates a diff between the given mounts and uploads the result
// to the content store. // to the content store.
func (s windowsDiff) Compare(ctx context.Context, lower, upper []mount.Mount, opts ...diff.Opt) (d ocispec.Descriptor, err error) { func (s windowsDiff) Compare(ctx context.Context, lower, upper []mount.Mount, opts ...diff.Opt) (d ocispec.Descriptor, err error) {

7
go.mod
View File

@ -7,8 +7,7 @@ require (
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24
github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0 github.com/AdamKorcz/go-118-fuzz-build v0.0.0-20230306123547-8075edf89bb0
github.com/Microsoft/go-winio v0.6.1 github.com/Microsoft/go-winio v0.6.1
github.com/Microsoft/hcsshim v0.12.0-rc.1 github.com/Microsoft/hcsshim v0.12.0-rc.2
github.com/Microsoft/hcsshim/test v0.0.0-20210227013316-43a75bb4edd3
github.com/container-orchestrated-devices/container-device-interface v0.6.1 github.com/container-orchestrated-devices/container-device-interface v0.6.1
github.com/containerd/btrfs/v2 v2.0.0 github.com/containerd/btrfs/v2 v2.0.0
github.com/containerd/cgroups/v3 v3.0.2 github.com/containerd/cgroups/v3 v3.0.2
@ -30,9 +29,9 @@ require (
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c
github.com/docker/go-metrics v0.0.1 github.com/docker/go-metrics v0.0.1
github.com/docker/go-units v0.5.0 github.com/docker/go-units v0.5.0
github.com/fsnotify/fsnotify v1.7.0 github.com/fsnotify/fsnotify v1.6.0
github.com/google/go-cmp v0.6.0 github.com/google/go-cmp v0.6.0
github.com/google/uuid v1.5.0 github.com/google/uuid v1.3.1
github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 github.com/grpc-ecosystem/go-grpc-middleware v1.4.0
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0
github.com/intel/goresctrl v0.5.0 github.com/intel/goresctrl v0.5.0

593
go.sum

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,6 @@ import (
"path/filepath" "path/filepath"
"github.com/Microsoft/hcsshim/osversion" "github.com/Microsoft/hcsshim/osversion"
_ "github.com/Microsoft/hcsshim/test/functional/manifest" // For rsrc_amd64.syso
) )
//nolint:unused // some variables used for fuzz //nolint:unused // some variables used for fuzz

View File

@ -36,7 +36,7 @@ func snapshotterOpts(snapshotterName string, config *runtime.ContainerConfig) ([
var opts []snapshots.Opt var opts []snapshots.Opt
switch snapshotterName { switch snapshotterName {
case "windows": case "windows", "cimfs":
rootfsSize := config.GetWindows().GetResources().GetRootfsSizeInBytes() rootfsSize := config.GetWindows().GetResources().GetRootfsSizeInBytes()
if rootfsSize != 0 { if rootfsSize != 0 {
labels := map[string]string{ labels := map[string]string{

View File

@ -1 +1 @@
v0.12.0-rc.0 v0.12.0-rc.2

217
snapshots/windows/cimfs.go Normal file
View File

@ -0,0 +1,217 @@
//go:build windows
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package windows
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/Microsoft/hcsshim"
"github.com/Microsoft/hcsshim/pkg/cimfs"
cimlayer "github.com/Microsoft/hcsshim/pkg/ociwclayer/cim"
"github.com/containerd/containerd/v2/errdefs"
"github.com/containerd/containerd/v2/mount"
"github.com/containerd/containerd/v2/platforms"
"github.com/containerd/containerd/v2/plugins"
"github.com/containerd/containerd/v2/snapshots"
"github.com/containerd/containerd/v2/snapshots/storage"
"github.com/containerd/log"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
// Composite image FileSystem (CimFS) is a new read-only filesystem (similar to overlayFS on Linux) created
// specifically for storing container image layers on windows. cimFSSnapshotter is a snapshotter that uses
// CimFS to create read-only parent layer snapshots. Each snapshot is represented by a `<snapshot-id>.cim`
// file and some other files (region & objectid files) which hold contents of that snapshot. Once a cim file for a layer is created it
// can only be used as a read-only layer by mounting it to a volume. Hence, CimFs will not be used when we are
// creating writable layers for container scratch and such. (However, in the future scratch layer of a container can be
// exported to a cim layer and then be used as a parent layer for another container).
type cimFSSnapshotter struct {
*windowsBaseSnapshotter
// cimDir is the path to the directory which holds all of the layer cim files. CimFS needs all the
// layer cim files to be present in the same directory. Hence, cim files of all the snapshots (even if
// they are of different images) will be kept in the same directory.
cimDir string
}
func init() {
registry.Register(&plugin.Registration{
Type: plugins.SnapshotPlugin,
ID: "cimfs",
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Platforms = []ocispec.Platform{platforms.DefaultSpec()}
return NewCimFSSnapshotter(ic.Properties[plugins.PropertyRootDir])
},
})
}
// NewCimFSSnapshotter returns a new CimFS based windows snapshotter
func NewCimFSSnapshotter(root string) (snapshots.Snapshotter, error) {
if !cimfs.IsCimFSSupported() {
return nil, fmt.Errorf("host windows version doesn't support CimFS")
}
baseSn, err := newBaseSnapshotter(root)
if err != nil {
return nil, err
}
return &cimFSSnapshotter{
windowsBaseSnapshotter: baseSn,
cimDir: filepath.Join(baseSn.info.HomeDir, "cim-layers"),
}, nil
}
// getCimLayerPath returns the path of the cim file for the given snapshot. Note that this function doesn't
// actually check if the cim layer exists it simply does string manipulation to generate the path isCimLayer
// can be used to verify if it is actually a cim layer.
func getCimLayerPath(cimDir, snID string) string {
return filepath.Join(cimDir, (snID + ".cim"))
}
// isCimLayer checks if the snapshot referred by the given key is actually a cim layer. With CimFS
// snapshotter all the read-only (i.e image) layers are stored in the cim format while we still use VHDs for
// scratch layers.
func (s *cimFSSnapshotter) isCimLayer(ctx context.Context, key string) (bool, error) {
id, _, _, err := storage.GetInfo(ctx, key)
if err != nil {
return false, fmt.Errorf("get snapshot info: %w", err)
}
snCimPath := getCimLayerPath(s.cimDir, id)
if _, err := os.Stat(snCimPath); err != nil {
if os.IsNotExist(err) {
return false, nil
}
return false, err
}
return true, nil
}
func (s *cimFSSnapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) {
baseUsage, err := s.windowsBaseSnapshotter.Usage(ctx, key)
if err != nil {
return snapshots.Usage{}, err
}
ctx, t, err := s.ms.TransactionContext(ctx, false)
if err != nil {
return snapshots.Usage{}, err
}
defer t.Rollback()
id, _, _, err := storage.GetInfo(ctx, key)
if err != nil {
return snapshots.Usage{}, fmt.Errorf("failed to get snapshot info: %w", err)
}
if ok, err := s.isCimLayer(ctx, key); err != nil {
return snapshots.Usage{}, err
} else if ok {
cimUsage, err := cimfs.GetCimUsage(ctx, getCimLayerPath(s.cimDir, id))
if err != nil {
return snapshots.Usage{}, err
}
baseUsage.Size += int64(cimUsage)
}
return baseUsage, nil
}
func (s *cimFSSnapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
m, err := s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
if err != nil {
return m, err
}
m[0].Type = "CimFS"
return m, nil
}
func (s *cimFSSnapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
m, err := s.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
if err != nil {
return m, err
}
m[0].Type = "CimFS"
return m, nil
}
func (s *cimFSSnapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) {
mounts, err := s.windowsBaseSnapshotter.Mounts(ctx, key)
if err != nil {
return nil, err
}
mounts[0].Type = "CimFS"
return mounts, nil
}
func (s *cimFSSnapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
if !strings.Contains(key, snapshots.UnpackKeyPrefix) {
return fmt.Errorf("committing a scratch snapshot to read-only cim layer isn't supported yet")
}
return s.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
usage, err := s.Usage(ctx, key)
if err != nil {
return fmt.Errorf("failed to get usage during commit: %w", err)
}
if _, err := storage.CommitActive(ctx, key, name, usage, opts...); err != nil {
return fmt.Errorf("failed to commit snapshot: %w", err)
}
return nil
})
}
// Remove abandons the transaction identified by key. All resources
// associated with the key will be removed.
func (s *cimFSSnapshotter) Remove(ctx context.Context, key string) error {
var ID, renamedID string
// collect original ID before preRemove
err := s.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
var infoErr error
ID, _, _, infoErr = storage.GetInfo(ctx, key)
return infoErr
})
if err != nil {
return fmt.Errorf("%w: failed to get snapshot info: %s", errdefs.ErrFailedPrecondition, err)
}
renamedID, err = s.preRemove(ctx, key)
if err != nil {
// wrap as ErrFailedPrecondition so that cleanup of other snapshots can continue
return fmt.Errorf("%w: %s", errdefs.ErrFailedPrecondition, err)
}
if err := cimlayer.DestroyCimLayer(s.getSnapshotDir(ID)); err != nil {
// Must be cleaned up, any "rm-*" could be removed if no active transactions
log.G(ctx).WithError(err).WithField("ID", ID).Warnf("failed to cleanup cim files")
}
if err = hcsshim.DestroyLayer(s.info, renamedID); err != nil {
// Must be cleaned up, any "rm-*" could be removed if no active transactions
log.G(ctx).WithError(err).WithField("renamedID", renamedID).Warnf("failed to remove root filesystem")
}
return nil
}

419
snapshots/windows/common.go Normal file
View File

@ -0,0 +1,419 @@
//go:build windows
// +build windows
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package windows
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/Microsoft/hcsshim"
"github.com/containerd/containerd/v2/mount"
"github.com/containerd/containerd/v2/snapshots"
"github.com/containerd/containerd/v2/snapshots/storage"
"github.com/containerd/continuity/fs"
"github.com/containerd/log"
)
// windowsBaseSnapshotter is a type that implements common functionality required by both windows & cimfs
// snapshotters (sort of a base type that windows & cimfs snapshotter types derive from - however, windowsBaseSnapshotter does NOT impelement the full Snapshotter interface). Some functions
// (like Stat, Update) that are identical for both snapshotters are directly implemented in this base
// snapshotter and such functions handle database transaction creation etc. However, the functions that are
// not common don't create a transaction to allow the caller the flexibility of deciding whether to commit or
// abort the transaction.
type windowsBaseSnapshotter struct {
root string
ms *storage.MetaStore
info hcsshim.DriverInfo
}
func newBaseSnapshotter(root string) (*windowsBaseSnapshotter, error) {
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
}
ms, err := storage.NewMetaStore(filepath.Join(root, "metadata.db"))
if err != nil {
return nil, err
}
if err := os.Mkdir(filepath.Join(root, "snapshots"), 0700); err != nil && !os.IsExist(err) {
return nil, err
}
return &windowsBaseSnapshotter{
root: root,
ms: ms,
info: hcsshim.DriverInfo{HomeDir: filepath.Join(root, "snapshots")},
}, nil
}
func (w *windowsBaseSnapshotter) getSnapshotDir(id string) string {
return filepath.Join(w.root, "snapshots", id)
}
func (w *windowsBaseSnapshotter) parentIDsToParentPaths(parentIDs []string) []string {
parentLayerPaths := make([]string, 0, len(parentIDs))
for _, ID := range parentIDs {
parentLayerPaths = append(parentLayerPaths, w.getSnapshotDir(ID))
}
return parentLayerPaths
}
func (w *windowsBaseSnapshotter) Stat(ctx context.Context, key string) (info snapshots.Info, err error) {
err = w.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
_, info, _, err = storage.GetInfo(ctx, key)
return err
})
if err != nil {
return snapshots.Info{}, err
}
return info, nil
}
func (w *windowsBaseSnapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (_ snapshots.Info, err error) {
err = w.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
info, err = storage.UpdateInfo(ctx, info, fieldpaths...)
return err
})
if err != nil {
return snapshots.Info{}, err
}
return info, nil
}
func (w *windowsBaseSnapshotter) Usage(ctx context.Context, key string) (usage snapshots.Usage, err error) {
var (
id string
info snapshots.Info
)
err = w.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
id, info, usage, err = storage.GetInfo(ctx, key)
return err
})
if err != nil {
return snapshots.Usage{}, err
}
if info.Kind == snapshots.KindActive {
path := w.getSnapshotDir(id)
du, err := fs.DiskUsage(ctx, path)
if err != nil {
return snapshots.Usage{}, err
}
usage = snapshots.Usage(du)
}
return usage, nil
}
func (w *windowsBaseSnapshotter) mounts(sn storage.Snapshot, key string) []mount.Mount {
var (
roFlag string
)
if sn.Kind == snapshots.KindView {
roFlag = "ro"
} else {
roFlag = "rw"
}
source := w.getSnapshotDir(sn.ID)
parentLayerPaths := w.parentIDsToParentPaths(sn.ParentIDs)
mountType := "windows-layer"
// error is not checked here, as a string array will never fail to Marshal
parentLayersJSON, _ := json.Marshal(parentLayerPaths)
parentLayersOption := mount.ParentLayerPathsFlag + string(parentLayersJSON)
options := []string{
roFlag,
}
if len(sn.ParentIDs) != 0 {
options = append(options, parentLayersOption)
}
mounts := []mount.Mount{
{
Source: source,
Type: mountType,
Options: options,
},
}
return mounts
}
// Mounts returns the mounts for the transaction identified by key. Can be
// called on an read-write or readonly transaction.
//
// This can be used to recover mounts after calling View or Prepare.
func (w *windowsBaseSnapshotter) Mounts(ctx context.Context, key string) (_ []mount.Mount, err error) {
var snapshot storage.Snapshot
err = w.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
snapshot, err = storage.GetSnapshot(ctx, key)
if err != nil {
return fmt.Errorf("failed to get snapshot mount: %w", err)
}
return nil
})
if err != nil {
return nil, err
}
return w.mounts(snapshot, key), nil
}
// Walk the committed snapshots.
func (w *windowsBaseSnapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
return w.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
return storage.WalkInfo(ctx, fn, fs...)
})
}
// preRemove prepares for removal of a snapshot by first renaming the snapshot directory and if that succeeds
// removing the snapshot info from the database. Then the caller can decide how to remove the actual renamed
// snapshot directory. Returns the new 'ID' (i.e the directory name after rename).
func (w *windowsBaseSnapshotter) preRemove(ctx context.Context, key string) (string, error) {
var (
renamed, path, renamedID string
restore bool
)
err := w.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
id, _, err := storage.Remove(ctx, key)
if err != nil {
return fmt.Errorf("failed to remove: %w", err)
}
path = w.getSnapshotDir(id)
renamedID = "rm-" + id
renamed = w.getSnapshotDir(renamedID)
if err = os.Rename(path, renamed); err != nil && !os.IsNotExist(err) {
if !os.IsPermission(err) {
return err
}
// If permission denied, it's possible that the scratch is still mounted, an
// artifact after a hard daemon crash for example. Worth a shot to try deactivating it
// before retrying the rename.
var (
home, layerID = filepath.Split(path)
di = hcsshim.DriverInfo{
HomeDir: home,
}
)
if deactivateErr := hcsshim.DeactivateLayer(di, layerID); deactivateErr != nil {
return fmt.Errorf("failed to deactivate layer following failed rename: %s: %w", deactivateErr, err)
}
if renameErr := os.Rename(path, renamed); renameErr != nil && !os.IsNotExist(renameErr) {
return fmt.Errorf("second rename attempt following detach failed: %s: %w", renameErr, err)
}
}
restore = true
return nil
})
if err != nil {
if restore { // failed to commit
if err1 := os.Rename(renamed, path); err1 != nil {
// May cause inconsistent data on disk
log.G(ctx).WithError(err1).WithField("path", renamed).Error("Failed to rename after failed commit")
}
}
return "", err
}
return renamedID, nil
}
// Close closes the snapshotter
func (w *windowsBaseSnapshotter) Close() error {
return w.ms.Close()
}
func (w *windowsBaseSnapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
var newSnapshot storage.Snapshot
err = w.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
newSnapshot, err = storage.CreateSnapshot(ctx, kind, key, parent, opts...)
if err != nil {
return fmt.Errorf("failed to create snapshot: %w", err)
}
log.G(ctx).Debug("createSnapshot")
// Create the new snapshot dir
snDir := w.getSnapshotDir(newSnapshot.ID)
if err = os.MkdirAll(snDir, 0700); err != nil {
return fmt.Errorf("failed to create snapshot dir %s: %w", snDir, err)
}
if strings.Contains(key, snapshots.UnpackKeyPrefix) {
// IO/disk space optimization: Do nothing
//
// We only need one sandbox.vhdx for the container. Skip making one for this
// snapshot if this isn't the snapshot that just houses the final sandbox.vhd
// that will be mounted as the containers scratch. Currently the key for a snapshot
// where a layer will be extracted to will have the string `extract-` in it.
return nil
}
if len(newSnapshot.ParentIDs) == 0 {
// A parentless snapshot a new base layer. Valid base layers must have a "Files" folder.
// When committed, there'll be some post-processing to fill in the rest
// of the metadata.
filesDir := filepath.Join(snDir, "Files")
if err := os.MkdirAll(filesDir, 0700); err != nil {
return fmt.Errorf("creating Files dir: %w", err)
}
return nil
}
parentLayerPaths := w.parentIDsToParentPaths(newSnapshot.ParentIDs)
var snapshotInfo snapshots.Info
for _, o := range opts {
o(&snapshotInfo)
}
var sizeInBytes uint64
if sizeGBstr, ok := snapshotInfo.Labels[rootfsSizeInGBLabel]; ok {
log.G(ctx).Warnf("%q label is deprecated, please use %q instead.", rootfsSizeInGBLabel, rootfsSizeInBytesLabel)
sizeInGB, err := strconv.ParseUint(sizeGBstr, 10, 32)
if err != nil {
return fmt.Errorf("failed to parse label %q=%q: %w", rootfsSizeInGBLabel, sizeGBstr, err)
}
sizeInBytes = sizeInGB * 1024 * 1024 * 1024
}
// Prefer the newer label in bytes over the deprecated Windows specific GB variant.
if sizeBytesStr, ok := snapshotInfo.Labels[rootfsSizeInBytesLabel]; ok {
sizeInBytes, err = strconv.ParseUint(sizeBytesStr, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse label %q=%q: %w", rootfsSizeInBytesLabel, sizeBytesStr, err)
}
}
var makeUVMScratch bool
if _, ok := snapshotInfo.Labels[uvmScratchLabel]; ok {
makeUVMScratch = true
}
// This has to be run first to avoid clashing with the containers sandbox.vhdx.
if makeUVMScratch {
if err = w.createUVMScratchLayer(ctx, snDir, parentLayerPaths); err != nil {
return fmt.Errorf("failed to make UVM's scratch layer: %w", err)
}
}
if err = w.createScratchLayer(ctx, snDir, parentLayerPaths, sizeInBytes); err != nil {
return fmt.Errorf("failed to create scratch layer: %w", err)
}
return nil
})
if err != nil {
return nil, err
}
return w.mounts(newSnapshot, key), nil
}
// This is essentially a recreation of what HCS' CreateSandboxLayer does with some extra bells and
// whistles like expanding the volume if a size is specified.
func (w *windowsBaseSnapshotter) createScratchLayer(ctx context.Context, snDir string, parentLayers []string, sizeInBytes uint64) error {
parentLen := len(parentLayers)
if parentLen == 0 {
return errors.New("no parent layers present")
}
baseLayer := parentLayers[parentLen-1]
templateDiffDisk := filepath.Join(baseLayer, "blank.vhdx")
dest := filepath.Join(snDir, "sandbox.vhdx")
if err := copyScratchDisk(templateDiffDisk, dest); err != nil {
return err
}
if sizeInBytes != 0 {
if err := hcsshim.ExpandSandboxSize(w.info, filepath.Base(snDir), sizeInBytes); err != nil {
return fmt.Errorf("failed to expand sandbox vhdx size to %d bytes: %w", sizeInBytes, err)
}
}
return nil
}
// This handles creating the UVMs scratch layer.
func (w *windowsBaseSnapshotter) createUVMScratchLayer(ctx context.Context, snDir string, parentLayers []string) error {
parentLen := len(parentLayers)
if parentLen == 0 {
return errors.New("no parent layers present")
}
baseLayer := parentLayers[parentLen-1]
// Make sure base layer has a UtilityVM folder.
uvmPath := filepath.Join(baseLayer, "UtilityVM")
if _, err := os.Stat(uvmPath); os.IsNotExist(err) {
return fmt.Errorf("failed to find UtilityVM directory in base layer %q: %w", baseLayer, err)
}
templateDiffDisk := filepath.Join(uvmPath, "SystemTemplate.vhdx")
// Check if SystemTemplate disk doesn't exist for some reason (this should be made during the unpacking
// of the base layer).
if _, err := os.Stat(templateDiffDisk); os.IsNotExist(err) {
return fmt.Errorf("%q does not exist in Utility VM image", templateDiffDisk)
}
// Move the sandbox.vhdx into a nested vm folder to avoid clashing with a containers sandbox.vhdx.
vmScratchDir := filepath.Join(snDir, "vm")
if err := os.MkdirAll(vmScratchDir, 0777); err != nil {
return fmt.Errorf("failed to make `vm` directory for vm's scratch space: %w", err)
}
return copyScratchDisk(templateDiffDisk, filepath.Join(vmScratchDir, "sandbox.vhdx"))
}
func copyScratchDisk(source, dest string) error {
scratchSource, err := os.OpenFile(source, os.O_RDWR, 0700)
if err != nil {
return fmt.Errorf("failed to open %s: %w", source, err)
}
defer scratchSource.Close()
f, err := os.OpenFile(dest, os.O_RDWR|os.O_CREATE, 0700)
if err != nil {
return fmt.Errorf("failed to create sandbox.vhdx in snapshot: %w", err)
}
defer f.Close()
if _, err := io.Copy(f, scratchSource); err != nil {
os.Remove(dest)
return fmt.Errorf("failed to copy cached %q to %q in snapshot: %w", source, dest, err)
}
return nil
}

View File

@ -21,13 +21,8 @@ package windows
import ( import (
"context" "context"
"encoding/json"
"errors"
"fmt" "fmt"
"io" "io"
"os"
"path/filepath"
"strconv"
"strings" "strings"
"github.com/Microsoft/go-winio" "github.com/Microsoft/go-winio"
@ -53,7 +48,7 @@ func init() {
ID: "windows", ID: "windows",
InitFn: func(ic *plugin.InitContext) (interface{}, error) { InitFn: func(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Platforms = []ocispec.Platform{platforms.DefaultSpec()} ic.Meta.Platforms = []ocispec.Platform{platforms.DefaultSpec()}
return NewSnapshotter(ic.Properties[plugins.PropertyRootDir]) return NewWindowsSnapshotter(ic.Properties[plugins.PropertyRootDir])
}, },
}) })
} }
@ -70,14 +65,13 @@ const (
rootfsSizeInBytesLabel = "containerd.io/snapshot/windows/rootfs.sizebytes" rootfsSizeInBytesLabel = "containerd.io/snapshot/windows/rootfs.sizebytes"
) )
type snapshotter struct { // snapshotter for legacy windows layers
root string type wcowSnapshotter struct {
info hcsshim.DriverInfo *windowsBaseSnapshotter
ms *storage.MetaStore
} }
// NewSnapshotter returns a new windows snapshotter // NewWindowsSnapshotter returns a new windows snapshotter
func NewSnapshotter(root string) (snapshots.Snapshotter, error) { func NewWindowsSnapshotter(root string) (snapshots.Snapshotter, error) {
fsType, err := winfs.GetFileSystemType(root) fsType, err := winfs.GetFileSystemType(root)
if err != nil { if err != nil {
return nil, err return nil, err
@ -86,113 +80,25 @@ func NewSnapshotter(root string) (snapshots.Snapshotter, error) {
return nil, fmt.Errorf("%s is not on an NTFS volume - only NTFS volumes are supported: %w", root, errdefs.ErrInvalidArgument) return nil, fmt.Errorf("%s is not on an NTFS volume - only NTFS volumes are supported: %w", root, errdefs.ErrInvalidArgument)
} }
if err := os.MkdirAll(root, 0700); err != nil { baseSn, err := newBaseSnapshotter(root)
return nil, err
}
ms, err := storage.NewMetaStore(filepath.Join(root, "metadata.db"))
if err != nil { if err != nil {
return nil, err return nil, err
} }
if err := os.Mkdir(filepath.Join(root, "snapshots"), 0700); err != nil && !os.IsExist(err) { return &wcowSnapshotter{
return nil, err windowsBaseSnapshotter: baseSn,
}
return &snapshotter{
info: hcsshim.DriverInfo{
HomeDir: filepath.Join(root, "snapshots"),
},
root: root,
ms: ms,
}, nil }, nil
} }
// Stat returns the info for an active or committed snapshot by name or func (s *wcowSnapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
// key.
//
// Should be used for parent resolution, existence checks and to discern
// the kind of snapshot.
func (s *snapshotter) Stat(ctx context.Context, key string) (info snapshots.Info, err error) {
err = s.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
_, info, _, err = storage.GetInfo(ctx, key)
return err
})
if err != nil {
return snapshots.Info{}, err
}
return info, nil
}
func (s *snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (_ snapshots.Info, err error) {
err = s.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
info, err = storage.UpdateInfo(ctx, info, fieldpaths...)
return err
})
if err != nil {
return snapshots.Info{}, err
}
return info, nil
}
func (s *snapshotter) Usage(ctx context.Context, key string) (usage snapshots.Usage, err error) {
var (
id string
info snapshots.Info
)
err = s.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
id, info, usage, err = storage.GetInfo(ctx, key)
return err
})
if err != nil {
return snapshots.Usage{}, err
}
if info.Kind == snapshots.KindActive {
path := s.getSnapshotDir(id)
du, err := fs.DiskUsage(ctx, path)
if err != nil {
return snapshots.Usage{}, err
}
usage = snapshots.Usage(du)
}
return usage, nil
}
func (s *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts) return s.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
} }
func (s *snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) { func (s *wcowSnapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return s.createSnapshot(ctx, snapshots.KindView, key, parent, opts) return s.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
} }
// Mounts returns the mounts for the transaction identified by key. Can be func (s *wcowSnapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) (retErr error) {
// called on an read-write or readonly transaction.
//
// This can be used to recover mounts after calling View or Prepare.
func (s *snapshotter) Mounts(ctx context.Context, key string) (_ []mount.Mount, err error) {
var snapshot storage.Snapshot
err = s.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
snapshot, err = storage.GetSnapshot(ctx, key)
if err != nil {
return fmt.Errorf("failed to get snapshot mount: %w", err)
}
return nil
})
if err != nil {
return nil, err
}
return s.mounts(snapshot, key), nil
}
func (s *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) (retErr error) {
return s.ms.WithTransaction(ctx, true, func(ctx context.Context) error { return s.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
// grab the existing id // grab the existing id
id, _, _, err := storage.GetInfo(ctx, key) id, _, _, err := storage.GetInfo(ctx, key)
@ -234,237 +140,23 @@ func (s *snapshotter) Commit(ctx context.Context, name, key string, opts ...snap
// Remove abandons the transaction identified by key. All resources // Remove abandons the transaction identified by key. All resources
// associated with the key will be removed. // associated with the key will be removed.
func (s *snapshotter) Remove(ctx context.Context, key string) error { func (s *wcowSnapshotter) Remove(ctx context.Context, key string) error {
var ( renamedID, err := s.preRemove(ctx, key)
renamed, path, renamedID string
restore bool
)
err := s.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
id, _, err := storage.Remove(ctx, key)
if err != nil {
return fmt.Errorf("failed to remove: %w", err)
}
path = s.getSnapshotDir(id)
renamedID = "rm-" + id
renamed = s.getSnapshotDir(renamedID)
if err = os.Rename(path, renamed); err != nil && !os.IsNotExist(err) {
if !os.IsPermission(err) {
return err
}
// If permission denied, it's possible that the scratch is still mounted, an
// artifact after a hard daemon crash for example. Worth a shot to try deactivating it
// before retrying the rename.
var (
home, layerID = filepath.Split(path)
di = hcsshim.DriverInfo{
HomeDir: home,
}
)
if deactivateErr := hcsshim.DeactivateLayer(di, layerID); deactivateErr != nil {
return fmt.Errorf("failed to deactivate layer following failed rename: %s: %w", deactivateErr, err)
}
if renameErr := os.Rename(path, renamed); renameErr != nil && !os.IsNotExist(renameErr) {
return fmt.Errorf("second rename attempt following detach failed: %s: %w", renameErr, err)
}
}
restore = true
return nil
})
if err != nil { if err != nil {
if restore { // failed to commit // wrap as ErrFailedPrecondition so that cleanup of other snapshots can continue
if err1 := os.Rename(renamed, path); err1 != nil { return fmt.Errorf("%w: %s", errdefs.ErrFailedPrecondition, err)
// May cause inconsistent data on disk
log.G(ctx).WithError(err1).WithField("path", renamed).Error("Failed to rename after failed commit")
}
}
// Return the error wrapped in ErrFailedPrecondition so that cleanup of other snapshots will
// still continue.
return errors.Join(errdefs.ErrFailedPrecondition, err)
} }
if err = hcsshim.DestroyLayer(s.info, renamedID); err != nil { if err = hcsshim.DestroyLayer(s.info, renamedID); err != nil {
// Must be cleaned up, any "rm-*" could be removed if no active transactions // Must be cleaned up, any "rm-*" could be removed if no active transactions
log.G(ctx).WithError(err).WithField("path", renamed).Warnf("Failed to remove root filesystem") log.G(ctx).WithError(err).WithField("renamedID", renamedID).Warnf("Failed to remove root filesystem")
} }
return nil return nil
} }
// Walk the committed snapshots.
func (s *snapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
return s.ms.WithTransaction(ctx, false, func(ctx context.Context) error {
return storage.WalkInfo(ctx, fn, fs...)
})
}
// Close closes the snapshotter
func (s *snapshotter) Close() error {
return s.ms.Close()
}
func (s *snapshotter) mounts(sn storage.Snapshot, key string) []mount.Mount {
var (
roFlag string
)
if sn.Kind == snapshots.KindView {
roFlag = "ro"
} else {
roFlag = "rw"
}
source := s.getSnapshotDir(sn.ID)
parentLayerPaths := s.parentIDsToParentPaths(sn.ParentIDs)
mountType := "windows-layer"
// error is not checked here, as a string array will never fail to Marshal
parentLayersJSON, _ := json.Marshal(parentLayerPaths)
parentLayersOption := mount.ParentLayerPathsFlag + string(parentLayersJSON)
options := []string{
roFlag,
}
if len(sn.ParentIDs) != 0 {
options = append(options, parentLayersOption)
}
mounts := []mount.Mount{
{
Source: source,
Type: mountType,
Options: options,
},
}
return mounts
}
func (s *snapshotter) getSnapshotDir(id string) string {
return filepath.Join(s.root, "snapshots", id)
}
func (s *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
var newSnapshot storage.Snapshot
err = s.ms.WithTransaction(ctx, true, func(ctx context.Context) error {
newSnapshot, err = storage.CreateSnapshot(ctx, kind, key, parent, opts...)
if err != nil {
return fmt.Errorf("failed to create snapshot: %w", err)
}
log.G(ctx).Debug("createSnapshot")
// Create the new snapshot dir
snDir := s.getSnapshotDir(newSnapshot.ID)
if err = os.MkdirAll(snDir, 0700); err != nil {
return fmt.Errorf("failed to create snapshot dir %s: %w", snDir, err)
}
if strings.Contains(key, snapshots.UnpackKeyPrefix) {
// IO/disk space optimization: Do nothing
//
// We only need one sandbox.vhdx for the container. Skip making one for this
// snapshot if this isn't the snapshot that just houses the final sandbox.vhd
// that will be mounted as the containers scratch. Currently the key for a snapshot
// where a layer will be extracted to will have the string `extract-` in it.
return nil
}
if len(newSnapshot.ParentIDs) == 0 {
// A parentless snapshot a new base layer. Valid base layers must have a "Files" folder.
// When committed, there'll be some post-processing to fill in the rest
// of the metadata.
filesDir := filepath.Join(snDir, "Files")
if err := os.MkdirAll(filesDir, 0700); err != nil {
return fmt.Errorf("creating Files dir: %w", err)
}
return nil
}
parentLayerPaths := s.parentIDsToParentPaths(newSnapshot.ParentIDs)
var snapshotInfo snapshots.Info
for _, o := range opts {
o(&snapshotInfo)
}
var sizeInBytes uint64
if sizeGBstr, ok := snapshotInfo.Labels[rootfsSizeInGBLabel]; ok {
log.G(ctx).Warnf("%q label is deprecated, please use %q instead.", rootfsSizeInGBLabel, rootfsSizeInBytesLabel)
sizeInGB, err := strconv.ParseUint(sizeGBstr, 10, 32)
if err != nil {
return fmt.Errorf("failed to parse label %q=%q: %w", rootfsSizeInGBLabel, sizeGBstr, err)
}
sizeInBytes = sizeInGB * 1024 * 1024 * 1024
}
// Prefer the newer label in bytes over the deprecated Windows specific GB variant.
if sizeBytesStr, ok := snapshotInfo.Labels[rootfsSizeInBytesLabel]; ok {
sizeInBytes, err = strconv.ParseUint(sizeBytesStr, 10, 64)
if err != nil {
return fmt.Errorf("failed to parse label %q=%q: %w", rootfsSizeInBytesLabel, sizeBytesStr, err)
}
}
var makeUVMScratch bool
if _, ok := snapshotInfo.Labels[uvmScratchLabel]; ok {
makeUVMScratch = true
}
// This has to be run first to avoid clashing with the containers sandbox.vhdx.
if makeUVMScratch {
if err = s.createUVMScratchLayer(ctx, snDir, parentLayerPaths); err != nil {
return fmt.Errorf("failed to make UVM's scratch layer: %w", err)
}
}
if err = s.createScratchLayer(ctx, snDir, parentLayerPaths, sizeInBytes); err != nil {
return fmt.Errorf("failed to create scratch layer: %w", err)
}
return nil
})
if err != nil {
return nil, err
}
return s.mounts(newSnapshot, key), nil
}
func (s *snapshotter) parentIDsToParentPaths(parentIDs []string) []string {
var parentLayerPaths []string
for _, ID := range parentIDs {
parentLayerPaths = append(parentLayerPaths, s.getSnapshotDir(ID))
}
return parentLayerPaths
}
// This is essentially a recreation of what HCS' CreateSandboxLayer does with some extra bells and
// whistles like expanding the volume if a size is specified.
func (s *snapshotter) createScratchLayer(ctx context.Context, snDir string, parentLayers []string, sizeInBytes uint64) error {
parentLen := len(parentLayers)
if parentLen == 0 {
return errors.New("no parent layers present")
}
baseLayer := parentLayers[parentLen-1]
templateDiffDisk := filepath.Join(baseLayer, "blank.vhdx")
dest := filepath.Join(snDir, "sandbox.vhdx")
if err := copyScratchDisk(templateDiffDisk, dest); err != nil {
return err
}
if sizeInBytes != 0 {
if err := hcsshim.ExpandSandboxSize(s.info, filepath.Base(snDir), sizeInBytes); err != nil {
return fmt.Errorf("failed to expand sandbox vhdx size to %d bytes: %w", sizeInBytes, err)
}
}
return nil
}
// convertScratchToReadOnlyLayer reimports the layer over itself, to transfer the files from the sandbox.vhdx to the on-disk storage. // convertScratchToReadOnlyLayer reimports the layer over itself, to transfer the files from the sandbox.vhdx to the on-disk storage.
func (s *snapshotter) convertScratchToReadOnlyLayer(ctx context.Context, snapshot storage.Snapshot, path string) (retErr error) { func (s *wcowSnapshotter) convertScratchToReadOnlyLayer(ctx context.Context, snapshot storage.Snapshot, path string) (retErr error) {
// TODO darrenstahlmsft: When this is done isolated, we should disable these. // TODO darrenstahlmsft: When this is done isolated, we should disable these.
// it currently cannot be disabled, unless we add ref counting. Since this is // it currently cannot be disabled, unless we add ref counting. Since this is
@ -505,54 +197,3 @@ func (s *snapshotter) convertScratchToReadOnlyLayer(ctx context.Context, snapsho
return nil return nil
} }
// This handles creating the UVMs scratch layer.
func (s *snapshotter) createUVMScratchLayer(ctx context.Context, snDir string, parentLayers []string) error {
parentLen := len(parentLayers)
if parentLen == 0 {
return errors.New("no parent layers present")
}
baseLayer := parentLayers[parentLen-1]
// Make sure base layer has a UtilityVM folder.
uvmPath := filepath.Join(baseLayer, "UtilityVM")
if _, err := os.Stat(uvmPath); os.IsNotExist(err) {
return fmt.Errorf("failed to find UtilityVM directory in base layer %q: %w", baseLayer, err)
}
templateDiffDisk := filepath.Join(uvmPath, "SystemTemplate.vhdx")
// Check if SystemTemplate disk doesn't exist for some reason (this should be made during the unpacking
// of the base layer).
if _, err := os.Stat(templateDiffDisk); os.IsNotExist(err) {
return fmt.Errorf("%q does not exist in Utility VM image", templateDiffDisk)
}
// Move the sandbox.vhdx into a nested vm folder to avoid clashing with a containers sandbox.vhdx.
vmScratchDir := filepath.Join(snDir, "vm")
if err := os.MkdirAll(vmScratchDir, 0777); err != nil {
return fmt.Errorf("failed to make `vm` directory for vm's scratch space: %w", err)
}
return copyScratchDisk(templateDiffDisk, filepath.Join(vmScratchDir, "sandbox.vhdx"))
}
func copyScratchDisk(source, dest string) error {
scratchSource, err := os.OpenFile(source, os.O_RDWR, 0700)
if err != nil {
return fmt.Errorf("failed to open %s: %w", source, err)
}
defer scratchSource.Close()
f, err := os.OpenFile(dest, os.O_RDWR|os.O_CREATE, 0700)
if err != nil {
return fmt.Errorf("failed to create sandbox.vhdx in snapshot: %w", err)
}
defer f.Close()
if _, err := io.Copy(f, scratchSource); err != nil {
os.Remove(dest)
return fmt.Errorf("failed to copy cached %q to %q in snapshot: %w", source, dest, err)
}
return nil
}

View File

@ -29,7 +29,7 @@ import (
) )
func newSnapshotter(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error) { func newSnapshotter(ctx context.Context, root string) (snapshots.Snapshotter, func() error, error) {
snapshotter, err := NewSnapshotter(root) snapshotter, err := NewWindowsSnapshotter(root)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }

View File

@ -20,6 +20,7 @@ linters:
# - typecheck # - typecheck
# - unused # - unused
- errorlint # error wrapping (eg, not using `errors.Is`, using `%s` instead of `%w` in `fmt.Errorf`)
- gofmt # whether code was gofmt-ed - gofmt # whether code was gofmt-ed
- govet # enabled by default, but just to be sure - govet # enabled by default, but just to be sure
- nolintlint # ill-formed or insufficient nolint directives - nolintlint # ill-formed or insufficient nolint directives
@ -53,6 +54,12 @@ issues:
text: "^ST1003: should not use underscores in package names$" text: "^ST1003: should not use underscores in package names$"
source: "^package cri_containerd$" source: "^package cri_containerd$"
# don't bother with propper error wrapping in test code
- path: cri-containerd
linters:
- errorlint
text: "non-wrapping format verb for fmt.Errorf"
# This repo has a LOT of generated schema files, operating system bindings, and other # This repo has a LOT of generated schema files, operating system bindings, and other
# things that ST1003 from stylecheck won't like (screaming case Windows api constants for example). # things that ST1003 from stylecheck won't like (screaming case Windows api constants for example).
# There's also some structs that we *could* change the initialisms to be Go friendly # There's also some structs that we *could* change the initialisms to be Go friendly

View File

@ -9,15 +9,18 @@ It is primarily used in the [Moby](https://github.com/moby/moby) and [Containerd
## Building ## Building
While this repository can be used as a library of sorts to call the HCS apis, there are a couple binaries built out of the repository as well. The main ones being the Linux guest agent, and an implementation of the [runtime v2 containerd shim api](https://github.com/containerd/containerd/blob/master/runtime/v2/README.md). While this repository can be used as a library of sorts to call the HCS apis, there are a couple binaries built out of the repository as well. The main ones being the Linux guest agent, and an implementation of the [runtime v2 containerd shim api](https://github.com/containerd/containerd/blob/master/runtime/v2/README.md).
### Linux Hyper-V Container Guest Agent ### Linux Hyper-V Container Guest Agent
To build the Linux guest agent itself all that's needed is to set your GOOS to "Linux" and build out of ./cmd/gcs. To build the Linux guest agent itself all that's needed is to set your GOOS to "Linux" and build out of ./cmd/gcs.
```powershell ```powershell
C:\> $env:GOOS="linux" C:\> $env:GOOS="linux"
C:\> go build .\cmd\gcs\ C:\> go build .\cmd\gcs\
``` ```
or on a Linux machine or on a Linux machine
```sh ```sh
> go build ./cmd/gcs > go build ./cmd/gcs
``` ```
@ -33,13 +36,15 @@ make all
``` ```
If the build is successful, in the `./out` folder you should see: If the build is successful, in the `./out` folder you should see:
```sh ```sh
> ls ./out/ > ls ./out/
delta.tar.gz initrd.img rootfs.tar.gz delta.tar.gz initrd.img rootfs.tar.gz
``` ```
### Containerd Shim ### Containerd Shim
For info on the Runtime V2 API: https://github.com/containerd/containerd/blob/master/runtime/v2/README.md.
For info on the [Runtime V2 API](https://github.com/containerd/containerd/blob/master/runtime/v2/README.md).
Contrary to the typical Linux architecture of shim -> runc, the runhcs shim is used both to launch and manage the lifetime of containers. Contrary to the typical Linux architecture of shim -> runc, the runhcs shim is used both to launch and manage the lifetime of containers.
@ -48,7 +53,9 @@ C:\> $env:GOOS="windows"
C:\> go build .\cmd\containerd-shim-runhcs-v1 C:\> go build .\cmd\containerd-shim-runhcs-v1
``` ```
Then place the binary in the same directory that Containerd is located at in your environment. A default Containerd configuration file can be generated by running: Then place the binary in the same directory that Containerd is located at in your environment.
A default Containerd configuration file can be generated by running:
```powershell ```powershell
.\containerd.exe config default | Out-File "C:\Program Files\containerd\config.toml" -Encoding ascii .\containerd.exe config default | Out-File "C:\Program Files\containerd\config.toml" -Encoding ascii
``` ```
@ -56,6 +63,7 @@ Then place the binary in the same directory that Containerd is located at in you
This config file will already have the shim set as the default runtime for cri interactions. This config file will already have the shim set as the default runtime for cri interactions.
To trial using the shim out with ctr.exe: To trial using the shim out with ctr.exe:
```powershell ```powershell
C:\> ctr.exe run --runtime io.containerd.runhcs.v1 --rm mcr.microsoft.com/windows/nanoserver:2004 windows-test cmd /c "echo Hello World!" C:\> ctr.exe run --runtime io.containerd.runhcs.v1 --rm mcr.microsoft.com/windows/nanoserver:2004 windows-test cmd /c "echo Hello World!"
``` ```
@ -64,16 +72,69 @@ C:\> ctr.exe run --runtime io.containerd.runhcs.v1 --rm mcr.microsoft.com/window
This project welcomes contributions and suggestions. Most contributions require you to agree to a This project welcomes contributions and suggestions. Most contributions require you to agree to a
Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us Contributor License Agreement (CLA) declaring that you have the right to, and actually do, grant us
the rights to use your contribution. For details, visit https://cla.microsoft.com. the rights to use your contribution. For details, visit [Microsoft CLA](https://cla.microsoft.com).
When you submit a pull request, a CLA-bot will automatically determine whether you need to provide When you submit a pull request, a CLA-bot will automatically determine whether you need to provide
a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the instructions
provided by the bot. You will only need to do this once across all repos using our CLA. provided by the bot. You will only need to do this once across all repos using our CLA.
We also require that contributors [sign their commits](https://git-scm.com/docs/git-commit) using `git commit -s` or `git commit --signoff` to We require that contributors sign their commits
certify they either authored the work themselves or otherwise have permission to use it in this project. Please see https://developercertificate.org/ for to certify they either authored the work themselves or otherwise have permission to use it in this project.
more info, as well as to make sure that you can attest to the rules listed. Our CI uses the [DCO Github app](https://github.com/apps/dco) to ensure
that all commits in a given PR are signed-off. We also require that contributors sign their commits using using [`git commit --signoff`][git-commit-s]
to certify they either authored the work themselves or otherwise have permission to use it in this project.
A range of commits can be signed off using [`git rebase --signoff`][git-rebase-s].
Please see [the developer certificate](https://developercertificate.org) for more info,
as well as to make sure that you can attest to the rules listed.
Our CI uses the [DCO Github app](https://github.com/apps/dco) to ensure that all commits in a given PR are signed-off.
### Linting
Code must pass a linting stage, which uses [`golangci-lint`][lint].
Since `./test` is a separate Go module, the linter is run from both the root and the
`test` directories. Additionally, the linter is run with `GOOS` set to both `windows` and
`linux`.
The linting settings are stored in [`.golangci.yaml`](./.golangci.yaml), and can be run
automatically with VSCode by adding the following to your workspace or folder settings:
```json
"go.lintTool": "golangci-lint",
"go.lintOnSave": "package",
```
Additional editor [integrations options are also available][lint-ide].
Alternatively, `golangci-lint` can be [installed][lint-install] and run locally:
```shell
# use . or specify a path to only lint a package
# to show all lint errors, use flags "--max-issues-per-linter=0 --max-same-issues=0"
> golangci-lint run
```
To run across the entire repo for both `GOOS=windows` and `linux`:
```powershell
> foreach ( $goos in ('windows', 'linux') ) {
foreach ( $repo in ('.', 'test') ) {
pwsh -Command "cd $repo && go env -w GOOS=$goos && golangci-lint.exe run --verbose"
}
}
```
### Go Generate
The pipeline checks that auto-generated code, via `go generate`, are up to date.
Similar to the [linting stage](#linting), `go generate` is run in both the root and test Go modules.
This can be done via:
```shell
> go generate ./...
> cd test && go generate ./...
```
## Code of Conduct ## Code of Conduct
@ -83,7 +144,7 @@ contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additio
## Dependencies ## Dependencies
This project requires Golang 1.17 or newer to build. This project requires Golang 1.18 or newer to build.
For system requirements to run this project, see the Microsoft docs on [Windows Container requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/system-requirements). For system requirements to run this project, see the Microsoft docs on [Windows Container requirements](https://docs.microsoft.com/en-us/virtualization/windowscontainers/deploy-containers/system-requirements).
@ -100,3 +161,10 @@ For additional details, see [Report a Computer Security Vulnerability](https://t
--------------- ---------------
Copyright (c) 2018 Microsoft Corp. All rights reserved. Copyright (c) 2018 Microsoft Corp. All rights reserved.
[lint]: https://golangci-lint.run/
[lint-ide]: https://golangci-lint.run/usage/integrations/#editor-integration
[lint-install]: https://golangci-lint.run/usage/install/#local-installation
[git-commit-s]: https://git-scm.com/docs/git-commit#Documentation/git-commit.txt--s
[git-rebase-s]: https://git-scm.com/docs/git-rebase#Documentation/git-rebase.txt---signoff

View File

@ -38,3 +38,31 @@ func AttachLayerStorageFilter(ctx context.Context, layerPath string, layerData L
} }
return nil return nil
} }
// AttachOverlayFilter sets up a filter of the given type on a writable container layer. Currently the only
// supported filter types are WCIFS & UnionFS (defined in internal/hcs/schema2/layer.go)
//
// `volumePath` is volume path at which writable layer is mounted. If the
// path does not end in a `\` the platform will append it automatically.
//
// `layerData` is the parent read-only layer data.
func AttachOverlayFilter(ctx context.Context, volumePath string, layerData LayerData) (err error) {
title := "hcsshim::AttachOverlayFilter"
ctx, span := oc.StartSpan(ctx, title) //nolint:ineffassign,staticcheck
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
span.AddAttributes(
trace.StringAttribute("volumePath", volumePath),
)
bytes, err := json.Marshal(layerData)
if err != nil {
return err
}
err = hcsAttachOverlayFilter(volumePath, string(bytes))
if err != nil {
return errors.Wrap(err, "failed to attach overlay filter")
}
return nil
}

View File

@ -4,7 +4,9 @@ package computestorage
import ( import (
"context" "context"
"encoding/json"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
"github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/oc"
"github.com/pkg/errors" "github.com/pkg/errors"
"go.opencensus.io/trace" "go.opencensus.io/trace"
@ -26,3 +28,27 @@ func DetachLayerStorageFilter(ctx context.Context, layerPath string) (err error)
} }
return nil return nil
} }
// DetachOverlayFilter detaches the filter on a writable container layer.
//
// `volumePath` is a path to writable container volume.
func DetachOverlayFilter(ctx context.Context, volumePath string, filterType hcsschema.FileSystemFilterType) (err error) {
title := "hcsshim::DetachOverlayFilter"
ctx, span := oc.StartSpan(ctx, title) //nolint:ineffassign,staticcheck
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
span.AddAttributes(trace.StringAttribute("volumePath", volumePath))
layerData := LayerData{}
layerData.FilterType = filterType
bytes, err := json.Marshal(layerData)
if err != nil {
return err
}
err = hcsDetachOverlayFilter(volumePath, string(bytes))
if err != nil {
return errors.Wrap(err, "failed to detach overlay filter")
}
return nil
}

View File

@ -19,14 +19,17 @@ import (
//sys hcsFormatWritableLayerVhd(handle windows.Handle) (hr error) = computestorage.HcsFormatWritableLayerVhd? //sys hcsFormatWritableLayerVhd(handle windows.Handle) (hr error) = computestorage.HcsFormatWritableLayerVhd?
//sys hcsGetLayerVhdMountPath(vhdHandle windows.Handle, mountPath **uint16) (hr error) = computestorage.HcsGetLayerVhdMountPath? //sys hcsGetLayerVhdMountPath(vhdHandle windows.Handle, mountPath **uint16) (hr error) = computestorage.HcsGetLayerVhdMountPath?
//sys hcsSetupBaseOSVolume(layerPath string, volumePath string, options string) (hr error) = computestorage.HcsSetupBaseOSVolume? //sys hcsSetupBaseOSVolume(layerPath string, volumePath string, options string) (hr error) = computestorage.HcsSetupBaseOSVolume?
//sys hcsAttachOverlayFilter(volumePath string, layerData string) (hr error) = computestorage.HcsAttachOverlayFilter?
//sys hcsDetachOverlayFilter(volumePath string, layerData string) (hr error) = computestorage.HcsDetachOverlayFilter?
type Version = hcsschema.Version type Version = hcsschema.Version
type Layer = hcsschema.Layer type Layer = hcsschema.Layer
// LayerData is the data used to describe parent layer information. // LayerData is the data used to describe parent layer information.
type LayerData struct { type LayerData struct {
SchemaVersion Version `json:"SchemaVersion,omitempty"` SchemaVersion Version `json:"SchemaVersion,omitempty"`
Layers []Layer `json:"Layers,omitempty"` Layers []Layer `json:"Layers,omitempty"`
FilterType hcsschema.FileSystemFilterType `json:"FilterType,omitempty"`
} }
// ExportLayerOptions are the set of options that are used with the `computestorage.HcsExportLayer` syscall. // ExportLayerOptions are the set of options that are used with the `computestorage.HcsExportLayer` syscall.

View File

@ -43,8 +43,10 @@ var (
modcomputestorage = windows.NewLazySystemDLL("computestorage.dll") modcomputestorage = windows.NewLazySystemDLL("computestorage.dll")
procHcsAttachLayerStorageFilter = modcomputestorage.NewProc("HcsAttachLayerStorageFilter") procHcsAttachLayerStorageFilter = modcomputestorage.NewProc("HcsAttachLayerStorageFilter")
procHcsAttachOverlayFilter = modcomputestorage.NewProc("HcsAttachOverlayFilter")
procHcsDestroyLayer = modcomputestorage.NewProc("HcsDestroyLayer") procHcsDestroyLayer = modcomputestorage.NewProc("HcsDestroyLayer")
procHcsDetachLayerStorageFilter = modcomputestorage.NewProc("HcsDetachLayerStorageFilter") procHcsDetachLayerStorageFilter = modcomputestorage.NewProc("HcsDetachLayerStorageFilter")
procHcsDetachOverlayFilter = modcomputestorage.NewProc("HcsDetachOverlayFilter")
procHcsExportLayer = modcomputestorage.NewProc("HcsExportLayer") procHcsExportLayer = modcomputestorage.NewProc("HcsExportLayer")
procHcsFormatWritableLayerVhd = modcomputestorage.NewProc("HcsFormatWritableLayerVhd") procHcsFormatWritableLayerVhd = modcomputestorage.NewProc("HcsFormatWritableLayerVhd")
procHcsGetLayerVhdMountPath = modcomputestorage.NewProc("HcsGetLayerVhdMountPath") procHcsGetLayerVhdMountPath = modcomputestorage.NewProc("HcsGetLayerVhdMountPath")
@ -83,6 +85,35 @@ func _hcsAttachLayerStorageFilter(layerPath *uint16, layerData *uint16) (hr erro
return return
} }
func hcsAttachOverlayFilter(volumePath string, layerData string) (hr error) {
var _p0 *uint16
_p0, hr = syscall.UTF16PtrFromString(volumePath)
if hr != nil {
return
}
var _p1 *uint16
_p1, hr = syscall.UTF16PtrFromString(layerData)
if hr != nil {
return
}
return _hcsAttachOverlayFilter(_p0, _p1)
}
func _hcsAttachOverlayFilter(volumePath *uint16, layerData *uint16) (hr error) {
hr = procHcsAttachOverlayFilter.Find()
if hr != nil {
return
}
r0, _, _ := syscall.Syscall(procHcsAttachOverlayFilter.Addr(), 2, uintptr(unsafe.Pointer(volumePath)), uintptr(unsafe.Pointer(layerData)), 0)
if int32(r0) < 0 {
if r0&0x1fff0000 == 0x00070000 {
r0 &= 0xffff
}
hr = syscall.Errno(r0)
}
return
}
func hcsDestroyLayer(layerPath string) (hr error) { func hcsDestroyLayer(layerPath string) (hr error) {
var _p0 *uint16 var _p0 *uint16
_p0, hr = syscall.UTF16PtrFromString(layerPath) _p0, hr = syscall.UTF16PtrFromString(layerPath)
@ -131,6 +162,35 @@ func _hcsDetachLayerStorageFilter(layerPath *uint16) (hr error) {
return return
} }
func hcsDetachOverlayFilter(volumePath string, layerData string) (hr error) {
var _p0 *uint16
_p0, hr = syscall.UTF16PtrFromString(volumePath)
if hr != nil {
return
}
var _p1 *uint16
_p1, hr = syscall.UTF16PtrFromString(layerData)
if hr != nil {
return
}
return _hcsDetachOverlayFilter(_p0, _p1)
}
func _hcsDetachOverlayFilter(volumePath *uint16, layerData *uint16) (hr error) {
hr = procHcsDetachOverlayFilter.Find()
if hr != nil {
return
}
r0, _, _ := syscall.Syscall(procHcsDetachOverlayFilter.Addr(), 2, uintptr(unsafe.Pointer(volumePath)), uintptr(unsafe.Pointer(layerData)), 0)
if int32(r0) < 0 {
if r0&0x1fff0000 == 0x00070000 {
r0 &= 0xffff
}
hr = syscall.Errno(r0)
}
return
}
func hcsExportLayer(layerPath string, exportFolderPath string, layerData string, options string) (hr error) { func hcsExportLayer(layerPath string, exportFolderPath string, layerData string, options string) (hr error) {
var _p0 *uint16 var _p0 *uint16
_p0, hr = syscall.UTF16PtrFromString(layerPath) _p0, hr = syscall.UTF16PtrFromString(layerPath)

View File

@ -75,7 +75,7 @@ func init() {
func CreateContainer(id string, c *ContainerConfig) (Container, error) { func CreateContainer(id string, c *ContainerConfig) (Container, error) {
fullConfig, err := mergemaps.MergeJSON(c, createContainerAdditionalJSON) fullConfig, err := mergemaps.MergeJSON(c, createContainerAdditionalJSON)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to merge additional JSON '%s': %s", createContainerAdditionalJSON, err) return nil, fmt.Errorf("failed to merge additional JSON '%s': %w", createContainerAdditionalJSON, err)
} }
system, err := hcs.CreateComputeSystem(context.Background(), id, fullConfig) system, err := hcs.CreateComputeSystem(context.Background(), id, fullConfig)

View File

@ -115,6 +115,7 @@ func (e *ContainerError) Error() string {
s += " encountered an error during " + e.Operation s += " encountered an error during " + e.Operation
} }
//nolint:errorlint // legacy code
switch e.Err.(type) { switch e.Err.(type) {
case nil: case nil:
break break
@ -145,6 +146,7 @@ func (e *ProcessError) Error() string {
s += " encountered an error during " + e.Operation s += " encountered an error during " + e.Operation
} }
//nolint:errorlint // legacy code
switch e.Err.(type) { switch e.Err.(type) {
case nil: case nil:
break break
@ -166,10 +168,10 @@ func (e *ProcessError) Error() string {
// already exited, or does not exist. Both IsAlreadyStopped and IsNotExist // already exited, or does not exist. Both IsAlreadyStopped and IsNotExist
// will currently return true when the error is ErrElementNotFound. // will currently return true when the error is ErrElementNotFound.
func IsNotExist(err error) bool { func IsNotExist(err error) bool {
if _, ok := err.(EndpointNotFoundError); ok { if _, ok := err.(EndpointNotFoundError); ok { //nolint:errorlint // legacy code
return true return true
} }
if _, ok := err.(NetworkNotFoundError); ok { if _, ok := err.(NetworkNotFoundError); ok { //nolint:errorlint // legacy code
return true return true
} }
return hcs.IsNotExist(getInnerError(err)) return hcs.IsNotExist(getInnerError(err))
@ -224,6 +226,7 @@ func IsAccessIsDenied(err error) bool {
} }
func getInnerError(err error) error { func getInnerError(err error) error {
//nolint:errorlint // legacy code
switch pe := err.(type) { switch pe := err.(type) {
case nil: case nil:
return nil return nil
@ -236,14 +239,14 @@ func getInnerError(err error) error {
} }
func convertSystemError(err error, c *container) error { func convertSystemError(err error, c *container) error {
if serr, ok := err.(*hcs.SystemError); ok { if serr, ok := err.(*hcs.SystemError); ok { //nolint:errorlint // legacy code
return &ContainerError{Container: c, Operation: serr.Op, Err: serr.Err, Events: serr.Events} return &ContainerError{Container: c, Operation: serr.Op, Err: serr.Err, Events: serr.Events}
} }
return err return err
} }
func convertProcessError(err error, p *process) error { func convertProcessError(err error, p *process) error {
if perr, ok := err.(*hcs.ProcessError); ok { if perr, ok := err.(*hcs.ProcessError); ok { //nolint:errorlint // legacy code
return &ProcessError{Process: p, Operation: perr.Op, Err: perr.Err, Events: perr.Events} return &ProcessError{Process: p, Operation: perr.Op, Err: perr.Err, Events: perr.Events}
} }
return err return err

View File

@ -185,6 +185,8 @@ func ReadDMVerityInfoReader(r io.Reader) (*VerityInfo, error) {
block := make([]byte, blockSize) block := make([]byte, blockSize)
if s, err := r.Read(block); err != nil || s != blockSize { if s, err := r.Read(block); err != nil || s != blockSize {
if err != nil { if err != nil {
// TODO (go1.20): use multierror via fmt.Errorf("...: %w; ...: %w", ...)
//nolint:errorlint // non-wrapping format verb for fmt.Errorf
return nil, fmt.Errorf("%s: %w", ErrSuperBlockReadFailure, err) return nil, fmt.Errorf("%s: %w", ErrSuperBlockReadFailure, err)
} }
return nil, fmt.Errorf("unexpected bytes read expected=%d actual=%d: %w", blockSize, s, ErrSuperBlockReadFailure) return nil, fmt.Errorf("unexpected bytes read expected=%d actual=%d: %w", blockSize, s, ErrSuperBlockReadFailure)
@ -193,6 +195,8 @@ func ReadDMVerityInfoReader(r io.Reader) (*VerityInfo, error) {
dmvSB := &dmveritySuperblock{} dmvSB := &dmveritySuperblock{}
b := bytes.NewBuffer(block) b := bytes.NewBuffer(block)
if err := binary.Read(b, binary.LittleEndian, dmvSB); err != nil { if err := binary.Read(b, binary.LittleEndian, dmvSB); err != nil {
// TODO (go1.20): use multierror via fmt.Errorf("...: %w; ...: %w", ...)
//nolint:errorlint // non-wrapping format verb for fmt.Errorf
return nil, fmt.Errorf("%s: %w", ErrSuperBlockParseFailure, err) return nil, fmt.Errorf("%s: %w", ErrSuperBlockParseFailure, err)
} }
@ -202,6 +206,8 @@ func ReadDMVerityInfoReader(r io.Reader) (*VerityInfo, error) {
if s, err := r.Read(block); err != nil || s != blockSize { if s, err := r.Read(block); err != nil || s != blockSize {
if err != nil { if err != nil {
// TODO (go1.20): use multierror via fmt.Errorf("...: %w; ...: %w", ...)
//nolint:errorlint // non-wrapping format verb for fmt.Errorf
return nil, fmt.Errorf("%s: %w", ErrRootHashReadFailure, err) return nil, fmt.Errorf("%s: %w", ErrRootHashReadFailure, err)
} }
return nil, fmt.Errorf("unexpected bytes read expected=%d, actual=%d: %w", blockSize, s, ErrRootHashReadFailure) return nil, fmt.Errorf("unexpected bytes read expected=%d, actual=%d: %w", blockSize, s, ErrRootHashReadFailure)

View File

@ -604,7 +604,7 @@ func (w *Writer) Create(name string, f *File) error {
} }
child, err := w.makeInode(f, reuse) child, err := w.makeInode(f, reuse)
if err != nil { if err != nil {
return fmt.Errorf("%s: %s", name, err) return fmt.Errorf("%s: %w", name, err)
} }
if existing != child { if existing != child {
if existing != nil { if existing != nil {

View File

@ -85,7 +85,7 @@ func ConvertTarToExt4(r io.Reader, w io.ReadWriteSeeker, options ...Option) erro
fs := compactext4.NewWriter(w, p.ext4opts...) fs := compactext4.NewWriter(w, p.ext4opts...)
for { for {
hdr, err := t.Next() hdr, err := t.Next()
if err == io.EOF { if errors.Is(err, io.EOF) {
break break
} }
if err != nil { if err != nil {
@ -301,7 +301,7 @@ func Ext4FileSystemSize(r io.ReadSeeker) (int64, int, error) {
func ConvertAndComputeRootDigest(r io.Reader) (string, error) { func ConvertAndComputeRootDigest(r io.Reader) (string, error) {
out, err := os.CreateTemp("", "") out, err := os.CreateTemp("", "")
if err != nil { if err != nil {
return "", fmt.Errorf("failed to create temporary file: %s", err) return "", fmt.Errorf("failed to create temporary file: %w", err)
} }
defer func() { defer func() {
_ = os.Remove(out.Name()) _ = os.Remove(out.Name())
@ -313,16 +313,16 @@ func ConvertAndComputeRootDigest(r io.Reader) (string, error) {
MaximumDiskSize(dmverity.RecommendedVHDSizeGB), MaximumDiskSize(dmverity.RecommendedVHDSizeGB),
} }
if err := ConvertTarToExt4(r, out, options...); err != nil { if err := ConvertTarToExt4(r, out, options...); err != nil {
return "", fmt.Errorf("failed to convert tar to ext4: %s", err) return "", fmt.Errorf("failed to convert tar to ext4: %w", err)
} }
if _, err := out.Seek(0, io.SeekStart); err != nil { if _, err := out.Seek(0, io.SeekStart); err != nil {
return "", fmt.Errorf("failed to seek start on temp file when creating merkle tree: %s", err) return "", fmt.Errorf("failed to seek start on temp file when creating merkle tree: %w", err)
} }
tree, err := dmverity.MerkleTree(bufio.NewReaderSize(out, dmverity.MerkleTreeBufioSize)) tree, err := dmverity.MerkleTree(bufio.NewReaderSize(out, dmverity.MerkleTreeBufioSize))
if err != nil { if err != nil {
return "", fmt.Errorf("failed to create merkle tree: %s", err) return "", fmt.Errorf("failed to create merkle tree: %w", err)
} }
hash := dmverity.RootHash(tree) hash := dmverity.RootHash(tree)

View File

@ -6,11 +6,12 @@ import (
"errors" "errors"
"fmt" "fmt"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
"github.com/Microsoft/hcsshim/internal/hcs" "github.com/Microsoft/hcsshim/internal/hcs"
"github.com/Microsoft/hcsshim/internal/hcserror" "github.com/Microsoft/hcsshim/internal/hcserror"
"github.com/Microsoft/hcsshim/internal/interop" "github.com/Microsoft/hcsshim/internal/interop"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
) )
var ( var (
@ -63,8 +64,8 @@ func (e *HcnError) Error() string {
} }
func CheckErrorWithCode(err error, code ErrorCode) bool { func CheckErrorWithCode(err error, code ErrorCode) bool {
hcnError, ok := err.(*HcnError) var hcnError *HcnError
if ok { if errors.As(err, &hcnError) {
return hcnError.code == code return hcnError.code == code
} }
return false return false
@ -81,7 +82,7 @@ func IsPortAlreadyExistsError(err error) bool {
func new(hr error, title string, rest string) error { func new(hr error, title string, rest string) error {
err := &HcnError{} err := &HcnError{}
hcsError := hcserror.New(hr, title, rest) hcsError := hcserror.New(hr, title, rest)
err.HcsError = hcsError.(*hcserror.HcsError) err.HcsError = hcsError.(*hcserror.HcsError) //nolint:errorlint
err.code = ErrorCode(hcserror.Win32FromError(hr)) err.code = ErrorCode(hcserror.Win32FromError(hr))
return err return err
} }
@ -97,6 +98,8 @@ type NetworkNotFoundError struct {
NetworkID string NetworkID string
} }
var _ error = NetworkNotFoundError{}
func (e NetworkNotFoundError) Error() string { func (e NetworkNotFoundError) Error() string {
if e.NetworkName != "" { if e.NetworkName != "" {
return fmt.Sprintf("Network name %q not found", e.NetworkName) return fmt.Sprintf("Network name %q not found", e.NetworkName)
@ -110,6 +113,8 @@ type EndpointNotFoundError struct {
EndpointID string EndpointID string
} }
var _ error = EndpointNotFoundError{}
func (e EndpointNotFoundError) Error() string { func (e EndpointNotFoundError) Error() string {
if e.EndpointName != "" { if e.EndpointName != "" {
return fmt.Sprintf("Endpoint name %q not found", e.EndpointName) return fmt.Sprintf("Endpoint name %q not found", e.EndpointName)
@ -122,6 +127,8 @@ type NamespaceNotFoundError struct {
NamespaceID string NamespaceID string
} }
var _ error = NamespaceNotFoundError{}
func (e NamespaceNotFoundError) Error() string { func (e NamespaceNotFoundError) Error() string {
return fmt.Sprintf("Namespace ID %q not found", e.NamespaceID) return fmt.Sprintf("Namespace ID %q not found", e.NamespaceID)
} }
@ -131,6 +138,8 @@ type LoadBalancerNotFoundError struct {
LoadBalancerId string LoadBalancerId string
} }
var _ error = LoadBalancerNotFoundError{}
func (e LoadBalancerNotFoundError) Error() string { func (e LoadBalancerNotFoundError) Error() string {
return fmt.Sprintf("LoadBalancer %q not found", e.LoadBalancerId) return fmt.Sprintf("LoadBalancer %q not found", e.LoadBalancerId)
} }
@ -140,6 +149,8 @@ type RouteNotFoundError struct {
RouteId string RouteId string
} }
var _ error = RouteNotFoundError{}
func (e RouteNotFoundError) Error() string { func (e RouteNotFoundError) Error() string {
return fmt.Sprintf("SDN Route %q not found", e.RouteId) return fmt.Sprintf("SDN Route %q not found", e.RouteId)
} }
@ -147,19 +158,31 @@ func (e RouteNotFoundError) Error() string {
// IsNotFoundError returns a boolean indicating whether the error was caused by // IsNotFoundError returns a boolean indicating whether the error was caused by
// a resource not being found. // a resource not being found.
func IsNotFoundError(err error) bool { func IsNotFoundError(err error) bool {
switch pe := err.(type) { // Calling [errors.As] in a loop over `[]error{NetworkNotFoundError{}, ...}` will not work,
case NetworkNotFoundError: // since the loop variable will be an interface type (ie, `error`) and `errors.As(error, *error)` will
// always succeed.
// Unless golang adds loops over (or arrays of) types, we need to manually call `errors.As` for
// each potential error type.
//
// Also, for T = NetworkNotFoundError and co, the error implementation is for T, not *T
if e := (NetworkNotFoundError{}); errors.As(err, &e) {
return true return true
case EndpointNotFoundError:
return true
case NamespaceNotFoundError:
return true
case LoadBalancerNotFoundError:
return true
case RouteNotFoundError:
return true
case *hcserror.HcsError:
return pe.Err == hcs.ErrElementNotFound
} }
if e := (EndpointNotFoundError{}); errors.As(err, &e) {
return true
}
if e := (NamespaceNotFoundError{}); errors.As(err, &e) {
return true
}
if e := (LoadBalancerNotFoundError{}); errors.As(err, &e) {
return true
}
if e := (RouteNotFoundError{}); errors.As(err, &e) {
return true
}
if e := (&hcserror.HcsError{}); errors.As(err, &e) {
return errors.Is(e.Err, hcs.ErrElementNotFound)
}
return false return false
} }

View File

@ -4,6 +4,7 @@ package hcn
import ( import (
"encoding/json" "encoding/json"
"errors"
"os" "os"
"syscall" "syscall"
@ -378,7 +379,8 @@ func (namespace *HostComputeNamespace) Sync() error {
shimPath := runhcs.VMPipePath(cfg.HostUniqueID) shimPath := runhcs.VMPipePath(cfg.HostUniqueID)
if err := runhcs.IssueVMRequest(shimPath, &req); err != nil { if err := runhcs.IssueVMRequest(shimPath, &req); err != nil {
// The shim is likely gone. Simply ignore the sync as if it didn't exist. // The shim is likely gone. Simply ignore the sync as if it didn't exist.
if perr, ok := err.(*os.PathError); ok && perr.Err == syscall.ERROR_FILE_NOT_FOUND { var perr *os.PathError
if errors.As(err, &perr) && errors.Is(perr.Err, syscall.ERROR_FILE_NOT_FOUND) {
// Remove the reg key there is no point to try again // Remove the reg key there is no point to try again
_ = cfg.Remove() _ = cfg.Remove()
return nil return nil

View File

@ -63,7 +63,7 @@ func (process *Process) SystemID() string {
} }
func (process *Process) processSignalResult(ctx context.Context, err error) (bool, error) { func (process *Process) processSignalResult(ctx context.Context, err error) (bool, error) {
switch err { switch err { //nolint:errorlint
case nil: case nil:
return true, nil return true, nil
case ErrVmcomputeOperationInvalidState, ErrComputeSystemDoesNotExist, ErrElementNotFound: case ErrVmcomputeOperationInvalidState, ErrComputeSystemDoesNotExist, ErrElementNotFound:

View File

@ -9,6 +9,13 @@
package hcsschema package hcsschema
type FileSystemFilterType string
const (
UnionFS FileSystemFilterType = "UnionFS"
WCIFS FileSystemFilterType = "WCIFS"
)
type Layer struct { type Layer struct {
Id string `json:"Id,omitempty"` Id string `json:"Id,omitempty"`

View File

@ -0,0 +1,13 @@
package hcsschema
// NOTE: manually added
type RegistryHive string
// List of RegistryHive
const (
RegistryHive_SYSTEM RegistryHive = "System"
RegistryHive_SOFTWARE RegistryHive = "Software"
RegistryHive_SECURITY RegistryHive = "Security"
RegistryHive_SAM RegistryHive = "Sam"
)

View File

@ -10,7 +10,7 @@
package hcsschema package hcsschema
type RegistryKey struct { type RegistryKey struct {
Hive string `json:"Hive,omitempty"` Hive RegistryHive `json:"Hive,omitempty"`
Name string `json:"Name,omitempty"` Name string `json:"Name,omitempty"`

View File

@ -14,7 +14,7 @@ type RegistryValue struct {
Name string `json:"Name,omitempty"` Name string `json:"Name,omitempty"`
Type_ string `json:"Type,omitempty"` Type_ RegistryValueType `json:"Type,omitempty"`
// One and only one value type must be set. // One and only one value type must be set.
StringValue string `json:"StringValue,omitempty"` StringValue string `json:"StringValue,omitempty"`

View File

@ -0,0 +1,17 @@
package hcsschema
// NOTE: manually added
type RegistryValueType string
// List of RegistryValueType
const (
RegistryValueType_NONE RegistryValueType = "None"
RegistryValueType_STRING RegistryValueType = "String"
RegistryValueType_EXPANDED_STRING RegistryValueType = "ExpandedString"
RegistryValueType_MULTI_STRING RegistryValueType = "MultiString"
RegistryValueType_BINARY RegistryValueType = "Binary"
RegistryValueType_D_WORD RegistryValueType = "DWord"
RegistryValueType_Q_WORD RegistryValueType = "QWord"
RegistryValueType_CUSTOM_TYPE RegistryValueType = "CustomType"
)

View File

@ -97,7 +97,7 @@ func CreateComputeSystem(ctx context.Context, id string, hcsDocumentInterface in
events, err := processAsyncHcsResult(ctx, createError, resultJSON, computeSystem.callbackNumber, events, err := processAsyncHcsResult(ctx, createError, resultJSON, computeSystem.callbackNumber,
hcsNotificationSystemCreateCompleted, &timeout.SystemCreate) hcsNotificationSystemCreateCompleted, &timeout.SystemCreate)
if err != nil { if err != nil {
if err == ErrTimeout { if errors.Is(err, ErrTimeout) {
// Terminate the compute system if it still exists. We're okay to // Terminate the compute system if it still exists. We're okay to
// ignore a failure here. // ignore a failure here.
_ = computeSystem.Terminate(ctx) _ = computeSystem.Terminate(ctx)
@ -238,7 +238,7 @@ func (computeSystem *System) Shutdown(ctx context.Context) error {
resultJSON, err := vmcompute.HcsShutdownComputeSystem(ctx, computeSystem.handle, "") resultJSON, err := vmcompute.HcsShutdownComputeSystem(ctx, computeSystem.handle, "")
events := processHcsResult(ctx, resultJSON) events := processHcsResult(ctx, resultJSON)
switch err { switch err { //nolint:errorlint
case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending: case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending:
default: default:
return makeSystemError(computeSystem, operation, err, events) return makeSystemError(computeSystem, operation, err, events)
@ -259,7 +259,7 @@ func (computeSystem *System) Terminate(ctx context.Context) error {
resultJSON, err := vmcompute.HcsTerminateComputeSystem(ctx, computeSystem.handle, "") resultJSON, err := vmcompute.HcsTerminateComputeSystem(ctx, computeSystem.handle, "")
events := processHcsResult(ctx, resultJSON) events := processHcsResult(ctx, resultJSON)
switch err { switch err { //nolint:errorlint
case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending: case nil, ErrVmcomputeAlreadyStopped, ErrComputeSystemDoesNotExist, ErrVmcomputeOperationPending:
default: default:
return makeSystemError(computeSystem, operation, err, events) return makeSystemError(computeSystem, operation, err, events)
@ -279,7 +279,7 @@ func (computeSystem *System) waitBackground() {
span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) span.AddAttributes(trace.StringAttribute("cid", computeSystem.id))
err := waitForNotification(ctx, computeSystem.callbackNumber, hcsNotificationSystemExited, nil) err := waitForNotification(ctx, computeSystem.callbackNumber, hcsNotificationSystemExited, nil)
switch err { switch err { //nolint:errorlint
case nil: case nil:
log.G(ctx).Debug("system exited") log.G(ctx).Debug("system exited")
case ErrVmcomputeUnexpectedExit: case ErrVmcomputeUnexpectedExit:

View File

@ -31,7 +31,7 @@ func hnsCallRawResponse(method, path, request string) (*hnsResponse, error) {
func hnsCall(method, path, request string, returnResponse interface{}) error { func hnsCall(method, path, request string, returnResponse interface{}) error {
hnsresponse, err := hnsCallRawResponse(method, path, request) hnsresponse, err := hnsCallRawResponse(method, path, request)
if err != nil { if err != nil {
return fmt.Errorf("failed during hnsCallRawResponse: %v", err) return fmt.Errorf("failed during hnsCallRawResponse: %w", err)
} }
if !hnsresponse.Success { if !hnsresponse.Success {
return fmt.Errorf("hns failed with error : %s", hnsresponse.Error) return fmt.Errorf("hns failed with error : %s", hnsresponse.Error)

View File

@ -56,7 +56,7 @@ func issueNamespaceRequest(id *string, method, subpath string, request interface
if strings.Contains(err.Error(), "Element not found.") { if strings.Contains(err.Error(), "Element not found.") {
return nil, os.ErrNotExist return nil, os.ErrNotExist
} }
return nil, fmt.Errorf("%s %s: %s", method, hnspath, err) return nil, fmt.Errorf("%s %s: %w", method, hnspath, err)
} }
return &ns, err return &ns, err
} }
@ -86,7 +86,7 @@ func GetNamespaceEndpoints(id string) ([]string, error) {
var endpoint namespaceEndpointRequest var endpoint namespaceEndpointRequest
err = json.Unmarshal(rsrc.Data, &endpoint) err = json.Unmarshal(rsrc.Data, &endpoint)
if err != nil { if err != nil {
return nil, fmt.Errorf("unmarshal endpoint: %s", err) return nil, fmt.Errorf("unmarshal endpoint: %w", err)
} }
endpoints = append(endpoints, endpoint.ID) endpoints = append(endpoints, endpoint.ID)
} }

View File

@ -4,6 +4,7 @@ package jobobject
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"sync" "sync"
"unsafe" "unsafe"
@ -59,7 +60,7 @@ func pollIOCP(ctx context.Context, iocpHandle windows.Handle) {
}).Warn("failed to parse job object message") }).Warn("failed to parse job object message")
continue continue
} }
if err := msq.Enqueue(notification); err == queue.ErrQueueClosed { if err := msq.Enqueue(notification); errors.Is(err, queue.ErrQueueClosed) {
// Write will only return an error when the queue is closed. // Write will only return an error when the queue is closed.
// The only time a queue would ever be closed is when we call `Close` on // The only time a queue would ever be closed is when we call `Close` on
// the job it belongs to which also removes it from the jobMap, so something // the job it belongs to which also removes it from the jobMap, so something

View File

@ -374,7 +374,7 @@ func (job *JobObject) Pids() ([]uint32, error) {
return []uint32{}, nil return []uint32{}, nil
} }
if err != winapi.ERROR_MORE_DATA { if err != winapi.ERROR_MORE_DATA { //nolint:errorlint
return nil, fmt.Errorf("failed initial query for PIDs in job object: %w", err) return nil, fmt.Errorf("failed initial query for PIDs in job object: %w", err)
} }

View File

@ -143,6 +143,13 @@ func (job *JobObject) SetCPUAffinity(affinityBitMask uint64) error {
return err return err
} }
info.BasicLimitInformation.LimitFlags |= uint32(windows.JOB_OBJECT_LIMIT_AFFINITY) info.BasicLimitInformation.LimitFlags |= uint32(windows.JOB_OBJECT_LIMIT_AFFINITY)
// We really, really shouldn't be running on 32 bit, but just in case (and to satisfy CodeQL) ...
const maxUintptr = ^uintptr(0)
if affinityBitMask > uint64(maxUintptr) {
return fmt.Errorf("affinity bitmask (%d) exceeds max allowable value (%d)", affinityBitMask, maxUintptr)
}
info.BasicLimitInformation.Affinity = uintptr(affinityBitMask) info.BasicLimitInformation.Affinity = uintptr(affinityBitMask)
return job.setExtendedInformation(info) return job.setExtendedInformation(info)
} }

View File

@ -104,6 +104,7 @@ func encode(v interface{}) (_ []byte, err error) {
if jErr := enc.Encode(v); jErr != nil { if jErr := enc.Encode(v); jErr != nil {
if err != nil { if err != nil {
// TODO (go1.20): use multierror via fmt.Errorf("...: %w; ...: %w", ...) // TODO (go1.20): use multierror via fmt.Errorf("...: %w; ...: %w", ...)
//nolint:errorlint // non-wrapping format verb for fmt.Errorf
return nil, fmt.Errorf("protojson encoding: %v; json encoding: %w", err, jErr) return nil, fmt.Errorf("protojson encoding: %v; json encoding: %w", err, jErr)
} }
return nil, fmt.Errorf("json encoding: %w", jErr) return nil, fmt.Errorf("json encoding: %w", jErr)

View File

@ -46,6 +46,7 @@ const (
ExpectedType = "expected-type" ExpectedType = "expected-type"
Bool = "bool" Bool = "bool"
Int32 = "int32"
Uint32 = "uint32" Uint32 = "uint32"
Uint64 = "uint64" Uint64 = "uint64"

View File

@ -126,7 +126,7 @@ func (pa *PoolAllocator) Allocate(size uint64) (MappedRegion, error) {
// this means that there are no more regions for the current class, try expanding // this means that there are no more regions for the current class, try expanding
if nextCls != memCls { if nextCls != memCls {
if err := pa.split(memCls); err != nil { if err := pa.split(memCls); err != nil {
if err == ErrInvalidMemoryClass { if errors.Is(err, ErrInvalidMemoryClass) {
return nil, ErrNotEnoughSpace return nil, ErrNotEnoughSpace
} }
return nil, err return nil, err
@ -147,7 +147,7 @@ func (pa *PoolAllocator) Allocate(size uint64) (MappedRegion, error) {
} }
// Release marks a memory region of class `memCls` and offset `offset` as free and tries to merge smaller regions into // Release marks a memory region of class `memCls` and offset `offset` as free and tries to merge smaller regions into
// a bigger one // a bigger one.
func (pa *PoolAllocator) Release(reg MappedRegion) error { func (pa *PoolAllocator) Release(reg MappedRegion) error {
mp := pa.pools[reg.Type()] mp := pa.pools[reg.Type()]
if mp == nil { if mp == nil {
@ -164,7 +164,7 @@ func (pa *PoolAllocator) Release(reg MappedRegion) error {
return ErrNotAllocated return ErrNotAllocated
} }
if err := pa.merge(n.parent); err != nil { if err := pa.merge(n.parent); err != nil {
if err != ErrEarlyMerge { if !errors.Is(err, ErrEarlyMerge) {
return err return err
} }
} }

View File

@ -4,6 +4,7 @@ package regstate
import ( import (
"encoding/json" "encoding/json"
"errors"
"fmt" "fmt"
"net/url" "net/url"
"os" "os"
@ -44,8 +45,8 @@ func (err *NotFoundError) Error() string {
} }
func IsNotFoundError(err error) bool { func IsNotFoundError(err error) bool {
_, ok := err.(*NotFoundError) var e *NotFoundError
return ok return errors.As(err, &e)
} }
type NoStateError struct { type NoStateError struct {
@ -152,7 +153,8 @@ func (k *Key) openid(id string) (*Key, error) {
escaped := url.PathEscape(id) escaped := url.PathEscape(id)
fullpath := filepath.Join(k.Name, escaped) fullpath := filepath.Join(k.Name, escaped)
nk, err := k.open(escaped) nk, err := k.open(escaped)
if perr, ok := err.(*os.PathError); ok && perr.Err == syscall.ERROR_FILE_NOT_FOUND { var perr *os.PathError
if errors.As(err, &perr) && errors.Is(perr.Err, syscall.ERROR_FILE_NOT_FOUND) {
return nil, &NotFoundError{id} return nil, &NotFoundError{id}
} }
if err != nil { if err != nil {
@ -165,7 +167,7 @@ func (k *Key) Remove(id string) error {
escaped := url.PathEscape(id) escaped := url.PathEscape(id)
err := registry.DeleteKey(k.Key, escaped) err := registry.DeleteKey(k.Key, escaped)
if err != nil { if err != nil {
if err == syscall.ERROR_FILE_NOT_FOUND { if err == syscall.ERROR_FILE_NOT_FOUND { //nolint:errorlint
return &NotFoundError{id} return &NotFoundError{id}
} }
return &os.PathError{Op: "RegDeleteKey", Path: filepath.Join(k.Name, escaped), Err: err} return &os.PathError{Op: "RegDeleteKey", Path: filepath.Join(k.Name, escaped), Err: err}
@ -215,7 +217,7 @@ func (k *Key) set(id string, create bool, key string, state interface{}) error {
err = sk.SetBinaryValue(key, js) err = sk.SetBinaryValue(key, js)
} }
if err != nil { if err != nil {
if err == syscall.ERROR_FILE_NOT_FOUND { if err == syscall.ERROR_FILE_NOT_FOUND { //nolint:errorlint
return &NoStateError{id, key} return &NoStateError{id, key}
} }
return &os.PathError{Op: "RegSetValueEx", Path: sk.Name + ":" + key, Err: err} return &os.PathError{Op: "RegSetValueEx", Path: sk.Name + ":" + key, Err: err}
@ -239,7 +241,7 @@ func (k *Key) Clear(id, key string) error {
defer sk.Close() defer sk.Close()
err = sk.DeleteValue(key) err = sk.DeleteValue(key)
if err != nil { if err != nil {
if err == syscall.ERROR_FILE_NOT_FOUND { if err == syscall.ERROR_FILE_NOT_FOUND { //nolint:errorlint
return &NoStateError{id, key} return &NoStateError{id, key}
} }
return &os.PathError{Op: "RegDeleteValue", Path: sk.Name + ":" + key, Err: err} return &os.PathError{Op: "RegDeleteValue", Path: sk.Name + ":" + key, Err: err}
@ -278,7 +280,7 @@ func (k *Key) Get(id, key string, state interface{}) error {
js, _, err = sk.GetBinaryValue(key) js, _, err = sk.GetBinaryValue(key)
} }
if err != nil { if err != nil {
if err == syscall.ERROR_FILE_NOT_FOUND { if err == syscall.ERROR_FILE_NOT_FOUND { //nolint:errorlint
return &NoStateError{id, key} return &NoStateError{id, key}
} }
return &os.PathError{Op: "RegQueryValueEx", Path: sk.Name + ":" + key, Err: err} return &os.PathError{Op: "RegQueryValueEx", Path: sk.Name + ":" + key, Err: err}

View File

@ -243,7 +243,7 @@ func RemoveRelative(path string, root *os.File) error {
if err == nil { if err == nil {
defer f.Close() defer f.Close()
err = deleteOnClose(f) err = deleteOnClose(f)
if err == syscall.ERROR_ACCESS_DENIED { if err == syscall.ERROR_ACCESS_DENIED { //nolint:errorlint
// Maybe the file is marked readonly. Clear the bit and retry. // Maybe the file is marked readonly. Clear the bit and retry.
_ = clearReadOnly(f) _ = clearReadOnly(f)
err = deleteOnClose(f) err = deleteOnClose(f)

View File

@ -0,0 +1,3 @@
// vhdx package adds the utility methods necessary to deal with the vhdx that are used as the scratch
// space for the containers and the uvm.
package vhdx

View File

@ -0,0 +1,233 @@
//go:build windows
package vhdx
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"os"
"syscall"
"unsafe"
"github.com/Microsoft/go-winio/pkg/guid"
"github.com/Microsoft/go-winio/vhd"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/oc"
"github.com/sirupsen/logrus"
"go.opencensus.io/trace"
"golang.org/x/sys/windows"
)
const _IOCTL_DISK_GET_DRIVE_LAYOUT_EX = 0x00070050
var partitionBasicDataGUID = guid.GUID{
Data1: 0xebd0a0a2,
Data2: 0xb9e5,
Data3: 0x4433,
Data4: [8]byte{0x87, 0xc0, 0x68, 0xb6, 0xb7, 0x26, 0x99, 0xc7},
}
const (
partitionStyleMBR uint32 = iota
partitionStyleGPT
partitionStyleRaw
)
// type partitionInformationMBR struct {
// PartitionType uint8
// BootIndicator uint8
// RecognizedPartition uint8
// HiddenSectors uint32
// PartitionId guid.GUID
// }
type partitionInformationGPT struct {
PartitionType guid.GUID
PartitionId guid.GUID
Attributes uint64
Name [72]byte // wide char
}
type partitionInformationEx struct {
PartitionStyle uint32
StartingOffset int64
PartitionLength int64
PartitionNumber uint32
RewritePartition uint8
IsServicePartition uint8
_ uint16
// A union of partitionInformationMBR and partitionInformationGPT
// since partitionInformationGPT is largest with 112 bytes
GptMbrUnion [112]byte
}
type driveLayoutInformationGPT struct {
DiskID guid.GUID
StartingUsableOffset int64
UsableLength int64
MaxPartitionCount uint32
}
// type driveLayoutInformationMBR struct {
// Signature uint32
// Checksum uint32
// }
type driveLayoutInformationEx struct {
PartitionStyle uint32
PartitionCount uint32
// A union of driveLayoutInformationGPT and driveLayoutInformationMBR
// since driveLayoutInformationGPT is largest with 40 bytes
GptMbrUnion [40]byte
PartitionEntry [1]partitionInformationEx
}
// Takes the physical path of a disk and retrieves the drive layout information of that disk. Returns the
// driveLayoutInformationEx struct and a slice of partitionInfomrationEx struct containing one element for
// each partition found on the vhdx. Note: some of the members like (GptMbrUnion) of these structs are raw
// byte arrays and it is the responsibility of the calling function to properly parse them.
func getDriveLayout(ctx context.Context, drivePhysicalPath string) (driveLayoutInformationEx, []partitionInformationEx, error) {
var (
outBytes uint32
err error
volume *os.File
)
layoutData := struct {
info driveLayoutInformationEx
// driveLayoutInformationEx has a flexible array member at the end. The data returned
// by IOCTL_DISK_GET_DRIVE_LAYOUT_EX usually has driveLayoutInformationEx.PartitionCount
// number of elements in this array. For all practical purposes we don't expect to have
// more than 64 partitions in a container/uvm vhdx.
partitions [63]partitionInformationEx
}{}
volume, err = os.OpenFile(drivePhysicalPath, os.O_RDONLY, 0)
if err != nil {
return layoutData.info, layoutData.partitions[:0], fmt.Errorf("failed to open drive: %w", err)
}
defer volume.Close()
err = windows.DeviceIoControl(windows.Handle(volume.Fd()),
_IOCTL_DISK_GET_DRIVE_LAYOUT_EX,
nil,
0,
(*byte)(unsafe.Pointer(&layoutData)),
uint32(unsafe.Sizeof(layoutData)),
&outBytes,
nil)
if err != nil {
return layoutData.info, layoutData.partitions[:0], fmt.Errorf("IOCTL to get disk layout failed: %w", err)
}
if layoutData.info.PartitionCount == 0 {
return layoutData.info, []partitionInformationEx{}, nil
} else {
// parse the retrieved data into driveLayoutInformationEx and partitionInformationEx
partitions := make([]partitionInformationEx, layoutData.info.PartitionCount)
partitions[0] = layoutData.info.PartitionEntry[0]
copy(partitions[1:], layoutData.partitions[:layoutData.info.PartitionCount-1])
return layoutData.info, partitions, nil
}
}
// Scratch VHDs are formatted with GPT style and have 1 MSFT_RESERVED
// partition and 1 BASIC_DATA partition. This struct contains the
// partitionID of this BASIC_DATA partition and the DiskID of this
// scratch vhdx.
type ScratchVhdxPartitionInfo struct {
DiskID guid.GUID
PartitionID guid.GUID
}
// Returns the VhdxInfo of a GPT vhdx at path vhdxPath.
func GetScratchVhdPartitionInfo(ctx context.Context, vhdxPath string) (_ ScratchVhdxPartitionInfo, err error) {
var (
diskHandle syscall.Handle
driveLayout driveLayoutInformationEx
partitions []partitionInformationEx
gptDriveLayout driveLayoutInformationGPT
gptPartitionInfo partitionInformationGPT
volumePath string
)
title := "hcsshim::GetScratchVhdPartitionInfo"
ctx, span := trace.StartSpan(ctx, title)
defer span.End()
defer func() { oc.SetSpanStatus(span, err) }()
span.AddAttributes(
trace.StringAttribute("path", vhdxPath))
diskHandle, err = vhd.OpenVirtualDisk(vhdxPath, vhd.VirtualDiskAccessNone, vhd.OpenVirtualDiskFlagNone)
if err != nil {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("get scratch vhd info failed: %w", err)
}
defer func() {
if closeErr := syscall.CloseHandle(diskHandle); closeErr != nil {
log.G(ctx).WithFields(logrus.Fields{
"disk path": vhdxPath,
"error": closeErr,
}).Warn("failed to close vhd handle")
}
}()
err = vhd.AttachVirtualDisk(diskHandle, vhd.AttachVirtualDiskFlagNone, &vhd.AttachVirtualDiskParameters{Version: 2})
if err != nil {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("get scratch vhd info failed: %w", err)
}
defer func() {
if detachErr := vhd.DetachVirtualDisk(diskHandle); detachErr != nil {
log.G(ctx).WithFields(logrus.Fields{
"disk path": vhdxPath,
"error": detachErr,
}).Warn("failed to detach vhd")
}
}()
volumePath, err = vhd.GetVirtualDiskPhysicalPath(diskHandle)
if err != nil {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("get vhd physical path: %w", err)
}
driveLayout, partitions, err = getDriveLayout(ctx, volumePath)
if err != nil {
return ScratchVhdxPartitionInfo{}, err
}
if driveLayout.PartitionStyle != partitionStyleGPT {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("drive Layout:Expected partition style GPT(%d) found %d", partitionStyleGPT, driveLayout.PartitionStyle)
}
if driveLayout.PartitionCount != 2 || len(partitions) != 2 {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("expected exactly 2 partitions. Got %d partitions and partition count of %d", len(partitions), driveLayout.PartitionCount)
}
if partitions[1].PartitionStyle != partitionStyleGPT {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("partition Info:Expected partition style GPT(%d) found %d", partitionStyleGPT, partitions[1].PartitionStyle)
}
bufReader := bytes.NewBuffer(driveLayout.GptMbrUnion[:])
if err := binary.Read(bufReader, binary.LittleEndian, &gptDriveLayout); err != nil {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("failed to parse drive GPT layout: %w", err)
}
bufReader = bytes.NewBuffer(partitions[1].GptMbrUnion[:])
if err := binary.Read(bufReader, binary.LittleEndian, &gptPartitionInfo); err != nil {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("failed to parse GPT partition info: %w", err)
}
if gptPartitionInfo.PartitionType != partitionBasicDataGUID {
return ScratchVhdxPartitionInfo{}, fmt.Errorf("expected partition type to have %s GUID found %s instead", partitionBasicDataGUID, gptPartitionInfo.PartitionType)
}
log.G(ctx).WithFields(logrus.Fields{
"Disk ID": gptDriveLayout.DiskID,
"GPT Partition ID": gptPartitionInfo.PartitionId,
}).Debug("Scratch VHD partition info")
return ScratchVhdxPartitionInfo{DiskID: gptDriveLayout.DiskID, PartitionID: gptPartitionInfo.PartitionId}, nil
}

View File

@ -104,7 +104,7 @@ func execute(ctx gcontext.Context, timeout time.Duration, f func() error) error
}() }()
select { select {
case <-ctx.Done(): case <-ctx.Done():
if ctx.Err() == gcontext.DeadlineExceeded { if ctx.Err() == gcontext.DeadlineExceeded { //nolint:errorlint
log.G(ctx).WithField(logfields.Timeout, trueTimeout). log.G(ctx).WithField(logfields.Timeout, trueTimeout).
Warning("Syscall did not complete within operation timeout. This may indicate a platform issue. " + Warning("Syscall did not complete within operation timeout. This may indicate a platform issue. " +
"If it appears to be making no forward progress, obtain the stacks and see if there is a syscall " + "If it appears to be making no forward progress, obtain the stacks and see if there is a syscall " +
@ -150,7 +150,7 @@ func HcsCreateComputeSystem(ctx gcontext.Context, id string, configuration strin
if result != "" { if result != "" {
span.AddAttributes(trace.StringAttribute("result", result)) span.AddAttributes(trace.StringAttribute("result", result))
} }
if hr != errVmcomputeOperationPending { if hr != errVmcomputeOperationPending { //nolint:errorlint // explicitly returned
oc.SetSpanStatus(span, hr) oc.SetSpanStatus(span, hr)
} }
}() }()
@ -205,7 +205,7 @@ func HcsStartComputeSystem(ctx gcontext.Context, computeSystem HcsSystem, option
if result != "" { if result != "" {
span.AddAttributes(trace.StringAttribute("result", result)) span.AddAttributes(trace.StringAttribute("result", result))
} }
if hr != errVmcomputeOperationPending { if hr != errVmcomputeOperationPending { //nolint:errorlint // explicitly returned
oc.SetSpanStatus(span, hr) oc.SetSpanStatus(span, hr)
} }
}() }()
@ -228,7 +228,7 @@ func HcsShutdownComputeSystem(ctx gcontext.Context, computeSystem HcsSystem, opt
if result != "" { if result != "" {
span.AddAttributes(trace.StringAttribute("result", result)) span.AddAttributes(trace.StringAttribute("result", result))
} }
if hr != errVmcomputeOperationPending { if hr != errVmcomputeOperationPending { //nolint:errorlint // explicitly returned
oc.SetSpanStatus(span, hr) oc.SetSpanStatus(span, hr)
} }
}() }()
@ -251,7 +251,7 @@ func HcsTerminateComputeSystem(ctx gcontext.Context, computeSystem HcsSystem, op
if result != "" { if result != "" {
span.AddAttributes(trace.StringAttribute("result", result)) span.AddAttributes(trace.StringAttribute("result", result))
} }
if hr != errVmcomputeOperationPending { if hr != errVmcomputeOperationPending { //nolint:errorlint // explicitly returned
oc.SetSpanStatus(span, hr) oc.SetSpanStatus(span, hr)
} }
}() }()
@ -274,7 +274,7 @@ func HcsPauseComputeSystem(ctx gcontext.Context, computeSystem HcsSystem, option
if result != "" { if result != "" {
span.AddAttributes(trace.StringAttribute("result", result)) span.AddAttributes(trace.StringAttribute("result", result))
} }
if hr != errVmcomputeOperationPending { if hr != errVmcomputeOperationPending { //nolint:errorlint // explicitly returned
oc.SetSpanStatus(span, hr) oc.SetSpanStatus(span, hr)
} }
}() }()
@ -297,7 +297,7 @@ func HcsResumeComputeSystem(ctx gcontext.Context, computeSystem HcsSystem, optio
if result != "" { if result != "" {
span.AddAttributes(trace.StringAttribute("result", result)) span.AddAttributes(trace.StringAttribute("result", result))
} }
if hr != errVmcomputeOperationPending { if hr != errVmcomputeOperationPending { //nolint:errorlint // explicitly returned
oc.SetSpanStatus(span, hr) oc.SetSpanStatus(span, hr)
} }
}() }()
@ -621,7 +621,7 @@ func HcsSaveComputeSystem(ctx gcontext.Context, computeSystem HcsSystem, options
if result != "" { if result != "" {
span.AddAttributes(trace.StringAttribute("result", result)) span.AddAttributes(trace.StringAttribute("result", result))
} }
if hr != errVmcomputeOperationPending { if hr != errVmcomputeOperationPending { //nolint:errorlint // explicitly returned
oc.SetSpanStatus(span, hr) oc.SetSpanStatus(span, hr)
} }
}() }()

View File

@ -1,3 +1,5 @@
//go:build windows
package wclayer package wclayer
import ( import (
@ -64,7 +66,7 @@ func (r *baseLayerReader) walkUntilCancelled() error {
return nil return nil
}) })
if err == errorIterationCanceled { if err == errorIterationCanceled { //nolint:errorlint // explicitly returned
return nil return nil
} }
@ -103,7 +105,7 @@ func (r *baseLayerReader) walkUntilCancelled() error {
return nil return nil
}) })
if err == errorIterationCanceled { if err == errorIterationCanceled { //nolint:errorlint // explicitly returned
return nil return nil
} }

View File

@ -0,0 +1,289 @@
//go:build windows
package cim
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/oc"
"github.com/Microsoft/hcsshim/internal/wclayer"
"github.com/Microsoft/hcsshim/osversion"
"github.com/Microsoft/hcsshim/pkg/cimfs"
"go.opencensus.io/trace"
)
// A CimLayerWriter implements the wclayer.LayerWriter interface to allow writing container
// image layers in the cim format.
// A cim layer consist of cim files (which are usually stored in the `cim-layers` directory and
// some other files which are stored in the directory of that layer (i.e the `path` directory).
type CimLayerWriter struct {
ctx context.Context
s *trace.Span
// path to the layer (i.e layer's directory) as provided by the caller.
// Even if a layer is stored as a cim in the cim directory, some files associated
// with a layer are still stored in this path.
path string
// parent layer paths
parentLayerPaths []string
// Handle to the layer cim - writes to the cim file
cimWriter *cimfs.CimFsWriter
// Handle to the writer for writing files in the local filesystem
stdFileWriter *stdFileWriter
// reference to currently active writer either cimWriter or stdFileWriter
activeWriter io.Writer
// denotes if this layer has the UtilityVM directory
hasUtilityVM bool
// some files are written outside the cim during initial import (via stdFileWriter) because we need to
// make some modifications to these files before writing them to the cim. The pendingOps slice
// maintains a list of such delayed modifications to the layer cim. These modifications are applied at
// the very end of layer import process.
pendingOps []pendingCimOp
}
type hive struct {
name string
base string
delta string
}
var (
hives = []hive{
{"SYSTEM", "SYSTEM_BASE", "SYSTEM_DELTA"},
{"SOFTWARE", "SOFTWARE_BASE", "SOFTWARE_DELTA"},
{"SAM", "SAM_BASE", "SAM_DELTA"},
{"SECURITY", "SECURITY_BASE", "SECURITY_DELTA"},
{"DEFAULT", "DEFAULTUSER_BASE", "DEFAULTUSER_DELTA"},
}
)
func isDeltaOrBaseHive(path string) bool {
for _, hv := range hives {
if strings.EqualFold(path, filepath.Join(wclayer.HivesPath, hv.delta)) ||
strings.EqualFold(path, filepath.Join(wclayer.RegFilesPath, hv.name)) {
return true
}
}
return false
}
// checks if this particular file should be written with a stdFileWriter instead of
// using the cimWriter.
func isStdFile(path string) bool {
return (isDeltaOrBaseHive(path) ||
path == filepath.Join(wclayer.UtilityVMPath, wclayer.RegFilesPath, "SYSTEM") ||
path == filepath.Join(wclayer.UtilityVMPath, wclayer.RegFilesPath, "SOFTWARE") ||
path == wclayer.BcdFilePath || path == wclayer.BootMgrFilePath)
}
// Add adds a file to the layer with given metadata.
func (cw *CimLayerWriter) Add(name string, fileInfo *winio.FileBasicInfo, fileSize int64, securityDescriptor []byte, extendedAttributes []byte, reparseData []byte) error {
if name == wclayer.UtilityVMPath {
cw.hasUtilityVM = true
}
if isStdFile(name) {
// create a pending op for this file
cw.pendingOps = append(cw.pendingOps, &addOp{
pathInCim: name,
hostPath: filepath.Join(cw.path, name),
fileInfo: fileInfo,
securityDescriptor: securityDescriptor,
extendedAttributes: extendedAttributes,
reparseData: reparseData,
})
if err := cw.stdFileWriter.Add(name); err != nil {
return err
}
cw.activeWriter = cw.stdFileWriter
} else {
if err := cw.cimWriter.AddFile(name, fileInfo, fileSize, securityDescriptor, extendedAttributes, reparseData); err != nil {
return err
}
cw.activeWriter = cw.cimWriter
}
return nil
}
// AddLink adds a hard link to the layer. The target must already have been added.
func (cw *CimLayerWriter) AddLink(name string, target string) error {
// set active write to nil so that we panic if layer tar is incorrectly formatted.
cw.activeWriter = nil
if isStdFile(target) {
// If this is a link to a std file it will have to be added later once the
// std file is written to the CIM. Create a pending op for this
cw.pendingOps = append(cw.pendingOps, &linkOp{
oldPath: target,
newPath: name,
})
return nil
} else if isStdFile(name) {
// None of the predefined std files are links. If they show up as links this is unexpected
// behavior. Error out.
return fmt.Errorf("unexpected link %s in layer", name)
} else {
return cw.cimWriter.AddLink(target, name)
}
}
// AddAlternateStream creates another alternate stream at the given
// path. Any writes made after this call will go to that stream.
func (cw *CimLayerWriter) AddAlternateStream(name string, size uint64) error {
if isStdFile(name) {
// As of now there is no known case of std file having multiple data streams.
// If such a file is encountered our assumptions are wrong. Error out.
return fmt.Errorf("unexpected alternate stream %s in layer", name)
}
if err := cw.cimWriter.CreateAlternateStream(name, size); err != nil {
return err
}
cw.activeWriter = cw.cimWriter
return nil
}
// Remove removes a file that was present in a parent layer from the layer.
func (cw *CimLayerWriter) Remove(name string) error {
// set active write to nil so that we panic if layer tar is incorrectly formatted.
cw.activeWriter = nil
return cw.cimWriter.Unlink(name)
}
// Write writes data to the current file. The data must be in the format of a Win32
// backup stream.
func (cw *CimLayerWriter) Write(b []byte) (int, error) {
return cw.activeWriter.Write(b)
}
// Close finishes the layer writing process and releases any resources.
func (cw *CimLayerWriter) Close(ctx context.Context) (retErr error) {
if err := cw.stdFileWriter.Close(ctx); err != nil {
return err
}
// cimWriter must be closed even if there are errors.
defer func() {
if err := cw.cimWriter.Close(); retErr == nil {
retErr = err
}
}()
// Find out the osversion of this layer, both base & non-base layers can have UtilityVM layer.
processUtilityVM := false
if cw.hasUtilityVM {
uvmSoftwareHivePath := filepath.Join(cw.path, wclayer.UtilityVMPath, wclayer.RegFilesPath, "SOFTWARE")
osvStr, err := getOsBuildNumberFromRegistry(uvmSoftwareHivePath)
if err != nil {
return fmt.Errorf("read os version string from UtilityVM SOFTWARE hive: %w", err)
}
osv, err := strconv.ParseUint(osvStr, 10, 16)
if err != nil {
return fmt.Errorf("parse os version string (%s): %w", osvStr, err)
}
// write this version to a file for future reference by the shim process
if err = wclayer.WriteLayerUvmBuildFile(cw.path, uint16(osv)); err != nil {
return fmt.Errorf("write uvm build version: %w", err)
}
// CIMFS for hyperV isolated is only supported after 20348, processing UtilityVM layer on 2048
// & lower will cause failures since those images won't have CIMFS specific UVM files (mostly
// BCD entries required for CIMFS)
processUtilityVM = (osv > osversion.LTSC2022)
log.G(ctx).Debugf("import image os version %d, processing UtilityVM layer: %t\n", osv, processUtilityVM)
}
if len(cw.parentLayerPaths) == 0 {
if err := cw.processBaseLayer(ctx, processUtilityVM); err != nil {
return fmt.Errorf("process base layer: %w", err)
}
} else {
if err := cw.processNonBaseLayer(ctx, processUtilityVM); err != nil {
return fmt.Errorf("process non base layer: %w", err)
}
}
for _, op := range cw.pendingOps {
if err := op.apply(cw.cimWriter); err != nil {
return fmt.Errorf("apply pending operations: %w", err)
}
}
return nil
}
func NewCimLayerWriter(ctx context.Context, path string, parentLayerPaths []string) (_ *CimLayerWriter, err error) {
if !cimfs.IsCimFSSupported() {
return nil, fmt.Errorf("CimFs not supported on this build")
}
ctx, span := trace.StartSpan(ctx, "hcsshim::NewCimLayerWriter")
defer func() {
if err != nil {
oc.SetSpanStatus(span, err)
span.End()
}
}()
span.AddAttributes(
trace.StringAttribute("path", path),
trace.StringAttribute("parentLayerPaths", strings.Join(parentLayerPaths, ", ")))
parentCim := ""
cimDirPath := GetCimDirFromLayer(path)
if _, err = os.Stat(cimDirPath); os.IsNotExist(err) {
// create cim directory
if err = os.Mkdir(cimDirPath, 0755); err != nil {
return nil, fmt.Errorf("failed while creating cim layers directory: %w", err)
}
} else if err != nil {
return nil, fmt.Errorf("unable to access cim layers directory: %w", err)
}
if len(parentLayerPaths) > 0 {
parentCim = GetCimNameFromLayer(parentLayerPaths[0])
}
cim, err := cimfs.Create(cimDirPath, parentCim, GetCimNameFromLayer(path))
if err != nil {
return nil, fmt.Errorf("error in creating a new cim: %w", err)
}
sfw, err := newStdFileWriter(path, parentLayerPaths)
if err != nil {
return nil, fmt.Errorf("error in creating new standard file writer: %w", err)
}
return &CimLayerWriter{
ctx: ctx,
s: span,
path: path,
parentLayerPaths: parentLayerPaths,
cimWriter: cim,
stdFileWriter: sfw,
}, nil
}
// DestroyCimLayer destroys a cim layer i.e it removes all the cimfs files for the given layer as well as
// all of the other files that are stored in the layer directory (at path `layerPath`).
// If this is not a cimfs layer (i.e a cim file for the given layer does not exist) then nothing is done.
func DestroyCimLayer(ctx context.Context, layerPath string) error {
cimPath := GetCimPathFromLayer(layerPath)
// verify that such a cim exists first, sometimes containerd tries to call
// this with the root snapshot directory as the layer path. We don't want to
// destroy everything inside the snapshots directory.
if _, err := os.Stat(cimPath); err != nil {
if os.IsNotExist(err) {
return nil
}
return err
}
return cimfs.DestroyCim(ctx, cimPath)
}

View File

@ -0,0 +1,107 @@
//go:build windows
package cim
import (
"bytes"
"fmt"
"os/exec"
"github.com/Microsoft/go-winio/pkg/guid"
)
const (
bcdFilePath = "UtilityVM\\Files\\EFI\\Microsoft\\Boot\\BCD"
cimfsDeviceOptionsID = "{763e9fea-502d-434f-aad9-5fabe9c91a7b}"
vmbusDeviceID = "{c63c9bdf-5fa5-4208-b03f-6b458b365592}"
compositeDeviceOptionsID = "{e1787220-d17f-49e7-977a-d8fe4c8537e2}"
bootContainerID = "{b890454c-80de-4e98-a7ab-56b74b4fbd0c}"
)
func bcdExec(storePath string, args ...string) error {
var out bytes.Buffer
argsArr := []string{"/store", storePath, "/offline"}
argsArr = append(argsArr, args...)
cmd := exec.Command("bcdedit.exe", argsArr...)
cmd.Stdout = &out
if err := cmd.Run(); err != nil {
return fmt.Errorf("bcd command (%s) failed: %w", cmd, err)
}
return nil
}
// A registry configuration required for the uvm.
func setBcdRestartOnFailure(storePath string) error {
return bcdExec(storePath, "/set", "{default}", "restartonfailure", "yes")
}
func setBcdCimBootDevice(storePath, cimPathRelativeToVSMB string, diskID, partitionID guid.GUID) error {
// create options for cimfs boot device
if err := bcdExec(storePath, "/create", cimfsDeviceOptionsID, "/d", "CimFS Device Options", "/device"); err != nil {
return err
}
// Set options. For now we need to set 2 options. First is the parent device i.e the device under
// which all cim files will be available. Second is the path of the cim (from which this UVM should
// boot) relative to the parent device. Note that even though the 2nd option is named
// `cimfsrootdirectory` it expects a path to the cim file and not a directory path.
if err := bcdExec(storePath, "/set", cimfsDeviceOptionsID, "cimfsparentdevice", fmt.Sprintf("vmbus=%s", vmbusDeviceID)); err != nil {
return err
}
if err := bcdExec(storePath, "/set", cimfsDeviceOptionsID, "cimfsrootdirectory", fmt.Sprintf("\\%s", cimPathRelativeToVSMB)); err != nil {
return err
}
// create options for the composite device
if err := bcdExec(storePath, "/create", compositeDeviceOptionsID, "/d", "Composite Device Options", "/device"); err != nil {
return err
}
// We need to specify the diskID & the partition ID of the boot disk and we need to set the cimfs boot
// options ID
partitionStr := fmt.Sprintf("gpt_partition={%s};{%s}", diskID, partitionID)
if err := bcdExec(storePath, "/set", compositeDeviceOptionsID, "primarydevice", partitionStr); err != nil {
return err
}
if err := bcdExec(storePath, "/set", compositeDeviceOptionsID, "secondarydevice", fmt.Sprintf("cimfs=%s,%s", bootContainerID, cimfsDeviceOptionsID)); err != nil {
return err
}
if err := bcdExec(storePath, "/set", "{default}", "device", fmt.Sprintf("composite=0,%s", compositeDeviceOptionsID)); err != nil {
return err
}
if err := bcdExec(storePath, "/set", "{default}", "osdevice", fmt.Sprintf("composite=0,%s", compositeDeviceOptionsID)); err != nil {
return err
}
// Since our UVM file are stored under UtilityVM\Files directory inside the CIM we must prepend that
// directory in front of paths used by bootmgr
if err := bcdExec(storePath, "/set", "{default}", "path", "\\UtilityVM\\Files\\Windows\\System32\\winload.efi"); err != nil {
return err
}
if err := bcdExec(storePath, "/set", "{default}", "systemroot", "\\UtilityVM\\Files\\Windows"); err != nil {
return err
}
return nil
}
// updateBcdStoreForBoot Updates the bcd store at path layerPath + UtilityVM\Files\EFI\Microsoft\Boot\BCD` to
// boot with the disk with given ID and given partitionID. cimPathRelativeToVSMB is the path of the cim which
// will be used for booting this UVM relative to the VSMB share. (Usually, the entire snapshots directory will
// be shared over VSMB, so if this is the cim-layers\1.cim under that directory, the value of
// `cimPathRelativeToVSMB` should be cim-layers\1.cim)
func updateBcdStoreForBoot(storePath string, cimPathRelativeToVSMB string, diskID, partitionID guid.GUID) error {
if err := setBcdRestartOnFailure(storePath); err != nil {
return err
}
if err := setBcdCimBootDevice(storePath, cimPathRelativeToVSMB, diskID, partitionID); err != nil {
return err
}
return nil
}

View File

@ -0,0 +1,41 @@
//go:build windows
package cim
import (
"os"
"path/filepath"
)
const (
// name of the directory in which cims are stored
cimDir = "cim-layers"
)
// Usually layers are stored at ./root/io.containerd.snapshotter.v1.windows/snapshots/<layerid>. For cimfs we
// must store all layer cims in the same directory (for forked cims to work). So all cim layers are stored in
// /root/io.containerd.snapshotter.v1.windows/snapshots/cim-layers. And the cim file representing each
// individual layer is stored at /root/io.containerd.snapshotter.v1.windows/snapshots/cim-layers/<layerid>.cim
// CimName is the filename (<layerid>.cim) of the file representing the cim
func GetCimNameFromLayer(layerPath string) string {
return filepath.Base(layerPath) + ".cim"
}
// CimPath is the path to the CimDir/<layerid>.cim file that represents a layer cim.
func GetCimPathFromLayer(layerPath string) string {
return filepath.Join(GetCimDirFromLayer(layerPath), GetCimNameFromLayer(layerPath))
}
// CimDir is the directory inside which all cims are stored.
func GetCimDirFromLayer(layerPath string) string {
dir := filepath.Dir(layerPath)
return filepath.Join(dir, cimDir)
}
// IsCimLayer returns `true` if the layer at path `layerPath` is a cim layer. Returns `false` otherwise.
func IsCimLayer(layerPath string) bool {
cimPath := GetCimPathFromLayer(layerPath)
_, err := os.Stat(cimPath)
return (err == nil)
}

View File

@ -0,0 +1,3 @@
// This package provides utilities for working with container image layers in the cim format
// via the wclayer APIs.
package cim

View File

@ -0,0 +1,90 @@
//go:build windows
package cim
import (
"context"
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/hcsshim/internal/safefile"
"github.com/Microsoft/hcsshim/internal/winapi"
)
// stdFileWriter writes the files of a layer to the layer folder instead of writing them inside the cim.
// For some files (like the Hive files or some UtilityVM files) it is necessary to write them as a normal file
// first, do some modifications on them (for example merging of hives or processing of UtilityVM files)
// and then write the modified versions into the cim. This writer is used for such files.
type stdFileWriter struct {
activeFile *os.File
// parent layer paths
parentLayerPaths []string
// path to the current layer
path string
// the open handle to the path directory
root *os.File
}
func newStdFileWriter(root string, parentRoots []string) (sfw *stdFileWriter, err error) {
sfw = &stdFileWriter{
path: root,
parentLayerPaths: parentRoots,
}
sfw.root, err = safefile.OpenRoot(root)
if err != nil {
return
}
return
}
func (sfw *stdFileWriter) closeActiveFile() (err error) {
if sfw.activeFile != nil {
err = sfw.activeFile.Close()
sfw.activeFile = nil
}
return
}
// Adds a new file or an alternate data stream to an existing file inside the layer directory.
func (sfw *stdFileWriter) Add(name string) error {
if err := sfw.closeActiveFile(); err != nil {
return err
}
// The directory of this file might be created inside the cim.
// make sure we have the same parent directory chain here
if err := safefile.MkdirAllRelative(filepath.Dir(name), sfw.root); err != nil {
return fmt.Errorf("failed to create file %s: %w", name, err)
}
f, err := safefile.OpenRelative(
name,
sfw.root,
syscall.GENERIC_READ|syscall.GENERIC_WRITE|winio.WRITE_DAC|winio.WRITE_OWNER,
syscall.FILE_SHARE_READ,
winapi.FILE_CREATE,
0,
)
if err != nil {
return fmt.Errorf("error creating file %s: %w", name, err)
}
sfw.activeFile = f
return nil
}
// Write writes data to the current file. The data must be in the format of a Win32
// backup stream.
func (sfw *stdFileWriter) Write(b []byte) (int, error) {
return sfw.activeFile.Write(b)
}
// Close finishes the layer writing process and releases any resources.
func (sfw *stdFileWriter) Close(ctx context.Context) error {
if err := sfw.closeActiveFile(); err != nil {
return fmt.Errorf("failed to close active file %s : %w", sfw.activeFile.Name(), err)
}
return nil
}

View File

@ -0,0 +1,89 @@
//go:build windows
package cim
import (
"context"
"fmt"
"os"
"sync"
"github.com/Microsoft/go-winio/pkg/guid"
hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2"
cimfs "github.com/Microsoft/hcsshim/pkg/cimfs"
)
// a cache of cim layer to its mounted volume - The mount manager plugin currently doesn't have an option of
// querying a mounted cim to get the volume at which it is mounted, so we maintain a cache of that here
var (
cimMounts map[string]string = make(map[string]string)
cimMountMapLock sync.Mutex
// A random GUID used as a namespace for generating cim mount volume GUIDs: 6827367b-c388-4e9b-95ec-961c6d2c936c
cimMountNamespace guid.GUID = guid.GUID{Data1: 0x6827367b, Data2: 0xc388, Data3: 0x4e9b, Data4: [8]byte{0x96, 0x1c, 0x6d, 0x2c, 0x93, 0x6c}}
)
// MountCimLayer mounts the cim at path `cimPath` and returns the mount location of that cim. This method
// uses the `CimMountFlagCacheFiles` mount flag when mounting the cim. The containerID is used to generated
// the volumeID for the volume at which this CIM is mounted. containerID is used so that if the shim process
// crashes for any reason, the mounted cim can be correctly cleaned up during `shim delete` call.
func MountCimLayer(ctx context.Context, cimPath, containerID string) (string, error) {
volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID))
if err != nil {
return "", fmt.Errorf("generated cim mount GUID: %w", err)
}
vol, err := cimfs.Mount(cimPath, volumeGUID, hcsschema.CimMountFlagCacheFiles)
if err != nil {
return "", err
}
cimMountMapLock.Lock()
defer cimMountMapLock.Unlock()
cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)] = vol
return vol, nil
}
// Unmount unmounts the cim at mounted for given container.
func UnmountCimLayer(ctx context.Context, cimPath, containerID string) error {
cimMountMapLock.Lock()
defer cimMountMapLock.Unlock()
if vol, ok := cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)]; !ok {
return fmt.Errorf("cim %s not mounted", cimPath)
} else {
delete(cimMounts, fmt.Sprintf("%s_%s", containerID, cimPath))
err := cimfs.Unmount(vol)
if err != nil {
return err
}
}
return nil
}
// GetCimMountPath returns the volume at which a cim is mounted. If the cim is not mounted returns error
func GetCimMountPath(cimPath, containerID string) (string, error) {
cimMountMapLock.Lock()
defer cimMountMapLock.Unlock()
if vol, ok := cimMounts[fmt.Sprintf("%s_%s", containerID, cimPath)]; !ok {
return "", fmt.Errorf("cim %s not mounted", cimPath)
} else {
return vol, nil
}
}
func CleanupContainerMounts(containerID string) error {
volumeGUID, err := guid.NewV5(cimMountNamespace, []byte(containerID))
if err != nil {
return fmt.Errorf("generated cim mount GUID: %w", err)
}
volPath := fmt.Sprintf("\\\\?\\Volume{%s}\\", volumeGUID.String())
if _, err := os.Stat(volPath); err == nil {
err = cimfs.Unmount(volPath)
if err != nil {
return err
}
}
return nil
}

View File

@ -0,0 +1,68 @@
//go:build windows
package cim
import (
"fmt"
"io"
"os"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/hcsshim/pkg/cimfs"
"golang.org/x/sys/windows"
)
type pendingCimOp interface {
apply(cw *cimfs.CimFsWriter) error
}
// add op represents a pending operation of adding a new file inside the cim
type addOp struct {
// path inside the cim at which the file should be added
pathInCim string
// host path where this file was temporarily written.
hostPath string
// other file metadata fields that were provided during the add call.
fileInfo *winio.FileBasicInfo
securityDescriptor []byte
extendedAttributes []byte
reparseData []byte
}
func (o *addOp) apply(cw *cimfs.CimFsWriter) error {
f, err := os.Open(o.hostPath)
if err != nil {
return fmt.Errorf("open file %s: %w", o.hostPath, err)
}
defer f.Close()
fs, err := f.Stat()
if err != nil {
return fmt.Errorf("stat file %s: %w", o.hostPath, err)
}
if err := cw.AddFile(o.pathInCim, o.fileInfo, fs.Size(), o.securityDescriptor, o.extendedAttributes, o.reparseData); err != nil {
return fmt.Errorf("cim add file %s: %w", o.hostPath, err)
}
if o.fileInfo.FileAttributes != windows.FILE_ATTRIBUTE_DIRECTORY {
written, err := io.Copy(cw, f)
if err != nil {
return fmt.Errorf("write file %s inside cim: %w", o.hostPath, err)
} else if written != fs.Size() {
return fmt.Errorf("short write to cim for file %s, expected %d bytes wrote %d", o.hostPath, fs.Size(), written)
}
}
return nil
}
// linkOp represents a pending link file operation inside the cim
type linkOp struct {
// old & new paths inside the cim where the link should be created
oldPath string
newPath string
}
func (o *linkOp) apply(cw *cimfs.CimFsWriter) error {
return cw.AddLink(o.oldPath, o.newPath)
}

View File

@ -0,0 +1,293 @@
//go:build windows
package cim
import (
"context"
"fmt"
"os"
"path/filepath"
"syscall"
"time"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/go-winio/vhd"
"github.com/Microsoft/hcsshim/computestorage"
"github.com/Microsoft/hcsshim/internal/memory"
"github.com/Microsoft/hcsshim/internal/security"
"github.com/Microsoft/hcsshim/internal/vhdx"
"github.com/Microsoft/hcsshim/internal/wclayer"
"golang.org/x/sys/windows"
)
const defaultVHDXBlockSizeInMB = 1
func createContainerBaseLayerVHDs(ctx context.Context, layerPath string) (err error) {
baseVhdPath := filepath.Join(layerPath, wclayer.ContainerBaseVhd)
diffVhdPath := filepath.Join(layerPath, wclayer.ContainerScratchVhd)
defaultVhdSize := uint64(20)
if _, err := os.Stat(baseVhdPath); err == nil {
if err := os.RemoveAll(baseVhdPath); err != nil {
return fmt.Errorf("failed to remove base vhdx path: %w", err)
}
}
if _, err := os.Stat(diffVhdPath); err == nil {
if err := os.RemoveAll(diffVhdPath); err != nil {
return fmt.Errorf("failed to remove differencing vhdx: %w", err)
}
}
createParams := &vhd.CreateVirtualDiskParameters{
Version: 2,
Version2: vhd.CreateVersion2{
MaximumSize: defaultVhdSize * memory.GiB,
BlockSizeInBytes: defaultVHDXBlockSizeInMB * memory.MiB,
},
}
handle, err := vhd.CreateVirtualDisk(baseVhdPath, vhd.VirtualDiskAccessNone, vhd.CreateVirtualDiskFlagNone, createParams)
if err != nil {
return fmt.Errorf("failed to create vhdx: %w", err)
}
defer func() {
if err != nil {
os.RemoveAll(baseVhdPath)
os.RemoveAll(diffVhdPath)
}
}()
err = computestorage.FormatWritableLayerVhd(ctx, windows.Handle(handle))
// we always wanna close the handle whether format succeeds for not.
closeErr := syscall.CloseHandle(handle)
if err != nil {
return err
} else if closeErr != nil {
return fmt.Errorf("failed to close vhdx handle: %w", closeErr)
}
// Create the differencing disk that will be what's copied for the final rw layer
// for a container.
if err = vhd.CreateDiffVhd(diffVhdPath, baseVhdPath, defaultVHDXBlockSizeInMB); err != nil {
return fmt.Errorf("failed to create differencing disk: %w", err)
}
if err = security.GrantVmGroupAccess(baseVhdPath); err != nil {
return fmt.Errorf("failed to grant vm group access to %s: %w", baseVhdPath, err)
}
if err = security.GrantVmGroupAccess(diffVhdPath); err != nil {
return fmt.Errorf("failed to grant vm group access to %s: %w", diffVhdPath, err)
}
return nil
}
// processUtilityVMLayer is similar to createContainerBaseLayerVHDs but along with the scratch creation it
// also does some BCD modifications to allow the UVM to boot from the CIM. It expects that the UVM BCD file is
// present at layerPath/`wclayer.BcdFilePath` and a UVM SYSTEM hive is present at
// layerPath/UtilityVM/`wclayer.RegFilesPath`/SYSTEM. The scratch VHDs are created under the `layerPath`
// directory.
func processUtilityVMLayer(ctx context.Context, layerPath string) error {
// func createUtilityVMLayerVHDs(ctx context.Context, layerPath string) error {
baseVhdPath := filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.UtilityVMBaseVhd)
diffVhdPath := filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.UtilityVMScratchVhd)
defaultVhdSize := uint64(10)
// Just create the vhdx for utilityVM layer, no need to format it.
createParams := &vhd.CreateVirtualDiskParameters{
Version: 2,
Version2: vhd.CreateVersion2{
MaximumSize: defaultVhdSize * memory.GiB,
BlockSizeInBytes: defaultVHDXBlockSizeInMB * memory.MiB,
},
}
handle, err := vhd.CreateVirtualDisk(baseVhdPath, vhd.VirtualDiskAccessNone, vhd.CreateVirtualDiskFlagNone, createParams)
if err != nil {
return fmt.Errorf("failed to create vhdx: %w", err)
}
defer func() {
if err != nil {
os.RemoveAll(baseVhdPath)
os.RemoveAll(diffVhdPath)
}
}()
err = computestorage.FormatWritableLayerVhd(ctx, windows.Handle(handle))
closeErr := syscall.CloseHandle(handle)
if err != nil {
return err
} else if closeErr != nil {
return fmt.Errorf("failed to close vhdx handle: %w", closeErr)
}
partitionInfo, err := vhdx.GetScratchVhdPartitionInfo(ctx, baseVhdPath)
if err != nil {
return fmt.Errorf("failed to get base vhd layout info: %w", err)
}
// relativeCimPath needs to be the cim path relative to the snapshots directory. The snapshots
// directory is shared inside the UVM over VSMB, so during the UVM boot this relative path will be
// used to find the cim file under that VSMB share.
relativeCimPath := filepath.Join(filepath.Base(GetCimDirFromLayer(layerPath)), GetCimNameFromLayer(layerPath))
bcdPath := filepath.Join(layerPath, bcdFilePath)
if err = updateBcdStoreForBoot(bcdPath, relativeCimPath, partitionInfo.DiskID, partitionInfo.PartitionID); err != nil {
return fmt.Errorf("failed to update BCD: %w", err)
}
if err := enableCimBoot(filepath.Join(layerPath, wclayer.UtilityVMPath, wclayer.RegFilesPath, "SYSTEM")); err != nil {
return fmt.Errorf("failed to setup cim image for uvm boot: %w", err)
}
// Note: diff vhd creation and granting of vm group access must be done AFTER
// getting the partition info of the base VHD. Otherwise it causes the vhd parent
// chain to get corrupted.
// TODO(ambarve): figure out why this happens so that bcd update can be moved to a separate function
// Create the differencing disk that will be what's copied for the final rw layer
// for a container.
if err = vhd.CreateDiffVhd(diffVhdPath, baseVhdPath, defaultVHDXBlockSizeInMB); err != nil {
return fmt.Errorf("failed to create differencing disk: %w", err)
}
if err := security.GrantVmGroupAccess(baseVhdPath); err != nil {
return fmt.Errorf("failed to grant vm group access to %s: %w", baseVhdPath, err)
}
if err := security.GrantVmGroupAccess(diffVhdPath); err != nil {
return fmt.Errorf("failed to grant vm group access to %s: %w", diffVhdPath, err)
}
return nil
}
// processBaseLayerHives make the base layer specific modifications on the hives and emits equivalent the
// pendingCimOps that should be applied on the CIM. In base layer we need to create hard links from registry
// hives under Files/Windows/Sysetm32/config into Hives/*_BASE. This function creates these links outside so
// that the registry hives under Hives/ are available during children layers import. Then we write these hive
// files inside the cim and create links inside the cim.
func processBaseLayerHives(layerPath string) ([]pendingCimOp, error) {
pendingOps := []pendingCimOp{}
// make hives directory both outside and in the cim
if err := os.Mkdir(filepath.Join(layerPath, wclayer.HivesPath), 0755); err != nil {
return pendingOps, fmt.Errorf("hives directory creation: %w", err)
}
hivesDirInfo := &winio.FileBasicInfo{
CreationTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()),
ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()),
FileAttributes: windows.FILE_ATTRIBUTE_DIRECTORY,
}
pendingOps = append(pendingOps, &addOp{
pathInCim: wclayer.HivesPath,
hostPath: filepath.Join(layerPath, wclayer.HivesPath),
fileInfo: hivesDirInfo,
})
// add hard links from base hive files.
for _, hv := range hives {
oldHivePathRelative := filepath.Join(wclayer.RegFilesPath, hv.name)
newHivePathRelative := filepath.Join(wclayer.HivesPath, hv.base)
if err := os.Link(filepath.Join(layerPath, oldHivePathRelative), filepath.Join(layerPath, newHivePathRelative)); err != nil {
return pendingOps, fmt.Errorf("hive link creation: %w", err)
}
pendingOps = append(pendingOps, &linkOp{
oldPath: oldHivePathRelative,
newPath: newHivePathRelative,
})
}
return pendingOps, nil
}
// processLayoutFile creates a file named "layout" in the root of the base layer. This allows certain
// container startup related functions to understand that the hives are a part of the container rootfs.
func processLayoutFile(layerPath string) ([]pendingCimOp, error) {
fileContents := "vhd-with-hives\n"
if err := os.WriteFile(filepath.Join(layerPath, "layout"), []byte(fileContents), 0755); err != nil {
return []pendingCimOp{}, fmt.Errorf("write layout file: %w", err)
}
layoutFileInfo := &winio.FileBasicInfo{
CreationTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()),
ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()),
FileAttributes: windows.FILE_ATTRIBUTE_NORMAL,
}
op := &addOp{
pathInCim: "layout",
hostPath: filepath.Join(layerPath, "layout"),
fileInfo: layoutFileInfo,
}
return []pendingCimOp{op}, nil
}
// Some of the layer files that are generated during the processBaseLayer call must be added back
// inside the cim, some registry file links must be updated. This function takes care of all those
// steps. This function opens the cim file for writing and updates it.
func (cw *CimLayerWriter) processBaseLayer(ctx context.Context, processUtilityVM bool) (err error) {
if err = createContainerBaseLayerVHDs(ctx, cw.path); err != nil {
return fmt.Errorf("failed to create container base VHDs: %w", err)
}
if processUtilityVM {
if err = processUtilityVMLayer(ctx, cw.path); err != nil {
return fmt.Errorf("process utilityVM layer: %w", err)
}
}
ops, err := processBaseLayerHives(cw.path)
if err != nil {
return err
}
cw.pendingOps = append(cw.pendingOps, ops...)
ops, err = processLayoutFile(cw.path)
if err != nil {
return err
}
cw.pendingOps = append(cw.pendingOps, ops...)
return nil
}
// processNonBaseLayer takes care of the processing required for a non base layer. As of now
// the only processing required for non base layer is to merge the delta registry hives of the
// non-base layer with it's parent layer.
func (cw *CimLayerWriter) processNonBaseLayer(ctx context.Context, processUtilityVM bool) (err error) {
for _, hv := range hives {
baseHive := filepath.Join(wclayer.HivesPath, hv.base)
deltaHive := filepath.Join(wclayer.HivesPath, hv.delta)
_, err := os.Stat(filepath.Join(cw.path, deltaHive))
// merge with parent layer if delta exists.
if err != nil && !os.IsNotExist(err) {
return fmt.Errorf("stat delta hive %s: %w", filepath.Join(cw.path, deltaHive), err)
} else if err == nil {
// merge base hive of parent layer with the delta hive of this layer and write it as
// the base hive of this layer.
err = mergeHive(filepath.Join(cw.parentLayerPaths[0], baseHive), filepath.Join(cw.path, deltaHive), filepath.Join(cw.path, baseHive))
if err != nil {
return err
}
// the newly created merged file must be added to the cim
cw.pendingOps = append(cw.pendingOps, &addOp{
pathInCim: baseHive,
hostPath: filepath.Join(cw.path, baseHive),
fileInfo: &winio.FileBasicInfo{
CreationTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastAccessTime: windows.NsecToFiletime(time.Now().UnixNano()),
LastWriteTime: windows.NsecToFiletime(time.Now().UnixNano()),
ChangeTime: windows.NsecToFiletime(time.Now().UnixNano()),
FileAttributes: windows.FILE_ATTRIBUTE_NORMAL,
},
})
}
}
if processUtilityVM {
return processUtilityVMLayer(ctx, cw.path)
}
return nil
}

View File

@ -0,0 +1,172 @@
//go:build windows
package cim
import (
"encoding/binary"
"fmt"
"os"
"unsafe"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/winapi"
"github.com/Microsoft/hcsshim/osversion"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
)
// enableCimBoot Opens the SYSTEM registry hive at path `hivePath` and updates it to include a CIMFS Start
// registry key. This prepares the uvm to boot from a cim file if requested. The registry changes required to
// actually make the uvm boot from a cim will be added in the uvm config (look at
// addBootFromCimRegistryChanges for details). This registry key needs to be available in the early boot
// phase and so including it in the uvm config doesn't work.
func enableCimBoot(hivePath string) (err error) {
dataZero := make([]byte, 4)
dataOne := make([]byte, 4)
binary.LittleEndian.PutUint32(dataOne, 1)
dataFour := make([]byte, 4)
binary.LittleEndian.PutUint32(dataFour, 4)
bootGUID, err := windows.UTF16FromString(bootContainerID)
if err != nil {
return fmt.Errorf("failed to encode boot guid to utf16: %w", err)
}
overrideBootPath, err := windows.UTF16FromString("\\Windows\\")
if err != nil {
return fmt.Errorf("failed to encode override boot path to utf16: %w", err)
}
regChanges := []struct {
keyPath string
valueName string
valueType winapi.RegType
data *byte
dataLen uint32
}{
{"ControlSet001\\Control", "BootContainerGuid", winapi.REG_TYPE_SZ, (*byte)(unsafe.Pointer(&bootGUID[0])), 2 * uint32(len(bootGUID))},
{"ControlSet001\\Services\\UnionFS", "Start", winapi.REG_TYPE_DWORD, &dataZero[0], uint32(len(dataZero))},
{"ControlSet001\\Services\\wcifs", "Start", winapi.REG_TYPE_DWORD, &dataFour[0], uint32(len(dataZero))},
// The bootmgr loads the uvm files from the cim and so uses the relative path `UtilityVM\\Files` inside the cim to access the uvm files. However, once the cim is mounted UnionFS will merge the correct directory (UtilityVM\\Files) of the cim with the scratch and then that point onwards we don't need to use the relative path. Below registry key tells the kernel that the boot path that was provided in BCD should now be overriden with this new path.
{"Setup", "BootPathOverride", winapi.REG_TYPE_SZ, (*byte)(unsafe.Pointer(&overrideBootPath[0])), 2 * uint32(len(overrideBootPath))},
}
var storeHandle winapi.ORHKey
if err = winapi.OROpenHive(hivePath, &storeHandle); err != nil {
return fmt.Errorf("failed to open registry store at %s: %w", hivePath, err)
}
for _, change := range regChanges {
var changeKey winapi.ORHKey
if err = winapi.ORCreateKey(storeHandle, change.keyPath, 0, 0, 0, &changeKey, nil); err != nil {
return fmt.Errorf("failed to open reg key %s: %w", change.keyPath, err)
}
if err = winapi.ORSetValue(changeKey, change.valueName, uint32(change.valueType), change.data, change.dataLen); err != nil {
return fmt.Errorf("failed to set value for regkey %s\\%s : %w", change.keyPath, change.valueName, err)
}
}
// remove the existing file first
if err := os.Remove(hivePath); err != nil {
return fmt.Errorf("failed to remove existing registry %s: %w", hivePath, err)
}
if err = winapi.ORSaveHive(winapi.ORHKey(storeHandle), hivePath, uint32(osversion.Get().MajorVersion), uint32(osversion.Get().MinorVersion)); err != nil {
return fmt.Errorf("error saving the registry store: %w", err)
}
// close hive irrespective of the errors
if err := winapi.ORCloseHive(winapi.ORHKey(storeHandle)); err != nil {
return fmt.Errorf("error closing registry store; %w", err)
}
return nil
}
// mergeHive merges the hive located at parentHivePath with the hive located at deltaHivePath and stores
// the result into the file at mergedHivePath. If a file already exists at path `mergedHivePath` then it
// throws an error.
func mergeHive(parentHivePath, deltaHivePath, mergedHivePath string) (err error) {
var baseHive, deltaHive, mergedHive winapi.ORHKey
if err := winapi.OROpenHive(parentHivePath, &baseHive); err != nil {
return fmt.Errorf("failed to open base hive %s: %w", parentHivePath, err)
}
defer func() {
err2 := winapi.ORCloseHive(baseHive)
if err == nil {
err = errors.Wrap(err2, "failed to close base hive")
}
}()
if err := winapi.OROpenHive(deltaHivePath, &deltaHive); err != nil {
return fmt.Errorf("failed to open delta hive %s: %w", deltaHivePath, err)
}
defer func() {
err2 := winapi.ORCloseHive(deltaHive)
if err == nil {
err = errors.Wrap(err2, "failed to close delta hive")
}
}()
if err := winapi.ORMergeHives([]winapi.ORHKey{baseHive, deltaHive}, &mergedHive); err != nil {
return fmt.Errorf("failed to merge hives: %w", err)
}
defer func() {
err2 := winapi.ORCloseHive(mergedHive)
if err == nil {
err = errors.Wrap(err2, "failed to close merged hive")
}
}()
if err := winapi.ORSaveHive(mergedHive, mergedHivePath, uint32(osversion.Get().MajorVersion), uint32(osversion.Get().MinorVersion)); err != nil {
return fmt.Errorf("failed to save hive: %w", err)
}
return
}
// getOsBuildNumberFromRegistry fetches the "CurrentBuild" value at path
// "Microsoft\Windows NT\CurrentVersion" from the SOFTWARE registry hive at path
// `regHivePath`. This is used to detect the build version of the uvm.
func getOsBuildNumberFromRegistry(regHivePath string) (_ string, err error) {
var storeHandle, keyHandle winapi.ORHKey
var dataType, dataLen uint32
keyPath := "Microsoft\\Windows NT\\CurrentVersion"
valueName := "CurrentBuild"
dataLen = 16 // build version string can't be more than 5 wide chars?
dataBuf := make([]byte, dataLen)
if err = winapi.OROpenHive(regHivePath, &storeHandle); err != nil {
return "", fmt.Errorf("failed to open registry store at %s: %w", regHivePath, err)
}
defer func() {
if closeErr := winapi.ORCloseHive(storeHandle); closeErr != nil {
log.L.WithFields(logrus.Fields{
"error": closeErr,
"hive": regHivePath,
}).Warnf("failed to close hive")
}
}()
if err = winapi.OROpenKey(storeHandle, keyPath, &keyHandle); err != nil {
return "", fmt.Errorf("failed to open key at %s: %w", keyPath, err)
}
defer func() {
if closeErr := winapi.ORCloseKey(keyHandle); closeErr != nil {
log.L.WithFields(logrus.Fields{
"error": closeErr,
"hive": regHivePath,
"key": keyPath,
"value": valueName,
}).Warnf("failed to close hive key")
}
}()
if err = winapi.ORGetValue(keyHandle, "", valueName, &dataType, &dataBuf[0], &dataLen); err != nil {
return "", fmt.Errorf("failed to get value of %s: %w", valueName, err)
}
if dataType != uint32(winapi.REG_TYPE_SZ) {
return "", fmt.Errorf("unexpected build number data type (%d)", dataType)
}
return winapi.ParseUtf16LE(dataBuf[:(dataLen - 2)]), nil
}

View File

@ -1,3 +1,5 @@
//go:build windows
package wclayer package wclayer
import ( import (

View File

@ -11,7 +11,6 @@ import (
"github.com/Microsoft/hcsshim/internal/hcserror" "github.com/Microsoft/hcsshim/internal/hcserror"
"github.com/Microsoft/hcsshim/internal/oc" "github.com/Microsoft/hcsshim/internal/oc"
"github.com/Microsoft/hcsshim/osversion"
"go.opencensus.io/trace" "go.opencensus.io/trace"
) )
@ -30,14 +29,17 @@ func ExpandScratchSize(ctx context.Context, path string, size uint64) (err error
return hcserror.New(err, title, "") return hcserror.New(err, title, "")
} }
// Manually expand the volume now in order to work around bugs in 19H1 and // Always expand the volume too. In case of legacy layers not expanding the volume here works because
// prerelease versions of Vb. Remove once this is fixed in Windows. // the PrepareLayer call internally handles the expansion. However, in other cases (like CimFS) we
if build := osversion.Build(); build >= osversion.V19H1 && build < 19020 { // don't call PrepareLayer and so the volume will never be expanded. This also means in case of
err = expandSandboxVolume(ctx, path) // legacy layers, we might have a small perf hit because the VHD is mounted twice for expansion (once
if err != nil { // here and once during the PrepareLayer call). But as long as the perf hit is minimal, we should be
return err // okay.
} err = expandSandboxVolume(ctx, path)
if err != nil {
return err
} }
return nil return nil
} }

View File

@ -154,7 +154,7 @@ func (r *legacyLayerReader) walkUntilCancelled() error {
} }
return nil return nil
}) })
if err == errorIterationCanceled { if err == errorIterationCanceled { //nolint:errorlint // explicitly returned
return nil return nil
} }
if err == nil { if err == nil {
@ -196,7 +196,7 @@ func findBackupStreamSize(r io.Reader) (int64, error) {
for { for {
hdr, err := br.Next() hdr, err := br.Next()
if err != nil { if err != nil {
if err == io.EOF { if errors.Is(err, io.EOF) {
err = nil err = nil
} }
return 0, err return 0, err
@ -428,7 +428,7 @@ func (w *legacyLayerWriter) initUtilityVM() error {
// immutable. // immutable.
err = cloneTree(w.parentRoots[0], w.destRoot, UtilityVMFilesPath, mutatedUtilityVMFiles) err = cloneTree(w.parentRoots[0], w.destRoot, UtilityVMFilesPath, mutatedUtilityVMFiles)
if err != nil { if err != nil {
return fmt.Errorf("cloning the parent utility VM image failed: %s", err) return fmt.Errorf("cloning the parent utility VM image failed: %w", err)
} }
w.HasUtilityVM = true w.HasUtilityVM = true
} }
@ -451,7 +451,7 @@ func (w *legacyLayerWriter) reset() error {
for { for {
bhdr, err := br.Next() bhdr, err := br.Next()
if err == io.EOF { if errors.Is(err, io.EOF) {
// end of backupstream data // end of backupstream data
break break
} }

View File

@ -1,3 +1,5 @@
//go:build windows
package winapi package winapi
import ( import (
@ -34,7 +36,7 @@ type CimFsFileMetadata struct {
//sys CimDismountImage(volumeID *g) (hr error) = cimfs.CimDismountImage? //sys CimDismountImage(volumeID *g) (hr error) = cimfs.CimDismountImage?
//sys CimCreateImage(imagePath string, oldFSName *uint16, newFSName *uint16, cimFSHandle *FsHandle) (hr error) = cimfs.CimCreateImage? //sys CimCreateImage(imagePath string, oldFSName *uint16, newFSName *uint16, cimFSHandle *FsHandle) (hr error) = cimfs.CimCreateImage?
//sys CimCloseImage(cimFSHandle FsHandle) (hr error) = cimfs.CimCloseImage? //sys CimCloseImage(cimFSHandle FsHandle) = cimfs.CimCloseImage?
//sys CimCommitImage(cimFSHandle FsHandle) (hr error) = cimfs.CimCommitImage? //sys CimCommitImage(cimFSHandle FsHandle) (hr error) = cimfs.CimCommitImage?
//sys CimCreateFile(cimFSHandle FsHandle, path string, file *CimFsFileMetadata, cimStreamHandle *StreamHandle) (hr error) = cimfs.CimCreateFile? //sys CimCreateFile(cimFSHandle FsHandle, path string, file *CimFsFileMetadata, cimStreamHandle *StreamHandle) (hr error) = cimfs.CimCreateFile?

View File

@ -184,18 +184,12 @@ func _CMLocateDevNode(pdnDevInst *uint32, pDeviceID *uint16, uFlags uint32) (hr
return return
} }
func CimCloseImage(cimFSHandle FsHandle) (hr error) { func CimCloseImage(cimFSHandle FsHandle) (err error) {
hr = procCimCloseImage.Find() err = procCimCloseImage.Find()
if hr != nil { if err != nil {
return return
} }
r0, _, _ := syscall.Syscall(procCimCloseImage.Addr(), 1, uintptr(cimFSHandle), 0, 0) syscall.Syscall(procCimCloseImage.Addr(), 1, uintptr(cimFSHandle), 0, 0)
if int32(r0) < 0 {
if r0&0x1fff0000 == 0x00070000 {
r0 &= 0xffff
}
hr = syscall.Errno(r0)
}
return return
} }

View File

@ -0,0 +1,291 @@
//go:build windows
// +build windows
package cimfs
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"unsafe"
"github.com/Microsoft/go-winio"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/winapi"
"github.com/sirupsen/logrus"
"golang.org/x/sys/windows"
)
// CimFsWriter represents a writer to a single CimFS filesystem instance. On disk, the
// image is composed of a filesystem file and several object ID and region files.
// Note: The CimFsWriter isn't thread safe!
type CimFsWriter struct {
// name of this cim. Usually a <name>.cim file will be created to represent this cim.
name string
// handle is the CIMFS_IMAGE_HANDLE that must be passed when calling CIMFS APIs.
handle winapi.FsHandle
// name of the active file i.e the file to which we are currently writing.
activeName string
// stream to currently active file.
activeStream winapi.StreamHandle
// amount of bytes that can be written to the activeStream.
activeLeft uint64
}
// Create creates a new cim image. The CimFsWriter returned can then be used to do
// operations on this cim.
func Create(imagePath string, oldFSName string, newFSName string) (_ *CimFsWriter, err error) {
var oldNameBytes *uint16
// CimCreateImage API call has different behavior if the value of oldNameBytes / newNameBytes
// is empty than if it is nil. So we have to convert those strings into *uint16 here.
fsName := oldFSName
if oldFSName != "" {
oldNameBytes, err = windows.UTF16PtrFromString(oldFSName)
if err != nil {
return nil, err
}
}
var newNameBytes *uint16
if newFSName != "" {
fsName = newFSName
newNameBytes, err = windows.UTF16PtrFromString(newFSName)
if err != nil {
return nil, err
}
}
var handle winapi.FsHandle
if err := winapi.CimCreateImage(imagePath, oldNameBytes, newNameBytes, &handle); err != nil {
return nil, fmt.Errorf("failed to create cim image at path %s, oldName: %s, newName: %s: %w", imagePath, oldFSName, newFSName, err)
}
return &CimFsWriter{handle: handle, name: filepath.Join(imagePath, fsName)}, nil
}
// CreateAlternateStream creates alternate stream of given size at the given path inside the cim. This will
// replace the current active stream. Always, finish writing current active stream and then create an
// alternate stream.
func (c *CimFsWriter) CreateAlternateStream(path string, size uint64) (err error) {
err = c.closeStream()
if err != nil {
return err
}
err = winapi.CimCreateAlternateStream(c.handle, path, size, &c.activeStream)
if err != nil {
return fmt.Errorf("failed to create alternate stream for path %s: %w", path, err)
}
c.activeName = path
return nil
}
// closes the currently active stream.
func (c *CimFsWriter) closeStream() error {
if c.activeStream == 0 {
return nil
}
err := winapi.CimCloseStream(c.activeStream)
if err == nil && c.activeLeft > 0 {
// Validate here because CimCloseStream does not and this improves error
// reporting. Otherwise the error will occur in the context of
// cimWriteStream.
err = fmt.Errorf("incomplete write, %d bytes left in the stream %s", c.activeLeft, c.activeName)
}
if err != nil {
err = &PathError{Cim: c.name, Op: "closeStream", Path: c.activeName, Err: err}
}
c.activeLeft = 0
c.activeStream = 0
c.activeName = ""
return err
}
// AddFile adds a new file to the image. The file is added at the specified path. After
// calling this function, the file is set as the active stream for the image, so data can
// be written by calling `Write`.
func (c *CimFsWriter) AddFile(path string, info *winio.FileBasicInfo, fileSize int64, securityDescriptor []byte, extendedAttributes []byte, reparseData []byte) error {
err := c.closeStream()
if err != nil {
return err
}
fileMetadata := &winapi.CimFsFileMetadata{
Attributes: info.FileAttributes,
FileSize: fileSize,
CreationTime: info.CreationTime,
LastWriteTime: info.LastWriteTime,
ChangeTime: info.ChangeTime,
LastAccessTime: info.LastAccessTime,
}
if len(securityDescriptor) == 0 {
// Passing an empty security descriptor creates a CIM in a weird state.
// Pass the NULL DACL.
securityDescriptor = nullSd
}
fileMetadata.SecurityDescriptorBuffer = unsafe.Pointer(&securityDescriptor[0])
fileMetadata.SecurityDescriptorSize = uint32(len(securityDescriptor))
if len(reparseData) > 0 {
fileMetadata.ReparseDataBuffer = unsafe.Pointer(&reparseData[0])
fileMetadata.ReparseDataSize = uint32(len(reparseData))
}
if len(extendedAttributes) > 0 {
fileMetadata.ExtendedAttributes = unsafe.Pointer(&extendedAttributes[0])
fileMetadata.EACount = uint32(len(extendedAttributes))
}
// remove the trailing `\` if present, otherwise it trips off the cim writer
path = strings.TrimSuffix(path, "\\")
err = winapi.CimCreateFile(c.handle, path, fileMetadata, &c.activeStream)
if err != nil {
return &PathError{Cim: c.name, Op: "addFile", Path: path, Err: err}
}
c.activeName = path
if info.FileAttributes&(windows.FILE_ATTRIBUTE_DIRECTORY) == 0 {
c.activeLeft = uint64(fileSize)
}
return nil
}
// Write writes bytes to the active stream.
func (c *CimFsWriter) Write(p []byte) (int, error) {
if c.activeStream == 0 {
return 0, fmt.Errorf("no active stream")
}
if uint64(len(p)) > c.activeLeft {
return 0, &PathError{Cim: c.name, Op: "write", Path: c.activeName, Err: fmt.Errorf("wrote too much")}
}
err := winapi.CimWriteStream(c.activeStream, uintptr(unsafe.Pointer(&p[0])), uint32(len(p)))
if err != nil {
err = &PathError{Cim: c.name, Op: "write", Path: c.activeName, Err: err}
return 0, err
}
c.activeLeft -= uint64(len(p))
return len(p), nil
}
// AddLink adds a hard link from `oldPath` to `newPath` in the image.
func (c *CimFsWriter) AddLink(oldPath string, newPath string) error {
err := c.closeStream()
if err != nil {
return err
}
err = winapi.CimCreateHardLink(c.handle, newPath, oldPath)
if err != nil {
err = &LinkError{Cim: c.name, Op: "addLink", Old: oldPath, New: newPath, Err: err}
}
return err
}
// Unlink deletes the file at `path` from the image.
func (c *CimFsWriter) Unlink(path string) error {
err := c.closeStream()
if err != nil {
return err
}
//TODO(ambarve): CimDeletePath currently returns an error if the file isn't found but we ideally want
// to put a tombstone at that path so that when cims are merged it removes that file from the lower
// layer
err = winapi.CimDeletePath(c.handle, path)
if err != nil && !os.IsNotExist(err) {
err = &PathError{Cim: c.name, Op: "unlink", Path: path, Err: err}
return err
}
return nil
}
func (c *CimFsWriter) commit() error {
err := c.closeStream()
if err != nil {
return err
}
err = winapi.CimCommitImage(c.handle)
if err != nil {
err = &OpError{Cim: c.name, Op: "commit", Err: err}
}
return err
}
// Close closes the CimFS filesystem.
func (c *CimFsWriter) Close() error {
if c.handle == 0 {
return fmt.Errorf("invalid writer")
}
if err := c.commit(); err != nil {
return &OpError{Cim: c.name, Op: "commit", Err: err}
}
if err := winapi.CimCloseImage(c.handle); err != nil {
return &OpError{Cim: c.name, Op: "close", Err: err}
}
c.handle = 0
return nil
}
// DestroyCim finds out the region files, object files of this cim and then delete
// the region files, object files and the <layer-id>.cim file itself.
func DestroyCim(ctx context.Context, cimPath string) (retErr error) {
regionFilePaths, err := getRegionFilePaths(ctx, cimPath)
if err != nil {
log.G(ctx).WithError(err).Warnf("get region files for cim %s", cimPath)
if retErr == nil { //nolint:govet // nilness: consistency with below
retErr = err
}
}
objectFilePaths, err := getObjectIDFilePaths(ctx, cimPath)
if err != nil {
log.G(ctx).WithError(err).Warnf("get objectid file for cim %s", cimPath)
if retErr == nil {
retErr = err
}
}
log.G(ctx).WithFields(logrus.Fields{
"cimPath": cimPath,
"regionFiles": regionFilePaths,
"objectFiles": objectFilePaths,
}).Debug("destroy cim")
for _, regFilePath := range regionFilePaths {
if err := os.Remove(regFilePath); err != nil {
log.G(ctx).WithError(err).Warnf("remove file %s", regFilePath)
if retErr == nil {
retErr = err
}
}
}
for _, objFilePath := range objectFilePaths {
if err := os.Remove(objFilePath); err != nil {
log.G(ctx).WithError(err).Warnf("remove file %s", objFilePath)
if retErr == nil {
retErr = err
}
}
}
if err := os.Remove(cimPath); err != nil {
log.G(ctx).WithError(err).Warnf("remove file %s", cimPath)
if retErr == nil {
retErr = err
}
}
return retErr
}
// GetCimUsage returns the total disk usage in bytes by the cim at path `cimPath`.
func GetCimUsage(ctx context.Context, cimPath string) (uint64, error) {
regionFilePaths, err := getRegionFilePaths(ctx, cimPath)
if err != nil {
return 0, fmt.Errorf("get region file paths for cim %s: %w", cimPath, err)
}
objectFilePaths, err := getObjectIDFilePaths(ctx, cimPath)
if err != nil {
return 0, fmt.Errorf("get objectid file for cim %s: %w", cimPath, err)
}
var totalUsage uint64
for _, f := range append(regionFilePaths, objectFilePaths...) {
fi, err := os.Stat(f)
if err != nil {
return 0, fmt.Errorf("stat file %s: %w", f, err)
}
totalUsage += uint64(fi.Size())
}
return totalUsage, nil
}

17
vendor/github.com/Microsoft/hcsshim/pkg/cimfs/cimfs.go generated vendored Normal file
View File

@ -0,0 +1,17 @@
//go:build windows
// +build windows
package cimfs
import (
"github.com/Microsoft/hcsshim/osversion"
"github.com/sirupsen/logrus"
)
func IsCimFSSupported() bool {
rv, err := osversion.BuildRevision()
if err != nil {
logrus.WithError(err).Warn("get build revision")
}
return osversion.Build() == 20348 && rv >= 2031
}

134
vendor/github.com/Microsoft/hcsshim/pkg/cimfs/common.go generated vendored Normal file
View File

@ -0,0 +1,134 @@
//go:build windows
// +build windows
package cimfs
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"os"
"path/filepath"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/pkg/cimfs/format"
)
var (
// Equivalent to SDDL of "D:NO_ACCESS_CONTROL".
nullSd = []byte{1, 0, 4, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
)
type OpError struct {
Cim string
Op string
Err error
}
func (e *OpError) Error() string {
s := "cim " + e.Op + " " + e.Cim
s += ": " + e.Err.Error()
return s
}
// PathError is the error type returned by most functions in this package.
type PathError struct {
Cim string
Op string
Path string
Err error
}
func (e *PathError) Error() string {
s := "cim " + e.Op + " " + e.Cim
s += ":" + e.Path
s += ": " + e.Err.Error()
return s
}
type LinkError struct {
Cim string
Op string
Old string
New string
Err error
}
func (e *LinkError) Error() string {
return "cim " + e.Op + " " + e.Old + " " + e.New + ": " + e.Err.Error()
}
func validateHeader(h *format.CommonHeader) error {
if !bytes.Equal(h.Magic[:], format.MagicValue[:]) {
return fmt.Errorf("not a cim file")
}
if h.Version.Major > format.CurrentVersion.Major || h.Version.Major < format.MinSupportedVersion.Major {
return fmt.Errorf("unsupported cim version. cim version %v must be between %v & %v", h.Version, format.MinSupportedVersion, format.CurrentVersion)
}
return nil
}
func readFilesystemHeader(f *os.File) (format.FilesystemHeader, error) {
var fsh format.FilesystemHeader
if err := binary.Read(f, binary.LittleEndian, &fsh); err != nil {
return fsh, fmt.Errorf("reading filesystem header: %w", err)
}
if err := validateHeader(&fsh.Common); err != nil {
return fsh, fmt.Errorf("validating filesystem header: %w", err)
}
return fsh, nil
}
// Returns the paths of all the objectID files associated with the cim at `cimPath`.
func getObjectIDFilePaths(ctx context.Context, cimPath string) ([]string, error) {
f, err := os.Open(cimPath)
if err != nil {
return []string{}, fmt.Errorf("open cim file %s: %w", cimPath, err)
}
defer f.Close()
fsh, err := readFilesystemHeader(f)
if err != nil {
return []string{}, fmt.Errorf("readingp cim header: %w", err)
}
paths := []string{}
for i := 0; i < int(fsh.Regions.Count); i++ {
path := filepath.Join(filepath.Dir(cimPath), fmt.Sprintf("%s_%v_%d", format.ObjectIDFileName, fsh.Regions.ID, i))
if _, err := os.Stat(path); err == nil {
paths = append(paths, path)
} else {
log.G(ctx).WithError(err).Warnf("stat for object file %s", path)
}
}
return paths, nil
}
// Returns the paths of all the region files associated with the cim at `cimPath`.
func getRegionFilePaths(ctx context.Context, cimPath string) ([]string, error) {
f, err := os.Open(cimPath)
if err != nil {
return []string{}, fmt.Errorf("open cim file %s: %w", cimPath, err)
}
defer f.Close()
fsh, err := readFilesystemHeader(f)
if err != nil {
return []string{}, fmt.Errorf("reading cim header: %w", err)
}
paths := []string{}
for i := 0; i < int(fsh.Regions.Count); i++ {
path := filepath.Join(filepath.Dir(cimPath), fmt.Sprintf("%s_%v_%d", format.RegionFileName, fsh.Regions.ID, i))
if _, err := os.Stat(path); err == nil {
paths = append(paths, path)
} else {
log.G(ctx).WithError(err).Warnf("stat for region file %s", path)
}
}
return paths, nil
}

3
vendor/github.com/Microsoft/hcsshim/pkg/cimfs/doc.go generated vendored Normal file
View File

@ -0,0 +1,3 @@
// This package provides simple go wrappers on top of the win32 CIMFS mount APIs.
// The mounting/unmount of cim layers is done by the cim mount functions the internal/wclayer/cim package.
package cimfs

View File

@ -0,0 +1,4 @@
// format package maintains some basic structures to allows us to read header of a cim file. This is mostly
// required to understand the region & objectid files associated with a particular cim. Otherwise, we don't
// need to parse the cim format.
package format

View File

@ -0,0 +1,61 @@
//go:build windows
// +build windows
package format
import "github.com/Microsoft/go-winio/pkg/guid"
const (
RegionFileName = "region"
ObjectIDFileName = "objectid"
)
// Magic specifies the magic number at the beginning of a file.
type Magic [8]uint8
var MagicValue = Magic([8]uint8{'c', 'i', 'm', 'f', 'i', 'l', 'e', '0'})
type Version struct {
Major, Minor uint32
}
var CurrentVersion = Version{3, 0}
var MinSupportedVersion = Version{2, 0}
type FileType uint8
// RegionOffset encodes an offset to objects as index of the region file
// containing the object and the byte offset within that file.
type RegionOffset uint64
// CommonHeader is the common header for all CIM-related files.
type CommonHeader struct {
Magic Magic
HeaderLength uint32
Type FileType
Reserved uint8
Reserved2 uint16
Version Version
Reserved3 uint64
}
type RegionSet struct {
ID guid.GUID
Count uint16
Reserved uint16
Reserved1 uint32
}
// FilesystemHeader is the header for a filesystem file.
//
// The filesystem file points to the filesystem object inside a region
// file and specifies regions sets.
type FilesystemHeader struct {
Common CommonHeader
Regions RegionSet
FilesystemOffset RegionOffset
Reserved uint32
Reserved1 uint16
ParentCount uint16
}

View File

@ -0,0 +1,65 @@
//go:build windows
// +build windows
package cimfs
import (
"fmt"
"path/filepath"
"strings"
"github.com/Microsoft/go-winio/pkg/guid"
"github.com/Microsoft/hcsshim/internal/winapi"
"github.com/pkg/errors"
)
type MountError struct {
Cim string
Op string
VolumeGUID guid.GUID
Err error
}
func (e *MountError) Error() string {
s := "cim " + e.Op
if e.Cim != "" {
s += " " + e.Cim
}
s += " " + e.VolumeGUID.String() + ": " + e.Err.Error()
return s
}
// Mount mounts the given cim at a volume with given GUID. Returns the full volume
// path if mount is successful.
func Mount(cimPath string, volumeGUID guid.GUID, mountFlags uint32) (string, error) {
if err := winapi.CimMountImage(filepath.Dir(cimPath), filepath.Base(cimPath), mountFlags, &volumeGUID); err != nil {
return "", &MountError{Cim: cimPath, Op: "Mount", VolumeGUID: volumeGUID, Err: err}
}
return fmt.Sprintf("\\\\?\\Volume{%s}\\", volumeGUID.String()), nil
}
// Unmount unmounts the cim at mounted at path `volumePath`.
func Unmount(volumePath string) error {
// The path is expected to be in the \\?\Volume{GUID}\ format
if volumePath[len(volumePath)-1] != '\\' {
volumePath += "\\"
}
if !(strings.HasPrefix(volumePath, "\\\\?\\Volume{") && strings.HasSuffix(volumePath, "}\\")) {
return errors.Errorf("volume path %s is not in the expected format", volumePath)
}
trimmedStr := strings.TrimPrefix(volumePath, "\\\\?\\Volume{")
trimmedStr = strings.TrimSuffix(trimmedStr, "}\\")
volGUID, err := guid.FromString(trimmedStr)
if err != nil {
return errors.Wrapf(err, "guid parsing failed for %s", trimmedStr)
}
if err := winapi.CimDismountImage(&volGUID); err != nil {
return &MountError{VolumeGUID: volGUID, Op: "Unmount", Err: err}
}
return nil
}

View File

@ -89,7 +89,7 @@ func putBuf(b *bytes.Buffer) {
bytesBufferPool.Put(b) bytesBufferPool.Put(b)
} }
// Runhcs is the client to the runhcs cli // Runhcs is the client to the runhcs cli.
type Runhcs struct { type Runhcs struct {
// Debug enables debug output for logging. // Debug enables debug output for logging.
Debug bool Debug bool
@ -130,8 +130,8 @@ func (r *Runhcs) args() []string {
return out return out
} }
func (r *Runhcs) command(context context.Context, args ...string) *exec.Cmd { func (r *Runhcs) command(ctx context.Context, args ...string) *exec.Cmd {
cmd := exec.CommandContext(context, getCommandPath(), append(r.args(), args...)...) cmd := exec.CommandContext(ctx, getCommandPath(), append(r.args(), args...)...)
cmd.Env = os.Environ() cmd.Env = os.Environ()
return cmd return cmd
} }
@ -139,7 +139,7 @@ func (r *Runhcs) command(context context.Context, args ...string) *exec.Cmd {
// runOrError will run the provided command. If an error is // runOrError will run the provided command. If an error is
// encountered and neither Stdout or Stderr was set the error and the // encountered and neither Stdout or Stderr was set the error and the
// stderr of the command will be returned in the format of <error>: // stderr of the command will be returned in the format of <error>:
// <stderr> // <stderr>.
func (r *Runhcs) runOrError(cmd *exec.Cmd) error { func (r *Runhcs) runOrError(cmd *exec.Cmd) error {
if cmd.Stdout != nil || cmd.Stderr != nil { if cmd.Stdout != nil || cmd.Stderr != nil {
ec, err := runc.Monitor.Start(cmd) ec, err := runc.Monitor.Start(cmd)
@ -154,7 +154,7 @@ func (r *Runhcs) runOrError(cmd *exec.Cmd) error {
} }
data, err := cmdOutput(cmd, true) data, err := cmdOutput(cmd, true)
if err != nil { if err != nil {
return fmt.Errorf("%s: %s", err, data) return fmt.Errorf("%s: %s", err, data) //nolint:errorlint // legacy code
} }
return nil return nil
} }

View File

@ -10,8 +10,8 @@ import (
) )
// CreateScratch creates a scratch vhdx at 'destpath' that is ext4 formatted. // CreateScratch creates a scratch vhdx at 'destpath' that is ext4 formatted.
func (r *Runhcs) CreateScratch(context context.Context, destpath string) error { func (r *Runhcs) CreateScratch(ctx context.Context, destpath string) error {
return r.CreateScratchWithOpts(context, destpath, nil) return r.CreateScratchWithOpts(ctx, destpath, nil)
} }
// CreateScratchOpts is the set of options that can be used with the // CreateScratchOpts is the set of options that can be used with the
@ -43,7 +43,7 @@ func (opt *CreateScratchOpts) args() ([]string, error) {
// CreateScratchWithOpts creates a scratch vhdx at 'destpath' that is ext4 // CreateScratchWithOpts creates a scratch vhdx at 'destpath' that is ext4
// formatted based on `opts`. // formatted based on `opts`.
func (r *Runhcs) CreateScratchWithOpts(context context.Context, destpath string, opts *CreateScratchOpts) error { func (r *Runhcs) CreateScratchWithOpts(ctx context.Context, destpath string, opts *CreateScratchOpts) error {
args := []string{"create-scratch", "--destpath", destpath} args := []string{"create-scratch", "--destpath", destpath}
if opts != nil { if opts != nil {
oargs, err := opts.args() oargs, err := opts.args()
@ -52,5 +52,5 @@ func (r *Runhcs) CreateScratchWithOpts(context context.Context, destpath string,
} }
args = append(args, oargs...) args = append(args, oargs...)
} }
return r.runOrError(r.command(context, args...)) return r.runOrError(r.command(ctx, args...))
} }

View File

@ -64,7 +64,7 @@ func (opt *CreateOpts) args() ([]string, error) {
// Create creates a new container and returns its pid if it was created // Create creates a new container and returns its pid if it was created
// successfully. // successfully.
func (r *Runhcs) Create(context context.Context, id, bundle string, opts *CreateOpts) error { func (r *Runhcs) Create(ctx context.Context, id, bundle string, opts *CreateOpts) error {
args := []string{"create", "--bundle", bundle} args := []string{"create", "--bundle", bundle}
if opts != nil { if opts != nil {
oargs, err := opts.args() oargs, err := opts.args()
@ -73,14 +73,14 @@ func (r *Runhcs) Create(context context.Context, id, bundle string, opts *Create
} }
args = append(args, oargs...) args = append(args, oargs...)
} }
cmd := r.command(context, append(args, id)...) cmd := r.command(ctx, append(args, id)...)
if opts != nil && opts.IO != nil { if opts != nil && opts.IO != nil {
opts.Set(cmd) opts.Set(cmd)
} }
if cmd.Stdout == nil && cmd.Stderr == nil { if cmd.Stdout == nil && cmd.Stderr == nil {
data, err := cmdOutput(cmd, true) data, err := cmdOutput(cmd, true)
if err != nil { if err != nil {
return fmt.Errorf("%s: %s", err, data) return fmt.Errorf("%s: %s", err, data) //nolint:errorlint // legacy code
} }
return nil return nil
} }

View File

@ -22,7 +22,7 @@ func (opt *DeleteOpts) args() ([]string, error) {
// Delete any resources held by the container often used with detached // Delete any resources held by the container often used with detached
// containers. // containers.
func (r *Runhcs) Delete(context context.Context, id string, opts *DeleteOpts) error { func (r *Runhcs) Delete(ctx context.Context, id string, opts *DeleteOpts) error {
args := []string{"delete"} args := []string{"delete"}
if opts != nil { if opts != nil {
oargs, err := opts.args() oargs, err := opts.args()
@ -31,5 +31,5 @@ func (r *Runhcs) Delete(context context.Context, id string, opts *DeleteOpts) er
} }
args = append(args, oargs...) args = append(args, oargs...)
} }
return r.runOrError(r.command(context, append(args, id)...)) return r.runOrError(r.command(ctx, append(args, id)...))
} }

View File

@ -51,7 +51,7 @@ func (opt *ExecOpts) args() ([]string, error) {
// Exec executes an additional process inside the container based on the // Exec executes an additional process inside the container based on the
// oci.Process spec found at processFile. // oci.Process spec found at processFile.
func (r *Runhcs) Exec(context context.Context, id, processFile string, opts *ExecOpts) error { func (r *Runhcs) Exec(ctx context.Context, id, processFile string, opts *ExecOpts) error {
args := []string{"exec", "--process", processFile} args := []string{"exec", "--process", processFile}
if opts != nil { if opts != nil {
oargs, err := opts.args() oargs, err := opts.args()
@ -60,14 +60,14 @@ func (r *Runhcs) Exec(context context.Context, id, processFile string, opts *Exe
} }
args = append(args, oargs...) args = append(args, oargs...)
} }
cmd := r.command(context, append(args, id)...) cmd := r.command(ctx, append(args, id)...)
if opts != nil && opts.IO != nil { if opts != nil && opts.IO != nil {
opts.Set(cmd) opts.Set(cmd)
} }
if cmd.Stdout == nil && cmd.Stderr == nil { if cmd.Stdout == nil && cmd.Stderr == nil {
data, err := cmdOutput(cmd, true) data, err := cmdOutput(cmd, true)
if err != nil { if err != nil {
return fmt.Errorf("%s: %s", err, data) return fmt.Errorf("%s: %s", err, data) //nolint:errorlint // legacy code
} }
return nil return nil
} }

View File

@ -8,6 +8,6 @@ import (
// Kill sends the specified signal (default: SIGTERM) to the container's init // Kill sends the specified signal (default: SIGTERM) to the container's init
// process. // process.
func (r *Runhcs) Kill(context context.Context, id, signal string) error { func (r *Runhcs) Kill(ctx context.Context, id, signal string) error {
return r.runOrError(r.command(context, "kill", id, signal)) return r.runOrError(r.command(ctx, "kill", id, signal))
} }

View File

@ -17,8 +17,8 @@ type ContainerState = irunhcs.ContainerState
// //
// Note: This is specific to the Runhcs.Root namespace provided in the global // Note: This is specific to the Runhcs.Root namespace provided in the global
// settings. // settings.
func (r *Runhcs) List(context context.Context) ([]*ContainerState, error) { func (r *Runhcs) List(ctx context.Context) ([]*ContainerState, error) {
data, err := cmdOutput(r.command(context, "list", "--format=json"), false) data, err := cmdOutput(r.command(ctx, "list", "--format=json"), false)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -7,6 +7,6 @@ import (
) )
// Pause suspends all processes inside the container. // Pause suspends all processes inside the container.
func (r *Runhcs) Pause(context context.Context, id string) error { func (r *Runhcs) Pause(ctx context.Context, id string) error {
return r.runOrError(r.command(context, "pause", id)) return r.runOrError(r.command(ctx, "pause", id))
} }

View File

@ -9,10 +9,10 @@ import (
) )
// Ps displays the processes running inside a container. // Ps displays the processes running inside a container.
func (r *Runhcs) Ps(context context.Context, id string) ([]int, error) { func (r *Runhcs) Ps(ctx context.Context, id string) ([]int, error) {
data, err := cmdOutput(r.command(context, "ps", "--format=json", id), true) data, err := cmdOutput(r.command(ctx, "ps", "--format=json", id), true)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s: %s", err, data) return nil, fmt.Errorf("%s: %s", err, data) //nolint:errorlint // legacy code
} }
var out []int var out []int
if err := json.Unmarshal(data, &out); err != nil { if err := json.Unmarshal(data, &out); err != nil {

View File

@ -22,7 +22,7 @@ func (opt *ResizeTTYOpts) args() ([]string, error) {
} }
// ResizeTTY updates the terminal size for a container process. // ResizeTTY updates the terminal size for a container process.
func (r *Runhcs) ResizeTTY(context context.Context, id string, width, height uint16, opts *ResizeTTYOpts) error { func (r *Runhcs) ResizeTTY(ctx context.Context, id string, width, height uint16, opts *ResizeTTYOpts) error {
args := []string{"resize-tty"} args := []string{"resize-tty"}
if opts != nil { if opts != nil {
oargs, err := opts.args() oargs, err := opts.args()
@ -31,5 +31,5 @@ func (r *Runhcs) ResizeTTY(context context.Context, id string, width, height uin
} }
args = append(args, oargs...) args = append(args, oargs...)
} }
return r.runOrError(r.command(context, append(args, id, strconv.FormatUint(uint64(width), 10), strconv.FormatUint(uint64(height), 10))...)) return r.runOrError(r.command(ctx, append(args, id, strconv.FormatUint(uint64(width), 10), strconv.FormatUint(uint64(height), 10))...))
} }

View File

@ -7,6 +7,6 @@ import (
) )
// Resume resumes all processes that have been previously paused. // Resume resumes all processes that have been previously paused.
func (r *Runhcs) Resume(context context.Context, id string) error { func (r *Runhcs) Resume(ctx context.Context, id string) error {
return r.runOrError(r.command(context, "resume", id)) return r.runOrError(r.command(ctx, "resume", id))
} }

View File

@ -7,6 +7,6 @@ import (
) )
// Start will start an already created container. // Start will start an already created container.
func (r *Runhcs) Start(context context.Context, id string) error { func (r *Runhcs) Start(ctx context.Context, id string) error {
return r.runOrError(r.command(context, "start", id)) return r.runOrError(r.command(ctx, "start", id))
} }

View File

@ -9,10 +9,10 @@ import (
) )
// State outputs the state of a container. // State outputs the state of a container.
func (r *Runhcs) State(context context.Context, id string) (*ContainerState, error) { func (r *Runhcs) State(ctx context.Context, id string) (*ContainerState, error) {
data, err := cmdOutput(r.command(context, "state", id), true) data, err := cmdOutput(r.command(ctx, "state", id), true)
if err != nil { if err != nil {
return nil, fmt.Errorf("%s: %s", err, data) return nil, fmt.Errorf("%s: %s", err, data) //nolint:errorlint // legacy code
} }
var out ContainerState var out ContainerState
if err := json.Unmarshal(data, &out); err != nil { if err := json.Unmarshal(data, &out); err != nil {

View File

@ -0,0 +1,166 @@
//go:build windows
// +build windows
package cim
import (
"archive/tar"
"bufio"
"context"
"errors"
"fmt"
"io"
"os"
"path"
"path/filepath"
"strings"
"github.com/Microsoft/go-winio/backuptar"
"github.com/Microsoft/hcsshim/internal/log"
"github.com/Microsoft/hcsshim/internal/wclayer/cim"
"github.com/Microsoft/hcsshim/pkg/ociwclayer"
"golang.org/x/sys/windows"
)
// ImportCimLayerFromTar reads a layer from an OCI layer tar stream and extracts it into
// the CIM format at the specified path. The caller must specify the parent layers, if
// any, ordered from lowest to highest layer.
// This function expects that the layer paths (both the layer that is being imported & the parent layers) are
// formatted like `.../snapshots/<id>` and the corresponding layer CIMs are located/will be created at
// `.../snapshots/cim-layers/<id>.cim`. Each CIM file also has corresponding region & objectID files and those
// files will also be stored inside the `cim-layers` directory.
//
// This function returns the total size of the layer's files, in bytes.
func ImportCimLayerFromTar(ctx context.Context, r io.Reader, layerPath string, parentLayerPaths []string) (int64, error) {
err := os.MkdirAll(layerPath, 0)
if err != nil {
return 0, err
}
w, err := cim.NewCimLayerWriter(ctx, layerPath, parentLayerPaths)
if err != nil {
return 0, err
}
n, err := writeCimLayerFromTar(ctx, r, w, layerPath)
cerr := w.Close(ctx)
if err != nil {
return 0, err
}
if cerr != nil {
return 0, cerr
}
return n, nil
}
func writeCimLayerFromTar(ctx context.Context, r io.Reader, w *cim.CimLayerWriter, layerPath string) (int64, error) {
tr := tar.NewReader(r)
buf := bufio.NewWriter(w)
size := int64(0)
// Iterate through the files in the archive.
hdr, loopErr := tr.Next()
for loopErr == nil {
select {
case <-ctx.Done():
return 0, ctx.Err()
default:
}
// Note: path is used instead of filepath to prevent OS specific handling
// of the tar path
base := path.Base(hdr.Name)
if strings.HasPrefix(base, ociwclayer.WhiteoutPrefix) {
name := path.Join(path.Dir(hdr.Name), base[len(ociwclayer.WhiteoutPrefix):])
if rErr := w.Remove(filepath.FromSlash(name)); rErr != nil {
return 0, rErr
}
hdr, loopErr = tr.Next()
} else if hdr.Typeflag == tar.TypeLink {
if linkErr := w.AddLink(filepath.FromSlash(hdr.Name), filepath.FromSlash(hdr.Linkname)); linkErr != nil {
return 0, linkErr
}
hdr, loopErr = tr.Next()
} else {
name, fileSize, fileInfo, err := backuptar.FileInfoFromHeader(hdr)
if err != nil {
return 0, err
}
sddl, err := backuptar.SecurityDescriptorFromTarHeader(hdr)
if err != nil {
return 0, err
}
eadata, err := backuptar.ExtendedAttributesFromTarHeader(hdr)
if err != nil {
return 0, err
}
var reparse []byte
// As of now the only valid reparse data in a layer will be for a symlink. If file is
// a symlink set reparse attribute and ensure reparse data buffer isn't
// empty. Otherwise remove the reparse attributed.
fileInfo.FileAttributes &^= uint32(windows.FILE_ATTRIBUTE_REPARSE_POINT)
if hdr.Typeflag == tar.TypeSymlink {
reparse = backuptar.EncodeReparsePointFromTarHeader(hdr)
if len(reparse) > 0 {
fileInfo.FileAttributes |= uint32(windows.FILE_ATTRIBUTE_REPARSE_POINT)
}
}
if addErr := w.Add(filepath.FromSlash(name), fileInfo, fileSize, sddl, eadata, reparse); addErr != nil {
return 0, addErr
}
if hdr.Typeflag == tar.TypeReg {
if _, cpErr := io.Copy(buf, tr); cpErr != nil {
return 0, cpErr
}
}
size += fileSize
// Copy all the alternate data streams and return the next non-ADS header.
var ahdr *tar.Header
for {
ahdr, loopErr = tr.Next()
if loopErr != nil {
break
}
if ahdr.Typeflag != tar.TypeReg || !strings.HasPrefix(ahdr.Name, hdr.Name+":") {
hdr = ahdr
break
}
// stream names have following format: '<filename>:<stream name>:$DATA'
// $DATA is one of the valid types of streams. We currently only support
// data streams so fail if this is some other type of stream.
if !strings.HasSuffix(ahdr.Name, ":$DATA") {
return 0, fmt.Errorf("stream types other than $DATA are not supported, found: %s", ahdr.Name)
}
if addErr := w.AddAlternateStream(filepath.FromSlash(ahdr.Name), uint64(ahdr.Size)); addErr != nil {
return 0, addErr
}
if _, cpErr := io.Copy(buf, tr); cpErr != nil {
return 0, cpErr
}
}
}
if flushErr := buf.Flush(); flushErr != nil {
if loopErr == nil {
loopErr = flushErr
} else {
log.G(ctx).WithError(flushErr).Warn("flush buffer during layer write failed")
}
}
}
if !errors.Is(loopErr, io.EOF) {
return 0, loopErr
}
return size, nil
}
func DestroyCimLayer(layerPath string) error {
return cim.DestroyCimLayer(context.Background(), layerPath)
}

View File

@ -5,6 +5,7 @@ package ociwclayer
import ( import (
"archive/tar" "archive/tar"
"context" "context"
"errors"
"io" "io"
"path/filepath" "path/filepath"
@ -62,7 +63,7 @@ func writeTarFromLayer(ctx context.Context, r wclayer.LayerReader, w io.Writer)
} }
name, size, fileInfo, err := r.Next() name, size, fileInfo, err := r.Next()
if err == io.EOF { if errors.Is(err, io.EOF) {
break break
} }
if err != nil { if err != nil {

View File

@ -6,6 +6,7 @@ import (
"archive/tar" "archive/tar"
"bufio" "bufio"
"context" "context"
"errors"
"io" "io"
"os" "os"
"path" "path"
@ -102,7 +103,7 @@ func writeLayerFromTar(ctx context.Context, r io.Reader, w wclayer.LayerWriter,
totalSize += size totalSize += size
} }
} }
if err != io.EOF { if !errors.Is(err, io.EOF) {
return 0, err return 0, err
} }
return totalSize, nil return totalSize, nil

View File

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2015 Microsoft
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,4 +0,0 @@
package manifest
// This is so that tests can include the .syso to manifest them to pick up the right Windows build
// TODO: Auto-generation of the .syso through rsrc or similar.

View File

@ -1,13 +0,0 @@
freebsd_task:
name: 'FreeBSD'
freebsd_instance:
image_family: freebsd-13-2
install_script:
- pkg update -f
- pkg install -y go
test_script:
# run tests as user "cirrus" instead of root
- pw useradd cirrus -m
- chown -R cirrus:cirrus .
- FSNOTIFY_BUFFER=4096 sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race ./...
- sudo --preserve-env=FSNOTIFY_BUFFER -u cirrus go test -parallel 1 -race ./...

View File

@ -4,4 +4,3 @@
# Output of go build ./cmd/fsnotify # Output of go build ./cmd/fsnotify
/fsnotify /fsnotify
/fsnotify.exe

View File

@ -1,87 +1,16 @@
# Changelog # Changelog
Unreleased All notable changes to this project will be documented in this file.
----------
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [Unreleased]
Nothing yet. Nothing yet.
1.7.0 - 2023-10-22 ## [1.6.0] - 2022-10-13
------------------
This version of fsnotify needs Go 1.17.
### Additions
- illumos: add FEN backend to support illumos and Solaris. ([#371])
- all: add `NewBufferedWatcher()` to use a buffered channel, which can be useful
in cases where you can't control the kernel buffer and receive a large number
of events in bursts. ([#550], [#572])
- all: add `AddWith()`, which is identical to `Add()` but allows passing
options. ([#521])
- windows: allow setting the ReadDirectoryChangesW() buffer size with
`fsnotify.WithBufferSize()`; the default of 64K is the highest value that
works on all platforms and is enough for most purposes, but in some cases a
highest buffer is needed. ([#521])
### Changes and fixes
- inotify: remove watcher if a watched path is renamed ([#518])
After a rename the reported name wasn't updated, or even an empty string.
Inotify doesn't provide any good facilities to update it, so just remove the
watcher. This is already how it worked on kqueue and FEN.
On Windows this does work, and remains working.
- windows: don't listen for file attribute changes ([#520])
File attribute changes are sent as `FILE_ACTION_MODIFIED` by the Windows API,
with no way to see if they're a file write or attribute change, so would show
up as a fsnotify.Write event. This is never useful, and could result in many
spurious Write events.
- windows: return `ErrEventOverflow` if the buffer is full ([#525])
Before it would merely return "short read", making it hard to detect this
error.
- kqueue: make sure events for all files are delivered properly when removing a
watched directory ([#526])
Previously they would get sent with `""` (empty string) or `"."` as the path
name.
- kqueue: don't emit spurious Create events for symbolic links ([#524])
The link would get resolved but kqueue would "forget" it already saw the link
itself, resulting on a Create for every Write event for the directory.
- all: return `ErrClosed` on `Add()` when the watcher is closed ([#516])
- other: add `Watcher.Errors` and `Watcher.Events` to the no-op `Watcher` in
`backend_other.go`, making it easier to use on unsupported platforms such as
WASM, AIX, etc. ([#528])
- other: use the `backend_other.go` no-op if the `appengine` build tag is set;
Google AppEngine forbids usage of the unsafe package so the inotify backend
won't compile there.
[#371]: https://github.com/fsnotify/fsnotify/pull/371
[#516]: https://github.com/fsnotify/fsnotify/pull/516
[#518]: https://github.com/fsnotify/fsnotify/pull/518
[#520]: https://github.com/fsnotify/fsnotify/pull/520
[#521]: https://github.com/fsnotify/fsnotify/pull/521
[#524]: https://github.com/fsnotify/fsnotify/pull/524
[#525]: https://github.com/fsnotify/fsnotify/pull/525
[#526]: https://github.com/fsnotify/fsnotify/pull/526
[#528]: https://github.com/fsnotify/fsnotify/pull/528
[#537]: https://github.com/fsnotify/fsnotify/pull/537
[#550]: https://github.com/fsnotify/fsnotify/pull/550
[#572]: https://github.com/fsnotify/fsnotify/pull/572
1.6.0 - 2022-10-13
------------------
This version of fsnotify needs Go 1.16 (this was already the case since 1.5.1, This version of fsnotify needs Go 1.16 (this was already the case since 1.5.1,
but not documented). It also increases the minimum Linux version to 2.6.32. but not documented). It also increases the minimum Linux version to 2.6.32.

View File

@ -1,31 +1,29 @@
fsnotify is a Go library to provide cross-platform filesystem notifications on fsnotify is a Go library to provide cross-platform filesystem notifications on
Windows, Linux, macOS, BSD, and illumos. Windows, Linux, macOS, and BSD systems.
Go 1.17 or newer is required; the full documentation is at Go 1.16 or newer is required; the full documentation is at
https://pkg.go.dev/github.com/fsnotify/fsnotify https://pkg.go.dev/github.com/fsnotify/fsnotify
**It's best to read the documentation at pkg.go.dev, as it's pinned to the last
released version, whereas this README is for the last development version which
may include additions/changes.**
--- ---
Platform support: Platform support:
| Backend | OS | Status | | Adapter | OS | Status |
| :-------------------- | :--------- | :------------------------------------------------------------------------ | | --------------------- | ---------------| -------------------------------------------------------------|
| inotify | Linux | Supported | | inotify | Linux 2.6.32+ | Supported |
| kqueue | BSD, macOS | Supported | | kqueue | BSD, macOS | Supported |
| ReadDirectoryChangesW | Windows | Supported | | ReadDirectoryChangesW | Windows | Supported |
| FEN | illumos | Supported | | FSEvents | macOS | [Planned](https://github.com/fsnotify/fsnotify/issues/11) |
| fanotify | Linux 5.9+ | [Not yet](https://github.com/fsnotify/fsnotify/issues/114) | | FEN | Solaris 11 | [In Progress](https://github.com/fsnotify/fsnotify/pull/371) |
| AHAFS | AIX | [aix branch]; experimental due to lack of maintainer and test environment | | fanotify | Linux 5.9+ | [Maybe](https://github.com/fsnotify/fsnotify/issues/114) |
| FSEvents | macOS | [Needs support in x/sys/unix][fsevents] | | USN Journals | Windows | [Maybe](https://github.com/fsnotify/fsnotify/issues/53) |
| USN Journals | Windows | [Needs support in x/sys/windows][usn] | | Polling | *All* | [Maybe](https://github.com/fsnotify/fsnotify/issues/9) |
| Polling | *All* | [Not yet](https://github.com/fsnotify/fsnotify/issues/9) |
Linux and illumos should include Android and Solaris, but these are currently Linux and macOS should include Android and iOS, but these are currently untested.
untested.
[fsevents]: https://github.com/fsnotify/fsnotify/issues/11#issuecomment-1279133120
[usn]: https://github.com/fsnotify/fsnotify/issues/53#issuecomment-1279829847
[aix branch]: https://github.com/fsnotify/fsnotify/issues/353#issuecomment-1284590129
Usage Usage
----- -----
@ -85,23 +83,20 @@ run with:
% go run ./cmd/fsnotify % go run ./cmd/fsnotify
Further detailed documentation can be found in godoc:
https://pkg.go.dev/github.com/fsnotify/fsnotify
FAQ FAQ
--- ---
### Will a file still be watched when it's moved to another directory? ### Will a file still be watched when it's moved to another directory?
No, not unless you are watching the location it was moved to. No, not unless you are watching the location it was moved to.
### Are subdirectories watched? ### Are subdirectories watched too?
No, you must add watches for any directory you want to watch (a recursive No, you must add watches for any directory you want to watch (a recursive
watcher is on the roadmap: [#18]). watcher is on the roadmap: [#18]).
[#18]: https://github.com/fsnotify/fsnotify/issues/18 [#18]: https://github.com/fsnotify/fsnotify/issues/18
### Do I have to watch the Error and Event channels in a goroutine? ### Do I have to watch the Error and Event channels in a goroutine?
Yes. You can read both channels in the same goroutine using `select` (you don't As of now, yes (you can read both channels in the same goroutine using `select`,
need a separate goroutine for both channels; see the example). you don't need a separate goroutine for both channels; see the example).
### Why don't notifications work with NFS, SMB, FUSE, /proc, or /sys? ### Why don't notifications work with NFS, SMB, FUSE, /proc, or /sys?
fsnotify requires support from underlying OS to work. The current NFS and SMB fsnotify requires support from underlying OS to work. The current NFS and SMB
@ -112,32 +107,6 @@ This could be fixed with a polling watcher ([#9]), but it's not yet implemented.
[#9]: https://github.com/fsnotify/fsnotify/issues/9 [#9]: https://github.com/fsnotify/fsnotify/issues/9
### Why do I get many Chmod events?
Some programs may generate a lot of attribute changes; for example Spotlight on
macOS, anti-virus programs, backup applications, and some others are known to do
this. As a rule, it's typically best to ignore Chmod events. They're often not
useful, and tend to cause problems.
Spotlight indexing on macOS can result in multiple events (see [#15]). A
temporary workaround is to add your folder(s) to the *Spotlight Privacy
settings* until we have a native FSEvents implementation (see [#11]).
[#11]: https://github.com/fsnotify/fsnotify/issues/11
[#15]: https://github.com/fsnotify/fsnotify/issues/15
### Watching a file doesn't work well
Watching individual files (rather than directories) is generally not recommended
as many programs (especially editors) update files atomically: it will write to
a temporary file which is then moved to to destination, overwriting the original
(or some variant thereof). The watcher on the original file is now lost, as that
no longer exists.
The upshot of this is that a power failure or crash won't leave a half-written
file.
Watch the parent directory and use `Event.Name` to filter out files you're not
interested in. There is an example of this in `cmd/fsnotify/file.go`.
Platform-specific notes Platform-specific notes
----------------------- -----------------------
### Linux ### Linux
@ -182,3 +151,11 @@ these platforms.
The sysctl variables `kern.maxfiles` and `kern.maxfilesperproc` can be used to The sysctl variables `kern.maxfiles` and `kern.maxfilesperproc` can be used to
control the maximum number of open files. control the maximum number of open files.
### macOS
Spotlight indexing on macOS can result in multiple events (see [#15]). A temporary
workaround is to add your folder(s) to the *Spotlight Privacy settings* until we
have a native FSEvents implementation (see [#11]).
[#11]: https://github.com/fsnotify/fsnotify/issues/11
[#15]: https://github.com/fsnotify/fsnotify/issues/15

View File

@ -1,19 +1,10 @@
//go:build solaris //go:build solaris
// +build solaris // +build solaris
// Note: the documentation on the Watcher type and methods is generated from
// mkdoc.zsh
package fsnotify package fsnotify
import ( import (
"errors" "errors"
"fmt"
"os"
"path/filepath"
"sync"
"golang.org/x/sys/unix"
) )
// Watcher watches a set of paths, delivering events on a channel. // Watcher watches a set of paths, delivering events on a channel.
@ -26,9 +17,9 @@ import (
// When a file is removed a Remove event won't be emitted until all file // When a file is removed a Remove event won't be emitted until all file
// descriptors are closed, and deletes will always emit a Chmod. For example: // descriptors are closed, and deletes will always emit a Chmod. For example:
// //
// fp := os.Open("file") // fp := os.Open("file")
// os.Remove("file") // Triggers Chmod // os.Remove("file") // Triggers Chmod
// fp.Close() // Triggers Remove // fp.Close() // Triggers Remove
// //
// This is the event that inotify sends, so not much can be changed about this. // This is the event that inotify sends, so not much can be changed about this.
// //
@ -42,16 +33,16 @@ import (
// //
// To increase them you can use sysctl or write the value to the /proc file: // To increase them you can use sysctl or write the value to the /proc file:
// //
// # Default values on Linux 5.18 // # Default values on Linux 5.18
// sysctl fs.inotify.max_user_watches=124983 // sysctl fs.inotify.max_user_watches=124983
// sysctl fs.inotify.max_user_instances=128 // sysctl fs.inotify.max_user_instances=128
// //
// To make the changes persist on reboot edit /etc/sysctl.conf or // To make the changes persist on reboot edit /etc/sysctl.conf or
// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check // /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check
// your distro's documentation): // your distro's documentation):
// //
// fs.inotify.max_user_watches=124983 // fs.inotify.max_user_watches=124983
// fs.inotify.max_user_instances=128 // fs.inotify.max_user_instances=128
// //
// Reaching the limit will result in a "no space left on device" or "too many open // Reaching the limit will result in a "no space left on device" or "too many open
// files" error. // files" error.
@ -67,20 +58,14 @@ import (
// control the maximum number of open files, as well as /etc/login.conf on BSD // control the maximum number of open files, as well as /etc/login.conf on BSD
// systems. // systems.
// //
// # Windows notes // # macOS notes
// //
// Paths can be added as "C:\path\to\dir", but forward slashes // Spotlight indexing on macOS can result in multiple events (see [#15]). A
// ("C:/path/to/dir") will also work. // temporary workaround is to add your folder(s) to the "Spotlight Privacy
// Settings" until we have a native FSEvents implementation (see [#11]).
// //
// When a watched directory is removed it will always send an event for the // [#11]: https://github.com/fsnotify/fsnotify/issues/11
// directory itself, but may not send events for all files in that directory. // [#15]: https://github.com/fsnotify/fsnotify/issues/15
// Sometimes it will send events for all times, sometimes it will send no
// events, and often only for some files.
//
// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest
// value that is guaranteed to work with SMB filesystems. If you have many
// events in quick succession this may not be enough, and you will have to use
// [WithBufferSize] to increase the value.
type Watcher struct { type Watcher struct {
// Events sends the filesystem change events. // Events sends the filesystem change events.
// //
@ -107,129 +92,44 @@ type Watcher struct {
// initiated by the user may show up as one or multiple // initiated by the user may show up as one or multiple
// writes, depending on when the system syncs things to // writes, depending on when the system syncs things to
// disk. For example when compiling a large Go program // disk. For example when compiling a large Go program
// you may get hundreds of Write events, and you may // you may get hundreds of Write events, so you
// want to wait until you've stopped receiving them // probably want to wait until you've stopped receiving
// (see the dedup example in cmd/fsnotify). // them (see the dedup example in cmd/fsnotify).
//
// Some systems may send Write event for directories
// when the directory content changes.
// //
// fsnotify.Chmod Attributes were changed. On Linux this is also sent // fsnotify.Chmod Attributes were changed. On Linux this is also sent
// when a file is removed (or more accurately, when a // when a file is removed (or more accurately, when a
// link to an inode is removed). On kqueue it's sent // link to an inode is removed). On kqueue it's sent
// when a file is truncated. On Windows it's never // and on kqueue when a file is truncated. On Windows
// sent. // it's never sent.
Events chan Event Events chan Event
// Errors sends any errors. // Errors sends any errors.
//
// ErrEventOverflow is used to indicate there are too many events:
//
// - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl)
// - windows: The buffer size is too small; WithBufferSize() can be used to increase it.
// - kqueue, fen: Not used.
Errors chan error Errors chan error
mu sync.Mutex
port *unix.EventPort
done chan struct{} // Channel for sending a "quit message" to the reader goroutine
dirs map[string]struct{} // Explicitly watched directories
watches map[string]struct{} // Explicitly watched non-directories
} }
// NewWatcher creates a new Watcher. // NewWatcher creates a new Watcher.
func NewWatcher() (*Watcher, error) { func NewWatcher() (*Watcher, error) {
return NewBufferedWatcher(0) return nil, errors.New("FEN based watcher not yet supported for fsnotify\n")
} }
// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events // Close removes all watches and closes the events channel.
// channel.
//
// The main use case for this is situations with a very large number of events
// where the kernel buffer size can't be increased (e.g. due to lack of
// permissions). An unbuffered Watcher will perform better for almost all use
// cases, and whenever possible you will be better off increasing the kernel
// buffers instead of adding a large userspace buffer.
func NewBufferedWatcher(sz uint) (*Watcher, error) {
w := &Watcher{
Events: make(chan Event, sz),
Errors: make(chan error),
dirs: make(map[string]struct{}),
watches: make(map[string]struct{}),
done: make(chan struct{}),
}
var err error
w.port, err = unix.NewEventPort()
if err != nil {
return nil, fmt.Errorf("fsnotify.NewWatcher: %w", err)
}
go w.readEvents()
return w, nil
}
// sendEvent attempts to send an event to the user, returning true if the event
// was put in the channel successfully and false if the watcher has been closed.
func (w *Watcher) sendEvent(name string, op Op) (sent bool) {
select {
case w.Events <- Event{Name: name, Op: op}:
return true
case <-w.done:
return false
}
}
// sendError attempts to send an error to the user, returning true if the error
// was put in the channel successfully and false if the watcher has been closed.
func (w *Watcher) sendError(err error) (sent bool) {
select {
case w.Errors <- err:
return true
case <-w.done:
return false
}
}
func (w *Watcher) isClosed() bool {
select {
case <-w.done:
return true
default:
return false
}
}
// Close removes all watches and closes the Events channel.
func (w *Watcher) Close() error { func (w *Watcher) Close() error {
// Take the lock used by associateFile to prevent lingering events from return nil
// being processed after the close
w.mu.Lock()
defer w.mu.Unlock()
if w.isClosed() {
return nil
}
close(w.done)
return w.port.Close()
} }
// Add starts monitoring the path for changes. // Add starts monitoring the path for changes.
// //
// A path can only be watched once; watching it more than once is a no-op and will // A path can only be watched once; attempting to watch it more than once will
// not return an error. Paths that do not yet exist on the filesystem cannot be // return an error. Paths that do not yet exist on the filesystem cannot be
// watched. // added. A watch will be automatically removed if the path is deleted.
// //
// A watch will be automatically removed if the watched path is deleted or // A path will remain watched if it gets renamed to somewhere else on the same
// renamed. The exception is the Windows backend, which doesn't remove the // filesystem, but the monitor will get removed if the path gets deleted and
// watcher on renames. // re-created, or if it's moved to a different filesystem.
// //
// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special // Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special
// filesystems (/proc, /sys, etc.) generally don't work. // filesystems (/proc, /sys, etc.) generally don't work.
// //
// Returns [ErrClosed] if [Watcher.Close] was called.
//
// See [Watcher.AddWith] for a version that allows adding options.
//
// # Watching directories // # Watching directories
// //
// All files in a directory are monitored, including new files that are created // All files in a directory are monitored, including new files that are created
@ -239,63 +139,15 @@ func (w *Watcher) Close() error {
// # Watching files // # Watching files
// //
// Watching individual files (rather than directories) is generally not // Watching individual files (rather than directories) is generally not
// recommended as many programs (especially editors) update files atomically: it // recommended as many tools update files atomically. Instead of "just" writing
// will write to a temporary file which is then moved to to destination, // to the file a temporary file will be written to first, and if successful the
// overwriting the original (or some variant thereof). The watcher on the // temporary file is moved to to destination removing the original, or some
// original file is now lost, as that no longer exists. // variant thereof. The watcher on the original file is now lost, as it no
// longer exists.
// //
// The upshot of this is that a power failure or crash won't leave a // Instead, watch the parent directory and use Event.Name to filter out files
// half-written file. // you're not interested in. There is an example of this in [cmd/fsnotify/file.go].
// func (w *Watcher) Add(name string) error {
// Watch the parent directory and use Event.Name to filter out files you're not
// interested in. There is an example of this in cmd/fsnotify/file.go.
func (w *Watcher) Add(name string) error { return w.AddWith(name) }
// AddWith is like [Watcher.Add], but allows adding options. When using Add()
// the defaults described below are used.
//
// Possible options are:
//
// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on
// other platforms. The default is 64K (65536 bytes).
func (w *Watcher) AddWith(name string, opts ...addOpt) error {
if w.isClosed() {
return ErrClosed
}
if w.port.PathIsWatched(name) {
return nil
}
_ = getOptions(opts...)
// Currently we resolve symlinks that were explicitly requested to be
// watched. Otherwise we would use LStat here.
stat, err := os.Stat(name)
if err != nil {
return err
}
// Associate all files in the directory.
if stat.IsDir() {
err := w.handleDirectory(name, stat, true, w.associateFile)
if err != nil {
return err
}
w.mu.Lock()
w.dirs[name] = struct{}{}
w.mu.Unlock()
return nil
}
err = w.associateFile(name, stat, true)
if err != nil {
return err
}
w.mu.Lock()
w.watches[name] = struct{}{}
w.mu.Unlock()
return nil return nil
} }
@ -305,336 +157,6 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error {
// /tmp/dir and /tmp/dir/subdir then you will need to remove both. // /tmp/dir and /tmp/dir/subdir then you will need to remove both.
// //
// Removing a path that has not yet been added returns [ErrNonExistentWatch]. // Removing a path that has not yet been added returns [ErrNonExistentWatch].
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) Remove(name string) error { func (w *Watcher) Remove(name string) error {
if w.isClosed() {
return nil
}
if !w.port.PathIsWatched(name) {
return fmt.Errorf("%w: %s", ErrNonExistentWatch, name)
}
// The user has expressed an intent. Immediately remove this name from
// whichever watch list it might be in. If it's not in there the delete
// doesn't cause harm.
w.mu.Lock()
delete(w.watches, name)
delete(w.dirs, name)
w.mu.Unlock()
stat, err := os.Stat(name)
if err != nil {
return err
}
// Remove associations for every file in the directory.
if stat.IsDir() {
err := w.handleDirectory(name, stat, false, w.dissociateFile)
if err != nil {
return err
}
return nil
}
err = w.port.DissociatePath(name)
if err != nil {
return err
}
return nil return nil
} }
// readEvents contains the main loop that runs in a goroutine watching for events.
func (w *Watcher) readEvents() {
// If this function returns, the watcher has been closed and we can close
// these channels
defer func() {
close(w.Errors)
close(w.Events)
}()
pevents := make([]unix.PortEvent, 8)
for {
count, err := w.port.Get(pevents, 1, nil)
if err != nil && err != unix.ETIME {
// Interrupted system call (count should be 0) ignore and continue
if errors.Is(err, unix.EINTR) && count == 0 {
continue
}
// Get failed because we called w.Close()
if errors.Is(err, unix.EBADF) && w.isClosed() {
return
}
// There was an error not caused by calling w.Close()
if !w.sendError(err) {
return
}
}
p := pevents[:count]
for _, pevent := range p {
if pevent.Source != unix.PORT_SOURCE_FILE {
// Event from unexpected source received; should never happen.
if !w.sendError(errors.New("Event from unexpected source received")) {
return
}
continue
}
err = w.handleEvent(&pevent)
if err != nil {
if !w.sendError(err) {
return
}
}
}
}
}
func (w *Watcher) handleDirectory(path string, stat os.FileInfo, follow bool, handler func(string, os.FileInfo, bool) error) error {
files, err := os.ReadDir(path)
if err != nil {
return err
}
// Handle all children of the directory.
for _, entry := range files {
finfo, err := entry.Info()
if err != nil {
return err
}
err = handler(filepath.Join(path, finfo.Name()), finfo, false)
if err != nil {
return err
}
}
// And finally handle the directory itself.
return handler(path, stat, follow)
}
// handleEvent might need to emit more than one fsnotify event if the events
// bitmap matches more than one event type (e.g. the file was both modified and
// had the attributes changed between when the association was created and the
// when event was returned)
func (w *Watcher) handleEvent(event *unix.PortEvent) error {
var (
events = event.Events
path = event.Path
fmode = event.Cookie.(os.FileMode)
reRegister = true
)
w.mu.Lock()
_, watchedDir := w.dirs[path]
_, watchedPath := w.watches[path]
w.mu.Unlock()
isWatched := watchedDir || watchedPath
if events&unix.FILE_DELETE != 0 {
if !w.sendEvent(path, Remove) {
return nil
}
reRegister = false
}
if events&unix.FILE_RENAME_FROM != 0 {
if !w.sendEvent(path, Rename) {
return nil
}
// Don't keep watching the new file name
reRegister = false
}
if events&unix.FILE_RENAME_TO != 0 {
// We don't report a Rename event for this case, because Rename events
// are interpreted as referring to the _old_ name of the file, and in
// this case the event would refer to the new name of the file. This
// type of rename event is not supported by fsnotify.
// inotify reports a Remove event in this case, so we simulate this
// here.
if !w.sendEvent(path, Remove) {
return nil
}
// Don't keep watching the file that was removed
reRegister = false
}
// The file is gone, nothing left to do.
if !reRegister {
if watchedDir {
w.mu.Lock()
delete(w.dirs, path)
w.mu.Unlock()
}
if watchedPath {
w.mu.Lock()
delete(w.watches, path)
w.mu.Unlock()
}
return nil
}
// If we didn't get a deletion the file still exists and we're going to have
// to watch it again. Let's Stat it now so that we can compare permissions
// and have what we need to continue watching the file
stat, err := os.Lstat(path)
if err != nil {
// This is unexpected, but we should still emit an event. This happens
// most often on "rm -r" of a subdirectory inside a watched directory We
// get a modify event of something happening inside, but by the time we
// get here, the sudirectory is already gone. Clearly we were watching
// this path but now it is gone. Let's tell the user that it was
// removed.
if !w.sendEvent(path, Remove) {
return nil
}
// Suppress extra write events on removed directories; they are not
// informative and can be confusing.
return nil
}
// resolve symlinks that were explicitly watched as we would have at Add()
// time. this helps suppress spurious Chmod events on watched symlinks
if isWatched {
stat, err = os.Stat(path)
if err != nil {
// The symlink still exists, but the target is gone. Report the
// Remove similar to above.
if !w.sendEvent(path, Remove) {
return nil
}
// Don't return the error
}
}
if events&unix.FILE_MODIFIED != 0 {
if fmode.IsDir() {
if watchedDir {
if err := w.updateDirectory(path); err != nil {
return err
}
} else {
if !w.sendEvent(path, Write) {
return nil
}
}
} else {
if !w.sendEvent(path, Write) {
return nil
}
}
}
if events&unix.FILE_ATTRIB != 0 && stat != nil {
// Only send Chmod if perms changed
if stat.Mode().Perm() != fmode.Perm() {
if !w.sendEvent(path, Chmod) {
return nil
}
}
}
if stat != nil {
// If we get here, it means we've hit an event above that requires us to
// continue watching the file or directory
return w.associateFile(path, stat, isWatched)
}
return nil
}
func (w *Watcher) updateDirectory(path string) error {
// The directory was modified, so we must find unwatched entities and watch
// them. If something was removed from the directory, nothing will happen,
// as everything else should still be watched.
files, err := os.ReadDir(path)
if err != nil {
return err
}
for _, entry := range files {
path := filepath.Join(path, entry.Name())
if w.port.PathIsWatched(path) {
continue
}
finfo, err := entry.Info()
if err != nil {
return err
}
err = w.associateFile(path, finfo, false)
if err != nil {
if !w.sendError(err) {
return nil
}
}
if !w.sendEvent(path, Create) {
return nil
}
}
return nil
}
func (w *Watcher) associateFile(path string, stat os.FileInfo, follow bool) error {
if w.isClosed() {
return ErrClosed
}
// This is primarily protecting the call to AssociatePath but it is
// important and intentional that the call to PathIsWatched is also
// protected by this mutex. Without this mutex, AssociatePath has been seen
// to error out that the path is already associated.
w.mu.Lock()
defer w.mu.Unlock()
if w.port.PathIsWatched(path) {
// Remove the old association in favor of this one If we get ENOENT,
// then while the x/sys/unix wrapper still thought that this path was
// associated, the underlying event port did not. This call will have
// cleared up that discrepancy. The most likely cause is that the event
// has fired but we haven't processed it yet.
err := w.port.DissociatePath(path)
if err != nil && err != unix.ENOENT {
return err
}
}
// FILE_NOFOLLOW means we watch symlinks themselves rather than their
// targets.
events := unix.FILE_MODIFIED | unix.FILE_ATTRIB | unix.FILE_NOFOLLOW
if follow {
// We *DO* follow symlinks for explicitly watched entries.
events = unix.FILE_MODIFIED | unix.FILE_ATTRIB
}
return w.port.AssociatePath(path, stat,
events,
stat.Mode())
}
func (w *Watcher) dissociateFile(path string, stat os.FileInfo, unused bool) error {
if !w.port.PathIsWatched(path) {
return nil
}
return w.port.DissociatePath(path)
}
// WatchList returns all paths explicitly added with [Watcher.Add] (and are not
// yet removed).
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) WatchList() []string {
if w.isClosed() {
return nil
}
w.mu.Lock()
defer w.mu.Unlock()
entries := make([]string, 0, len(w.watches)+len(w.dirs))
for pathname := range w.dirs {
entries = append(entries, pathname)
}
for pathname := range w.watches {
entries = append(entries, pathname)
}
return entries
}

View File

@ -1,8 +1,5 @@
//go:build linux && !appengine //go:build linux
// +build linux,!appengine // +build linux
// Note: the documentation on the Watcher type and methods is generated from
// mkdoc.zsh
package fsnotify package fsnotify
@ -29,9 +26,9 @@ import (
// When a file is removed a Remove event won't be emitted until all file // When a file is removed a Remove event won't be emitted until all file
// descriptors are closed, and deletes will always emit a Chmod. For example: // descriptors are closed, and deletes will always emit a Chmod. For example:
// //
// fp := os.Open("file") // fp := os.Open("file")
// os.Remove("file") // Triggers Chmod // os.Remove("file") // Triggers Chmod
// fp.Close() // Triggers Remove // fp.Close() // Triggers Remove
// //
// This is the event that inotify sends, so not much can be changed about this. // This is the event that inotify sends, so not much can be changed about this.
// //
@ -45,16 +42,16 @@ import (
// //
// To increase them you can use sysctl or write the value to the /proc file: // To increase them you can use sysctl or write the value to the /proc file:
// //
// # Default values on Linux 5.18 // # Default values on Linux 5.18
// sysctl fs.inotify.max_user_watches=124983 // sysctl fs.inotify.max_user_watches=124983
// sysctl fs.inotify.max_user_instances=128 // sysctl fs.inotify.max_user_instances=128
// //
// To make the changes persist on reboot edit /etc/sysctl.conf or // To make the changes persist on reboot edit /etc/sysctl.conf or
// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check // /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check
// your distro's documentation): // your distro's documentation):
// //
// fs.inotify.max_user_watches=124983 // fs.inotify.max_user_watches=124983
// fs.inotify.max_user_instances=128 // fs.inotify.max_user_instances=128
// //
// Reaching the limit will result in a "no space left on device" or "too many open // Reaching the limit will result in a "no space left on device" or "too many open
// files" error. // files" error.
@ -70,20 +67,14 @@ import (
// control the maximum number of open files, as well as /etc/login.conf on BSD // control the maximum number of open files, as well as /etc/login.conf on BSD
// systems. // systems.
// //
// # Windows notes // # macOS notes
// //
// Paths can be added as "C:\path\to\dir", but forward slashes // Spotlight indexing on macOS can result in multiple events (see [#15]). A
// ("C:/path/to/dir") will also work. // temporary workaround is to add your folder(s) to the "Spotlight Privacy
// Settings" until we have a native FSEvents implementation (see [#11]).
// //
// When a watched directory is removed it will always send an event for the // [#11]: https://github.com/fsnotify/fsnotify/issues/11
// directory itself, but may not send events for all files in that directory. // [#15]: https://github.com/fsnotify/fsnotify/issues/15
// Sometimes it will send events for all times, sometimes it will send no
// events, and often only for some files.
//
// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest
// value that is guaranteed to work with SMB filesystems. If you have many
// events in quick succession this may not be enough, and you will have to use
// [WithBufferSize] to increase the value.
type Watcher struct { type Watcher struct {
// Events sends the filesystem change events. // Events sends the filesystem change events.
// //
@ -110,148 +101,36 @@ type Watcher struct {
// initiated by the user may show up as one or multiple // initiated by the user may show up as one or multiple
// writes, depending on when the system syncs things to // writes, depending on when the system syncs things to
// disk. For example when compiling a large Go program // disk. For example when compiling a large Go program
// you may get hundreds of Write events, and you may // you may get hundreds of Write events, so you
// want to wait until you've stopped receiving them // probably want to wait until you've stopped receiving
// (see the dedup example in cmd/fsnotify). // them (see the dedup example in cmd/fsnotify).
//
// Some systems may send Write event for directories
// when the directory content changes.
// //
// fsnotify.Chmod Attributes were changed. On Linux this is also sent // fsnotify.Chmod Attributes were changed. On Linux this is also sent
// when a file is removed (or more accurately, when a // when a file is removed (or more accurately, when a
// link to an inode is removed). On kqueue it's sent // link to an inode is removed). On kqueue it's sent
// when a file is truncated. On Windows it's never // and on kqueue when a file is truncated. On Windows
// sent. // it's never sent.
Events chan Event Events chan Event
// Errors sends any errors. // Errors sends any errors.
//
// ErrEventOverflow is used to indicate there are too many events:
//
// - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl)
// - windows: The buffer size is too small; WithBufferSize() can be used to increase it.
// - kqueue, fen: Not used.
Errors chan error Errors chan error
// Store fd here as os.File.Read() will no longer return on close after // Store fd here as os.File.Read() will no longer return on close after
// calling Fd(). See: https://github.com/golang/go/issues/26439 // calling Fd(). See: https://github.com/golang/go/issues/26439
fd int fd int
mu sync.Mutex // Map access
inotifyFile *os.File inotifyFile *os.File
watches *watches watches map[string]*watch // Map of inotify watches (key: path)
done chan struct{} // Channel for sending a "quit message" to the reader goroutine paths map[int]string // Map of watched paths (key: watch descriptor)
closeMu sync.Mutex done chan struct{} // Channel for sending a "quit message" to the reader goroutine
doneResp chan struct{} // Channel to respond to Close doneResp chan struct{} // Channel to respond to Close
}
type (
watches struct {
mu sync.RWMutex
wd map[uint32]*watch // wd → watch
path map[string]uint32 // pathname → wd
}
watch struct {
wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall)
flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags)
path string // Watch path.
}
)
func newWatches() *watches {
return &watches{
wd: make(map[uint32]*watch),
path: make(map[string]uint32),
}
}
func (w *watches) len() int {
w.mu.RLock()
defer w.mu.RUnlock()
return len(w.wd)
}
func (w *watches) add(ww *watch) {
w.mu.Lock()
defer w.mu.Unlock()
w.wd[ww.wd] = ww
w.path[ww.path] = ww.wd
}
func (w *watches) remove(wd uint32) {
w.mu.Lock()
defer w.mu.Unlock()
delete(w.path, w.wd[wd].path)
delete(w.wd, wd)
}
func (w *watches) removePath(path string) (uint32, bool) {
w.mu.Lock()
defer w.mu.Unlock()
wd, ok := w.path[path]
if !ok {
return 0, false
}
delete(w.path, path)
delete(w.wd, wd)
return wd, true
}
func (w *watches) byPath(path string) *watch {
w.mu.RLock()
defer w.mu.RUnlock()
return w.wd[w.path[path]]
}
func (w *watches) byWd(wd uint32) *watch {
w.mu.RLock()
defer w.mu.RUnlock()
return w.wd[wd]
}
func (w *watches) updatePath(path string, f func(*watch) (*watch, error)) error {
w.mu.Lock()
defer w.mu.Unlock()
var existing *watch
wd, ok := w.path[path]
if ok {
existing = w.wd[wd]
}
upd, err := f(existing)
if err != nil {
return err
}
if upd != nil {
w.wd[upd.wd] = upd
w.path[upd.path] = upd.wd
if upd.wd != wd {
delete(w.wd, wd)
}
}
return nil
} }
// NewWatcher creates a new Watcher. // NewWatcher creates a new Watcher.
func NewWatcher() (*Watcher, error) { func NewWatcher() (*Watcher, error) {
return NewBufferedWatcher(0) // Create inotify fd
} // Need to set the FD to nonblocking mode in order for SetDeadline methods to work
// Otherwise, blocking i/o operations won't terminate on close
// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events
// channel.
//
// The main use case for this is situations with a very large number of events
// where the kernel buffer size can't be increased (e.g. due to lack of
// permissions). An unbuffered Watcher will perform better for almost all use
// cases, and whenever possible you will be better off increasing the kernel
// buffers instead of adding a large userspace buffer.
func NewBufferedWatcher(sz uint) (*Watcher, error) {
// Need to set nonblocking mode for SetDeadline to work, otherwise blocking
// I/O operations won't terminate on close.
fd, errno := unix.InotifyInit1(unix.IN_CLOEXEC | unix.IN_NONBLOCK) fd, errno := unix.InotifyInit1(unix.IN_CLOEXEC | unix.IN_NONBLOCK)
if fd == -1 { if fd == -1 {
return nil, errno return nil, errno
@ -260,8 +139,9 @@ func NewBufferedWatcher(sz uint) (*Watcher, error) {
w := &Watcher{ w := &Watcher{
fd: fd, fd: fd,
inotifyFile: os.NewFile(uintptr(fd), ""), inotifyFile: os.NewFile(uintptr(fd), ""),
watches: newWatches(), watches: make(map[string]*watch),
Events: make(chan Event, sz), paths: make(map[int]string),
Events: make(chan Event),
Errors: make(chan error), Errors: make(chan error),
done: make(chan struct{}), done: make(chan struct{}),
doneResp: make(chan struct{}), doneResp: make(chan struct{}),
@ -277,8 +157,8 @@ func (w *Watcher) sendEvent(e Event) bool {
case w.Events <- e: case w.Events <- e:
return true return true
case <-w.done: case <-w.done:
return false
} }
return false
} }
// Returns true if the error was sent, or false if watcher is closed. // Returns true if the error was sent, or false if watcher is closed.
@ -300,15 +180,17 @@ func (w *Watcher) isClosed() bool {
} }
} }
// Close removes all watches and closes the Events channel. // Close removes all watches and closes the events channel.
func (w *Watcher) Close() error { func (w *Watcher) Close() error {
w.closeMu.Lock() w.mu.Lock()
if w.isClosed() { if w.isClosed() {
w.closeMu.Unlock() w.mu.Unlock()
return nil return nil
} }
// Send 'close' signal to goroutine, and set the Watcher to closed.
close(w.done) close(w.done)
w.closeMu.Unlock() w.mu.Unlock()
// Causes any blocking reads to return with an error, provided the file // Causes any blocking reads to return with an error, provided the file
// still supports deadline operations. // still supports deadline operations.
@ -325,21 +207,17 @@ func (w *Watcher) Close() error {
// Add starts monitoring the path for changes. // Add starts monitoring the path for changes.
// //
// A path can only be watched once; watching it more than once is a no-op and will // A path can only be watched once; attempting to watch it more than once will
// not return an error. Paths that do not yet exist on the filesystem cannot be // return an error. Paths that do not yet exist on the filesystem cannot be
// watched. // added. A watch will be automatically removed if the path is deleted.
// //
// A watch will be automatically removed if the watched path is deleted or // A path will remain watched if it gets renamed to somewhere else on the same
// renamed. The exception is the Windows backend, which doesn't remove the // filesystem, but the monitor will get removed if the path gets deleted and
// watcher on renames. // re-created, or if it's moved to a different filesystem.
// //
// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special // Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special
// filesystems (/proc, /sys, etc.) generally don't work. // filesystems (/proc, /sys, etc.) generally don't work.
// //
// Returns [ErrClosed] if [Watcher.Close] was called.
//
// See [Watcher.AddWith] for a version that allows adding options.
//
// # Watching directories // # Watching directories
// //
// All files in a directory are monitored, including new files that are created // All files in a directory are monitored, including new files that are created
@ -349,59 +227,44 @@ func (w *Watcher) Close() error {
// # Watching files // # Watching files
// //
// Watching individual files (rather than directories) is generally not // Watching individual files (rather than directories) is generally not
// recommended as many programs (especially editors) update files atomically: it // recommended as many tools update files atomically. Instead of "just" writing
// will write to a temporary file which is then moved to to destination, // to the file a temporary file will be written to first, and if successful the
// overwriting the original (or some variant thereof). The watcher on the // temporary file is moved to to destination removing the original, or some
// original file is now lost, as that no longer exists. // variant thereof. The watcher on the original file is now lost, as it no
// longer exists.
// //
// The upshot of this is that a power failure or crash won't leave a // Instead, watch the parent directory and use Event.Name to filter out files
// half-written file. // you're not interested in. There is an example of this in [cmd/fsnotify/file.go].
// func (w *Watcher) Add(name string) error {
// Watch the parent directory and use Event.Name to filter out files you're not
// interested in. There is an example of this in cmd/fsnotify/file.go.
func (w *Watcher) Add(name string) error { return w.AddWith(name) }
// AddWith is like [Watcher.Add], but allows adding options. When using Add()
// the defaults described below are used.
//
// Possible options are:
//
// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on
// other platforms. The default is 64K (65536 bytes).
func (w *Watcher) AddWith(name string, opts ...addOpt) error {
if w.isClosed() {
return ErrClosed
}
name = filepath.Clean(name) name = filepath.Clean(name)
_ = getOptions(opts...) if w.isClosed() {
return errors.New("inotify instance already closed")
}
var flags uint32 = unix.IN_MOVED_TO | unix.IN_MOVED_FROM | var flags uint32 = unix.IN_MOVED_TO | unix.IN_MOVED_FROM |
unix.IN_CREATE | unix.IN_ATTRIB | unix.IN_MODIFY | unix.IN_CREATE | unix.IN_ATTRIB | unix.IN_MODIFY |
unix.IN_MOVE_SELF | unix.IN_DELETE | unix.IN_DELETE_SELF unix.IN_MOVE_SELF | unix.IN_DELETE | unix.IN_DELETE_SELF
return w.watches.updatePath(name, func(existing *watch) (*watch, error) { w.mu.Lock()
if existing != nil { defer w.mu.Unlock()
flags |= existing.flags | unix.IN_MASK_ADD watchEntry := w.watches[name]
} if watchEntry != nil {
flags |= watchEntry.flags | unix.IN_MASK_ADD
}
wd, errno := unix.InotifyAddWatch(w.fd, name, flags)
if wd == -1 {
return errno
}
wd, err := unix.InotifyAddWatch(w.fd, name, flags) if watchEntry == nil {
if wd == -1 { w.watches[name] = &watch{wd: uint32(wd), flags: flags}
return nil, err w.paths[wd] = name
} } else {
watchEntry.wd = uint32(wd)
watchEntry.flags = flags
}
if existing == nil { return nil
return &watch{
wd: uint32(wd),
path: name,
flags: flags,
}, nil
}
existing.wd = uint32(wd)
existing.flags = flags
return existing, nil
})
} }
// Remove stops monitoring the path for changes. // Remove stops monitoring the path for changes.
@ -410,22 +273,32 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error {
// /tmp/dir and /tmp/dir/subdir then you will need to remove both. // /tmp/dir and /tmp/dir/subdir then you will need to remove both.
// //
// Removing a path that has not yet been added returns [ErrNonExistentWatch]. // Removing a path that has not yet been added returns [ErrNonExistentWatch].
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) Remove(name string) error { func (w *Watcher) Remove(name string) error {
if w.isClosed() { name = filepath.Clean(name)
return nil
}
return w.remove(filepath.Clean(name))
}
func (w *Watcher) remove(name string) error { // Fetch the watch.
wd, ok := w.watches.removePath(name) w.mu.Lock()
defer w.mu.Unlock()
watch, ok := w.watches[name]
// Remove it from inotify.
if !ok { if !ok {
return fmt.Errorf("%w: %s", ErrNonExistentWatch, name) return fmt.Errorf("%w: %s", ErrNonExistentWatch, name)
} }
success, errno := unix.InotifyRmWatch(w.fd, wd) // We successfully removed the watch if InotifyRmWatch doesn't return an
// error, we need to clean up our internal state to ensure it matches
// inotify's kernel state.
delete(w.paths, int(watch.wd))
delete(w.watches, name)
// inotify_rm_watch will return EINVAL if the file has been deleted;
// the inotify will already have been removed.
// watches and pathes are deleted in ignoreLinux() implicitly and asynchronously
// by calling inotify_rm_watch() below. e.g. readEvents() goroutine receives IN_IGNORE
// so that EINVAL means that the wd is being rm_watch()ed or its file removed
// by another thread and we have not received IN_IGNORE event.
success, errno := unix.InotifyRmWatch(w.fd, watch.wd)
if success == -1 { if success == -1 {
// TODO: Perhaps it's not helpful to return an error here in every case; // TODO: Perhaps it's not helpful to return an error here in every case;
// The only two possible errors are: // The only two possible errors are:
@ -439,28 +312,28 @@ func (w *Watcher) remove(name string) error {
// are watching is deleted. // are watching is deleted.
return errno return errno
} }
return nil return nil
} }
// WatchList returns all paths explicitly added with [Watcher.Add] (and are not // WatchList returns all paths added with [Add] (and are not yet removed).
// yet removed).
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) WatchList() []string { func (w *Watcher) WatchList() []string {
if w.isClosed() { w.mu.Lock()
return nil defer w.mu.Unlock()
}
entries := make([]string, 0, w.watches.len()) entries := make([]string, 0, len(w.watches))
w.watches.mu.RLock() for pathname := range w.watches {
for pathname := range w.watches.path {
entries = append(entries, pathname) entries = append(entries, pathname)
} }
w.watches.mu.RUnlock()
return entries return entries
} }
type watch struct {
wd uint32 // Watch descriptor (as returned by the inotify_add_watch() syscall)
flags uint32 // inotify flags of this watch (see inotify(7) for the list of valid flags)
}
// readEvents reads from the inotify file descriptor, converts the // readEvents reads from the inotify file descriptor, converts the
// received events into Event objects and sends them via the Events channel // received events into Event objects and sends them via the Events channel
func (w *Watcher) readEvents() { func (w *Watcher) readEvents() {
@ -494,11 +367,14 @@ func (w *Watcher) readEvents() {
if n < unix.SizeofInotifyEvent { if n < unix.SizeofInotifyEvent {
var err error var err error
if n == 0 { if n == 0 {
err = io.EOF // If EOF is received. This should really never happen. // If EOF is received. This should really never happen.
err = io.EOF
} else if n < 0 { } else if n < 0 {
err = errno // If an error occurred while reading. // If an error occurred while reading.
err = errno
} else { } else {
err = errors.New("notify: short read in readEvents()") // Read was too short. // Read was too short.
err = errors.New("notify: short read in readEvents()")
} }
if !w.sendError(err) { if !w.sendError(err) {
return return
@ -527,29 +403,18 @@ func (w *Watcher) readEvents() {
// doesn't append the filename to the event, but we would like to always fill the // doesn't append the filename to the event, but we would like to always fill the
// the "Name" field with a valid filename. We retrieve the path of the watch from // the "Name" field with a valid filename. We retrieve the path of the watch from
// the "paths" map. // the "paths" map.
watch := w.watches.byWd(uint32(raw.Wd)) w.mu.Lock()
name, ok := w.paths[int(raw.Wd)]
// IN_DELETE_SELF occurs when the file/directory being watched is removed.
// This is a sign to clean up the maps, otherwise we are no longer in sync
// with the inotify kernel state which has already deleted the watch
// automatically.
if ok && mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF {
delete(w.paths, int(raw.Wd))
delete(w.watches, name)
}
w.mu.Unlock()
// inotify will automatically remove the watch on deletes; just need
// to clean our state here.
if watch != nil && mask&unix.IN_DELETE_SELF == unix.IN_DELETE_SELF {
w.watches.remove(watch.wd)
}
// We can't really update the state when a watched path is moved;
// only IN_MOVE_SELF is sent and not IN_MOVED_{FROM,TO}. So remove
// the watch.
if watch != nil && mask&unix.IN_MOVE_SELF == unix.IN_MOVE_SELF {
err := w.remove(watch.path)
if err != nil && !errors.Is(err, ErrNonExistentWatch) {
if !w.sendError(err) {
return
}
}
}
var name string
if watch != nil {
name = watch.path
}
if nameLen > 0 { if nameLen > 0 {
// Point "bytes" at the first byte of the filename // Point "bytes" at the first byte of the filename
bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent]))[:nameLen:nameLen] bytes := (*[unix.PathMax]byte)(unsafe.Pointer(&buf[offset+unix.SizeofInotifyEvent]))[:nameLen:nameLen]

View File

@ -1,14 +1,12 @@
//go:build freebsd || openbsd || netbsd || dragonfly || darwin //go:build freebsd || openbsd || netbsd || dragonfly || darwin
// +build freebsd openbsd netbsd dragonfly darwin // +build freebsd openbsd netbsd dragonfly darwin
// Note: the documentation on the Watcher type and methods is generated from
// mkdoc.zsh
package fsnotify package fsnotify
import ( import (
"errors" "errors"
"fmt" "fmt"
"io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
"sync" "sync"
@ -26,9 +24,9 @@ import (
// When a file is removed a Remove event won't be emitted until all file // When a file is removed a Remove event won't be emitted until all file
// descriptors are closed, and deletes will always emit a Chmod. For example: // descriptors are closed, and deletes will always emit a Chmod. For example:
// //
// fp := os.Open("file") // fp := os.Open("file")
// os.Remove("file") // Triggers Chmod // os.Remove("file") // Triggers Chmod
// fp.Close() // Triggers Remove // fp.Close() // Triggers Remove
// //
// This is the event that inotify sends, so not much can be changed about this. // This is the event that inotify sends, so not much can be changed about this.
// //
@ -42,16 +40,16 @@ import (
// //
// To increase them you can use sysctl or write the value to the /proc file: // To increase them you can use sysctl or write the value to the /proc file:
// //
// # Default values on Linux 5.18 // # Default values on Linux 5.18
// sysctl fs.inotify.max_user_watches=124983 // sysctl fs.inotify.max_user_watches=124983
// sysctl fs.inotify.max_user_instances=128 // sysctl fs.inotify.max_user_instances=128
// //
// To make the changes persist on reboot edit /etc/sysctl.conf or // To make the changes persist on reboot edit /etc/sysctl.conf or
// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check // /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check
// your distro's documentation): // your distro's documentation):
// //
// fs.inotify.max_user_watches=124983 // fs.inotify.max_user_watches=124983
// fs.inotify.max_user_instances=128 // fs.inotify.max_user_instances=128
// //
// Reaching the limit will result in a "no space left on device" or "too many open // Reaching the limit will result in a "no space left on device" or "too many open
// files" error. // files" error.
@ -67,20 +65,14 @@ import (
// control the maximum number of open files, as well as /etc/login.conf on BSD // control the maximum number of open files, as well as /etc/login.conf on BSD
// systems. // systems.
// //
// # Windows notes // # macOS notes
// //
// Paths can be added as "C:\path\to\dir", but forward slashes // Spotlight indexing on macOS can result in multiple events (see [#15]). A
// ("C:/path/to/dir") will also work. // temporary workaround is to add your folder(s) to the "Spotlight Privacy
// Settings" until we have a native FSEvents implementation (see [#11]).
// //
// When a watched directory is removed it will always send an event for the // [#11]: https://github.com/fsnotify/fsnotify/issues/11
// directory itself, but may not send events for all files in that directory. // [#15]: https://github.com/fsnotify/fsnotify/issues/15
// Sometimes it will send events for all times, sometimes it will send no
// events, and often only for some files.
//
// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest
// value that is guaranteed to work with SMB filesystems. If you have many
// events in quick succession this may not be enough, and you will have to use
// [WithBufferSize] to increase the value.
type Watcher struct { type Watcher struct {
// Events sends the filesystem change events. // Events sends the filesystem change events.
// //
@ -107,27 +99,18 @@ type Watcher struct {
// initiated by the user may show up as one or multiple // initiated by the user may show up as one or multiple
// writes, depending on when the system syncs things to // writes, depending on when the system syncs things to
// disk. For example when compiling a large Go program // disk. For example when compiling a large Go program
// you may get hundreds of Write events, and you may // you may get hundreds of Write events, so you
// want to wait until you've stopped receiving them // probably want to wait until you've stopped receiving
// (see the dedup example in cmd/fsnotify). // them (see the dedup example in cmd/fsnotify).
//
// Some systems may send Write event for directories
// when the directory content changes.
// //
// fsnotify.Chmod Attributes were changed. On Linux this is also sent // fsnotify.Chmod Attributes were changed. On Linux this is also sent
// when a file is removed (or more accurately, when a // when a file is removed (or more accurately, when a
// link to an inode is removed). On kqueue it's sent // link to an inode is removed). On kqueue it's sent
// when a file is truncated. On Windows it's never // and on kqueue when a file is truncated. On Windows
// sent. // it's never sent.
Events chan Event Events chan Event
// Errors sends any errors. // Errors sends any errors.
//
// ErrEventOverflow is used to indicate there are too many events:
//
// - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl)
// - windows: The buffer size is too small; WithBufferSize() can be used to increase it.
// - kqueue, fen: Not used.
Errors chan error Errors chan error
done chan struct{} done chan struct{}
@ -150,18 +133,6 @@ type pathInfo struct {
// NewWatcher creates a new Watcher. // NewWatcher creates a new Watcher.
func NewWatcher() (*Watcher, error) { func NewWatcher() (*Watcher, error) {
return NewBufferedWatcher(0)
}
// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events
// channel.
//
// The main use case for this is situations with a very large number of events
// where the kernel buffer size can't be increased (e.g. due to lack of
// permissions). An unbuffered Watcher will perform better for almost all use
// cases, and whenever possible you will be better off increasing the kernel
// buffers instead of adding a large userspace buffer.
func NewBufferedWatcher(sz uint) (*Watcher, error) {
kq, closepipe, err := newKqueue() kq, closepipe, err := newKqueue()
if err != nil { if err != nil {
return nil, err return nil, err
@ -176,7 +147,7 @@ func NewBufferedWatcher(sz uint) (*Watcher, error) {
paths: make(map[int]pathInfo), paths: make(map[int]pathInfo),
fileExists: make(map[string]struct{}), fileExists: make(map[string]struct{}),
userWatches: make(map[string]struct{}), userWatches: make(map[string]struct{}),
Events: make(chan Event, sz), Events: make(chan Event),
Errors: make(chan error), Errors: make(chan error),
done: make(chan struct{}), done: make(chan struct{}),
} }
@ -226,8 +197,8 @@ func (w *Watcher) sendEvent(e Event) bool {
case w.Events <- e: case w.Events <- e:
return true return true
case <-w.done: case <-w.done:
return false
} }
return false
} }
// Returns true if the error was sent, or false if watcher is closed. // Returns true if the error was sent, or false if watcher is closed.
@ -236,11 +207,11 @@ func (w *Watcher) sendError(err error) bool {
case w.Errors <- err: case w.Errors <- err:
return true return true
case <-w.done: case <-w.done:
return false
} }
return false
} }
// Close removes all watches and closes the Events channel. // Close removes all watches and closes the events channel.
func (w *Watcher) Close() error { func (w *Watcher) Close() error {
w.mu.Lock() w.mu.Lock()
if w.isClosed { if w.isClosed {
@ -268,21 +239,17 @@ func (w *Watcher) Close() error {
// Add starts monitoring the path for changes. // Add starts monitoring the path for changes.
// //
// A path can only be watched once; watching it more than once is a no-op and will // A path can only be watched once; attempting to watch it more than once will
// not return an error. Paths that do not yet exist on the filesystem cannot be // return an error. Paths that do not yet exist on the filesystem cannot be
// watched. // added. A watch will be automatically removed if the path is deleted.
// //
// A watch will be automatically removed if the watched path is deleted or // A path will remain watched if it gets renamed to somewhere else on the same
// renamed. The exception is the Windows backend, which doesn't remove the // filesystem, but the monitor will get removed if the path gets deleted and
// watcher on renames. // re-created, or if it's moved to a different filesystem.
// //
// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special // Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special
// filesystems (/proc, /sys, etc.) generally don't work. // filesystems (/proc, /sys, etc.) generally don't work.
// //
// Returns [ErrClosed] if [Watcher.Close] was called.
//
// See [Watcher.AddWith] for a version that allows adding options.
//
// # Watching directories // # Watching directories
// //
// All files in a directory are monitored, including new files that are created // All files in a directory are monitored, including new files that are created
@ -292,28 +259,15 @@ func (w *Watcher) Close() error {
// # Watching files // # Watching files
// //
// Watching individual files (rather than directories) is generally not // Watching individual files (rather than directories) is generally not
// recommended as many programs (especially editors) update files atomically: it // recommended as many tools update files atomically. Instead of "just" writing
// will write to a temporary file which is then moved to to destination, // to the file a temporary file will be written to first, and if successful the
// overwriting the original (or some variant thereof). The watcher on the // temporary file is moved to to destination removing the original, or some
// original file is now lost, as that no longer exists. // variant thereof. The watcher on the original file is now lost, as it no
// longer exists.
// //
// The upshot of this is that a power failure or crash won't leave a // Instead, watch the parent directory and use Event.Name to filter out files
// half-written file. // you're not interested in. There is an example of this in [cmd/fsnotify/file.go].
// func (w *Watcher) Add(name string) error {
// Watch the parent directory and use Event.Name to filter out files you're not
// interested in. There is an example of this in cmd/fsnotify/file.go.
func (w *Watcher) Add(name string) error { return w.AddWith(name) }
// AddWith is like [Watcher.Add], but allows adding options. When using Add()
// the defaults described below are used.
//
// Possible options are:
//
// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on
// other platforms. The default is 64K (65536 bytes).
func (w *Watcher) AddWith(name string, opts ...addOpt) error {
_ = getOptions(opts...)
w.mu.Lock() w.mu.Lock()
w.userWatches[name] = struct{}{} w.userWatches[name] = struct{}{}
w.mu.Unlock() w.mu.Unlock()
@ -327,19 +281,9 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error {
// /tmp/dir and /tmp/dir/subdir then you will need to remove both. // /tmp/dir and /tmp/dir/subdir then you will need to remove both.
// //
// Removing a path that has not yet been added returns [ErrNonExistentWatch]. // Removing a path that has not yet been added returns [ErrNonExistentWatch].
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) Remove(name string) error { func (w *Watcher) Remove(name string) error {
return w.remove(name, true)
}
func (w *Watcher) remove(name string, unwatchFiles bool) error {
name = filepath.Clean(name) name = filepath.Clean(name)
w.mu.Lock() w.mu.Lock()
if w.isClosed {
w.mu.Unlock()
return nil
}
watchfd, ok := w.watches[name] watchfd, ok := w.watches[name]
w.mu.Unlock() w.mu.Unlock()
if !ok { if !ok {
@ -371,7 +315,7 @@ func (w *Watcher) remove(name string, unwatchFiles bool) error {
w.mu.Unlock() w.mu.Unlock()
// Find all watched paths that are in this directory that are not external. // Find all watched paths that are in this directory that are not external.
if unwatchFiles && isDir { if isDir {
var pathsToRemove []string var pathsToRemove []string
w.mu.Lock() w.mu.Lock()
for fd := range w.watchesByDir[name] { for fd := range w.watchesByDir[name] {
@ -382,25 +326,20 @@ func (w *Watcher) remove(name string, unwatchFiles bool) error {
} }
w.mu.Unlock() w.mu.Unlock()
for _, name := range pathsToRemove { for _, name := range pathsToRemove {
// Since these are internal, not much sense in propagating error to // Since these are internal, not much sense in propagating error
// the user, as that will just confuse them with an error about a // to the user, as that will just confuse them with an error about
// path they did not explicitly watch themselves. // a path they did not explicitly watch themselves.
w.Remove(name) w.Remove(name)
} }
} }
return nil return nil
} }
// WatchList returns all paths explicitly added with [Watcher.Add] (and are not // WatchList returns all paths added with [Add] (and are not yet removed).
// yet removed).
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) WatchList() []string { func (w *Watcher) WatchList() []string {
w.mu.Lock() w.mu.Lock()
defer w.mu.Unlock() defer w.mu.Unlock()
if w.isClosed {
return nil
}
entries := make([]string, 0, len(w.userWatches)) entries := make([]string, 0, len(w.userWatches))
for pathname := range w.userWatches { for pathname := range w.userWatches {
@ -413,18 +352,18 @@ func (w *Watcher) WatchList() []string {
// Watch all events (except NOTE_EXTEND, NOTE_LINK, NOTE_REVOKE) // Watch all events (except NOTE_EXTEND, NOTE_LINK, NOTE_REVOKE)
const noteAllEvents = unix.NOTE_DELETE | unix.NOTE_WRITE | unix.NOTE_ATTRIB | unix.NOTE_RENAME const noteAllEvents = unix.NOTE_DELETE | unix.NOTE_WRITE | unix.NOTE_ATTRIB | unix.NOTE_RENAME
// addWatch adds name to the watched file set; the flags are interpreted as // addWatch adds name to the watched file set.
// described in kevent(2). // The flags are interpreted as described in kevent(2).
// // Returns the real path to the file which was added, if any, which may be different from the one passed in the case of symlinks.
// Returns the real path to the file which was added, with symlinks resolved.
func (w *Watcher) addWatch(name string, flags uint32) (string, error) { func (w *Watcher) addWatch(name string, flags uint32) (string, error) {
var isDir bool var isDir bool
// Make ./name and name equivalent
name = filepath.Clean(name) name = filepath.Clean(name)
w.mu.Lock() w.mu.Lock()
if w.isClosed { if w.isClosed {
w.mu.Unlock() w.mu.Unlock()
return "", ErrClosed return "", errors.New("kevent instance already closed")
} }
watchfd, alreadyWatching := w.watches[name] watchfd, alreadyWatching := w.watches[name]
// We already have a watch, but we can still override flags. // We already have a watch, but we can still override flags.
@ -444,30 +383,27 @@ func (w *Watcher) addWatch(name string, flags uint32) (string, error) {
return "", nil return "", nil
} }
// Follow Symlinks. // Follow Symlinks
//
// Linux can add unresolvable symlinks to the watch list without issue,
// and Windows can't do symlinks period. To maintain consistency, we
// will act like everything is fine if the link can't be resolved.
// There will simply be no file events for broken symlinks. Hence the
// returns of nil on errors.
if fi.Mode()&os.ModeSymlink == os.ModeSymlink { if fi.Mode()&os.ModeSymlink == os.ModeSymlink {
link, err := os.Readlink(name) name, err = filepath.EvalSymlinks(name)
if err != nil { if err != nil {
// Return nil because Linux can add unresolvable symlinks to the
// watch list without problems, so maintain consistency with
// that. There will be no file events for broken symlinks.
// TODO: more specific check; returns os.PathError; ENOENT?
return "", nil return "", nil
} }
w.mu.Lock() w.mu.Lock()
_, alreadyWatching = w.watches[link] _, alreadyWatching = w.watches[name]
w.mu.Unlock() w.mu.Unlock()
if alreadyWatching { if alreadyWatching {
// Add to watches so we don't get spurious Create events later return name, nil
// on when we diff the directories.
w.watches[name] = 0
w.fileExists[name] = struct{}{}
return link, nil
} }
name = link
fi, err = os.Lstat(name) fi, err = os.Lstat(name)
if err != nil { if err != nil {
return "", nil return "", nil
@ -475,7 +411,7 @@ func (w *Watcher) addWatch(name string, flags uint32) (string, error) {
} }
// Retry on EINTR; open() can return EINTR in practice on macOS. // Retry on EINTR; open() can return EINTR in practice on macOS.
// See #354, and Go issues 11180 and 39237. // See #354, and go issues 11180 and 39237.
for { for {
watchfd, err = unix.Open(name, openMode, 0) watchfd, err = unix.Open(name, openMode, 0)
if err == nil { if err == nil {
@ -508,13 +444,14 @@ func (w *Watcher) addWatch(name string, flags uint32) (string, error) {
w.watchesByDir[parentName] = watchesByDir w.watchesByDir[parentName] = watchesByDir
} }
watchesByDir[watchfd] = struct{}{} watchesByDir[watchfd] = struct{}{}
w.paths[watchfd] = pathInfo{name: name, isDir: isDir} w.paths[watchfd] = pathInfo{name: name, isDir: isDir}
w.mu.Unlock() w.mu.Unlock()
} }
if isDir { if isDir {
// Watch the directory if it has not been watched before, or if it was // Watch the directory if it has not been watched before,
// watched before, but perhaps only a NOTE_DELETE (watchDirectoryFiles) // or if it was watched before, but perhaps only a NOTE_DELETE (watchDirectoryFiles)
w.mu.Lock() w.mu.Lock()
watchDir := (flags&unix.NOTE_WRITE) == unix.NOTE_WRITE && watchDir := (flags&unix.NOTE_WRITE) == unix.NOTE_WRITE &&
@ -536,10 +473,13 @@ func (w *Watcher) addWatch(name string, flags uint32) (string, error) {
// Event values that it sends down the Events channel. // Event values that it sends down the Events channel.
func (w *Watcher) readEvents() { func (w *Watcher) readEvents() {
defer func() { defer func() {
err := unix.Close(w.kq)
if err != nil {
w.Errors <- err
}
unix.Close(w.closepipe[0])
close(w.Events) close(w.Events)
close(w.Errors) close(w.Errors)
_ = unix.Close(w.kq)
unix.Close(w.closepipe[0])
}() }()
eventBuffer := make([]unix.Kevent_t, 10) eventBuffer := make([]unix.Kevent_t, 10)
@ -573,8 +513,18 @@ func (w *Watcher) readEvents() {
event := w.newEvent(path.name, mask) event := w.newEvent(path.name, mask)
if path.isDir && !event.Has(Remove) {
// Double check to make sure the directory exists. This can
// happen when we do a rm -fr on a recursively watched folders
// and we receive a modification event first but the folder has
// been deleted and later receive the delete event.
if _, err := os.Lstat(event.Name); os.IsNotExist(err) {
event.Op |= Remove
}
}
if event.Has(Rename) || event.Has(Remove) { if event.Has(Rename) || event.Has(Remove) {
w.remove(event.Name, false) w.Remove(event.Name)
w.mu.Lock() w.mu.Lock()
delete(w.fileExists, event.Name) delete(w.fileExists, event.Name)
w.mu.Unlock() w.mu.Unlock()
@ -590,30 +540,26 @@ func (w *Watcher) readEvents() {
} }
if event.Has(Remove) { if event.Has(Remove) {
// Look for a file that may have overwritten this; for example, // Look for a file that may have overwritten this.
// mv f1 f2 will delete f2, then create f2. // For example, mv f1 f2 will delete f2, then create f2.
if path.isDir { if path.isDir {
fileDir := filepath.Clean(event.Name) fileDir := filepath.Clean(event.Name)
w.mu.Lock() w.mu.Lock()
_, found := w.watches[fileDir] _, found := w.watches[fileDir]
w.mu.Unlock() w.mu.Unlock()
if found { if found {
err := w.sendDirectoryChangeEvents(fileDir) // make sure the directory exists before we watch for changes. When we
if err != nil { // do a recursive watch and perform rm -fr, the parent directory might
if !w.sendError(err) { // have gone missing, ignore the missing directory and let the
closed = true // upcoming delete event remove the watch from the parent directory.
} if _, err := os.Lstat(fileDir); err == nil {
w.sendDirectoryChangeEvents(fileDir)
} }
} }
} else { } else {
filePath := filepath.Clean(event.Name) filePath := filepath.Clean(event.Name)
if fi, err := os.Lstat(filePath); err == nil { if fileInfo, err := os.Lstat(filePath); err == nil {
err := w.sendFileCreatedEventIfNew(filePath, fi) w.sendFileCreatedEventIfNew(filePath, fileInfo)
if err != nil {
if !w.sendError(err) {
closed = true
}
}
} }
} }
} }
@ -636,31 +582,21 @@ func (w *Watcher) newEvent(name string, mask uint32) Event {
if mask&unix.NOTE_ATTRIB == unix.NOTE_ATTRIB { if mask&unix.NOTE_ATTRIB == unix.NOTE_ATTRIB {
e.Op |= Chmod e.Op |= Chmod
} }
// No point sending a write and delete event at the same time: if it's gone,
// then it's gone.
if e.Op.Has(Write) && e.Op.Has(Remove) {
e.Op &^= Write
}
return e return e
} }
// watchDirectoryFiles to mimic inotify when adding a watch on a directory // watchDirectoryFiles to mimic inotify when adding a watch on a directory
func (w *Watcher) watchDirectoryFiles(dirPath string) error { func (w *Watcher) watchDirectoryFiles(dirPath string) error {
// Get all files // Get all files
files, err := os.ReadDir(dirPath) files, err := ioutil.ReadDir(dirPath)
if err != nil { if err != nil {
return err return err
} }
for _, f := range files { for _, fileInfo := range files {
path := filepath.Join(dirPath, f.Name()) path := filepath.Join(dirPath, fileInfo.Name())
fi, err := f.Info() cleanPath, err := w.internalWatch(path, fileInfo)
if err != nil {
return fmt.Errorf("%q: %w", path, err)
}
cleanPath, err := w.internalWatch(path, fi)
if err != nil { if err != nil {
// No permission to read the file; that's not a problem: just skip. // No permission to read the file; that's not a problem: just skip.
// But do add it to w.fileExists to prevent it from being picked up // But do add it to w.fileExists to prevent it from being picked up
@ -670,7 +606,7 @@ func (w *Watcher) watchDirectoryFiles(dirPath string) error {
case errors.Is(err, unix.EACCES) || errors.Is(err, unix.EPERM): case errors.Is(err, unix.EACCES) || errors.Is(err, unix.EPERM):
cleanPath = filepath.Clean(path) cleanPath = filepath.Clean(path)
default: default:
return fmt.Errorf("%q: %w", path, err) return fmt.Errorf("%q: %w", filepath.Join(dirPath, fileInfo.Name()), err)
} }
} }
@ -686,37 +622,26 @@ func (w *Watcher) watchDirectoryFiles(dirPath string) error {
// //
// This functionality is to have the BSD watcher match the inotify, which sends // This functionality is to have the BSD watcher match the inotify, which sends
// a create event for files created in a watched directory. // a create event for files created in a watched directory.
func (w *Watcher) sendDirectoryChangeEvents(dir string) error { func (w *Watcher) sendDirectoryChangeEvents(dir string) {
files, err := os.ReadDir(dir) // Get all files
files, err := ioutil.ReadDir(dir)
if err != nil { if err != nil {
// Directory no longer exists: we can ignore this safely. kqueue will if !w.sendError(fmt.Errorf("fsnotify.sendDirectoryChangeEvents: %w", err)) {
// still give us the correct events. return
if errors.Is(err, os.ErrNotExist) { }
return nil }
}
return fmt.Errorf("fsnotify.sendDirectoryChangeEvents: %w", err) // Search for new files
} for _, fi := range files {
err := w.sendFileCreatedEventIfNew(filepath.Join(dir, fi.Name()), fi)
for _, f := range files { if err != nil {
fi, err := f.Info() return
if err != nil {
return fmt.Errorf("fsnotify.sendDirectoryChangeEvents: %w", err)
}
err = w.sendFileCreatedEventIfNew(filepath.Join(dir, fi.Name()), fi)
if err != nil {
// Don't need to send an error if this file isn't readable.
if errors.Is(err, unix.EACCES) || errors.Is(err, unix.EPERM) {
return nil
}
return fmt.Errorf("fsnotify.sendDirectoryChangeEvents: %w", err)
} }
} }
return nil
} }
// sendFileCreatedEvent sends a create event if the file isn't already being tracked. // sendFileCreatedEvent sends a create event if the file isn't already being tracked.
func (w *Watcher) sendFileCreatedEventIfNew(filePath string, fi os.FileInfo) (err error) { func (w *Watcher) sendFileCreatedEventIfNew(filePath string, fileInfo os.FileInfo) (err error) {
w.mu.Lock() w.mu.Lock()
_, doesExist := w.fileExists[filePath] _, doesExist := w.fileExists[filePath]
w.mu.Unlock() w.mu.Unlock()
@ -727,7 +652,7 @@ func (w *Watcher) sendFileCreatedEventIfNew(filePath string, fi os.FileInfo) (er
} }
// like watchDirectoryFiles (but without doing another ReadDir) // like watchDirectoryFiles (but without doing another ReadDir)
filePath, err = w.internalWatch(filePath, fi) filePath, err = w.internalWatch(filePath, fileInfo)
if err != nil { if err != nil {
return err return err
} }
@ -739,10 +664,10 @@ func (w *Watcher) sendFileCreatedEventIfNew(filePath string, fi os.FileInfo) (er
return nil return nil
} }
func (w *Watcher) internalWatch(name string, fi os.FileInfo) (string, error) { func (w *Watcher) internalWatch(name string, fileInfo os.FileInfo) (string, error) {
if fi.IsDir() { if fileInfo.IsDir() {
// mimic Linux providing delete events for subdirectories, but preserve // mimic Linux providing delete events for subdirectories
// the flags used if currently watching subdirectory // but preserve the flags used if currently watching subdirectory
w.mu.Lock() w.mu.Lock()
flags := w.dirFlags[name] flags := w.dirFlags[name]
w.mu.Unlock() w.mu.Unlock()

View File

@ -1,169 +1,39 @@
//go:build appengine || (!darwin && !dragonfly && !freebsd && !openbsd && !linux && !netbsd && !solaris && !windows) //go:build !darwin && !dragonfly && !freebsd && !openbsd && !linux && !netbsd && !solaris && !windows
// +build appengine !darwin,!dragonfly,!freebsd,!openbsd,!linux,!netbsd,!solaris,!windows // +build !darwin,!dragonfly,!freebsd,!openbsd,!linux,!netbsd,!solaris,!windows
// Note: the documentation on the Watcher type and methods is generated from
// mkdoc.zsh
package fsnotify package fsnotify
import "errors" import (
"fmt"
"runtime"
)
// Watcher watches a set of paths, delivering events on a channel. // Watcher watches a set of files, delivering events to a channel.
// type Watcher struct{}
// A watcher should not be copied (e.g. pass it by pointer, rather than by
// value).
//
// # Linux notes
//
// When a file is removed a Remove event won't be emitted until all file
// descriptors are closed, and deletes will always emit a Chmod. For example:
//
// fp := os.Open("file")
// os.Remove("file") // Triggers Chmod
// fp.Close() // Triggers Remove
//
// This is the event that inotify sends, so not much can be changed about this.
//
// The fs.inotify.max_user_watches sysctl variable specifies the upper limit
// for the number of watches per user, and fs.inotify.max_user_instances
// specifies the maximum number of inotify instances per user. Every Watcher you
// create is an "instance", and every path you add is a "watch".
//
// These are also exposed in /proc as /proc/sys/fs/inotify/max_user_watches and
// /proc/sys/fs/inotify/max_user_instances
//
// To increase them you can use sysctl or write the value to the /proc file:
//
// # Default values on Linux 5.18
// sysctl fs.inotify.max_user_watches=124983
// sysctl fs.inotify.max_user_instances=128
//
// To make the changes persist on reboot edit /etc/sysctl.conf or
// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check
// your distro's documentation):
//
// fs.inotify.max_user_watches=124983
// fs.inotify.max_user_instances=128
//
// Reaching the limit will result in a "no space left on device" or "too many open
// files" error.
//
// # kqueue notes (macOS, BSD)
//
// kqueue requires opening a file descriptor for every file that's being watched;
// so if you're watching a directory with five files then that's six file
// descriptors. You will run in to your system's "max open files" limit faster on
// these platforms.
//
// The sysctl variables kern.maxfiles and kern.maxfilesperproc can be used to
// control the maximum number of open files, as well as /etc/login.conf on BSD
// systems.
//
// # Windows notes
//
// Paths can be added as "C:\path\to\dir", but forward slashes
// ("C:/path/to/dir") will also work.
//
// When a watched directory is removed it will always send an event for the
// directory itself, but may not send events for all files in that directory.
// Sometimes it will send events for all times, sometimes it will send no
// events, and often only for some files.
//
// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest
// value that is guaranteed to work with SMB filesystems. If you have many
// events in quick succession this may not be enough, and you will have to use
// [WithBufferSize] to increase the value.
type Watcher struct {
// Events sends the filesystem change events.
//
// fsnotify can send the following events; a "path" here can refer to a
// file, directory, symbolic link, or special file like a FIFO.
//
// fsnotify.Create A new path was created; this may be followed by one
// or more Write events if data also gets written to a
// file.
//
// fsnotify.Remove A path was removed.
//
// fsnotify.Rename A path was renamed. A rename is always sent with the
// old path as Event.Name, and a Create event will be
// sent with the new name. Renames are only sent for
// paths that are currently watched; e.g. moving an
// unmonitored file into a monitored directory will
// show up as just a Create. Similarly, renaming a file
// to outside a monitored directory will show up as
// only a Rename.
//
// fsnotify.Write A file or named pipe was written to. A Truncate will
// also trigger a Write. A single "write action"
// initiated by the user may show up as one or multiple
// writes, depending on when the system syncs things to
// disk. For example when compiling a large Go program
// you may get hundreds of Write events, and you may
// want to wait until you've stopped receiving them
// (see the dedup example in cmd/fsnotify).
//
// Some systems may send Write event for directories
// when the directory content changes.
//
// fsnotify.Chmod Attributes were changed. On Linux this is also sent
// when a file is removed (or more accurately, when a
// link to an inode is removed). On kqueue it's sent
// when a file is truncated. On Windows it's never
// sent.
Events chan Event
// Errors sends any errors.
//
// ErrEventOverflow is used to indicate there are too many events:
//
// - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl)
// - windows: The buffer size is too small; WithBufferSize() can be used to increase it.
// - kqueue, fen: Not used.
Errors chan error
}
// NewWatcher creates a new Watcher. // NewWatcher creates a new Watcher.
func NewWatcher() (*Watcher, error) { func NewWatcher() (*Watcher, error) {
return nil, errors.New("fsnotify not supported on the current platform") return nil, fmt.Errorf("fsnotify not supported on %s", runtime.GOOS)
} }
// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events // Close removes all watches and closes the events channel.
// channel. func (w *Watcher) Close() error {
// return nil
// The main use case for this is situations with a very large number of events }
// where the kernel buffer size can't be increased (e.g. due to lack of
// permissions). An unbuffered Watcher will perform better for almost all use
// cases, and whenever possible you will be better off increasing the kernel
// buffers instead of adding a large userspace buffer.
func NewBufferedWatcher(sz uint) (*Watcher, error) { return NewWatcher() }
// Close removes all watches and closes the Events channel.
func (w *Watcher) Close() error { return nil }
// WatchList returns all paths explicitly added with [Watcher.Add] (and are not
// yet removed).
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) WatchList() []string { return nil }
// Add starts monitoring the path for changes. // Add starts monitoring the path for changes.
// //
// A path can only be watched once; watching it more than once is a no-op and will // A path can only be watched once; attempting to watch it more than once will
// not return an error. Paths that do not yet exist on the filesystem cannot be // return an error. Paths that do not yet exist on the filesystem cannot be
// watched. // added. A watch will be automatically removed if the path is deleted.
// //
// A watch will be automatically removed if the watched path is deleted or // A path will remain watched if it gets renamed to somewhere else on the same
// renamed. The exception is the Windows backend, which doesn't remove the // filesystem, but the monitor will get removed if the path gets deleted and
// watcher on renames. // re-created, or if it's moved to a different filesystem.
// //
// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special // Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special
// filesystems (/proc, /sys, etc.) generally don't work. // filesystems (/proc, /sys, etc.) generally don't work.
// //
// Returns [ErrClosed] if [Watcher.Close] was called.
//
// See [Watcher.AddWith] for a version that allows adding options.
//
// # Watching directories // # Watching directories
// //
// All files in a directory are monitored, including new files that are created // All files in a directory are monitored, including new files that are created
@ -173,26 +43,17 @@ func (w *Watcher) WatchList() []string { return nil }
// # Watching files // # Watching files
// //
// Watching individual files (rather than directories) is generally not // Watching individual files (rather than directories) is generally not
// recommended as many programs (especially editors) update files atomically: it // recommended as many tools update files atomically. Instead of "just" writing
// will write to a temporary file which is then moved to to destination, // to the file a temporary file will be written to first, and if successful the
// overwriting the original (or some variant thereof). The watcher on the // temporary file is moved to to destination removing the original, or some
// original file is now lost, as that no longer exists. // variant thereof. The watcher on the original file is now lost, as it no
// longer exists.
// //
// The upshot of this is that a power failure or crash won't leave a // Instead, watch the parent directory and use Event.Name to filter out files
// half-written file. // you're not interested in. There is an example of this in [cmd/fsnotify/file.go].
// func (w *Watcher) Add(name string) error {
// Watch the parent directory and use Event.Name to filter out files you're not return nil
// interested in. There is an example of this in cmd/fsnotify/file.go. }
func (w *Watcher) Add(name string) error { return nil }
// AddWith is like [Watcher.Add], but allows adding options. When using Add()
// the defaults described below are used.
//
// Possible options are:
//
// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on
// other platforms. The default is 64K (65536 bytes).
func (w *Watcher) AddWith(name string, opts ...addOpt) error { return nil }
// Remove stops monitoring the path for changes. // Remove stops monitoring the path for changes.
// //
@ -200,6 +61,6 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error { return nil }
// /tmp/dir and /tmp/dir/subdir then you will need to remove both. // /tmp/dir and /tmp/dir/subdir then you will need to remove both.
// //
// Removing a path that has not yet been added returns [ErrNonExistentWatch]. // Removing a path that has not yet been added returns [ErrNonExistentWatch].
// func (w *Watcher) Remove(name string) error {
// Returns nil if [Watcher.Close] was called. return nil
func (w *Watcher) Remove(name string) error { return nil } }

View File

@ -1,13 +1,6 @@
//go:build windows //go:build windows
// +build windows // +build windows
// Windows backend based on ReadDirectoryChangesW()
//
// https://learn.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-readdirectorychangesw
//
// Note: the documentation on the Watcher type and methods is generated from
// mkdoc.zsh
package fsnotify package fsnotify
import ( import (
@ -34,9 +27,9 @@ import (
// When a file is removed a Remove event won't be emitted until all file // When a file is removed a Remove event won't be emitted until all file
// descriptors are closed, and deletes will always emit a Chmod. For example: // descriptors are closed, and deletes will always emit a Chmod. For example:
// //
// fp := os.Open("file") // fp := os.Open("file")
// os.Remove("file") // Triggers Chmod // os.Remove("file") // Triggers Chmod
// fp.Close() // Triggers Remove // fp.Close() // Triggers Remove
// //
// This is the event that inotify sends, so not much can be changed about this. // This is the event that inotify sends, so not much can be changed about this.
// //
@ -50,16 +43,16 @@ import (
// //
// To increase them you can use sysctl or write the value to the /proc file: // To increase them you can use sysctl or write the value to the /proc file:
// //
// # Default values on Linux 5.18 // # Default values on Linux 5.18
// sysctl fs.inotify.max_user_watches=124983 // sysctl fs.inotify.max_user_watches=124983
// sysctl fs.inotify.max_user_instances=128 // sysctl fs.inotify.max_user_instances=128
// //
// To make the changes persist on reboot edit /etc/sysctl.conf or // To make the changes persist on reboot edit /etc/sysctl.conf or
// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check // /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check
// your distro's documentation): // your distro's documentation):
// //
// fs.inotify.max_user_watches=124983 // fs.inotify.max_user_watches=124983
// fs.inotify.max_user_instances=128 // fs.inotify.max_user_instances=128
// //
// Reaching the limit will result in a "no space left on device" or "too many open // Reaching the limit will result in a "no space left on device" or "too many open
// files" error. // files" error.
@ -75,20 +68,14 @@ import (
// control the maximum number of open files, as well as /etc/login.conf on BSD // control the maximum number of open files, as well as /etc/login.conf on BSD
// systems. // systems.
// //
// # Windows notes // # macOS notes
// //
// Paths can be added as "C:\path\to\dir", but forward slashes // Spotlight indexing on macOS can result in multiple events (see [#15]). A
// ("C:/path/to/dir") will also work. // temporary workaround is to add your folder(s) to the "Spotlight Privacy
// Settings" until we have a native FSEvents implementation (see [#11]).
// //
// When a watched directory is removed it will always send an event for the // [#11]: https://github.com/fsnotify/fsnotify/issues/11
// directory itself, but may not send events for all files in that directory. // [#15]: https://github.com/fsnotify/fsnotify/issues/15
// Sometimes it will send events for all times, sometimes it will send no
// events, and often only for some files.
//
// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest
// value that is guaranteed to work with SMB filesystems. If you have many
// events in quick succession this may not be enough, and you will have to use
// [WithBufferSize] to increase the value.
type Watcher struct { type Watcher struct {
// Events sends the filesystem change events. // Events sends the filesystem change events.
// //
@ -115,52 +102,31 @@ type Watcher struct {
// initiated by the user may show up as one or multiple // initiated by the user may show up as one or multiple
// writes, depending on when the system syncs things to // writes, depending on when the system syncs things to
// disk. For example when compiling a large Go program // disk. For example when compiling a large Go program
// you may get hundreds of Write events, and you may // you may get hundreds of Write events, so you
// want to wait until you've stopped receiving them // probably want to wait until you've stopped receiving
// (see the dedup example in cmd/fsnotify). // them (see the dedup example in cmd/fsnotify).
//
// Some systems may send Write event for directories
// when the directory content changes.
// //
// fsnotify.Chmod Attributes were changed. On Linux this is also sent // fsnotify.Chmod Attributes were changed. On Linux this is also sent
// when a file is removed (or more accurately, when a // when a file is removed (or more accurately, when a
// link to an inode is removed). On kqueue it's sent // link to an inode is removed). On kqueue it's sent
// when a file is truncated. On Windows it's never // and on kqueue when a file is truncated. On Windows
// sent. // it's never sent.
Events chan Event Events chan Event
// Errors sends any errors. // Errors sends any errors.
//
// ErrEventOverflow is used to indicate there are too many events:
//
// - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl)
// - windows: The buffer size is too small; WithBufferSize() can be used to increase it.
// - kqueue, fen: Not used.
Errors chan error Errors chan error
port windows.Handle // Handle to completion port port windows.Handle // Handle to completion port
input chan *input // Inputs to the reader are sent on this channel input chan *input // Inputs to the reader are sent on this channel
quit chan chan<- error quit chan chan<- error
mu sync.Mutex // Protects access to watches, closed mu sync.Mutex // Protects access to watches, isClosed
watches watchMap // Map of watches (key: i-number) watches watchMap // Map of watches (key: i-number)
closed bool // Set to true when Close() is first called isClosed bool // Set to true when Close() is first called
} }
// NewWatcher creates a new Watcher. // NewWatcher creates a new Watcher.
func NewWatcher() (*Watcher, error) { func NewWatcher() (*Watcher, error) {
return NewBufferedWatcher(50)
}
// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events
// channel.
//
// The main use case for this is situations with a very large number of events
// where the kernel buffer size can't be increased (e.g. due to lack of
// permissions). An unbuffered Watcher will perform better for almost all use
// cases, and whenever possible you will be better off increasing the kernel
// buffers instead of adding a large userspace buffer.
func NewBufferedWatcher(sz uint) (*Watcher, error) {
port, err := windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0) port, err := windows.CreateIoCompletionPort(windows.InvalidHandle, 0, 0, 0)
if err != nil { if err != nil {
return nil, os.NewSyscallError("CreateIoCompletionPort", err) return nil, os.NewSyscallError("CreateIoCompletionPort", err)
@ -169,7 +135,7 @@ func NewBufferedWatcher(sz uint) (*Watcher, error) {
port: port, port: port,
watches: make(watchMap), watches: make(watchMap),
input: make(chan *input, 1), input: make(chan *input, 1),
Events: make(chan Event, sz), Events: make(chan Event, 50),
Errors: make(chan error), Errors: make(chan error),
quit: make(chan chan<- error, 1), quit: make(chan chan<- error, 1),
} }
@ -177,12 +143,6 @@ func NewBufferedWatcher(sz uint) (*Watcher, error) {
return w, nil return w, nil
} }
func (w *Watcher) isClosed() bool {
w.mu.Lock()
defer w.mu.Unlock()
return w.closed
}
func (w *Watcher) sendEvent(name string, mask uint64) bool { func (w *Watcher) sendEvent(name string, mask uint64) bool {
if mask == 0 { if mask == 0 {
return false return false
@ -207,14 +167,14 @@ func (w *Watcher) sendError(err error) bool {
return false return false
} }
// Close removes all watches and closes the Events channel. // Close removes all watches and closes the events channel.
func (w *Watcher) Close() error { func (w *Watcher) Close() error {
if w.isClosed() { w.mu.Lock()
if w.isClosed {
w.mu.Unlock()
return nil return nil
} }
w.isClosed = true
w.mu.Lock()
w.closed = true
w.mu.Unlock() w.mu.Unlock()
// Send "quit" message to the reader goroutine // Send "quit" message to the reader goroutine
@ -228,21 +188,17 @@ func (w *Watcher) Close() error {
// Add starts monitoring the path for changes. // Add starts monitoring the path for changes.
// //
// A path can only be watched once; watching it more than once is a no-op and will // A path can only be watched once; attempting to watch it more than once will
// not return an error. Paths that do not yet exist on the filesystem cannot be // return an error. Paths that do not yet exist on the filesystem cannot be
// watched. // added. A watch will be automatically removed if the path is deleted.
// //
// A watch will be automatically removed if the watched path is deleted or // A path will remain watched if it gets renamed to somewhere else on the same
// renamed. The exception is the Windows backend, which doesn't remove the // filesystem, but the monitor will get removed if the path gets deleted and
// watcher on renames. // re-created, or if it's moved to a different filesystem.
// //
// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special // Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special
// filesystems (/proc, /sys, etc.) generally don't work. // filesystems (/proc, /sys, etc.) generally don't work.
// //
// Returns [ErrClosed] if [Watcher.Close] was called.
//
// See [Watcher.AddWith] for a version that allows adding options.
//
// # Watching directories // # Watching directories
// //
// All files in a directory are monitored, including new files that are created // All files in a directory are monitored, including new files that are created
@ -252,41 +208,27 @@ func (w *Watcher) Close() error {
// # Watching files // # Watching files
// //
// Watching individual files (rather than directories) is generally not // Watching individual files (rather than directories) is generally not
// recommended as many programs (especially editors) update files atomically: it // recommended as many tools update files atomically. Instead of "just" writing
// will write to a temporary file which is then moved to to destination, // to the file a temporary file will be written to first, and if successful the
// overwriting the original (or some variant thereof). The watcher on the // temporary file is moved to to destination removing the original, or some
// original file is now lost, as that no longer exists. // variant thereof. The watcher on the original file is now lost, as it no
// longer exists.
// //
// The upshot of this is that a power failure or crash won't leave a // Instead, watch the parent directory and use Event.Name to filter out files
// half-written file. // you're not interested in. There is an example of this in [cmd/fsnotify/file.go].
// func (w *Watcher) Add(name string) error {
// Watch the parent directory and use Event.Name to filter out files you're not w.mu.Lock()
// interested in. There is an example of this in cmd/fsnotify/file.go. if w.isClosed {
func (w *Watcher) Add(name string) error { return w.AddWith(name) } w.mu.Unlock()
return errors.New("watcher already closed")
// AddWith is like [Watcher.Add], but allows adding options. When using Add()
// the defaults described below are used.
//
// Possible options are:
//
// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on
// other platforms. The default is 64K (65536 bytes).
func (w *Watcher) AddWith(name string, opts ...addOpt) error {
if w.isClosed() {
return ErrClosed
}
with := getOptions(opts...)
if with.bufsize < 4096 {
return fmt.Errorf("fsnotify.WithBufferSize: buffer size cannot be smaller than 4096 bytes")
} }
w.mu.Unlock()
in := &input{ in := &input{
op: opAddWatch, op: opAddWatch,
path: filepath.Clean(name), path: filepath.Clean(name),
flags: sysFSALLEVENTS, flags: sysFSALLEVENTS,
reply: make(chan error), reply: make(chan error),
bufsize: with.bufsize,
} }
w.input <- in w.input <- in
if err := w.wakeupReader(); err != nil { if err := w.wakeupReader(); err != nil {
@ -301,13 +243,7 @@ func (w *Watcher) AddWith(name string, opts ...addOpt) error {
// /tmp/dir and /tmp/dir/subdir then you will need to remove both. // /tmp/dir and /tmp/dir/subdir then you will need to remove both.
// //
// Removing a path that has not yet been added returns [ErrNonExistentWatch]. // Removing a path that has not yet been added returns [ErrNonExistentWatch].
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) Remove(name string) error { func (w *Watcher) Remove(name string) error {
if w.isClosed() {
return nil
}
in := &input{ in := &input{
op: opRemoveWatch, op: opRemoveWatch,
path: filepath.Clean(name), path: filepath.Clean(name),
@ -320,15 +256,8 @@ func (w *Watcher) Remove(name string) error {
return <-in.reply return <-in.reply
} }
// WatchList returns all paths explicitly added with [Watcher.Add] (and are not // WatchList returns all paths added with [Add] (and are not yet removed).
// yet removed).
//
// Returns nil if [Watcher.Close] was called.
func (w *Watcher) WatchList() []string { func (w *Watcher) WatchList() []string {
if w.isClosed() {
return nil
}
w.mu.Lock() w.mu.Lock()
defer w.mu.Unlock() defer w.mu.Unlock()
@ -350,6 +279,7 @@ func (w *Watcher) WatchList() []string {
// This should all be removed at some point, and just use windows.FILE_NOTIFY_* // This should all be removed at some point, and just use windows.FILE_NOTIFY_*
const ( const (
sysFSALLEVENTS = 0xfff sysFSALLEVENTS = 0xfff
sysFSATTRIB = 0x4
sysFSCREATE = 0x100 sysFSCREATE = 0x100
sysFSDELETE = 0x200 sysFSDELETE = 0x200
sysFSDELETESELF = 0x400 sysFSDELETESELF = 0x400
@ -375,6 +305,9 @@ func (w *Watcher) newEvent(name string, mask uint32) Event {
if mask&sysFSMOVE == sysFSMOVE || mask&sysFSMOVESELF == sysFSMOVESELF || mask&sysFSMOVEDFROM == sysFSMOVEDFROM { if mask&sysFSMOVE == sysFSMOVE || mask&sysFSMOVESELF == sysFSMOVESELF || mask&sysFSMOVEDFROM == sysFSMOVEDFROM {
e.Op |= Rename e.Op |= Rename
} }
if mask&sysFSATTRIB == sysFSATTRIB {
e.Op |= Chmod
}
return e return e
} }
@ -388,11 +321,10 @@ const (
) )
type input struct { type input struct {
op int op int
path string path string
flags uint32 flags uint32
bufsize int reply chan error
reply chan error
} }
type inode struct { type inode struct {
@ -402,14 +334,13 @@ type inode struct {
} }
type watch struct { type watch struct {
ov windows.Overlapped ov windows.Overlapped
ino *inode // i-number ino *inode // i-number
recurse bool // Recursive watch? path string // Directory path
path string // Directory path mask uint64 // Directory itself is being watched with these notify flags
mask uint64 // Directory itself is being watched with these notify flags names map[string]uint64 // Map of names being watched and their notify flags
names map[string]uint64 // Map of names being watched and their notify flags rename string // Remembers the old name while renaming a file
rename string // Remembers the old name while renaming a file buf [65536]byte // 64K buffer
buf []byte // buffer, allocated later
} }
type ( type (
@ -482,10 +413,7 @@ func (m watchMap) set(ino *inode, watch *watch) {
} }
// Must run within the I/O thread. // Must run within the I/O thread.
func (w *Watcher) addWatch(pathname string, flags uint64, bufsize int) error { func (w *Watcher) addWatch(pathname string, flags uint64) error {
//pathname, recurse := recursivePath(pathname)
recurse := false
dir, err := w.getDir(pathname) dir, err := w.getDir(pathname)
if err != nil { if err != nil {
return err return err
@ -505,11 +433,9 @@ func (w *Watcher) addWatch(pathname string, flags uint64, bufsize int) error {
return os.NewSyscallError("CreateIoCompletionPort", err) return os.NewSyscallError("CreateIoCompletionPort", err)
} }
watchEntry = &watch{ watchEntry = &watch{
ino: ino, ino: ino,
path: dir, path: dir,
names: make(map[string]uint64), names: make(map[string]uint64),
recurse: recurse,
buf: make([]byte, bufsize),
} }
w.mu.Lock() w.mu.Lock()
w.watches.set(ino, watchEntry) w.watches.set(ino, watchEntry)
@ -539,8 +465,6 @@ func (w *Watcher) addWatch(pathname string, flags uint64, bufsize int) error {
// Must run within the I/O thread. // Must run within the I/O thread.
func (w *Watcher) remWatch(pathname string) error { func (w *Watcher) remWatch(pathname string) error {
pathname, recurse := recursivePath(pathname)
dir, err := w.getDir(pathname) dir, err := w.getDir(pathname)
if err != nil { if err != nil {
return err return err
@ -554,10 +478,6 @@ func (w *Watcher) remWatch(pathname string) error {
watch := w.watches.get(ino) watch := w.watches.get(ino)
w.mu.Unlock() w.mu.Unlock()
if recurse && !watch.recurse {
return fmt.Errorf("can't use \\... with non-recursive watch %q", pathname)
}
err = windows.CloseHandle(ino.handle) err = windows.CloseHandle(ino.handle)
if err != nil { if err != nil {
w.sendError(os.NewSyscallError("CloseHandle", err)) w.sendError(os.NewSyscallError("CloseHandle", err))
@ -615,11 +535,8 @@ func (w *Watcher) startRead(watch *watch) error {
return nil return nil
} }
// We need to pass the array, rather than the slice. rdErr := windows.ReadDirectoryChanges(watch.ino.handle, &watch.buf[0],
hdr := (*reflect.SliceHeader)(unsafe.Pointer(&watch.buf)) uint32(unsafe.Sizeof(watch.buf)), false, mask, nil, &watch.ov, 0)
rdErr := windows.ReadDirectoryChanges(watch.ino.handle,
(*byte)(unsafe.Pointer(hdr.Data)), uint32(hdr.Len),
watch.recurse, mask, nil, &watch.ov, 0)
if rdErr != nil { if rdErr != nil {
err := os.NewSyscallError("ReadDirectoryChanges", rdErr) err := os.NewSyscallError("ReadDirectoryChanges", rdErr)
if rdErr == windows.ERROR_ACCESS_DENIED && watch.mask&provisional == 0 { if rdErr == windows.ERROR_ACCESS_DENIED && watch.mask&provisional == 0 {
@ -646,8 +563,9 @@ func (w *Watcher) readEvents() {
runtime.LockOSThread() runtime.LockOSThread()
for { for {
// This error is handled after the watch == nil check below.
qErr := windows.GetQueuedCompletionStatus(w.port, &n, &key, &ov, windows.INFINITE) qErr := windows.GetQueuedCompletionStatus(w.port, &n, &key, &ov, windows.INFINITE)
// This error is handled after the watch == nil check below. NOTE: this
// seems odd, note sure if it's correct.
watch := (*watch)(unsafe.Pointer(ov)) watch := (*watch)(unsafe.Pointer(ov))
if watch == nil { if watch == nil {
@ -677,7 +595,7 @@ func (w *Watcher) readEvents() {
case in := <-w.input: case in := <-w.input:
switch in.op { switch in.op {
case opAddWatch: case opAddWatch:
in.reply <- w.addWatch(in.path, uint64(in.flags), in.bufsize) in.reply <- w.addWatch(in.path, uint64(in.flags))
case opRemoveWatch: case opRemoveWatch:
in.reply <- w.remWatch(in.path) in.reply <- w.remWatch(in.path)
} }
@ -687,8 +605,6 @@ func (w *Watcher) readEvents() {
} }
switch qErr { switch qErr {
case nil:
// No error
case windows.ERROR_MORE_DATA: case windows.ERROR_MORE_DATA:
if watch == nil { if watch == nil {
w.sendError(errors.New("ERROR_MORE_DATA has unexpectedly null lpOverlapped buffer")) w.sendError(errors.New("ERROR_MORE_DATA has unexpectedly null lpOverlapped buffer"))
@ -710,12 +626,13 @@ func (w *Watcher) readEvents() {
default: default:
w.sendError(os.NewSyscallError("GetQueuedCompletionPort", qErr)) w.sendError(os.NewSyscallError("GetQueuedCompletionPort", qErr))
continue continue
case nil:
} }
var offset uint32 var offset uint32
for { for {
if n == 0 { if n == 0 {
w.sendError(ErrEventOverflow) w.sendError(errors.New("short read in readEvents()"))
break break
} }
@ -786,9 +703,8 @@ func (w *Watcher) readEvents() {
// Error! // Error!
if offset >= n { if offset >= n {
//lint:ignore ST1005 Windows should be capitalized
w.sendError(errors.New( w.sendError(errors.New(
"Windows system assumed buffer larger than it is, events have likely been missed")) "Windows system assumed buffer larger than it is, events have likely been missed."))
break break
} }
} }
@ -804,6 +720,9 @@ func (w *Watcher) toWindowsFlags(mask uint64) uint32 {
if mask&sysFSMODIFY != 0 { if mask&sysFSMODIFY != 0 {
m |= windows.FILE_NOTIFY_CHANGE_LAST_WRITE m |= windows.FILE_NOTIFY_CHANGE_LAST_WRITE
} }
if mask&sysFSATTRIB != 0 {
m |= windows.FILE_NOTIFY_CHANGE_ATTRIBUTES
}
if mask&(sysFSMOVE|sysFSCREATE|sysFSDELETE) != 0 { if mask&(sysFSMOVE|sysFSCREATE|sysFSDELETE) != 0 {
m |= windows.FILE_NOTIFY_CHANGE_FILE_NAME | windows.FILE_NOTIFY_CHANGE_DIR_NAME m |= windows.FILE_NOTIFY_CHANGE_FILE_NAME | windows.FILE_NOTIFY_CHANGE_DIR_NAME
} }

View File

@ -1,18 +1,13 @@
//go:build !plan9
// +build !plan9
// Package fsnotify provides a cross-platform interface for file system // Package fsnotify provides a cross-platform interface for file system
// notifications. // notifications.
//
// Currently supported systems:
//
// Linux 2.6.32+ via inotify
// BSD, macOS via kqueue
// Windows via ReadDirectoryChangesW
// illumos via FEN
package fsnotify package fsnotify
import ( import (
"errors" "errors"
"fmt" "fmt"
"path/filepath"
"strings" "strings"
) )
@ -38,52 +33,34 @@ type Op uint32
// The operations fsnotify can trigger; see the documentation on [Watcher] for a // The operations fsnotify can trigger; see the documentation on [Watcher] for a
// full description, and check them with [Event.Has]. // full description, and check them with [Event.Has].
const ( const (
// A new pathname was created.
Create Op = 1 << iota Create Op = 1 << iota
// The pathname was written to; this does *not* mean the write has finished,
// and a write can be followed by more writes.
Write Write
// The path was removed; any watches on it will be removed. Some "remove"
// operations may trigger a Rename if the file is actually moved (for
// example "remove to trash" is often a rename).
Remove Remove
// The path was renamed to something else; any watched on it will be
// removed.
Rename Rename
// File attributes were changed.
//
// It's generally not recommended to take action on this event, as it may
// get triggered very frequently by some software. For example, Spotlight
// indexing on macOS, anti-virus software, backup software, etc.
Chmod Chmod
) )
// Common errors that can be reported. // Common errors that can be reported by a watcher
var ( var (
ErrNonExistentWatch = errors.New("fsnotify: can't remove non-existent watch") ErrNonExistentWatch = errors.New("can't remove non-existent watcher")
ErrEventOverflow = errors.New("fsnotify: queue or buffer overflow") ErrEventOverflow = errors.New("fsnotify queue overflow")
ErrClosed = errors.New("fsnotify: watcher already closed")
) )
func (o Op) String() string { func (op Op) String() string {
var b strings.Builder var b strings.Builder
if o.Has(Create) { if op.Has(Create) {
b.WriteString("|CREATE") b.WriteString("|CREATE")
} }
if o.Has(Remove) { if op.Has(Remove) {
b.WriteString("|REMOVE") b.WriteString("|REMOVE")
} }
if o.Has(Write) { if op.Has(Write) {
b.WriteString("|WRITE") b.WriteString("|WRITE")
} }
if o.Has(Rename) { if op.Has(Rename) {
b.WriteString("|RENAME") b.WriteString("|RENAME")
} }
if o.Has(Chmod) { if op.Has(Chmod) {
b.WriteString("|CHMOD") b.WriteString("|CHMOD")
} }
if b.Len() == 0 { if b.Len() == 0 {
@ -93,7 +70,7 @@ func (o Op) String() string {
} }
// Has reports if this operation has the given operation. // Has reports if this operation has the given operation.
func (o Op) Has(h Op) bool { return o&h != 0 } func (o Op) Has(h Op) bool { return o&h == h }
// Has reports if this event has the given operation. // Has reports if this event has the given operation.
func (e Event) Has(op Op) bool { return e.Op.Has(op) } func (e Event) Has(op Op) bool { return e.Op.Has(op) }
@ -102,45 +79,3 @@ func (e Event) Has(op Op) bool { return e.Op.Has(op) }
func (e Event) String() string { func (e Event) String() string {
return fmt.Sprintf("%-13s %q", e.Op.String(), e.Name) return fmt.Sprintf("%-13s %q", e.Op.String(), e.Name)
} }
type (
addOpt func(opt *withOpts)
withOpts struct {
bufsize int
}
)
var defaultOpts = withOpts{
bufsize: 65536, // 64K
}
func getOptions(opts ...addOpt) withOpts {
with := defaultOpts
for _, o := range opts {
o(&with)
}
return with
}
// WithBufferSize sets the [ReadDirectoryChangesW] buffer size.
//
// This only has effect on Windows systems, and is a no-op for other backends.
//
// The default value is 64K (65536 bytes) which is the highest value that works
// on all filesystems and should be enough for most applications, but if you
// have a large burst of events it may not be enough. You can increase it if
// you're hitting "queue or buffer overflow" errors ([ErrEventOverflow]).
//
// [ReadDirectoryChangesW]: https://learn.microsoft.com/en-gb/windows/win32/api/winbase/nf-winbase-readdirectorychangesw
func WithBufferSize(bytes int) addOpt {
return func(opt *withOpts) { opt.bufsize = bytes }
}
// Check if this path is recursive (ends with "/..." or "\..."), and return the
// path with the /... stripped.
func recursivePath(path string) (string, bool) {
if filepath.Base(path) == "..." {
return filepath.Dir(path), true
}
return path, false
}

View File

@ -2,8 +2,8 @@
[ "${ZSH_VERSION:-}" = "" ] && echo >&2 "Only works with zsh" && exit 1 [ "${ZSH_VERSION:-}" = "" ] && echo >&2 "Only works with zsh" && exit 1
setopt err_exit no_unset pipefail extended_glob setopt err_exit no_unset pipefail extended_glob
# Simple script to update the godoc comments on all watchers so you don't need # Simple script to update the godoc comments on all watchers. Probably took me
# to update the same comment 5 times. # more time to write this than doing it manually, but ah well 🙃
watcher=$(<<EOF watcher=$(<<EOF
// Watcher watches a set of paths, delivering events on a channel. // Watcher watches a set of paths, delivering events on a channel.
@ -16,9 +16,9 @@ watcher=$(<<EOF
// When a file is removed a Remove event won't be emitted until all file // When a file is removed a Remove event won't be emitted until all file
// descriptors are closed, and deletes will always emit a Chmod. For example: // descriptors are closed, and deletes will always emit a Chmod. For example:
// //
// fp := os.Open("file") // fp := os.Open("file")
// os.Remove("file") // Triggers Chmod // os.Remove("file") // Triggers Chmod
// fp.Close() // Triggers Remove // fp.Close() // Triggers Remove
// //
// This is the event that inotify sends, so not much can be changed about this. // This is the event that inotify sends, so not much can be changed about this.
// //
@ -32,16 +32,16 @@ watcher=$(<<EOF
// //
// To increase them you can use sysctl or write the value to the /proc file: // To increase them you can use sysctl or write the value to the /proc file:
// //
// # Default values on Linux 5.18 // # Default values on Linux 5.18
// sysctl fs.inotify.max_user_watches=124983 // sysctl fs.inotify.max_user_watches=124983
// sysctl fs.inotify.max_user_instances=128 // sysctl fs.inotify.max_user_instances=128
// //
// To make the changes persist on reboot edit /etc/sysctl.conf or // To make the changes persist on reboot edit /etc/sysctl.conf or
// /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check // /usr/lib/sysctl.d/50-default.conf (details differ per Linux distro; check
// your distro's documentation): // your distro's documentation):
// //
// fs.inotify.max_user_watches=124983 // fs.inotify.max_user_watches=124983
// fs.inotify.max_user_instances=128 // fs.inotify.max_user_instances=128
// //
// Reaching the limit will result in a "no space left on device" or "too many open // Reaching the limit will result in a "no space left on device" or "too many open
// files" error. // files" error.
@ -57,20 +57,14 @@ watcher=$(<<EOF
// control the maximum number of open files, as well as /etc/login.conf on BSD // control the maximum number of open files, as well as /etc/login.conf on BSD
// systems. // systems.
// //
// # Windows notes // # macOS notes
// //
// Paths can be added as "C:\\path\\to\\dir", but forward slashes // Spotlight indexing on macOS can result in multiple events (see [#15]). A
// ("C:/path/to/dir") will also work. // temporary workaround is to add your folder(s) to the "Spotlight Privacy
// Settings" until we have a native FSEvents implementation (see [#11]).
// //
// When a watched directory is removed it will always send an event for the // [#11]: https://github.com/fsnotify/fsnotify/issues/11
// directory itself, but may not send events for all files in that directory. // [#15]: https://github.com/fsnotify/fsnotify/issues/15
// Sometimes it will send events for all times, sometimes it will send no
// events, and often only for some files.
//
// The default ReadDirectoryChangesW() buffer size is 64K, which is the largest
// value that is guaranteed to work with SMB filesystems. If you have many
// events in quick succession this may not be enough, and you will have to use
// [WithBufferSize] to increase the value.
EOF EOF
) )
@ -79,36 +73,20 @@ new=$(<<EOF
EOF EOF
) )
newbuffered=$(<<EOF
// NewBufferedWatcher creates a new Watcher with a buffered Watcher.Events
// channel.
//
// The main use case for this is situations with a very large number of events
// where the kernel buffer size can't be increased (e.g. due to lack of
// permissions). An unbuffered Watcher will perform better for almost all use
// cases, and whenever possible you will be better off increasing the kernel
// buffers instead of adding a large userspace buffer.
EOF
)
add=$(<<EOF add=$(<<EOF
// Add starts monitoring the path for changes. // Add starts monitoring the path for changes.
// //
// A path can only be watched once; watching it more than once is a no-op and will // A path can only be watched once; attempting to watch it more than once will
// not return an error. Paths that do not yet exist on the filesystem cannot be // return an error. Paths that do not yet exist on the filesystem cannot be
// watched. // added. A watch will be automatically removed if the path is deleted.
// //
// A watch will be automatically removed if the watched path is deleted or // A path will remain watched if it gets renamed to somewhere else on the same
// renamed. The exception is the Windows backend, which doesn't remove the // filesystem, but the monitor will get removed if the path gets deleted and
// watcher on renames. // re-created, or if it's moved to a different filesystem.
// //
// Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special // Notifications on network filesystems (NFS, SMB, FUSE, etc.) or special
// filesystems (/proc, /sys, etc.) generally don't work. // filesystems (/proc, /sys, etc.) generally don't work.
// //
// Returns [ErrClosed] if [Watcher.Close] was called.
//
// See [Watcher.AddWith] for a version that allows adding options.
//
// # Watching directories // # Watching directories
// //
// All files in a directory are monitored, including new files that are created // All files in a directory are monitored, including new files that are created
@ -118,27 +96,14 @@ add=$(<<EOF
// # Watching files // # Watching files
// //
// Watching individual files (rather than directories) is generally not // Watching individual files (rather than directories) is generally not
// recommended as many programs (especially editors) update files atomically: it // recommended as many tools update files atomically. Instead of "just" writing
// will write to a temporary file which is then moved to to destination, // to the file a temporary file will be written to first, and if successful the
// overwriting the original (or some variant thereof). The watcher on the // temporary file is moved to to destination removing the original, or some
// original file is now lost, as that no longer exists. // variant thereof. The watcher on the original file is now lost, as it no
// longer exists.
// //
// The upshot of this is that a power failure or crash won't leave a // Instead, watch the parent directory and use Event.Name to filter out files
// half-written file. // you're not interested in. There is an example of this in [cmd/fsnotify/file.go].
//
// Watch the parent directory and use Event.Name to filter out files you're not
// interested in. There is an example of this in cmd/fsnotify/file.go.
EOF
)
addwith=$(<<EOF
// AddWith is like [Watcher.Add], but allows adding options. When using Add()
// the defaults described below are used.
//
// Possible options are:
//
// - [WithBufferSize] sets the buffer size for the Windows backend; no-op on
// other platforms. The default is 64K (65536 bytes).
EOF EOF
) )
@ -149,21 +114,16 @@ remove=$(<<EOF
// /tmp/dir and /tmp/dir/subdir then you will need to remove both. // /tmp/dir and /tmp/dir/subdir then you will need to remove both.
// //
// Removing a path that has not yet been added returns [ErrNonExistentWatch]. // Removing a path that has not yet been added returns [ErrNonExistentWatch].
//
// Returns nil if [Watcher.Close] was called.
EOF EOF
) )
close=$(<<EOF close=$(<<EOF
// Close removes all watches and closes the Events channel. // Close removes all watches and closes the events channel.
EOF EOF
) )
watchlist=$(<<EOF watchlist=$(<<EOF
// WatchList returns all paths explicitly added with [Watcher.Add] (and are not // WatchList returns all paths added with [Add] (and are not yet removed).
// yet removed).
//
// Returns nil if [Watcher.Close] was called.
EOF EOF
) )
@ -193,29 +153,20 @@ events=$(<<EOF
// initiated by the user may show up as one or multiple // initiated by the user may show up as one or multiple
// writes, depending on when the system syncs things to // writes, depending on when the system syncs things to
// disk. For example when compiling a large Go program // disk. For example when compiling a large Go program
// you may get hundreds of Write events, and you may // you may get hundreds of Write events, so you
// want to wait until you've stopped receiving them // probably want to wait until you've stopped receiving
// (see the dedup example in cmd/fsnotify). // them (see the dedup example in cmd/fsnotify).
//
// Some systems may send Write event for directories
// when the directory content changes.
// //
// fsnotify.Chmod Attributes were changed. On Linux this is also sent // fsnotify.Chmod Attributes were changed. On Linux this is also sent
// when a file is removed (or more accurately, when a // when a file is removed (or more accurately, when a
// link to an inode is removed). On kqueue it's sent // link to an inode is removed). On kqueue it's sent
// when a file is truncated. On Windows it's never // and on kqueue when a file is truncated. On Windows
// sent. // it's never sent.
EOF EOF
) )
errors=$(<<EOF errors=$(<<EOF
// Errors sends any errors. // Errors sends any errors.
//
// ErrEventOverflow is used to indicate there are too many events:
//
// - inotify: There are too many queued events (fs.inotify.max_queued_events sysctl)
// - windows: The buffer size is too small; WithBufferSize() can be used to increase it.
// - kqueue, fen: Not used.
EOF EOF
) )
@ -249,9 +200,7 @@ set-cmt() {
set-cmt '^type Watcher struct ' $watcher set-cmt '^type Watcher struct ' $watcher
set-cmt '^func NewWatcher(' $new set-cmt '^func NewWatcher(' $new
set-cmt '^func NewBufferedWatcher(' $newbuffered
set-cmt '^func (w \*Watcher) Add(' $add set-cmt '^func (w \*Watcher) Add(' $add
set-cmt '^func (w \*Watcher) AddWith(' $addwith
set-cmt '^func (w \*Watcher) Remove(' $remove set-cmt '^func (w \*Watcher) Remove(' $remove
set-cmt '^func (w \*Watcher) Close(' $close set-cmt '^func (w \*Watcher) Close(' $close
set-cmt '^func (w \*Watcher) WatchList(' $watchlist set-cmt '^func (w \*Watcher) WatchList(' $watchlist

View File

@ -1,23 +1,5 @@
# Changelog # Changelog
## [1.5.0](https://github.com/google/uuid/compare/v1.4.0...v1.5.0) (2023-12-12)
### Features
* Validate UUID without creating new UUID ([#141](https://github.com/google/uuid/issues/141)) ([9ee7366](https://github.com/google/uuid/commit/9ee7366e66c9ad96bab89139418a713dc584ae29))
## [1.4.0](https://github.com/google/uuid/compare/v1.3.1...v1.4.0) (2023-10-26)
### Features
* UUIDs slice type with Strings() convenience method ([#133](https://github.com/google/uuid/issues/133)) ([cd5fbbd](https://github.com/google/uuid/commit/cd5fbbdd02f3e3467ac18940e07e062be1f864b4))
### Fixes
* Clarify that Parse's job is to parse but not necessarily validate strings. (Documents current behavior)
## [1.3.1](https://github.com/google/uuid/compare/v1.3.0...v1.3.1) (2023-08-18) ## [1.3.1](https://github.com/google/uuid/compare/v1.3.0...v1.3.1) (2023-08-18)

View File

@ -11,7 +11,7 @@ please explain why in the pull request description.
### Releasing ### Releasing
Commits that would precipitate a SemVer change, as described in the Conventional Commits that would precipitate a SemVer change, as desrcibed in the Conventional
Commits Specification, will trigger [`release-please`](https://github.com/google-github-actions/release-please-action) Commits Specification, will trigger [`release-please`](https://github.com/google-github-actions/release-please-action)
to create a release candidate pull request. Once submitted, `release-please` to create a release candidate pull request. Once submitted, `release-please`
will create a release. will create a release.

View File

@ -108,23 +108,12 @@ func setClockSequence(seq int) {
} }
// Time returns the time in 100s of nanoseconds since 15 Oct 1582 encoded in // Time returns the time in 100s of nanoseconds since 15 Oct 1582 encoded in
// uuid. The time is only defined for version 1, 2, 6 and 7 UUIDs. // uuid. The time is only defined for version 1 and 2 UUIDs.
func (uuid UUID) Time() Time { func (uuid UUID) Time() Time {
var t Time time := int64(binary.BigEndian.Uint32(uuid[0:4]))
switch uuid.Version() { time |= int64(binary.BigEndian.Uint16(uuid[4:6])) << 32
case 6: time |= int64(binary.BigEndian.Uint16(uuid[6:8])&0xfff) << 48
time := binary.BigEndian.Uint64(uuid[:8]) // Ignore uuid[6] version b0110 return Time(time)
t = Time(time)
case 7:
time := binary.BigEndian.Uint64(uuid[:8])
t = Time((time>>16)*10000 + g1582ns100)
default: // forward compatible
time := int64(binary.BigEndian.Uint32(uuid[0:4]))
time |= int64(binary.BigEndian.Uint16(uuid[4:6])) << 32
time |= int64(binary.BigEndian.Uint16(uuid[6:8])&0xfff) << 48
t = Time(time)
}
return t
} }
// ClockSequence returns the clock sequence encoded in uuid. // ClockSequence returns the clock sequence encoded in uuid.

Some files were not shown because too many files have changed in this diff Show More