Enable btrfs/fuse-overlayfs/stargz snapshotter plugins

Signed-off-by: Brad Davidson <brad.davidson@rancher.com>
This commit is contained in:
Brad Davidson 2025-01-18 00:05:29 +00:00
parent f660f4424f
commit 890953d3c6
No known key found for this signature in database
GPG Key ID: FFB7A9376A9349B9
2097 changed files with 543055 additions and 3912 deletions

View File

@ -22,6 +22,9 @@ import (
_ "github.com/containerd/containerd/v2/core/metrics/cgroups/v2"
_ "github.com/containerd/containerd/v2/plugins/diff/walking/plugin"
_ "github.com/containerd/containerd/v2/plugins/snapshots/blockfile/plugin"
_ "github.com/containerd/containerd/v2/plugins/snapshots/btrfs/plugin"
_ "github.com/containerd/containerd/v2/plugins/snapshots/native/plugin"
_ "github.com/containerd/containerd/v2/plugins/snapshots/overlay/plugin"
_ "github.com/containerd/fuse-overlayfs-snapshotter/v2/plugin"
_ "github.com/containerd/stargz-snapshotter/service/plugin"
)

26
go.mod
View File

@ -14,10 +14,11 @@ require (
github.com/containerd/cgroups/v3 v3.0.3
github.com/containerd/console v1.0.4
github.com/containerd/containerd/api v1.8.0
github.com/containerd/continuity v0.4.4
github.com/containerd/continuity v0.4.5
github.com/containerd/errdefs v1.0.0
github.com/containerd/errdefs/pkg v0.3.0
github.com/containerd/fifo v1.1.0
github.com/containerd/fuse-overlayfs-snapshotter/v2 v2.1.0
github.com/containerd/go-cni v1.1.12
github.com/containerd/go-runc v1.1.0
github.com/containerd/imgcrypt/v2 v2.0.0
@ -26,6 +27,7 @@ require (
github.com/containerd/otelttrpc v0.1.0
github.com/containerd/platforms v1.0.0-rc.1
github.com/containerd/plugin v1.0.0
github.com/containerd/stargz-snapshotter v0.16.3
github.com/containerd/ttrpc v1.2.7
github.com/containerd/typeurl/v2 v2.2.3
github.com/containerd/zfs/v2 v2.0.0-rc.0
@ -40,7 +42,6 @@ require (
github.com/fsnotify/fsnotify v1.7.0
github.com/google/go-cmp v0.6.0
github.com/google/uuid v1.6.0
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1
github.com/intel/goresctrl v0.8.0
github.com/klauspost/compress v1.17.11
github.com/mdlayher/vsock v1.2.1
@ -81,7 +82,7 @@ require (
k8s.io/apimachinery v0.31.2
k8s.io/client-go v0.31.2
k8s.io/component-base v0.31.2
k8s.io/cri-api v0.31.2
k8s.io/cri-api v0.32.0-alpha.0
k8s.io/klog/v2 v2.130.1
k8s.io/kubelet v0.31.2
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
@ -93,23 +94,37 @@ require (
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/cilium/ebpf v0.11.0 // indirect
github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect
github.com/containers/ocicrypt v1.2.1 // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
github.com/docker/cli v27.3.1+incompatible // indirect
github.com/docker/docker-credential-helpers v0.7.0 // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-jose/go-jose/v4 v4.0.5 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.19.6 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.22.4 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/gnostic-models v0.6.8 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/gorilla/websocket v1.5.0 // indirect
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect
github.com/hanwen/go-fuse/v2 v2.6.3 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
github.com/imdario/mergo v0.3.13 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mdlayher/socket v0.4.1 // indirect
github.com/miekg/pkcs11 v1.1.1 // indirect
github.com/mistifyio/go-zfs/v3 v3.0.1 // indirect
@ -127,8 +142,10 @@ require (
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/sasha-s/go-deadlock v0.3.5 // indirect
github.com/smallstep/pkcs7 v0.1.1 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stefanberger/go-pkcs11uri v0.0.0-20230803200340-78284954bff6 // indirect
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
github.com/vbatts/tar-split v0.11.6 // indirect
github.com/vishvananda/netns v0.0.4 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect
@ -148,6 +165,7 @@ require (
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.31.2 // indirect
k8s.io/apiserver v0.31.2 // indirect
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect

48
go.sum
View File

@ -41,14 +41,16 @@ github.com/containerd/console v1.0.4 h1:F2g4+oChYvBTsASRTz8NP6iIAi97J3TtSAsLbIFn
github.com/containerd/console v1.0.4/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk=
github.com/containerd/containerd/api v1.8.0 h1:hVTNJKR8fMc/2Tiw60ZRijntNMd1U+JVMyTRdsD2bS0=
github.com/containerd/containerd/api v1.8.0/go.mod h1:dFv4lt6S20wTu/hMcP4350RL87qPWLVa/OHOwmmdnYc=
github.com/containerd/continuity v0.4.4 h1:/fNVfTJ7wIl/YPMHjf+5H32uFhl63JucB34PlCpMKII=
github.com/containerd/continuity v0.4.4/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE=
github.com/containerd/continuity v0.4.5 h1:ZRoN1sXq9u7V6QoHMcVWGhOwDFqZ4B9i5H6un1Wh0x4=
github.com/containerd/continuity v0.4.5/go.mod h1:/lNJvtJKUQStBzpVQ1+rasXO1LAWtUQssk28EZvJ3nE=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
github.com/containerd/fifo v1.1.0 h1:4I2mbh5stb1u6ycIABlBw9zgtlK8viPI9QkQNRQEEmY=
github.com/containerd/fifo v1.1.0/go.mod h1:bmC4NWMbXlt2EZ0Hc7Fx7QzTFxgPID13eH0Qu+MAb2o=
github.com/containerd/fuse-overlayfs-snapshotter/v2 v2.1.0 h1:okk7wQXjHJhG+Y+Rs3wToje/yHJInlE3DjLNQNJ1WGI=
github.com/containerd/fuse-overlayfs-snapshotter/v2 v2.1.0/go.mod h1:yK/eAdWigKE4XsBi8WDHV52jO8MJZcTRU5tCVOnEE9w=
github.com/containerd/go-cni v1.1.12 h1:wm/5VD/i255hjM4uIZjBRiEQ7y98W9ACy/mHeLi4+94=
github.com/containerd/go-cni v1.1.12/go.mod h1:+jaqRBdtW5faJxj2Qwg1Of7GsV66xcvnCx4mSJtUlxU=
github.com/containerd/go-runc v1.1.0 h1:OX4f+/i2y5sUT7LhmcJH7GYrjjhHa1QI4e8yO0gGleA=
@ -65,6 +67,10 @@ github.com/containerd/platforms v1.0.0-rc.1 h1:83KIq4yy1erSRgOVHNk1HYdPvzdJ5CnsW
github.com/containerd/platforms v1.0.0-rc.1/go.mod h1:J71L7B+aiM5SdIEqmd9wp6THLVRzJGXfNuWCZCllLA4=
github.com/containerd/plugin v1.0.0 h1:c8Kf1TNl6+e2TtMHZt+39yAPDbouRH9WAToRjex483Y=
github.com/containerd/plugin v1.0.0/go.mod h1:hQfJe5nmWfImiqT1q8Si3jLv3ynMUIBB47bQ+KexvO8=
github.com/containerd/stargz-snapshotter v0.16.3 h1:zbQMm8dRuPHEOD4OqAYGajJJUwCeUzt4j7w9Iaw58u4=
github.com/containerd/stargz-snapshotter v0.16.3/go.mod h1:XPOl2oa9zjWidTM2IX191smolwWc3/zkKtp02TzTFb0=
github.com/containerd/stargz-snapshotter/estargz v0.16.3 h1:7evrXtoh1mSbGj/pfRccTampEyKpjpOnS3CyiV1Ebr8=
github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU=
github.com/containerd/ttrpc v1.2.7 h1:qIrroQvuOL9HQ1X6KHe2ohc7p+HP/0VE6XPU7elJRqQ=
github.com/containerd/ttrpc v1.2.7/go.mod h1:YCXHsb32f+Sq5/72xHubdiJRQY9inL4a4ZQrAbN1q9o=
github.com/containerd/typeurl/v2 v2.2.3 h1:yNA/94zxWdvYACdYO8zofhrTVuQY73fFU1y++dYSw40=
@ -81,11 +87,16 @@ github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/cli v27.3.1+incompatible h1:qEGdFBF3Xu6SCvCYhc7CzaQTlBmqDuzxPDpigSyeKQQ=
github.com/docker/cli v27.3.1+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8=
github.com/docker/docker-credential-helpers v0.7.0 h1:xtCHsjxogADNZcdv1pKUHXryefjlVRqWqIhk/uXJp0A=
github.com/docker/docker-credential-helpers v0.7.0/go.mod h1:rETQfLdHNT3foU5kuNkFR1R1V12OJRRO5lzt2D1b5X0=
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c h1:+pKlWGMw7gf6bQ+oDZB4KHQFypsfjYlq/C4rfL7D3g8=
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c/go.mod h1:Uw6UezgYA44ePAFQYUehOuCzmy5zmg/+nl2ZfMWGkpA=
github.com/docker/go-metrics v0.0.1 h1:AgB/0SvBxihN0X8OR4SjsblXkbMvalQ8cjmtKQ2rQV8=
@ -98,6 +109,8 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/frankban/quicktest v1.14.5 h1:dfYrrRyLtiqT9GyKXgdh+k4inNeTvmGbuSgZ3lx3GhA=
@ -120,6 +133,7 @@ github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE=
github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k=
github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU=
github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
@ -171,17 +185,23 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1 h1:qnpSQwGEnkcRpTqNOIR6bJbR0gAorgP9CSALpRcKoAA=
github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus v1.0.1/go.mod h1:lXGCsh6c22WGtjr+qGHj1otzZpV/1kwTMAqkwZsnWRU=
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0 h1:pRhl55Yx1eC7BZ1N+BBWwnKaMyD8uC+34TLdndZMAKk=
github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.1.0/go.mod h1:XKMd7iuf/RGPSMJ/U4HP0zS2Z9Fh8Ps9a+6X26m/tmI=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I=
github.com/hanwen/go-fuse/v2 v2.6.3 h1:tDcEkLRx93lXu4XyN1/j8Z74VWvhHDl6qU1kNnvFUqI=
github.com/hanwen/go-fuse/v2 v2.6.3/go.mod h1:ugNaD/iv5JYyS1Rcvi57Wz7/vrLQJo10mmketmoef48=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k=
github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU=
github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk=
github.com/imdario/mergo v0.3.13 h1:lFzP57bqS/wsqKssCGmtLAb8A0wKjLGrve2q3PPVcBk=
github.com/imdario/mergo v0.3.13/go.mod h1:4lJ1jqUDcsbIECGy0RUJAXNIhg+6ocWgb1ALK2O4oXg=
github.com/intel/goresctrl v0.8.0 h1:N3shVbS3kA1Hk2AmcbHv8805Hjbv+zqsCIZCGktxx50=
github.com/intel/goresctrl v0.8.0/go.mod h1:T3ZZnuHSNouwELB5wvOoUJaB7l/4Rm23rJy/wuWJlr0=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
@ -197,14 +217,21 @@ github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IX
github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA=
github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U=
github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
@ -325,6 +352,8 @@ github.com/tchap/go-patricia/v2 v2.3.1/go.mod h1:VZRHKAb53DLaG+nA9EaYYiaEx6YztwD
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w=
github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
github.com/vbatts/tar-split v0.11.6 h1:4SjTW5+PU11n6fZenf2IPoV8/tz3AaYHMWjf23envGs=
github.com/vbatts/tar-split v0.11.6/go.mod h1:dqKNtesIOr2j2Qv3W/cHjnvk9I8+G7oAkFDFN6TCBEI=
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
@ -536,8 +565,11 @@ gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY=
gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools/v3 v3.5.0 h1:Ljk6PdHdOhAb5aDMWXjDLMMhph+BpztA4v1QdqEW2eY=
gotest.tools/v3 v3.5.0/go.mod h1:isy3WKz7GK6uNw/sbHzfKBLvlvXwUyV06n6brMxxopU=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
k8s.io/api v0.31.2 h1:3wLBbL5Uom/8Zy98GRPXpJ254nEFpl+hwndmk9RwmL0=
@ -550,8 +582,8 @@ k8s.io/client-go v0.31.2 h1:Y2F4dxU5d3AQj+ybwSMqQnpZH9F30//1ObxOKlTI9yc=
k8s.io/client-go v0.31.2/go.mod h1:NPa74jSVR/+eez2dFsEIHNa+3o09vtNaWwWwb1qSxSs=
k8s.io/component-base v0.31.2 h1:Z1J1LIaC0AV+nzcPRFqfK09af6bZ4D1nAOpWsy9owlA=
k8s.io/component-base v0.31.2/go.mod h1:9PeyyFN/drHjtJZMCTkSpQJS3U9OXORnHQqMLDz0sUQ=
k8s.io/cri-api v0.31.2 h1:O/weUnSHvM59nTio0unxIUFyRHMRKkYn96YDILSQKmo=
k8s.io/cri-api v0.31.2/go.mod h1:Po3TMAYH/+KrZabi7QiwQI4a692oZcUOUThd/rqwxrI=
k8s.io/cri-api v0.32.0-alpha.0 h1:Rs9prajcHWZAdy9ueQdD2R+OOnDD3rKYbM9hQ90iEQU=
k8s.io/cri-api v0.32.0-alpha.0/go.mod h1:Po3TMAYH/+KrZabi7QiwQI4a692oZcUOUThd/rqwxrI=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag=

View File

@ -1,14 +1,14 @@
linters:
enable:
- staticcheck
- unconvert
- gofmt
- goimports
- govet
- ineffassign
- revive
- vet
- unused
- misspell
- revive
- staticcheck
- unconvert
- unused
disable:
- errcheck

View File

@ -46,6 +46,7 @@ generate:
lint:
@echo "+ $@"
@golangci-lint run
@(cd cmd/continuity && golangci-lint --config=../../.golangci.yml run)
build:
@echo "+ $@"

View File

@ -19,10 +19,36 @@
package fs
import (
"fmt"
"io/fs"
"syscall"
"time"
)
func Atime(st fs.FileInfo) (time.Time, error) {
stSys, ok := st.Sys().(*syscall.Stat_t)
if !ok {
return time.Time{}, fmt.Errorf("expected st.Sys() to be *syscall.Stat_t, got %T", st.Sys())
}
return time.Unix(stSys.Atimespec.Unix()), nil
}
func Ctime(st fs.FileInfo) (time.Time, error) {
stSys, ok := st.Sys().(*syscall.Stat_t)
if !ok {
return time.Time{}, fmt.Errorf("expected st.Sys() to be *syscall.Stat_t, got %T", st.Sys())
}
return time.Unix(stSys.Ctimespec.Unix()), nil
}
func Mtime(st fs.FileInfo) (time.Time, error) {
stSys, ok := st.Sys().(*syscall.Stat_t)
if !ok {
return time.Time{}, fmt.Errorf("expected st.Sys() to be *syscall.Stat_t, got %T", st.Sys())
}
return time.Unix(stSys.Mtimespec.Unix()), nil
}
// StatAtime returns the access time from a stat struct
func StatAtime(st *syscall.Stat_t) syscall.Timespec {
return st.Atimespec

View File

@ -30,7 +30,7 @@ func Atime(st fs.FileInfo) (time.Time, error) {
if !ok {
return time.Time{}, fmt.Errorf("expected st.Sys() to be *syscall.Stat_t, got %T", st.Sys())
}
return StatATimeAsTime(stSys), nil
return time.Unix(stSys.Atim.Unix()), nil
}
func Ctime(st fs.FileInfo) (time.Time, error) {
@ -38,7 +38,7 @@ func Ctime(st fs.FileInfo) (time.Time, error) {
if !ok {
return time.Time{}, fmt.Errorf("expected st.Sys() to be *syscall.Stat_t, got %T", st.Sys())
}
return time.Unix(stSys.Atim.Unix()), nil
return time.Unix(stSys.Ctim.Unix()), nil
}
func Mtime(st fs.FileInfo) (time.Time, error) {

View File

@ -56,8 +56,8 @@ func Unmarshal(p []byte) (*Manifest, error) {
func Marshal(m *Manifest) ([]byte, error) {
var bm pb.Manifest
for _, resource := range m.Resources {
bm.Resource = append(bm.Resource, toProto(resource))
for _, rsrc := range m.Resources {
bm.Resource = append(bm.Resource, toProto(rsrc))
}
return proto.Marshal(&bm)
@ -65,8 +65,8 @@ func Marshal(m *Manifest) ([]byte, error) {
func MarshalText(w io.Writer, m *Manifest) error {
var bm pb.Manifest
for _, resource := range m.Resources {
bm.Resource = append(bm.Resource, toProto(resource))
for _, rsrc := range m.Resources {
bm.Resource = append(bm.Resource, toProto(rsrc))
}
b, err := prototext.Marshal(&bm)
@ -78,11 +78,11 @@ func MarshalText(w io.Writer, m *Manifest) error {
}
// BuildManifest creates the manifest for the given context
func BuildManifest(ctx Context) (*Manifest, error) {
func BuildManifest(fsContext Context) (*Manifest, error) {
resourcesByPath := map[string]Resource{}
hardLinks := newHardlinkManager()
if err := ctx.Walk(func(p string, fi os.FileInfo, err error) error {
if err := fsContext.Walk(func(p string, fi os.FileInfo, err error) error {
if err != nil {
return fmt.Errorf("error walking %s: %w", p, err)
}
@ -92,7 +92,7 @@ func BuildManifest(ctx Context) (*Manifest, error) {
return nil
}
resource, err := ctx.Resource(p, fi)
rsrc, err := fsContext.Resource(p, fi)
if err != nil {
if err == ErrNotFound {
return nil
@ -101,7 +101,7 @@ func BuildManifest(ctx Context) (*Manifest, error) {
}
// add to the hardlink manager
if err := hardLinks.Add(fi, resource); err == nil {
if err := hardLinks.Add(fi, rsrc); err == nil {
// Resource has been accepted by hardlink manager so we don't add
// it to the resourcesByPath until we merge at the end.
return nil
@ -110,7 +110,7 @@ func BuildManifest(ctx Context) (*Manifest, error) {
return fmt.Errorf("adding hardlink %s: %w", p, err)
}
resourcesByPath[p] = resource
resourcesByPath[p] = rsrc
return nil
}); err != nil {
@ -123,13 +123,13 @@ func BuildManifest(ctx Context) (*Manifest, error) {
return nil, err
}
for _, resource := range hardLinked {
resourcesByPath[resource.Path()] = resource
for _, rsrc := range hardLinked {
resourcesByPath[rsrc.Path()] = rsrc
}
var resources []Resource
for _, resource := range resourcesByPath {
resources = append(resources, resource)
for _, rsrc := range resourcesByPath {
resources = append(resources, rsrc)
}
sort.Stable(ByPath(resources))
@ -141,9 +141,9 @@ func BuildManifest(ctx Context) (*Manifest, error) {
// VerifyManifest verifies all the resources in a manifest
// against files from the given context.
func VerifyManifest(ctx Context, manifest *Manifest) error {
for _, resource := range manifest.Resources {
if err := ctx.Verify(resource); err != nil {
func VerifyManifest(fsContext Context, manifest *Manifest) error {
for _, rsrc := range manifest.Resources {
if err := fsContext.Verify(rsrc); err != nil {
return err
}
}
@ -153,9 +153,9 @@ func VerifyManifest(ctx Context, manifest *Manifest) error {
// ApplyManifest applies on the resources in a manifest to
// the given context.
func ApplyManifest(ctx Context, manifest *Manifest) error {
for _, resource := range manifest.Resources {
if err := ctx.Apply(resource); err != nil {
func ApplyManifest(fsContext Context, manifest *Manifest) error {
for _, rsrc := range manifest.Resources {
if err := fsContext.Apply(rsrc); err != nil {
return err
}
}

View File

@ -0,0 +1,2 @@
*.test
bin

View File

@ -0,0 +1,3 @@
*.test
bin
/_output

View File

@ -0,0 +1,65 @@
# Copyright The containerd Authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG FUSEOVERLAYFS_COMMIT=main
ARG ROOTLESSKIT_COMMIT=v1.1.0
ARG GO_VERSION=1.22
ARG DEBIAN_VERSION=11
ARG ALPINE_VERSION=3.18
FROM golang:${GO_VERSION}-alpine AS containerd-fuse-overlayfs-test
COPY . /go/src/github.com/containerd/fuse-overlayfs-snapshotter
WORKDIR /go/src/github.com/containerd/fuse-overlayfs-snapshotter
ENV CGO_ENABLED=0
ENV GO111MODULE=on
RUN go build ./...
RUN mkdir /out && go test -c -o /out/containerd-fuse-overlayfs.test
# from https://github.com/containers/fuse-overlayfs/blob/53c17dab78b43de1cd121bf9260b20b76371bbaf/Dockerfile.static.ubuntu
FROM debian:${DEBIAN_VERSION} AS fuse-overlayfs
RUN apt-get update && \
apt-get install --no-install-recommends -y \
git ca-certificates libc6-dev gcc g++ make automake autoconf clang pkgconf libfuse3-dev
RUN git clone https://github.com/containers/fuse-overlayfs
WORKDIR fuse-overlayfs
ARG FUSEOVERLAYFS_COMMIT
RUN git pull && git checkout ${FUSEOVERLAYFS_COMMIT}
RUN ./autogen.sh && \
LIBS="-ldl" LDFLAGS="-static" ./configure && \
make && mkdir /out && cp fuse-overlayfs /out
FROM golang:${GO_VERSION}-alpine AS rootlesskit
RUN apk add --no-cache git
RUN git clone https://github.com/rootless-containers/rootlesskit.git /go/src/github.com/rootless-containers/rootlesskit
WORKDIR /go/src/github.com/rootless-containers/rootlesskit
ARG ROOTLESSKIT_COMMIT
RUN git pull && git checkout ${ROOTLESSKIT_COMMIT}
ENV CGO_ENABLED=0
RUN mkdir /out && go build -o /out/rootlesskit github.com/rootless-containers/rootlesskit/cmd/rootlesskit
FROM alpine:${ALPINE_VERSION}
COPY --from=containerd-fuse-overlayfs-test /out/containerd-fuse-overlayfs.test /usr/local/bin
COPY --from=rootlesskit /out/rootlesskit /usr/local/bin
COPY --from=fuse-overlayfs /out/fuse-overlayfs /usr/local/bin
RUN apk add --no-cache fuse3 libcap shadow-uidmap && \
setcap CAP_SETUID=ep /usr/bin/newuidmap && \
setcap CAP_SETGID=ep /usr/bin/newgidmap && \
adduser -D -u 1000 testuser && \
echo testuser:100000:65536 | tee /etc/subuid | tee /etc/subgid
USER testuser
# If /tmp is real overlayfs, some tests fail. Mount a volume to ensure /tmp to be a sane filesystem.
VOLUME /tmp
# requires --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/fuse
CMD ["rootlesskit", "containerd-fuse-overlayfs.test", "-test.root", "-test.v"]

View File

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
https://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright The containerd Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -0,0 +1,4 @@
# fuse-overlayfs-snapshotter maintainers
#
# As a containerd sub-project, containerd maintainers are also included from https://github.com/containerd/project/blob/master/MAINTAINERS.
# See https://github.com/containerd/project/blob/master/GOVERNANCE.md for description of maintainer role

View File

@ -0,0 +1,108 @@
# Copyright The containerd Authors.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Deliverables path
DESTDIR ?= /usr/local
BINDIR ?= $(DESTDIR)/bin
# Tools path
ECHO ?= echo
DOCKER ?= docker
GO ?= go
MKDIR ?= mkdir
TAR ?= tar
INSTALL ?= install
GIT ?= git
TARGET_BIN=containerd-fuse-overlayfs-grpc
VERSION ?= $(shell $(GIT) describe --match 'v[0-9]*' --dirty='.m' --always --tags)
VERSION_TRIMMED := $(VERSION:v%=%)
REVISION ?= $(shell $(GIT) rev-parse HEAD)$(shell if ! $(GIT) diff --no-ext-diff --quiet --exit-code; then $(ECHO) .m; fi)
PKG_MAIN := github.com/containerd/fuse-overlayfs-snapshotter/v2/cmd/$(TARGET_BIN)
PKG_VERSION := github.com/containerd/fuse-overlayfs-snapshotter/v2/cmd/$(TARGET_BIN)/version
export GO_BUILD=GO111MODULE=on CGO_ENABLED=0 $(GO) build -ldflags "-s -w -X $(PKG_VERSION).Version=$(VERSION) -X $(PKG_VERSION).Revision=$(REVISION)"
bin/$(TARGET_BIN):
$(GO_BUILD) -o $@ $(PKG_MAIN)
all: binaries
help:
@$(ECHO) "Usage: make <target>"
@$(ECHO)
@$(ECHO) " * 'install' - Install binaries to system locations."
@$(ECHO) " * 'uninstall' - Uninstall binaries from system."
@$(ECHO) " * 'binaries' - Build $(TARGET_BIN)."
@$(ECHO) " * 'test' - Run tests."
@$(ECHO) " * 'clean' - Clean artifacts."
@$(ECHO) " * 'help' - Show this help message."
binaries: bin/$(TARGET_BIN)
$(TARGET_BIN):
$(GO_BUILD) -o $(CURDIR)/bin/$@ $(PKG_MAIN)
binaries: $(TARGET_BIN)
install:
$(INSTALL) -D -m 755 $(CURDIR)/bin/$(TARGET_BIN) $(BINDIR)/$(TARGET_BIN)
uninstall:
$(RM) $(BINDIR)/$(TARGET_BIN)
clean:
$(RM) -r $(CURDIR)/bin $(CURDIR)/_output
TEST_DOCKER_IMG_TAG=containerd-fuse-overlayfs-test
test:
DOCKER_BUILDKIT=1 $(DOCKER) build -t $(TEST_DOCKER_IMG_TAG) --build-arg FUSEOVERLAYFS_COMMIT=${FUSEOVERLAYFS_COMMIT} .
$(DOCKER) run --rm $(TEST_DOCKER_IMG_TAG) fuse-overlayfs -V
$(DOCKER) run --rm --security-opt seccomp=unconfined --security-opt apparmor=unconfined --device /dev/fuse $(TEST_DOCKER_IMG_TAG)
$(DOCKER) rmi $(TEST_DOCKER_IMG_TAG)
_test:
$(GO) test -exec rootlesskit -test.v -test.root
TAR_FLAGS=--transform 's/.*\///g' --owner=0 --group=0
ARTIFACT_NAME=containerd-fuse-overlayfs-$(VERSION_TRIMMED)
artifacts: clean
$(MKDIR) -p _output
GOOS=linux GOARCH=amd64 make -B
$(TAR) $(TAR_FLAGS) -czvf _output/$(ARTIFACT_NAME)-linux-amd64.tar.gz $(CURDIR)/bin/*
GOOS=linux GOARCH=arm64 make -B
$(TAR) $(TAR_FLAGS) -czvf _output/$(ARTIFACT_NAME)-linux-arm64.tar.gz $(CURDIR)/bin/*
GOOS=linux GOARCH=arm GOARM=7 make -B
$(TAR) $(TAR_FLAGS) -czvf _output/$(ARTIFACT_NAME)-linux-arm-v7.tar.gz $(CURDIR)/bin/*
GOOS=linux GOARCH=ppc64le make -B
$(TAR) $(TAR_FLAGS) -czvf _output/$(ARTIFACT_NAME)-linux-ppc64le.tar.gz $(CURDIR)/bin/*
GOOS=linux GOARCH=s390x make -B
$(TAR) $(TAR_FLAGS) -czvf _output/$(ARTIFACT_NAME)-linux-s390x.tar.gz $(CURDIR)/bin/*
GOOS=linux GOARCH=riscv64 make -B
$(TAR) $(TAR_FLAGS) -czvf _output/$(ARTIFACT_NAME)-linux-riscv64.tar.gz $(CURDIR)/bin/*
.PHONY: \
$(TARGET_BIN) \
install \
uninstall \
clean \
test \
_test \
artifacts \
help

View File

@ -0,0 +1,157 @@
# [`fuse-overlayfs`](https://github.com/containers/fuse-overlayfs) snapshotter plugin for [containerd](https://containerd.io)
Unlike `overlayfs`, `fuse-overlayfs` can be used as a non-root user on almost all recent distros.
You do NOT need this `fuse-overlayfs` plugin on the following environments, because they support the real `overlayfs` for non-root users:
- [kernel >= 5.11](https://github.com/torvalds/linux/commit/459c7c565ac36ba09ffbf24231147f408fde4203)
- [Ubuntu kernel, since circa 2015](https://kernel.ubuntu.com/git/ubuntu/ubuntu-bionic.git/commit/fs/overlayfs?id=3b7da90f28fe1ed4b79ef2d994c81efbc58f1144)
fuse-overlayfs-snapshotter is a **non-core** sub-project of containerd.
## Requirements
* kernel >= 4.18
* containerd >= 1.4
* fuse-overlayfs >= 0.7.0
## Setup
Two installation options are supported:
1. Embed `fuse-overlayfs` plugin into the containerd binary
2. Execute `fuse-overlayfs` plugin as a separate binary
Choose 1 if you don't mind recompiling containerd, otherwise choose 2.
### Option 1: Embed `fuse-overlayfs` plugin into the containerd binary
Create `builtins_fuseoverlayfs_linux.go` under [`$GOPATH/src/github.com/containerd/containerd/cmd/containerd/builtins`](https://github.com/containerd/containerd/tree/master/cmd/containerd/builtins)
with the following content, and recompile the containerd binary:
```go
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package main
import _ "github.com/containerd/fuse-overlayfs-snapshotter/v2/plugin"
```
No extra configuration is needed.
See https://github.com/containerd/containerd/blob/master/docs/rootless.md for how to run containerd as a non-root user.
### Option 2: Execute `fuse-overlayfs` plugin as a separate binary
#### "Easy way"
The easiest way is to use `containerd-rootless-setuptool.sh` included in [nerdctl](https://github.com/containerd/nerdctl).
```console
$ containerd-rootless-setuptool.sh install
$ containerd-rootless-setuptool.sh install-fuse-overlayfs
[INFO] Creating "/home/exampleuser/.config/systemd/user/containerd-fuse-overlayfs.service"
...
[INFO] Installed "containerd-fuse-overlayfs.service" successfully.
[INFO] To control "containerd-fuse-overlayfs.service", run: `systemctl --user (start|stop|restart) containerd-fuse-overlayfs.service`
[INFO] Add the following lines to "/home/exampleuser/.config/containerd/config.toml" manually:
### BEGIN ###
[proxy_plugins]
[proxy_plugins."fuse-overlayfs"]
type = "snapshot"
address = "/run/user/1000/containerd-fuse-overlayfs.sock"
### END ###
[INFO] Set `export CONTAINERD_SNAPSHOTTER="fuse-overlayfs"` to use the fuse-overlayfs snapshotter.
```
Add the `[proxy_plugins."fuse-overlayfs"]` configuration shown above to `~/.config/containerd/config.toml`.
"1000" needs to be replaced with your actual UID.
#### "Hard way"
<details>
<summary>Click here to show the "hard way"</summary>
<p>
* Install `containerd-fuse-overlayfs-grpc` binary. The binary will be installed under `$DESTDIR/bin`.
```console
$ make && DESTDIR=$HOME make install
```
* Create the following configuration in `~/.config/containerd/config.toml`:
```toml
version = 2
# substitute "/home/suda" with your own $HOME
root = "/home/suda/.local/share/containerd"
# substitute "/run/user/1001" with your own $XDG_RUNTIME_DIR
state = "/run/user/1001/containerd"
[grpc]
address = "/run/user/1001/containerd/containerd.sock"
[proxy_plugins]
[proxy_plugins."fuse-overlayfs"]
type = "snapshot"
address = "/run/user/1001/containerd/fuse-overlayfs.sock"
```
* Start [RootlessKit](https://github.com/rootless-containers/rootlesskit) with `sleep infinity` (or any kind of "pause" command):
```console
$ rootlesskit \
--net=slirp4netns --disable-host-loopback \
--copy-up=/etc --copy-up=/run \
--state-dir=$XDG_RUNTIME_DIR/rootlesskit-containerd \
sh -c "rm -rf /run/containerd ; sleep infinity"
```
(Note: `rm -rf /run/containerd` is a workaround for [containerd/containerd#2767](https://github.com/containerd/containerd/issues/2767))
* Enter the RootlessKit namespaces and run `containerd-fuse-overlayfs-grpc`:
```console
$ nsenter -U --preserve-credentials -m -n -t $(cat $XDG_RUNTIME_DIR/rootlesskit-containerd/child_pid) \
containerd-fuse-overlayfs-grpc $XDG_RUNTIME_DIR/containerd/fuse-overlayfs.sock $HOME/.local/share/containerd-fuse-overlayfs
```
* Enter the same namespaces and run `containerd`:
```console
$ nsenter -U --preserve-credentials -m -n -t $(cat $XDG_RUNTIME_DIR/rootlesskit-containerd/child_pid) \
containerd -c $HOME/.config/containerd/config.toml
```
</p>
</details>
## Usage
```console
$ export CONTAINERD_SNAPSHOTTER=fuse-overlayfs
$ nerdctl run ...
```
## How to test
To run the test as a non-root user, [RootlessKit](https://github.com/rootless-containers/rootlesskit) needs to be installed.
```console
$ go test -exec rootlesskit -test.v -test.root
```
## Project details
fuse-overlayfs-snapshotter is a containerd **non-core** sub-project, licensed under the [Apache 2.0 license](./LICENSE).
As a containerd non-core sub-project, you will find the:
* [Project governance](https://github.com/containerd/project/blob/master/GOVERNANCE.md),
* [Maintainers](./MAINTAINERS),
* and [Contributing guidelines](https://github.com/containerd/project/blob/master/CONTRIBUTING.md)
information in our [`containerd/project`](https://github.com/containerd/project) repository.

View File

@ -0,0 +1,82 @@
//go:build linux
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fuseoverlayfs
import (
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/log"
)
// supportsReadonlyMultipleLowerDir checks if read-only multiple lowerdirs can be mounted with fuse-overlayfs.
// https://github.com/containers/fuse-overlayfs/pull/133
func supportsReadonlyMultipleLowerDir(d string) error {
td, err := ioutil.TempDir(d, "fuseoverlayfs-check")
if err != nil {
return err
}
defer func() {
if err := os.RemoveAll(td); err != nil {
log.L.WithError(err).Warnf("Failed to remove check directory %v", td)
}
}()
for _, dir := range []string{"lower1", "lower2", "merged"} {
if err := os.Mkdir(filepath.Join(td, dir), 0755); err != nil {
return err
}
}
opts := []string{fmt.Sprintf("lowerdir=%s:%s", filepath.Join(td, "lower2"), filepath.Join(td, "lower1"))}
m := mount.Mount{
Type: "fuse3." + fuseoverlayfsBinary,
Source: "overlay",
Options: opts,
}
dest := filepath.Join(td, "merged")
if err := m.Mount(dest); err != nil {
return fmt.Errorf("failed to mount fuse-overlayfs (%+v) on %s: %w", m, dest, err)
}
if err := mount.UnmountAll(dest, 0); err != nil {
log.L.WithError(err).Warnf("Failed to unmount check directory %v", dest)
}
return nil
}
// Supported returns nil when the overlayfs is functional on the system with the root directory.
// Supported is not called during plugin initialization, but exposed for downstream projects which uses
// this snapshotter as a library.
func Supported(root string) error {
if _, err := exec.LookPath(fuseoverlayfsBinary); err != nil {
return fmt.Errorf("%s not installed: %w", fuseoverlayfsBinary, err)
}
if err := os.MkdirAll(root, 0700); err != nil {
return err
}
if err := supportsReadonlyMultipleLowerDir(root); err != nil {
return fmt.Errorf("fuse-overlayfs not functional, make sure running with kernel >= 4.18: %w", err)
}
return nil
}

View File

@ -0,0 +1,518 @@
//go:build linux
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fuseoverlayfs
import (
"context"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/core/snapshots/storage"
"github.com/containerd/continuity/fs"
"github.com/containerd/log"
)
const (
fuseoverlayfsBinary = "fuse-overlayfs"
)
// SnapshotterConfig is used to configure the overlay snapshotter instance
type SnapshotterConfig struct {
asyncRemove bool
}
// Opt is an option to configure the overlay snapshotter
type Opt func(config *SnapshotterConfig) error
// AsynchronousRemove defers removal of filesystem content until
// the Cleanup method is called. Removals will make the snapshot
// referred to by the key unavailable and make the key immediately
// available for re-use.
//
// AsynchronousRemove is untested for fuse-overlayfs
func AsynchronousRemove(config *SnapshotterConfig) error {
config.asyncRemove = true
return nil
}
type snapshotter struct {
root string
ms *storage.MetaStore
asyncRemove bool
}
// NewSnapshotter returns a Snapshotter which uses overlayfs. The overlayfs
// diffs are stored under the provided root. A metadata file is stored under
// the root.
func NewSnapshotter(root string, opts ...Opt) (snapshots.Snapshotter, error) {
var config SnapshotterConfig
for _, opt := range opts {
if err := opt(&config); err != nil {
return nil, err
}
}
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
}
ms, err := storage.NewMetaStore(filepath.Join(root, "metadata.db"))
if err != nil {
return nil, err
}
if err := os.Mkdir(filepath.Join(root, "snapshots"), 0700); err != nil && !os.IsExist(err) {
return nil, err
}
return &snapshotter{
root: root,
ms: ms,
asyncRemove: config.asyncRemove,
}, nil
}
// Stat returns the info for an active or committed snapshot by name or
// key.
//
// Should be used for parent resolution, existence checks and to discern
// the kind of snapshot.
func (o *snapshotter) Stat(ctx context.Context, key string) (snapshots.Info, error) {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return snapshots.Info{}, err
}
defer t.Rollback()
_, info, _, err := storage.GetInfo(ctx, key)
if err != nil {
return snapshots.Info{}, err
}
return info, nil
}
func (o *snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (snapshots.Info, error) {
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return snapshots.Info{}, err
}
info, err = storage.UpdateInfo(ctx, info, fieldpaths...)
if err != nil {
t.Rollback()
return snapshots.Info{}, err
}
if err := t.Commit(); err != nil {
return snapshots.Info{}, err
}
return info, nil
}
// Usage returns the resources taken by the snapshot identified by key.
//
// For active snapshots, this will scan the usage of the overlay "diff" (aka
// "upper") directory and may take some time.
//
// For committed snapshots, the value is returned from the metadata database.
func (o *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return snapshots.Usage{}, err
}
id, info, usage, err := storage.GetInfo(ctx, key)
t.Rollback() // transaction no longer needed at this point.
if err != nil {
return snapshots.Usage{}, err
}
upperPath := o.upperPath(id)
if info.Kind == snapshots.KindActive {
du, err := fs.DiskUsage(ctx, upperPath)
if err != nil {
// TODO(stevvooe): Consider not reporting an error in this case.
return snapshots.Usage{}, err
}
usage = snapshots.Usage(du)
}
return usage, nil
}
func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return o.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
}
func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
return o.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
}
// Mounts returns the mounts for the transaction identified by key. Can be
// called on an read-write or readonly transaction.
//
// This can be used to recover mounts after calling View or Prepare.
func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return nil, err
}
s, err := storage.GetSnapshot(ctx, key)
if err != nil {
return nil, err
}
_, info, _, err := storage.GetInfo(ctx, key)
t.Rollback()
if err != nil {
return nil, fmt.Errorf("failed to get active mount: %w", err)
}
return o.mounts(s, info), nil
}
func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return err
}
defer func() {
if err != nil {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
}
}()
// grab the existing id
id, _, _, err := storage.GetInfo(ctx, key)
if err != nil {
return err
}
usage, err := fs.DiskUsage(ctx, o.upperPath(id))
if err != nil {
return err
}
if _, err = storage.CommitActive(ctx, key, name, snapshots.Usage(usage), opts...); err != nil {
return fmt.Errorf("failed to commit snapshot: %w", err)
}
return t.Commit()
}
// Remove abandons the snapshot identified by key. The snapshot will
// immediately become unavailable and unrecoverable. Disk space will
// be freed up on the next call to `Cleanup`.
func (o *snapshotter) Remove(ctx context.Context, key string) (err error) {
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return err
}
defer func() {
if err != nil {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
}
}()
_, _, err = storage.Remove(ctx, key)
if err != nil {
return fmt.Errorf("failed to remove: %w", err)
}
if !o.asyncRemove {
var removals []string
removals, err = o.getCleanupDirectories(ctx, t)
if err != nil {
return fmt.Errorf("unable to get directories for removal: %w", err)
}
// Remove directories after the transaction is closed, failures must not
// return error since the transaction is committed with the removal
// key no longer available.
defer func() {
if err == nil {
for _, dir := range removals {
if err := os.RemoveAll(dir); err != nil {
log.G(ctx).WithError(err).WithField("path", dir).Warn("failed to remove directory")
}
}
}
}()
}
return t.Commit()
}
// Walk the committed snapshots.
func (o *snapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return err
}
defer t.Rollback()
return storage.WalkInfo(ctx, fn, fs...)
}
// Cleanup cleans up disk resources from removed or abandoned snapshots
func (o *snapshotter) Cleanup(ctx context.Context) error {
cleanup, err := o.cleanupDirectories(ctx)
if err != nil {
return err
}
for _, dir := range cleanup {
if err := os.RemoveAll(dir); err != nil {
log.G(ctx).WithError(err).WithField("path", dir).Warn("failed to remove directory")
}
}
return nil
}
func (o *snapshotter) cleanupDirectories(ctx context.Context) ([]string, error) {
// Get a write transaction to ensure no other write transaction can be entered
// while the cleanup is scanning.
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return nil, err
}
defer t.Rollback()
return o.getCleanupDirectories(ctx, t)
}
func (o *snapshotter) getCleanupDirectories(ctx context.Context, t storage.Transactor) ([]string, error) {
ids, err := storage.IDMap(ctx)
if err != nil {
return nil, err
}
snapshotDir := filepath.Join(o.root, "snapshots")
fd, err := os.Open(snapshotDir)
if err != nil {
return nil, err
}
defer fd.Close()
dirs, err := fd.Readdirnames(0)
if err != nil {
return nil, err
}
cleanup := []string{}
for _, d := range dirs {
if _, ok := ids[d]; ok {
continue
}
cleanup = append(cleanup, filepath.Join(snapshotDir, d))
}
return cleanup, nil
}
func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ []mount.Mount, err error) {
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return nil, err
}
var td, path string
defer func() {
if err != nil {
if td != "" {
if err1 := os.RemoveAll(td); err1 != nil {
log.G(ctx).WithError(err1).Warn("failed to cleanup temp snapshot directory")
}
}
if path != "" {
if err1 := os.RemoveAll(path); err1 != nil {
log.G(ctx).WithError(err1).WithField("path", path).Error("failed to reclaim snapshot directory, directory may need removal")
err = fmt.Errorf("failed to remove path: %v: %w", err1, err)
}
}
}
}()
snapshotDir := filepath.Join(o.root, "snapshots")
td, err = o.prepareDirectory(ctx, snapshotDir, kind)
if err != nil {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
return nil, fmt.Errorf("failed to create prepare snapshot dir: %w", err)
}
rollback := true
defer func() {
if rollback {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
}
}()
s, err := storage.CreateSnapshot(ctx, kind, key, parent, opts...)
if err != nil {
return nil, fmt.Errorf("failed to create snapshot: %w", err)
}
if len(s.ParentIDs) > 0 {
st, err := os.Stat(o.upperPath(s.ParentIDs[0]))
if err != nil {
return nil, fmt.Errorf("failed to stat parent: %w", err)
}
stat := st.Sys().(*syscall.Stat_t)
if err := os.Lchown(filepath.Join(td, "fs"), int(stat.Uid), int(stat.Gid)); err != nil {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
return nil, fmt.Errorf("failed to chown: %w", err)
}
}
path = filepath.Join(snapshotDir, s.ID)
if err = os.Rename(td, path); err != nil {
return nil, fmt.Errorf("failed to rename: %w", err)
}
td = ""
_, info, _, err := storage.GetInfo(ctx, key)
rollback = false
if err = t.Commit(); err != nil {
return nil, fmt.Errorf("commit failed: %w", err)
}
return o.mounts(s, info), nil
}
func (o *snapshotter) prepareDirectory(ctx context.Context, snapshotDir string, kind snapshots.Kind) (string, error) {
td, err := ioutil.TempDir(snapshotDir, "new-")
if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err)
}
if err := os.Mkdir(filepath.Join(td, "fs"), 0755); err != nil {
return td, err
}
if kind == snapshots.KindActive {
if err := os.Mkdir(filepath.Join(td, "work"), 0711); err != nil {
return td, err
}
}
return td, nil
}
func (o *snapshotter) mounts(s storage.Snapshot, info snapshots.Info) []mount.Mount {
if len(s.ParentIDs) == 0 {
// if we only have one layer/no parents then just return a bind mount as overlay
// will not work
roFlag := "rw"
if s.Kind == snapshots.KindView {
roFlag = "ro"
}
return []mount.Mount{
{
Source: o.upperPath(s.ID),
Type: "bind",
Options: []string{
roFlag,
"rbind",
},
},
}
}
var options []string
if s.Kind == snapshots.KindActive {
options = append(options,
fmt.Sprintf("workdir=%s", o.workPath(s.ID)),
fmt.Sprintf("upperdir=%s", o.upperPath(s.ID)),
)
} else if len(s.ParentIDs) == 1 {
return []mount.Mount{
{
Source: o.upperPath(s.ParentIDs[0]),
Type: "bind",
Options: []string{
"ro",
"rbind",
},
},
}
}
parentPaths := make([]string, len(s.ParentIDs))
for i := range s.ParentIDs {
parentPaths[i] = o.upperPath(s.ParentIDs[i])
}
options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(parentPaths, ":")))
if mapping, ok := info.Labels["containerd.io/snapshot/uidmapping"]; ok {
options = append(options, fmt.Sprintf("uidmapping=%s", convertIDMappingOption(mapping)))
}
if mapping, ok := info.Labels["containerd.io/snapshot/gidmapping"]; ok {
options = append(options, fmt.Sprintf("gidmapping=%s", convertIDMappingOption(mapping)))
}
return []mount.Mount{
{
Type: "fuse3." + fuseoverlayfsBinary,
Source: "overlay",
Options: options,
},
}
}
func (o *snapshotter) upperPath(id string) string {
return filepath.Join(o.root, "snapshots", id, "fs")
}
func (o *snapshotter) workPath(id string) string {
return filepath.Join(o.root, "snapshots", id, "work")
}
// Close closes the snapshotter
func (o *snapshotter) Close() error {
return o.ms.Close()
}
// fuseIDMappingOption converts mapping entries joined with ',' to ':'
// This is expected by the fuse-overlayfs program:
// https://github.com/containers/fuse-overlayfs/blob/main/fuse-overlayfs.1.md
func convertIDMappingOption(label string) string {
return strings.ReplaceAll(label, ",", ":")
}

View File

@ -0,0 +1,60 @@
//go:build linux
// +build linux
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package fuseoverlayfs
import (
"errors"
"github.com/containerd/containerd/v2/plugins"
fuseoverlayfs "github.com/containerd/fuse-overlayfs-snapshotter/v2"
"github.com/containerd/platforms"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
)
// Config represents configuration for the fuse-overlayfs plugin.
type Config struct {
// Root directory for the plugin
RootPath string `toml:"root_path"`
}
func init() {
registry.Register(&plugin.Registration{
Type: plugins.SnapshotPlugin,
ID: "fuse-overlayfs",
Config: &Config{},
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Platforms = append(ic.Meta.Platforms, platforms.DefaultSpec())
config, ok := ic.Config.(*Config)
if !ok {
return nil, errors.New("invalid fuse-overlayfs configuration")
}
root := ic.Properties[plugins.PropertyRootDir]
if config.RootPath != "" {
root = config.RootPath
}
ic.Meta.Exports["root"] = root
return fuseoverlayfs.NewSnapshotter(root)
},
})
}

View File

@ -1,4 +1,5 @@
Apache License
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
@ -198,4 +199,4 @@
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
limitations under the License.

View File

@ -0,0 +1,67 @@
The source code developed under the Stargz Snapshotter Project is licensed under Apache License 2.0.
However, the Stargz Snapshotter project contains modified subcomponents from Container Registry Filesystem Project with separate copyright notices and license terms. Your use of the source code for the subcomponent is subject to the terms and conditions as defined by the source project. Files in these subcomponents contain following file header.
```
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
```
These source code is governed by a 3-Clause BSD license. The copyright notice, list of conditions and disclaimer are the following.
```
Copyright (c) 2019 Google LLC. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```
The Stargz Snapshotter project also contains modified benchmarking code from HelloBench Project with separate copyright notices and license terms. Your use of the source code for the benchmarking code is subject to the terms and conditions as defined by the source project. These source code is governed by a MIT license. The copyright notice, condition and disclaimer are the following. The file in the benchmarking code contains it as the file header.
```
The MIT License (MIT)
Copyright (c) 2015 Tintri
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
```

View File

@ -0,0 +1,440 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cache
import (
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"github.com/containerd/stargz-snapshotter/util/cacheutil"
"github.com/containerd/stargz-snapshotter/util/namedmutex"
"github.com/hashicorp/go-multierror"
)
const (
defaultMaxLRUCacheEntry = 10
defaultMaxCacheFds = 10
)
type DirectoryCacheConfig struct {
// Number of entries of LRU cache (default: 10).
// This won't be used when DataCache is specified.
MaxLRUCacheEntry int
// Number of file descriptors to cache (default: 10).
// This won't be used when FdCache is specified.
MaxCacheFds int
// On Add, wait until the data is fully written to the cache directory.
SyncAdd bool
// DataCache is an on-memory cache of the data.
// OnEvicted will be overridden and replaced for internal use.
DataCache *cacheutil.LRUCache
// FdCache is a cache for opened file descriptors.
// OnEvicted will be overridden and replaced for internal use.
FdCache *cacheutil.LRUCache
// BufPool will be used for pooling bytes.Buffer.
BufPool *sync.Pool
// Direct forcefully enables direct mode for all operation in cache.
// Thus operation won't use on-memory caches.
Direct bool
}
// TODO: contents validation.
// BlobCache represents a cache for bytes data
type BlobCache interface {
// Add returns a writer to add contents to cache
Add(key string, opts ...Option) (Writer, error)
// Get returns a reader to read the specified contents
// from cache
Get(key string, opts ...Option) (Reader, error)
// Close closes the cache
Close() error
}
// Reader provides the data cached.
type Reader interface {
io.ReaderAt
Close() error
}
// Writer enables the client to cache byte data. Commit() must be
// called after data is fully written to Write(). To abort the written
// data, Abort() must be called.
type Writer interface {
io.WriteCloser
Commit() error
Abort() error
}
type cacheOpt struct {
direct bool
}
type Option func(o *cacheOpt) *cacheOpt
// Direct option lets FetchAt and Add methods not to use on-memory caches. When
// you know that the targeting value won't be used immediately, you can prevent
// the limited space of on-memory caches from being polluted by these unimportant
// values.
func Direct() Option {
return func(o *cacheOpt) *cacheOpt {
o.direct = true
return o
}
}
func NewDirectoryCache(directory string, config DirectoryCacheConfig) (BlobCache, error) {
if !filepath.IsAbs(directory) {
return nil, fmt.Errorf("dir cache path must be an absolute path; got %q", directory)
}
bufPool := config.BufPool
if bufPool == nil {
bufPool = &sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
}
dataCache := config.DataCache
if dataCache == nil {
maxEntry := config.MaxLRUCacheEntry
if maxEntry == 0 {
maxEntry = defaultMaxLRUCacheEntry
}
dataCache = cacheutil.NewLRUCache(maxEntry)
dataCache.OnEvicted = func(key string, value interface{}) {
value.(*bytes.Buffer).Reset()
bufPool.Put(value)
}
}
fdCache := config.FdCache
if fdCache == nil {
maxEntry := config.MaxCacheFds
if maxEntry == 0 {
maxEntry = defaultMaxCacheFds
}
fdCache = cacheutil.NewLRUCache(maxEntry)
fdCache.OnEvicted = func(key string, value interface{}) {
value.(*os.File).Close()
}
}
if err := os.MkdirAll(directory, 0700); err != nil {
return nil, err
}
wipdir := filepath.Join(directory, "wip")
if err := os.MkdirAll(wipdir, 0700); err != nil {
return nil, err
}
dc := &directoryCache{
cache: dataCache,
fileCache: fdCache,
wipLock: new(namedmutex.NamedMutex),
directory: directory,
wipDirectory: wipdir,
bufPool: bufPool,
direct: config.Direct,
}
dc.syncAdd = config.SyncAdd
return dc, nil
}
// directoryCache is a cache implementation which backend is a directory.
type directoryCache struct {
cache *cacheutil.LRUCache
fileCache *cacheutil.LRUCache
wipDirectory string
directory string
wipLock *namedmutex.NamedMutex
bufPool *sync.Pool
syncAdd bool
direct bool
closed bool
closedMu sync.Mutex
}
func (dc *directoryCache) Get(key string, opts ...Option) (Reader, error) {
if dc.isClosed() {
return nil, fmt.Errorf("cache is already closed")
}
opt := &cacheOpt{}
for _, o := range opts {
opt = o(opt)
}
if !dc.direct && !opt.direct {
// Get data from memory
if b, done, ok := dc.cache.Get(key); ok {
return &reader{
ReaderAt: bytes.NewReader(b.(*bytes.Buffer).Bytes()),
closeFunc: func() error {
done()
return nil
},
}, nil
}
// Get data from disk. If the file is already opened, use it.
if f, done, ok := dc.fileCache.Get(key); ok {
return &reader{
ReaderAt: f.(*os.File),
closeFunc: func() error {
done() // file will be closed when it's evicted from the cache
return nil
},
}, nil
}
}
// Open the cache file and read the target region
// TODO: If the target cache is write-in-progress, should we wait for the completion
// or simply report the cache miss?
file, err := os.Open(dc.cachePath(key))
if err != nil {
return nil, fmt.Errorf("failed to open blob file for %q: %w", key, err)
}
// If "direct" option is specified, do not cache the file on memory.
// This option is useful for preventing memory cache from being polluted by data
// that won't be accessed immediately.
if dc.direct || opt.direct {
return &reader{
ReaderAt: file,
closeFunc: func() error { return file.Close() },
}, nil
}
// TODO: should we cache the entire file data on memory?
// but making I/O (possibly huge) on every fetching
// might be costly.
return &reader{
ReaderAt: file,
closeFunc: func() error {
_, done, added := dc.fileCache.Add(key, file)
defer done() // Release it immediately. Cleaned up on eviction.
if !added {
return file.Close() // file already exists in the cache. close it.
}
return nil
},
}, nil
}
func (dc *directoryCache) Add(key string, opts ...Option) (Writer, error) {
if dc.isClosed() {
return nil, fmt.Errorf("cache is already closed")
}
opt := &cacheOpt{}
for _, o := range opts {
opt = o(opt)
}
wip, err := dc.wipFile(key)
if err != nil {
return nil, err
}
w := &writer{
WriteCloser: wip,
commitFunc: func() error {
if dc.isClosed() {
return fmt.Errorf("cache is already closed")
}
// Commit the cache contents
c := dc.cachePath(key)
if err := os.MkdirAll(filepath.Dir(c), os.ModePerm); err != nil {
var allErr error
if err := os.Remove(wip.Name()); err != nil {
allErr = multierror.Append(allErr, err)
}
return multierror.Append(allErr,
fmt.Errorf("failed to create cache directory %q: %w", c, err))
}
return os.Rename(wip.Name(), c)
},
abortFunc: func() error {
return os.Remove(wip.Name())
},
}
// If "direct" option is specified, do not cache the passed data on memory.
// This option is useful for preventing memory cache from being polluted by data
// that won't be accessed immediately.
if dc.direct || opt.direct {
return w, nil
}
b := dc.bufPool.Get().(*bytes.Buffer)
memW := &writer{
WriteCloser: nopWriteCloser(io.Writer(b)),
commitFunc: func() error {
if dc.isClosed() {
w.Close()
return fmt.Errorf("cache is already closed")
}
cached, done, added := dc.cache.Add(key, b)
if !added {
dc.putBuffer(b) // already exists in the cache. abort it.
}
commit := func() error {
defer done()
defer w.Close()
n, err := w.Write(cached.(*bytes.Buffer).Bytes())
if err != nil || n != cached.(*bytes.Buffer).Len() {
w.Abort()
return err
}
return w.Commit()
}
if dc.syncAdd {
return commit()
}
go func() {
if err := commit(); err != nil {
fmt.Println("failed to commit to file:", err)
}
}()
return nil
},
abortFunc: func() error {
defer w.Close()
defer w.Abort()
dc.putBuffer(b) // abort it.
return nil
},
}
return memW, nil
}
func (dc *directoryCache) putBuffer(b *bytes.Buffer) {
b.Reset()
dc.bufPool.Put(b)
}
func (dc *directoryCache) Close() error {
dc.closedMu.Lock()
defer dc.closedMu.Unlock()
if dc.closed {
return nil
}
dc.closed = true
return os.RemoveAll(dc.directory)
}
func (dc *directoryCache) isClosed() bool {
dc.closedMu.Lock()
closed := dc.closed
dc.closedMu.Unlock()
return closed
}
func (dc *directoryCache) cachePath(key string) string {
return filepath.Join(dc.directory, key[:2], key)
}
func (dc *directoryCache) wipFile(key string) (*os.File, error) {
return os.CreateTemp(dc.wipDirectory, key+"-*")
}
func NewMemoryCache() BlobCache {
return &MemoryCache{
Membuf: map[string]*bytes.Buffer{},
}
}
// MemoryCache is a cache implementation which backend is a memory.
type MemoryCache struct {
Membuf map[string]*bytes.Buffer
mu sync.Mutex
}
func (mc *MemoryCache) Get(key string, opts ...Option) (Reader, error) {
mc.mu.Lock()
defer mc.mu.Unlock()
b, ok := mc.Membuf[key]
if !ok {
return nil, fmt.Errorf("Missed cache: %q", key)
}
return &reader{bytes.NewReader(b.Bytes()), func() error { return nil }}, nil
}
func (mc *MemoryCache) Add(key string, opts ...Option) (Writer, error) {
b := new(bytes.Buffer)
return &writer{
WriteCloser: nopWriteCloser(io.Writer(b)),
commitFunc: func() error {
mc.mu.Lock()
defer mc.mu.Unlock()
mc.Membuf[key] = b
return nil
},
abortFunc: func() error { return nil },
}, nil
}
func (mc *MemoryCache) Close() error {
return nil
}
type reader struct {
io.ReaderAt
closeFunc func() error
}
func (r *reader) Close() error { return r.closeFunc() }
type writer struct {
io.WriteCloser
commitFunc func() error
abortFunc func() error
}
func (w *writer) Commit() error {
return w.commitFunc()
}
func (w *writer) Abort() error {
return w.abortFunc()
}
type writeCloser struct {
io.Writer
closeFunc func() error
}
func (w *writeCloser) Close() error { return w.closeFunc() }
func nopWriteCloser(w io.Writer) io.WriteCloser {
return &writeCloser{w, func() error { return nil }}
}

View File

@ -1,4 +1,5 @@
Apache License
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
@ -198,4 +199,4 @@
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
limitations under the License.

View File

@ -0,0 +1,689 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
package estargz
import (
"archive/tar"
"bytes"
"compress/gzip"
"context"
"errors"
"fmt"
"io"
"os"
"path"
"runtime"
"strings"
"sync"
"github.com/containerd/stargz-snapshotter/estargz/errorutil"
"github.com/klauspost/compress/zstd"
digest "github.com/opencontainers/go-digest"
"golang.org/x/sync/errgroup"
)
type options struct {
chunkSize int
compressionLevel int
prioritizedFiles []string
missedPrioritizedFiles *[]string
compression Compression
ctx context.Context
minChunkSize int
}
type Option func(o *options) error
// WithChunkSize option specifies the chunk size of eStargz blob to build.
func WithChunkSize(chunkSize int) Option {
return func(o *options) error {
o.chunkSize = chunkSize
return nil
}
}
// WithCompressionLevel option specifies the gzip compression level.
// The default is gzip.BestCompression.
// This option will be ignored if WithCompression option is used.
// See also: https://godoc.org/compress/gzip#pkg-constants
func WithCompressionLevel(level int) Option {
return func(o *options) error {
o.compressionLevel = level
return nil
}
}
// WithPrioritizedFiles option specifies the list of prioritized files.
// These files must be complete paths that are absolute or relative to "/"
// For example, all of "foo/bar", "/foo/bar", "./foo/bar" and "../foo/bar"
// are treated as "/foo/bar".
func WithPrioritizedFiles(files []string) Option {
return func(o *options) error {
o.prioritizedFiles = files
return nil
}
}
// WithAllowPrioritizeNotFound makes Build continue the execution even if some
// of prioritized files specified by WithPrioritizedFiles option aren't found
// in the input tar. Instead, this records all missed file names to the passed
// slice.
func WithAllowPrioritizeNotFound(missedFiles *[]string) Option {
return func(o *options) error {
if missedFiles == nil {
return fmt.Errorf("WithAllowPrioritizeNotFound: slice must be passed")
}
o.missedPrioritizedFiles = missedFiles
return nil
}
}
// WithCompression specifies compression algorithm to be used.
// Default is gzip.
func WithCompression(compression Compression) Option {
return func(o *options) error {
o.compression = compression
return nil
}
}
// WithContext specifies a context that can be used for clean canceleration.
func WithContext(ctx context.Context) Option {
return func(o *options) error {
o.ctx = ctx
return nil
}
}
// WithMinChunkSize option specifies the minimal number of bytes of data
// must be written in one gzip stream.
// By increasing this number, one gzip stream can contain multiple files
// and it hopefully leads to smaller result blob.
// NOTE: This adds a TOC property that old reader doesn't understand.
func WithMinChunkSize(minChunkSize int) Option {
return func(o *options) error {
o.minChunkSize = minChunkSize
return nil
}
}
// Blob is an eStargz blob.
type Blob struct {
io.ReadCloser
diffID digest.Digester
tocDigest digest.Digest
}
// DiffID returns the digest of uncompressed blob.
// It is only valid to call DiffID after Close.
func (b *Blob) DiffID() digest.Digest {
return b.diffID.Digest()
}
// TOCDigest returns the digest of uncompressed TOC JSON.
func (b *Blob) TOCDigest() digest.Digest {
return b.tocDigest
}
// Build builds an eStargz blob which is an extended version of stargz, from a blob (gzip, zstd
// or plain tar) passed through the argument. If there are some prioritized files are listed in
// the option, these files are grouped as "prioritized" and can be used for runtime optimization
// (e.g. prefetch). This function builds a blob in parallel, with dividing that blob into several
// (at least the number of runtime.GOMAXPROCS(0)) sub-blobs.
func Build(tarBlob *io.SectionReader, opt ...Option) (_ *Blob, rErr error) {
var opts options
opts.compressionLevel = gzip.BestCompression // BestCompression by default
for _, o := range opt {
if err := o(&opts); err != nil {
return nil, err
}
}
if opts.compression == nil {
opts.compression = newGzipCompressionWithLevel(opts.compressionLevel)
}
layerFiles := newTempFiles()
ctx := opts.ctx
if ctx == nil {
ctx = context.Background()
}
done := make(chan struct{})
defer close(done)
go func() {
select {
case <-done:
// nop
case <-ctx.Done():
layerFiles.CleanupAll()
}
}()
defer func() {
if rErr != nil {
if err := layerFiles.CleanupAll(); err != nil {
rErr = fmt.Errorf("failed to cleanup tmp files: %v: %w", err, rErr)
}
}
if cErr := ctx.Err(); cErr != nil {
rErr = fmt.Errorf("error from context %q: %w", cErr, rErr)
}
}()
tarBlob, err := decompressBlob(tarBlob, layerFiles)
if err != nil {
return nil, err
}
entries, err := sortEntries(tarBlob, opts.prioritizedFiles, opts.missedPrioritizedFiles)
if err != nil {
return nil, err
}
var tarParts [][]*entry
if opts.minChunkSize > 0 {
// Each entry needs to know the size of the current gzip stream so they
// cannot be processed in parallel.
tarParts = [][]*entry{entries}
} else {
tarParts = divideEntries(entries, runtime.GOMAXPROCS(0))
}
writers := make([]*Writer, len(tarParts))
payloads := make([]*os.File, len(tarParts))
var mu sync.Mutex
var eg errgroup.Group
for i, parts := range tarParts {
i, parts := i, parts
// builds verifiable stargz sub-blobs
eg.Go(func() error {
esgzFile, err := layerFiles.TempFile("", "esgzdata")
if err != nil {
return err
}
sw := NewWriterWithCompressor(esgzFile, opts.compression)
sw.ChunkSize = opts.chunkSize
sw.MinChunkSize = opts.minChunkSize
if sw.needsOpenGzEntries == nil {
sw.needsOpenGzEntries = make(map[string]struct{})
}
for _, f := range []string{PrefetchLandmark, NoPrefetchLandmark} {
sw.needsOpenGzEntries[f] = struct{}{}
}
if err := sw.AppendTar(readerFromEntries(parts...)); err != nil {
return err
}
mu.Lock()
writers[i] = sw
payloads[i] = esgzFile
mu.Unlock()
return nil
})
}
if err := eg.Wait(); err != nil {
rErr = err
return nil, err
}
tocAndFooter, tocDgst, err := closeWithCombine(writers...)
if err != nil {
rErr = err
return nil, err
}
var rs []io.Reader
for _, p := range payloads {
fs, err := fileSectionReader(p)
if err != nil {
return nil, err
}
rs = append(rs, fs)
}
diffID := digest.Canonical.Digester()
pr, pw := io.Pipe()
go func() {
r, err := opts.compression.Reader(io.TeeReader(io.MultiReader(append(rs, tocAndFooter)...), pw))
if err != nil {
pw.CloseWithError(err)
return
}
defer r.Close()
if _, err := io.Copy(diffID.Hash(), r); err != nil {
pw.CloseWithError(err)
return
}
pw.Close()
}()
return &Blob{
ReadCloser: readCloser{
Reader: pr,
closeFunc: layerFiles.CleanupAll,
},
tocDigest: tocDgst,
diffID: diffID,
}, nil
}
// closeWithCombine takes unclosed Writers and close them. This also returns the
// toc that combined all Writers into.
// Writers doesn't write TOC and footer to the underlying writers so they can be
// combined into a single eStargz and tocAndFooter returned by this function can
// be appended at the tail of that combined blob.
func closeWithCombine(ws ...*Writer) (tocAndFooterR io.Reader, tocDgst digest.Digest, err error) {
if len(ws) == 0 {
return nil, "", fmt.Errorf("at least one writer must be passed")
}
for _, w := range ws {
if w.closed {
return nil, "", fmt.Errorf("writer must be unclosed")
}
defer func(w *Writer) { w.closed = true }(w)
if err := w.closeGz(); err != nil {
return nil, "", err
}
if err := w.bw.Flush(); err != nil {
return nil, "", err
}
}
var (
mtoc = new(JTOC)
currentOffset int64
)
mtoc.Version = ws[0].toc.Version
for _, w := range ws {
for _, e := range w.toc.Entries {
// Recalculate Offset of non-empty files/chunks
if (e.Type == "reg" && e.Size > 0) || e.Type == "chunk" {
e.Offset += currentOffset
}
mtoc.Entries = append(mtoc.Entries, e)
}
if w.toc.Version > mtoc.Version {
mtoc.Version = w.toc.Version
}
currentOffset += w.cw.n
}
return tocAndFooter(ws[0].compressor, mtoc, currentOffset)
}
func tocAndFooter(compressor Compressor, toc *JTOC, offset int64) (io.Reader, digest.Digest, error) {
buf := new(bytes.Buffer)
tocDigest, err := compressor.WriteTOCAndFooter(buf, offset, toc, nil)
if err != nil {
return nil, "", err
}
return buf, tocDigest, nil
}
// divideEntries divides passed entries to the parts at least the number specified by the
// argument.
func divideEntries(entries []*entry, minPartsNum int) (set [][]*entry) {
var estimatedSize int64
for _, e := range entries {
estimatedSize += e.header.Size
}
unitSize := estimatedSize / int64(minPartsNum)
var (
nextEnd = unitSize
offset int64
)
set = append(set, []*entry{})
for _, e := range entries {
set[len(set)-1] = append(set[len(set)-1], e)
offset += e.header.Size
if offset > nextEnd {
set = append(set, []*entry{})
nextEnd += unitSize
}
}
return
}
var errNotFound = errors.New("not found")
// sortEntries reads the specified tar blob and returns a list of tar entries.
// If some of prioritized files are specified, the list starts from these
// files with keeping the order specified by the argument.
func sortEntries(in io.ReaderAt, prioritized []string, missedPrioritized *[]string) ([]*entry, error) {
// Import tar file.
intar, err := importTar(in)
if err != nil {
return nil, fmt.Errorf("failed to sort: %w", err)
}
// Sort the tar file respecting to the prioritized files list.
sorted := &tarFile{}
for _, l := range prioritized {
if err := moveRec(l, intar, sorted); err != nil {
if errors.Is(err, errNotFound) && missedPrioritized != nil {
*missedPrioritized = append(*missedPrioritized, l)
continue // allow not found
}
return nil, fmt.Errorf("failed to sort tar entries: %w", err)
}
}
if len(prioritized) == 0 {
sorted.add(&entry{
header: &tar.Header{
Name: NoPrefetchLandmark,
Typeflag: tar.TypeReg,
Size: int64(len([]byte{landmarkContents})),
},
payload: bytes.NewReader([]byte{landmarkContents}),
})
} else {
sorted.add(&entry{
header: &tar.Header{
Name: PrefetchLandmark,
Typeflag: tar.TypeReg,
Size: int64(len([]byte{landmarkContents})),
},
payload: bytes.NewReader([]byte{landmarkContents}),
})
}
// Dump all entry and concatinate them.
return append(sorted.dump(), intar.dump()...), nil
}
// readerFromEntries returns a reader of tar archive that contains entries passed
// through the arguments.
func readerFromEntries(entries ...*entry) io.Reader {
pr, pw := io.Pipe()
go func() {
tw := tar.NewWriter(pw)
defer tw.Close()
for _, entry := range entries {
if err := tw.WriteHeader(entry.header); err != nil {
pw.CloseWithError(fmt.Errorf("Failed to write tar header: %v", err))
return
}
if _, err := io.Copy(tw, entry.payload); err != nil {
pw.CloseWithError(fmt.Errorf("Failed to write tar payload: %v", err))
return
}
}
pw.Close()
}()
return pr
}
func importTar(in io.ReaderAt) (*tarFile, error) {
tf := &tarFile{}
pw, err := newCountReadSeeker(in)
if err != nil {
return nil, fmt.Errorf("failed to make position watcher: %w", err)
}
tr := tar.NewReader(pw)
// Walk through all nodes.
for {
// Fetch and parse next header.
h, err := tr.Next()
if err != nil {
if err == io.EOF {
break
}
return nil, fmt.Errorf("failed to parse tar file, %w", err)
}
switch cleanEntryName(h.Name) {
case PrefetchLandmark, NoPrefetchLandmark:
// Ignore existing landmark
continue
}
// Add entry. If it already exists, replace it.
if _, ok := tf.get(h.Name); ok {
tf.remove(h.Name)
}
tf.add(&entry{
header: h,
payload: io.NewSectionReader(in, pw.currentPos(), h.Size),
})
}
return tf, nil
}
func moveRec(name string, in *tarFile, out *tarFile) error {
name = cleanEntryName(name)
if name == "" { // root directory. stop recursion.
if e, ok := in.get(name); ok {
// entry of the root directory exists. we should move it as well.
// this case will occur if tar entries are prefixed with "./", "/", etc.
out.add(e)
in.remove(name)
}
return nil
}
_, okIn := in.get(name)
_, okOut := out.get(name)
if !okIn && !okOut {
return fmt.Errorf("file: %q: %w", name, errNotFound)
}
parent, _ := path.Split(strings.TrimSuffix(name, "/"))
if err := moveRec(parent, in, out); err != nil {
return err
}
if e, ok := in.get(name); ok && e.header.Typeflag == tar.TypeLink {
if err := moveRec(e.header.Linkname, in, out); err != nil {
return err
}
}
if e, ok := in.get(name); ok {
out.add(e)
in.remove(name)
}
return nil
}
type entry struct {
header *tar.Header
payload io.ReadSeeker
}
type tarFile struct {
index map[string]*entry
stream []*entry
}
func (f *tarFile) add(e *entry) {
if f.index == nil {
f.index = make(map[string]*entry)
}
f.index[cleanEntryName(e.header.Name)] = e
f.stream = append(f.stream, e)
}
func (f *tarFile) remove(name string) {
name = cleanEntryName(name)
if f.index != nil {
delete(f.index, name)
}
var filtered []*entry
for _, e := range f.stream {
if cleanEntryName(e.header.Name) == name {
continue
}
filtered = append(filtered, e)
}
f.stream = filtered
}
func (f *tarFile) get(name string) (e *entry, ok bool) {
if f.index == nil {
return nil, false
}
e, ok = f.index[cleanEntryName(name)]
return
}
func (f *tarFile) dump() []*entry {
return f.stream
}
type readCloser struct {
io.Reader
closeFunc func() error
}
func (rc readCloser) Close() error {
return rc.closeFunc()
}
func fileSectionReader(file *os.File) (*io.SectionReader, error) {
info, err := file.Stat()
if err != nil {
return nil, err
}
return io.NewSectionReader(file, 0, info.Size()), nil
}
func newTempFiles() *tempFiles {
return &tempFiles{}
}
type tempFiles struct {
files []*os.File
filesMu sync.Mutex
cleanupOnce sync.Once
}
func (tf *tempFiles) TempFile(dir, pattern string) (*os.File, error) {
f, err := os.CreateTemp(dir, pattern)
if err != nil {
return nil, err
}
tf.filesMu.Lock()
tf.files = append(tf.files, f)
tf.filesMu.Unlock()
return f, nil
}
func (tf *tempFiles) CleanupAll() (err error) {
tf.cleanupOnce.Do(func() {
err = tf.cleanupAll()
})
return
}
func (tf *tempFiles) cleanupAll() error {
tf.filesMu.Lock()
defer tf.filesMu.Unlock()
var allErr []error
for _, f := range tf.files {
if err := f.Close(); err != nil {
allErr = append(allErr, err)
}
if err := os.Remove(f.Name()); err != nil {
allErr = append(allErr, err)
}
}
tf.files = nil
return errorutil.Aggregate(allErr)
}
func newCountReadSeeker(r io.ReaderAt) (*countReadSeeker, error) {
pos := int64(0)
return &countReadSeeker{r: r, cPos: &pos}, nil
}
type countReadSeeker struct {
r io.ReaderAt
cPos *int64
mu sync.Mutex
}
func (cr *countReadSeeker) Read(p []byte) (int, error) {
cr.mu.Lock()
defer cr.mu.Unlock()
n, err := cr.r.ReadAt(p, *cr.cPos)
if err == nil {
*cr.cPos += int64(n)
}
return n, err
}
func (cr *countReadSeeker) Seek(offset int64, whence int) (int64, error) {
cr.mu.Lock()
defer cr.mu.Unlock()
switch whence {
default:
return 0, fmt.Errorf("Unknown whence: %v", whence)
case io.SeekStart:
case io.SeekCurrent:
offset += *cr.cPos
case io.SeekEnd:
return 0, fmt.Errorf("Unsupported whence: %v", whence)
}
if offset < 0 {
return 0, fmt.Errorf("invalid offset")
}
*cr.cPos = offset
return offset, nil
}
func (cr *countReadSeeker) currentPos() int64 {
cr.mu.Lock()
defer cr.mu.Unlock()
return *cr.cPos
}
func decompressBlob(org *io.SectionReader, tmp *tempFiles) (*io.SectionReader, error) {
if org.Size() < 4 {
return org, nil
}
src := make([]byte, 4)
if _, err := org.Read(src); err != nil && err != io.EOF {
return nil, err
}
var dR io.Reader
if bytes.Equal([]byte{0x1F, 0x8B, 0x08}, src[:3]) {
// gzip
dgR, err := gzip.NewReader(io.NewSectionReader(org, 0, org.Size()))
if err != nil {
return nil, err
}
defer dgR.Close()
dR = io.Reader(dgR)
} else if bytes.Equal([]byte{0x28, 0xb5, 0x2f, 0xfd}, src[:4]) {
// zstd
dzR, err := zstd.NewReader(io.NewSectionReader(org, 0, org.Size()))
if err != nil {
return nil, err
}
defer dzR.Close()
dR = io.Reader(dzR)
} else {
// uncompressed
return io.NewSectionReader(org, 0, org.Size()), nil
}
b, err := tmp.TempFile("", "uncompresseddata")
if err != nil {
return nil, err
}
if _, err := io.Copy(b, dR); err != nil {
return nil, err
}
return fileSectionReader(b)
}

View File

@ -0,0 +1,40 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package errorutil
import (
"errors"
"fmt"
"strings"
)
// Aggregate combines a list of errors into a single new error.
func Aggregate(errs []error) error {
switch len(errs) {
case 0:
return nil
case 1:
return errs[0]
default:
points := make([]string, len(errs)+1)
points[0] = fmt.Sprintf("%d error(s) occurred:", len(errs))
for i, err := range errs {
points[i+1] = fmt.Sprintf("* %s", err)
}
return errors.New(strings.Join(points, "\n\t"))
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,278 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
package externaltoc
import (
"archive/tar"
"bytes"
"compress/gzip"
"encoding/binary"
"encoding/json"
"fmt"
"hash"
"io"
"sync"
"github.com/containerd/stargz-snapshotter/estargz"
digest "github.com/opencontainers/go-digest"
)
type GzipCompression struct {
*GzipCompressor
*GzipDecompressor
}
func NewGzipCompressionWithLevel(provideTOC func() ([]byte, error), level int) estargz.Compression {
return &GzipCompression{
NewGzipCompressorWithLevel(level),
NewGzipDecompressor(provideTOC),
}
}
func NewGzipCompressor() *GzipCompressor {
return &GzipCompressor{compressionLevel: gzip.BestCompression}
}
func NewGzipCompressorWithLevel(level int) *GzipCompressor {
return &GzipCompressor{compressionLevel: level}
}
type GzipCompressor struct {
compressionLevel int
buf *bytes.Buffer
}
func (gc *GzipCompressor) WriteTOCTo(w io.Writer) (int, error) {
if len(gc.buf.Bytes()) == 0 {
return 0, fmt.Errorf("TOC hasn't been registered")
}
return w.Write(gc.buf.Bytes())
}
func (gc *GzipCompressor) Writer(w io.Writer) (estargz.WriteFlushCloser, error) {
return gzip.NewWriterLevel(w, gc.compressionLevel)
}
func (gc *GzipCompressor) WriteTOCAndFooter(w io.Writer, off int64, toc *estargz.JTOC, diffHash hash.Hash) (digest.Digest, error) {
tocJSON, err := json.MarshalIndent(toc, "", "\t")
if err != nil {
return "", err
}
buf := new(bytes.Buffer)
gz, _ := gzip.NewWriterLevel(buf, gc.compressionLevel)
// TOC isn't written to layer so no effect to diff ID
tw := tar.NewWriter(gz)
if err := tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeReg,
Name: estargz.TOCTarName,
Size: int64(len(tocJSON)),
}); err != nil {
return "", err
}
if _, err := tw.Write(tocJSON); err != nil {
return "", err
}
if err := tw.Close(); err != nil {
return "", err
}
if err := gz.Close(); err != nil {
return "", err
}
gc.buf = buf
footerBytes, err := gzipFooterBytes()
if err != nil {
return "", err
}
if _, err := w.Write(footerBytes); err != nil {
return "", err
}
return digest.FromBytes(tocJSON), nil
}
// The footer is an empty gzip stream with no compression and an Extra header.
//
// 46 comes from:
//
// 10 bytes gzip header
// 2 bytes XLEN (length of Extra field) = 21 (4 bytes header + len("STARGZEXTERNALTOC"))
// 2 bytes Extra: SI1 = 'S', SI2 = 'G'
// 2 bytes Extra: LEN = 17 (len("STARGZEXTERNALTOC"))
// 17 bytes Extra: subfield = "STARGZEXTERNALTOC"
// 5 bytes flate header
// 8 bytes gzip footer
// (End of the eStargz blob)
const FooterSize = 46
// gzipFooterBytes returns the 104 bytes footer.
func gzipFooterBytes() ([]byte, error) {
buf := bytes.NewBuffer(make([]byte, 0, FooterSize))
gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression) // MUST be NoCompression to keep 51 bytes
// Extra header indicating the offset of TOCJSON
// https://tools.ietf.org/html/rfc1952#section-2.3.1.1
header := make([]byte, 4)
header[0], header[1] = 'S', 'G'
subfield := "STARGZEXTERNALTOC" // len("STARGZEXTERNALTOC") = 17
binary.LittleEndian.PutUint16(header[2:4], uint16(len(subfield))) // little-endian per RFC1952
gz.Header.Extra = append(header, []byte(subfield)...)
if err := gz.Close(); err != nil {
return nil, err
}
if buf.Len() != FooterSize {
panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), FooterSize))
}
return buf.Bytes(), nil
}
func NewGzipDecompressor(provideTOCFunc func() ([]byte, error)) *GzipDecompressor {
return &GzipDecompressor{provideTOCFunc: provideTOCFunc}
}
type GzipDecompressor struct {
provideTOCFunc func() ([]byte, error)
rawTOC []byte // Do not access this field directly. Get this through getTOC() method.
getTOCOnce sync.Once
}
func (gz *GzipDecompressor) getTOC() ([]byte, error) {
if len(gz.rawTOC) == 0 {
var retErr error
gz.getTOCOnce.Do(func() {
if gz.provideTOCFunc == nil {
retErr = fmt.Errorf("TOC hasn't been provided")
return
}
rawTOC, err := gz.provideTOCFunc()
if err != nil {
retErr = err
return
}
gz.rawTOC = rawTOC
})
if retErr != nil {
return nil, retErr
}
if len(gz.rawTOC) == 0 {
return nil, fmt.Errorf("no TOC is provided")
}
}
return gz.rawTOC, nil
}
func (gz *GzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) {
return gzip.NewReader(r)
}
func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *estargz.JTOC, tocDgst digest.Digest, err error) {
if r != nil {
return nil, "", fmt.Errorf("TOC must be provided externally but got internal one")
}
rawTOC, err := gz.getTOC()
if err != nil {
return nil, "", fmt.Errorf("failed to get TOC: %v", err)
}
return parseTOCEStargz(bytes.NewReader(rawTOC))
}
func (gz *GzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
if len(p) != FooterSize {
return 0, 0, 0, fmt.Errorf("invalid length %d cannot be parsed", len(p))
}
zr, err := gzip.NewReader(bytes.NewReader(p))
if err != nil {
return 0, 0, 0, err
}
defer zr.Close()
extra := zr.Header.Extra
si1, si2, subfieldlen, subfield := extra[0], extra[1], extra[2:4], extra[4:]
if si1 != 'S' || si2 != 'G' {
return 0, 0, 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2)
}
if slen := binary.LittleEndian.Uint16(subfieldlen); slen != uint16(len("STARGZEXTERNALTOC")) {
return 0, 0, 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ"))
}
if string(subfield) != "STARGZEXTERNALTOC" {
return 0, 0, 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield")
}
// tocOffset < 0 indicates external TOC.
// blobPayloadSize < 0 indicates the entire blob size.
return -1, -1, 0, nil
}
func (gz *GzipDecompressor) FooterSize() int64 {
return FooterSize
}
func (gz *GzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error) {
if r != nil {
return nil, fmt.Errorf("TOC must be provided externally but got internal one")
}
rawTOC, err := gz.getTOC()
if err != nil {
return nil, fmt.Errorf("failed to get TOC: %v", err)
}
return decompressTOCEStargz(bytes.NewReader(rawTOC))
}
func parseTOCEStargz(r io.Reader) (toc *estargz.JTOC, tocDgst digest.Digest, err error) {
tr, err := decompressTOCEStargz(r)
if err != nil {
return nil, "", err
}
dgstr := digest.Canonical.Digester()
toc = new(estargz.JTOC)
if err := json.NewDecoder(io.TeeReader(tr, dgstr.Hash())).Decode(&toc); err != nil {
return nil, "", fmt.Errorf("error decoding TOC JSON: %v", err)
}
if err := tr.Close(); err != nil {
return nil, "", err
}
return toc, dgstr.Digest(), nil
}
func decompressTOCEStargz(r io.Reader) (tocJSON io.ReadCloser, err error) {
zr, err := gzip.NewReader(r)
if err != nil {
return nil, fmt.Errorf("malformed TOC gzip header: %v", err)
}
zr.Multistream(false)
tr := tar.NewReader(zr)
h, err := tr.Next()
if err != nil {
return nil, fmt.Errorf("failed to find tar header in TOC gzip stream: %v", err)
}
if h.Name != estargz.TOCTarName {
return nil, fmt.Errorf("TOC tar entry had name %q; expected %q", h.Name, estargz.TOCTarName)
}
return readCloser{tr, zr.Close}, nil
}
type readCloser struct {
io.Reader
closeFunc func() error
}
func (rc readCloser) Close() error {
return rc.closeFunc()
}

View File

@ -0,0 +1,237 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
package estargz
import (
"archive/tar"
"bytes"
"compress/gzip"
"encoding/binary"
"encoding/json"
"fmt"
"hash"
"io"
"strconv"
digest "github.com/opencontainers/go-digest"
)
type gzipCompression struct {
*GzipCompressor
*GzipDecompressor
}
func newGzipCompressionWithLevel(level int) Compression {
return &gzipCompression{
&GzipCompressor{level},
&GzipDecompressor{},
}
}
func NewGzipCompressor() *GzipCompressor {
return &GzipCompressor{gzip.BestCompression}
}
func NewGzipCompressorWithLevel(level int) *GzipCompressor {
return &GzipCompressor{level}
}
type GzipCompressor struct {
compressionLevel int
}
func (gc *GzipCompressor) Writer(w io.Writer) (WriteFlushCloser, error) {
return gzip.NewWriterLevel(w, gc.compressionLevel)
}
func (gc *GzipCompressor) WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (digest.Digest, error) {
tocJSON, err := json.MarshalIndent(toc, "", "\t")
if err != nil {
return "", err
}
gz, _ := gzip.NewWriterLevel(w, gc.compressionLevel)
gw := io.Writer(gz)
if diffHash != nil {
gw = io.MultiWriter(gz, diffHash)
}
tw := tar.NewWriter(gw)
if err := tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeReg,
Name: TOCTarName,
Size: int64(len(tocJSON)),
}); err != nil {
return "", err
}
if _, err := tw.Write(tocJSON); err != nil {
return "", err
}
if err := tw.Close(); err != nil {
return "", err
}
if err := gz.Close(); err != nil {
return "", err
}
if _, err := w.Write(gzipFooterBytes(off)); err != nil {
return "", err
}
return digest.FromBytes(tocJSON), nil
}
// gzipFooterBytes returns the 51 bytes footer.
func gzipFooterBytes(tocOff int64) []byte {
buf := bytes.NewBuffer(make([]byte, 0, FooterSize))
gz, _ := gzip.NewWriterLevel(buf, gzip.NoCompression) // MUST be NoCompression to keep 51 bytes
// Extra header indicating the offset of TOCJSON
// https://tools.ietf.org/html/rfc1952#section-2.3.1.1
header := make([]byte, 4)
header[0], header[1] = 'S', 'G'
subfield := fmt.Sprintf("%016xSTARGZ", tocOff)
binary.LittleEndian.PutUint16(header[2:4], uint16(len(subfield))) // little-endian per RFC1952
gz.Header.Extra = append(header, []byte(subfield)...)
gz.Close()
if buf.Len() != FooterSize {
panic(fmt.Sprintf("footer buffer = %d, not %d", buf.Len(), FooterSize))
}
return buf.Bytes()
}
type GzipDecompressor struct{}
func (gz *GzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) {
return gzip.NewReader(r)
}
func (gz *GzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
return parseTOCEStargz(r)
}
func (gz *GzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
if len(p) != FooterSize {
return 0, 0, 0, fmt.Errorf("invalid length %d cannot be parsed", len(p))
}
zr, err := gzip.NewReader(bytes.NewReader(p))
if err != nil {
return 0, 0, 0, err
}
defer zr.Close()
extra := zr.Header.Extra
si1, si2, subfieldlen, subfield := extra[0], extra[1], extra[2:4], extra[4:]
if si1 != 'S' || si2 != 'G' {
return 0, 0, 0, fmt.Errorf("invalid subfield IDs: %q, %q; want E, S", si1, si2)
}
if slen := binary.LittleEndian.Uint16(subfieldlen); slen != uint16(16+len("STARGZ")) {
return 0, 0, 0, fmt.Errorf("invalid length of subfield %d; want %d", slen, 16+len("STARGZ"))
}
if string(subfield[16:]) != "STARGZ" {
return 0, 0, 0, fmt.Errorf("STARGZ magic string must be included in the footer subfield")
}
tocOffset, err = strconv.ParseInt(string(subfield[:16]), 16, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("legacy: failed to parse toc offset: %w", err)
}
return tocOffset, tocOffset, 0, nil
}
func (gz *GzipDecompressor) FooterSize() int64 {
return FooterSize
}
func (gz *GzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error) {
return decompressTOCEStargz(r)
}
type LegacyGzipDecompressor struct{}
func (gz *LegacyGzipDecompressor) Reader(r io.Reader) (io.ReadCloser, error) {
return gzip.NewReader(r)
}
func (gz *LegacyGzipDecompressor) ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
return parseTOCEStargz(r)
}
func (gz *LegacyGzipDecompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
if len(p) != legacyFooterSize {
return 0, 0, 0, fmt.Errorf("legacy: invalid length %d cannot be parsed", len(p))
}
zr, err := gzip.NewReader(bytes.NewReader(p))
if err != nil {
return 0, 0, 0, fmt.Errorf("legacy: failed to get footer gzip reader: %w", err)
}
defer zr.Close()
extra := zr.Header.Extra
if len(extra) != 16+len("STARGZ") {
return 0, 0, 0, fmt.Errorf("legacy: invalid stargz's extra field size")
}
if string(extra[16:]) != "STARGZ" {
return 0, 0, 0, fmt.Errorf("legacy: magic string STARGZ not found")
}
tocOffset, err = strconv.ParseInt(string(extra[:16]), 16, 64)
if err != nil {
return 0, 0, 0, fmt.Errorf("legacy: failed to parse toc offset: %w", err)
}
return tocOffset, tocOffset, 0, nil
}
func (gz *LegacyGzipDecompressor) FooterSize() int64 {
return legacyFooterSize
}
func (gz *LegacyGzipDecompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error) {
return decompressTOCEStargz(r)
}
func parseTOCEStargz(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error) {
tr, err := decompressTOCEStargz(r)
if err != nil {
return nil, "", err
}
dgstr := digest.Canonical.Digester()
toc = new(JTOC)
if err := json.NewDecoder(io.TeeReader(tr, dgstr.Hash())).Decode(&toc); err != nil {
return nil, "", fmt.Errorf("error decoding TOC JSON: %v", err)
}
if err := tr.Close(); err != nil {
return nil, "", err
}
return toc, dgstr.Digest(), nil
}
func decompressTOCEStargz(r io.Reader) (tocJSON io.ReadCloser, err error) {
zr, err := gzip.NewReader(r)
if err != nil {
return nil, fmt.Errorf("malformed TOC gzip header: %v", err)
}
zr.Multistream(false)
tr := tar.NewReader(zr)
h, err := tr.Next()
if err != nil {
return nil, fmt.Errorf("failed to find tar header in TOC gzip stream: %v", err)
}
if h.Name != TOCTarName {
return nil, fmt.Errorf("TOC tar entry had name %q; expected %q", h.Name, TOCTarName)
}
return readCloser{tr, zr.Close}, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,342 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the LICENSE file.
*/
package estargz
import (
"archive/tar"
"hash"
"io"
"os"
"path"
"time"
digest "github.com/opencontainers/go-digest"
)
const (
// TOCTarName is the name of the JSON file in the tar archive in the
// table of contents gzip stream.
TOCTarName = "stargz.index.json"
// FooterSize is the number of bytes in the footer
//
// The footer is an empty gzip stream with no compression and an Extra
// header of the form "%016xSTARGZ", where the 64 bit hex-encoded
// number is the offset to the gzip stream of JSON TOC.
//
// 51 comes from:
//
// 10 bytes gzip header
// 2 bytes XLEN (length of Extra field) = 26 (4 bytes header + 16 hex digits + len("STARGZ"))
// 2 bytes Extra: SI1 = 'S', SI2 = 'G'
// 2 bytes Extra: LEN = 22 (16 hex digits + len("STARGZ"))
// 22 bytes Extra: subfield = fmt.Sprintf("%016xSTARGZ", offsetOfTOC)
// 5 bytes flate header
// 8 bytes gzip footer
// (End of the eStargz blob)
//
// NOTE: For Extra fields, subfield IDs SI1='S' SI2='G' is used for eStargz.
FooterSize = 51
// legacyFooterSize is the number of bytes in the legacy stargz footer.
//
// 47 comes from:
//
// 10 byte gzip header +
// 2 byte (LE16) length of extra, encoding 22 (16 hex digits + len("STARGZ")) == "\x16\x00" +
// 22 bytes of extra (fmt.Sprintf("%016xSTARGZ", tocGzipOffset))
// 5 byte flate header
// 8 byte gzip footer (two little endian uint32s: digest, size)
legacyFooterSize = 47
// TOCJSONDigestAnnotation is an annotation for an image layer. This stores the
// digest of the TOC JSON.
// This annotation is valid only when it is specified in `.[]layers.annotations`
// of an image manifest.
TOCJSONDigestAnnotation = "containerd.io/snapshot/stargz/toc.digest"
// StoreUncompressedSizeAnnotation is an additional annotation key for eStargz to enable lazy
// pulling on containers/storage. Stargz Store is required to expose the layer's uncompressed size
// to the runtime but current OCI image doesn't ship this information by default. So we store this
// to the special annotation.
StoreUncompressedSizeAnnotation = "io.containers.estargz.uncompressed-size"
// PrefetchLandmark is a file entry which indicates the end position of
// prefetch in the stargz file.
PrefetchLandmark = ".prefetch.landmark"
// NoPrefetchLandmark is a file entry which indicates that no prefetch should
// occur in the stargz file.
NoPrefetchLandmark = ".no.prefetch.landmark"
landmarkContents = 0xf
)
// JTOC is the JSON-serialized table of contents index of the files in the stargz file.
type JTOC struct {
Version int `json:"version"`
Entries []*TOCEntry `json:"entries"`
}
// TOCEntry is an entry in the stargz file's TOC (Table of Contents).
type TOCEntry struct {
// Name is the tar entry's name. It is the complete path
// stored in the tar file, not just the base name.
Name string `json:"name"`
// Type is one of "dir", "reg", "symlink", "hardlink", "char",
// "block", "fifo", or "chunk".
// The "chunk" type is used for regular file data chunks past the first
// TOCEntry; the 2nd chunk and on have only Type ("chunk"), Offset,
// ChunkOffset, and ChunkSize populated.
Type string `json:"type"`
// Size, for regular files, is the logical size of the file.
Size int64 `json:"size,omitempty"`
// ModTime3339 is the modification time of the tar entry. Empty
// means zero or unknown. Otherwise it's in UTC RFC3339
// format. Use the ModTime method to access the time.Time value.
ModTime3339 string `json:"modtime,omitempty"`
modTime time.Time
// LinkName, for symlinks and hardlinks, is the link target.
LinkName string `json:"linkName,omitempty"`
// Mode is the permission and mode bits.
Mode int64 `json:"mode,omitempty"`
// UID is the user ID of the owner.
UID int `json:"uid,omitempty"`
// GID is the group ID of the owner.
GID int `json:"gid,omitempty"`
// Uname is the username of the owner.
//
// In the serialized JSON, this field may only be present for
// the first entry with the same UID.
Uname string `json:"userName,omitempty"`
// Gname is the group name of the owner.
//
// In the serialized JSON, this field may only be present for
// the first entry with the same GID.
Gname string `json:"groupName,omitempty"`
// Offset, for regular files, provides the offset in the
// stargz file to the file's data bytes. See ChunkOffset and
// ChunkSize.
Offset int64 `json:"offset,omitempty"`
// InnerOffset is an optional field indicates uncompressed offset
// of this "reg" or "chunk" payload in a stream starts from Offset.
// This field enables to put multiple "reg" or "chunk" payloads
// in one chunk with having the same Offset but different InnerOffset.
InnerOffset int64 `json:"innerOffset,omitempty"`
nextOffset int64 // the Offset of the next entry with a non-zero Offset
// DevMajor is the major device number for "char" and "block" types.
DevMajor int `json:"devMajor,omitempty"`
// DevMinor is the major device number for "char" and "block" types.
DevMinor int `json:"devMinor,omitempty"`
// NumLink is the number of entry names pointing to this entry.
// Zero means one name references this entry.
// This field is calculated during runtime and not recorded in TOC JSON.
NumLink int `json:"-"`
// Xattrs are the extended attribute for the entry.
Xattrs map[string][]byte `json:"xattrs,omitempty"`
// Digest stores the OCI checksum for regular files payload.
// It has the form "sha256:abcdef01234....".
Digest string `json:"digest,omitempty"`
// ChunkOffset is non-zero if this is a chunk of a large,
// regular file. If so, the Offset is where the gzip header of
// ChunkSize bytes at ChunkOffset in Name begin.
//
// In serialized form, a "chunkSize" JSON field of zero means
// that the chunk goes to the end of the file. After reading
// from the stargz TOC, though, the ChunkSize is initialized
// to a non-zero file for when Type is either "reg" or
// "chunk".
ChunkOffset int64 `json:"chunkOffset,omitempty"`
ChunkSize int64 `json:"chunkSize,omitempty"`
// ChunkDigest stores an OCI digest of the chunk. This must be formed
// as "sha256:0123abcd...".
ChunkDigest string `json:"chunkDigest,omitempty"`
children map[string]*TOCEntry
// chunkTopIndex is index of the entry where Offset starts in the blob.
chunkTopIndex int
}
// ModTime returns the entry's modification time.
func (e *TOCEntry) ModTime() time.Time { return e.modTime }
// NextOffset returns the position (relative to the start of the
// stargz file) of the next gzip boundary after e.Offset.
func (e *TOCEntry) NextOffset() int64 { return e.nextOffset }
func (e *TOCEntry) addChild(baseName string, child *TOCEntry) {
if e.children == nil {
e.children = make(map[string]*TOCEntry)
}
if child.Type == "dir" {
e.NumLink++ // Entry ".." in the subdirectory links to this directory
}
e.children[baseName] = child
}
// isDataType reports whether TOCEntry is a regular file or chunk (something that
// contains regular file data).
func (e *TOCEntry) isDataType() bool { return e.Type == "reg" || e.Type == "chunk" }
// Stat returns a FileInfo value representing e.
func (e *TOCEntry) Stat() os.FileInfo { return fileInfo{e} }
// ForeachChild calls f for each child item. If f returns false, iteration ends.
// If e is not a directory, f is not called.
func (e *TOCEntry) ForeachChild(f func(baseName string, ent *TOCEntry) bool) {
for name, ent := range e.children {
if !f(name, ent) {
return
}
}
}
// LookupChild returns the directory e's child by its base name.
func (e *TOCEntry) LookupChild(baseName string) (child *TOCEntry, ok bool) {
child, ok = e.children[baseName]
return
}
// fileInfo implements os.FileInfo using the wrapped *TOCEntry.
type fileInfo struct{ e *TOCEntry }
var _ os.FileInfo = fileInfo{}
func (fi fileInfo) Name() string { return path.Base(fi.e.Name) }
func (fi fileInfo) IsDir() bool { return fi.e.Type == "dir" }
func (fi fileInfo) Size() int64 { return fi.e.Size }
func (fi fileInfo) ModTime() time.Time { return fi.e.ModTime() }
func (fi fileInfo) Sys() interface{} { return fi.e }
func (fi fileInfo) Mode() (m os.FileMode) {
// TOCEntry.Mode is tar.Header.Mode so we can understand the these bits using `tar` pkg.
m = (&tar.Header{Mode: fi.e.Mode}).FileInfo().Mode() &
(os.ModePerm | os.ModeSetuid | os.ModeSetgid | os.ModeSticky)
switch fi.e.Type {
case "dir":
m |= os.ModeDir
case "symlink":
m |= os.ModeSymlink
case "char":
m |= os.ModeDevice | os.ModeCharDevice
case "block":
m |= os.ModeDevice
case "fifo":
m |= os.ModeNamedPipe
}
return m
}
// TOCEntryVerifier holds verifiers that are usable for verifying chunks contained
// in a eStargz blob.
type TOCEntryVerifier interface {
// Verifier provides a content verifier that can be used for verifying the
// contents of the specified TOCEntry.
Verifier(ce *TOCEntry) (digest.Verifier, error)
}
// Compression provides the compression helper to be used creating and parsing eStargz.
// This package provides gzip-based Compression by default, but any compression
// algorithm (e.g. zstd) can be used as long as it implements Compression.
type Compression interface {
Compressor
Decompressor
}
// Compressor represents the helper mothods to be used for creating eStargz.
type Compressor interface {
// Writer returns WriteCloser to be used for writing a chunk to eStargz.
// Everytime a chunk is written, the WriteCloser is closed and Writer is
// called again for writing the next chunk.
//
// The returned writer should implement "Flush() error" function that flushes
// any pending compressed data to the underlying writer.
Writer(w io.Writer) (WriteFlushCloser, error)
// WriteTOCAndFooter is called to write JTOC to the passed Writer.
// diffHash calculates the DiffID (uncompressed sha256 hash) of the blob
// WriteTOCAndFooter can optionally write anything that affects DiffID calculation
// (e.g. uncompressed TOC JSON).
//
// This function returns tocDgst that represents the digest of TOC that will be used
// to verify this blob when it's parsed.
WriteTOCAndFooter(w io.Writer, off int64, toc *JTOC, diffHash hash.Hash) (tocDgst digest.Digest, err error)
}
// Decompressor represents the helper mothods to be used for parsing eStargz.
type Decompressor interface {
// Reader returns ReadCloser to be used for decompressing file payload.
Reader(r io.Reader) (io.ReadCloser, error)
// FooterSize returns the size of the footer of this blob.
FooterSize() int64
// ParseFooter parses the footer and returns the offset and (compressed) size of TOC.
// payloadBlobSize is the (compressed) size of the blob payload (i.e. the size between
// the top until the TOC JSON).
//
// If tocOffset < 0, we assume that TOC isn't contained in the blob and pass nil reader
// to ParseTOC. We expect that ParseTOC acquire TOC from the external location and return it.
//
// tocSize is optional. If tocSize <= 0, it's by default the size of the range from tocOffset until the beginning of the
// footer (blob size - tocOff - FooterSize).
// If blobPayloadSize < 0, blobPayloadSize become the blob size.
ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error)
// ParseTOC parses TOC from the passed reader. The reader provides the partial contents
// of the underlying blob that has the range specified by ParseFooter method.
//
// This function returns tocDgst that represents the digest of TOC that will be used
// to verify this blob. This must match to the value returned from
// Compressor.WriteTOCAndFooter that is used when creating this blob.
//
// If tocOffset returned by ParseFooter is < 0, we assume that TOC isn't contained in the blob.
// Pass nil reader to ParseTOC then we expect that ParseTOC acquire TOC from the external location
// and return it.
ParseTOC(r io.Reader) (toc *JTOC, tocDgst digest.Digest, err error)
}
type WriteFlushCloser interface {
io.WriteCloser
Flush() error
}

View File

@ -0,0 +1,201 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package zstdchunked
import (
"bufio"
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"hash"
"io"
"sync"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/klauspost/compress/zstd"
digest "github.com/opencontainers/go-digest"
)
const (
// ManifestChecksumAnnotation is an annotation that contains the compressed TOC Digset
ManifestChecksumAnnotation = "io.containers.zstd-chunked.manifest-checksum"
// ManifestPositionAnnotation is an annotation that contains the offset to the TOC.
ManifestPositionAnnotation = "io.containers.zstd-chunked.manifest-position"
// FooterSize is the size of the footer
FooterSize = 40
manifestTypeCRFS = 1
)
var (
skippableFrameMagic = []byte{0x50, 0x2a, 0x4d, 0x18}
zstdFrameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd}
zstdChunkedFrameMagic = []byte{0x47, 0x6e, 0x55, 0x6c, 0x49, 0x6e, 0x55, 0x78}
)
type Decompressor struct{}
func (zz *Decompressor) Reader(r io.Reader) (io.ReadCloser, error) {
decoder, err := zstd.NewReader(r)
if err != nil {
return nil, err
}
return &zstdReadCloser{decoder}, nil
}
func (zz *Decompressor) ParseTOC(r io.Reader) (toc *estargz.JTOC, tocDgst digest.Digest, err error) {
zr, err := zstd.NewReader(r)
if err != nil {
return nil, "", err
}
defer zr.Close()
dgstr := digest.Canonical.Digester()
toc = new(estargz.JTOC)
if err := json.NewDecoder(io.TeeReader(zr, dgstr.Hash())).Decode(&toc); err != nil {
return nil, "", fmt.Errorf("error decoding TOC JSON: %w", err)
}
return toc, dgstr.Digest(), nil
}
func (zz *Decompressor) ParseFooter(p []byte) (blobPayloadSize, tocOffset, tocSize int64, err error) {
offset := binary.LittleEndian.Uint64(p[0:8])
compressedLength := binary.LittleEndian.Uint64(p[8:16])
if !bytes.Equal(zstdChunkedFrameMagic, p[32:40]) {
return 0, 0, 0, fmt.Errorf("invalid magic number")
}
// 8 is the size of the zstd skippable frame header + the frame size (see WriteTOCAndFooter)
return int64(offset - 8), int64(offset), int64(compressedLength), nil
}
func (zz *Decompressor) FooterSize() int64 {
return FooterSize
}
func (zz *Decompressor) DecompressTOC(r io.Reader) (tocJSON io.ReadCloser, err error) {
decoder, err := zstd.NewReader(r)
if err != nil {
return nil, err
}
br := bufio.NewReader(decoder)
if _, err := br.Peek(1); err != nil {
return nil, err
}
return &reader{br, decoder.Close}, nil
}
type reader struct {
io.Reader
closeFunc func()
}
func (r *reader) Close() error { r.closeFunc(); return nil }
type zstdReadCloser struct{ *zstd.Decoder }
func (z *zstdReadCloser) Close() error {
z.Decoder.Close()
return nil
}
type Compressor struct {
CompressionLevel zstd.EncoderLevel
Metadata map[string]string
pool sync.Pool
}
func (zc *Compressor) Writer(w io.Writer) (estargz.WriteFlushCloser, error) {
if wc := zc.pool.Get(); wc != nil {
ec := wc.(*zstd.Encoder)
ec.Reset(w)
return &poolEncoder{ec, zc}, nil
}
ec, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zc.CompressionLevel), zstd.WithLowerEncoderMem(true))
if err != nil {
return nil, err
}
return &poolEncoder{ec, zc}, nil
}
type poolEncoder struct {
*zstd.Encoder
zc *Compressor
}
func (w *poolEncoder) Close() error {
if err := w.Encoder.Close(); err != nil {
return err
}
w.zc.pool.Put(w.Encoder)
return nil
}
func (zc *Compressor) WriteTOCAndFooter(w io.Writer, off int64, toc *estargz.JTOC, diffHash hash.Hash) (digest.Digest, error) {
tocJSON, err := json.MarshalIndent(toc, "", "\t")
if err != nil {
return "", err
}
buf := new(bytes.Buffer)
encoder, err := zstd.NewWriter(buf, zstd.WithEncoderLevel(zc.CompressionLevel))
if err != nil {
return "", err
}
if _, err := encoder.Write(tocJSON); err != nil {
return "", err
}
if err := encoder.Close(); err != nil {
return "", err
}
compressedTOC := buf.Bytes()
_, err = io.Copy(w, bytes.NewReader(appendSkippableFrameMagic(compressedTOC)))
// 8 is the size of the zstd skippable frame header + the frame size
tocOff := uint64(off) + 8
if _, err := w.Write(appendSkippableFrameMagic(
zstdFooterBytes(tocOff, uint64(len(tocJSON)), uint64(len(compressedTOC)))),
); err != nil {
return "", err
}
if zc.Metadata != nil {
zc.Metadata[ManifestChecksumAnnotation] = digest.FromBytes(compressedTOC).String()
zc.Metadata[ManifestPositionAnnotation] = fmt.Sprintf("%d:%d:%d:%d",
tocOff, len(compressedTOC), len(tocJSON), manifestTypeCRFS)
}
return digest.FromBytes(tocJSON), err
}
// zstdFooterBytes returns the 40 bytes footer.
func zstdFooterBytes(tocOff, tocRawSize, tocCompressedSize uint64) []byte {
footer := make([]byte, FooterSize)
binary.LittleEndian.PutUint64(footer, tocOff)
binary.LittleEndian.PutUint64(footer[8:], tocCompressedSize)
binary.LittleEndian.PutUint64(footer[16:], tocRawSize)
binary.LittleEndian.PutUint64(footer[24:], manifestTypeCRFS)
copy(footer[32:40], zstdChunkedFrameMagic)
return footer
}
func appendSkippableFrameMagic(b []byte) []byte {
size := make([]byte, 4)
binary.LittleEndian.PutUint32(size, uint32(len(b)))
return append(append(skippableFrameMagic, size...), b...)
}

View File

@ -0,0 +1,151 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package config
const (
// TargetSkipVerifyLabel is a snapshot label key that indicates to skip content
// verification for the layer.
TargetSkipVerifyLabel = "containerd.io/snapshot/remote/stargz.skipverify"
// TargetPrefetchSizeLabel is a snapshot label key that indicates size to prefetch
// the layer. If the layer is eStargz and contains prefetch landmarks, these config
// will be respeced.
TargetPrefetchSizeLabel = "containerd.io/snapshot/remote/stargz.prefetch"
)
// Config is configuration for stargz snapshotter filesystem.
type Config struct {
// Type of cache for compressed contents fetched from the registry. "memory" stores them on memory.
// Other values default to cache them on disk.
HTTPCacheType string `toml:"http_cache_type"`
// Type of cache for uncompressed files contents. "memory" stores them on memory. Other values
// default to cache them on disk.
FSCacheType string `toml:"filesystem_cache_type"`
// ResolveResultEntryTTLSec is TTL (in sec) to cache resolved layers for
// future use. (default 120s)
ResolveResultEntryTTLSec int `toml:"resolve_result_entry_ttl_sec"`
// PrefetchSize is the default size (in bytes) to prefetch when mounting a layer. Default is 0. Stargz-snapshotter still
// uses the value specified by the image using "containerd.io/snapshot/remote/stargz.prefetch" or the landmark file.
PrefetchSize int64 `toml:"prefetch_size"`
// PrefetchTimeoutSec is the default timeout (in seconds) when the prefetching takes long. Default is 10s.
PrefetchTimeoutSec int64 `toml:"prefetch_timeout_sec"`
// NoPrefetch disables prefetching. Default is false.
NoPrefetch bool `toml:"noprefetch"`
// NoBackgroundFetch disables the behaviour of fetching the entire layer contents in background. Default is false.
NoBackgroundFetch bool `toml:"no_background_fetch"`
// Debug enables filesystem debug log.
Debug bool `toml:"debug"`
// AllowNoVerification allows mouting images without verification. Default is false.
AllowNoVerification bool `toml:"allow_no_verification"`
// DisableVerification disables verifying layer contents. Default is false.
DisableVerification bool `toml:"disable_verification"`
// MaxConcurrency is max number of concurrent background tasks for fetching layer contents. Default is 2.
MaxConcurrency int64 `toml:"max_concurrency"`
// NoPrometheus disables exposing filesystem-related metrics. Default is false.
NoPrometheus bool `toml:"no_prometheus"`
// BlobConfig is config for layer blob management.
BlobConfig `toml:"blob"`
// DirectoryCacheConfig is config for directory-based cache.
DirectoryCacheConfig `toml:"directory_cache"`
// FuseConfig is configurations for FUSE fs.
FuseConfig `toml:"fuse"`
// ResolveResultEntry is a deprecated field.
ResolveResultEntry int `toml:"resolve_result_entry"` // deprecated
}
// BlobConfig is configuration for the logic to fetching blobs.
type BlobConfig struct {
// ValidInterval specifies a duration (in seconds) during which the layer can be reused without
// checking the connection to the registry. Default is 60.
ValidInterval int64 `toml:"valid_interval"`
// CheckAlways overwrites ValidInterval to 0 if it's true. Default is false.
CheckAlways bool `toml:"check_always"`
// ChunkSize is the granularity (in bytes) at which background fetch and on-demand reads
// are fetched from the remote registry. Default is 50000.
ChunkSize int64 `toml:"chunk_size"`
// FetchTimeoutSec is a timeout duration (in seconds) for fetching chunks from the registry. Default is 300.
FetchTimeoutSec int64 `toml:"fetching_timeout_sec"`
// ForceSingleRangeMode disables using of multiple ranges in a Range Request and always specifies one larger
// region that covers them. Default is false.
ForceSingleRangeMode bool `toml:"force_single_range_mode"`
// PrefetchChunkSize is the maximum bytes transferred per http GET from remote registry
// during prefetch. It is recommended to have PrefetchChunkSize > ChunkSize.
// If PrefetchChunkSize < ChunkSize prefetch bytes will be fetched as a single http GET,
// else total GET requests for prefetch = ceil(PrefetchSize / PrefetchChunkSize).
// Default is 0.
PrefetchChunkSize int64 `toml:"prefetch_chunk_size"`
// MaxRetries is a max number of reries of a HTTP request. Default is 5.
MaxRetries int `toml:"max_retries"`
// MinWaitMSec is minimal delay (in seconds) for the next retrying after a request failure. Default is 30.
MinWaitMSec int `toml:"min_wait_msec"`
// MinWaitMSec is maximum delay (in seconds) for the next retrying after a request failure. Default is 30.
MaxWaitMSec int `toml:"max_wait_msec"`
}
// DirectoryCacheConfig is configuration for the disk-based cache.
type DirectoryCacheConfig struct {
// MaxLRUCacheEntry is the number of entries of LRU cache to cache data on memory. Default is 10.
MaxLRUCacheEntry int `toml:"max_lru_cache_entry"`
// MaxCacheFds is the number of entries of LRU cache to hold fds of files of cached contents. Default is 10.
MaxCacheFds int `toml:"max_cache_fds"`
// SyncAdd being true means that each adding of data to the cache blocks until the data is fully written to the
// cache directory. Default is false.
SyncAdd bool `toml:"sync_add"`
// Direct disables on-memory data cache. Default is true for saving memory usage.
Direct bool `toml:"direct" default:"true"`
}
// FuseConfig is configuration for FUSE fs.
type FuseConfig struct {
// AttrTimeout defines overall timeout attribute for a file system in seconds.
AttrTimeout int64 `toml:"attr_timeout"`
// EntryTimeout defines TTL for directory, name lookup in seconds.
EntryTimeout int64 `toml:"entry_timeout"`
}

View File

@ -0,0 +1,506 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
//
// Example implementation of FileSystem.
//
// This implementation uses stargz by CRFS(https://github.com/google/crfs) as
// image format, which has following feature:
// - We can use docker registry as a backend store (means w/o additional layer
// stores).
// - The stargz-formatted image is still docker-compatible (means normal
// runtimes can still use the formatted image).
//
// Currently, we reimplemented CRFS-like filesystem for ease of integration.
// But in the near future, we intend to integrate it with CRFS.
//
package fs
import (
"context"
"fmt"
"os/exec"
"strconv"
"sync"
"time"
"github.com/containerd/containerd/v2/core/remotes/docker"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/fs/config"
"github.com/containerd/stargz-snapshotter/fs/layer"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common"
layermetrics "github.com/containerd/stargz-snapshotter/fs/metrics/layer"
"github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/fs/source"
"github.com/containerd/stargz-snapshotter/metadata"
memorymetadata "github.com/containerd/stargz-snapshotter/metadata/memory"
"github.com/containerd/stargz-snapshotter/snapshot"
"github.com/containerd/stargz-snapshotter/task"
metrics "github.com/docker/go-metrics"
fusefs "github.com/hanwen/go-fuse/v2/fs"
"github.com/hanwen/go-fuse/v2/fuse"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"golang.org/x/sys/unix"
)
const (
defaultFuseTimeout = time.Second
defaultMaxConcurrency = 2
)
var fusermountBin = []string{"fusermount", "fusermount3"}
type Option func(*options)
type options struct {
getSources source.GetSources
resolveHandlers map[string]remote.Handler
metadataStore metadata.Store
metricsLogLevel *log.Level
overlayOpaqueType layer.OverlayOpaqueType
additionalDecompressors func(context.Context, source.RegistryHosts, reference.Spec, ocispec.Descriptor) []metadata.Decompressor
}
func WithGetSources(s source.GetSources) Option {
return func(opts *options) {
opts.getSources = s
}
}
func WithResolveHandler(name string, handler remote.Handler) Option {
return func(opts *options) {
if opts.resolveHandlers == nil {
opts.resolveHandlers = make(map[string]remote.Handler)
}
opts.resolveHandlers[name] = handler
}
}
func WithMetadataStore(metadataStore metadata.Store) Option {
return func(opts *options) {
opts.metadataStore = metadataStore
}
}
func WithMetricsLogLevel(logLevel log.Level) Option {
return func(opts *options) {
opts.metricsLogLevel = &logLevel
}
}
func WithOverlayOpaqueType(overlayOpaqueType layer.OverlayOpaqueType) Option {
return func(opts *options) {
opts.overlayOpaqueType = overlayOpaqueType
}
}
func WithAdditionalDecompressors(d func(context.Context, source.RegistryHosts, reference.Spec, ocispec.Descriptor) []metadata.Decompressor) Option {
return func(opts *options) {
opts.additionalDecompressors = d
}
}
func NewFilesystem(root string, cfg config.Config, opts ...Option) (_ snapshot.FileSystem, err error) {
var fsOpts options
for _, o := range opts {
o(&fsOpts)
}
maxConcurrency := cfg.MaxConcurrency
if maxConcurrency == 0 {
maxConcurrency = defaultMaxConcurrency
}
attrTimeout := time.Duration(cfg.FuseConfig.AttrTimeout) * time.Second
if attrTimeout == 0 {
attrTimeout = defaultFuseTimeout
}
entryTimeout := time.Duration(cfg.FuseConfig.EntryTimeout) * time.Second
if entryTimeout == 0 {
entryTimeout = defaultFuseTimeout
}
metadataStore := fsOpts.metadataStore
if metadataStore == nil {
metadataStore = memorymetadata.NewReader
}
getSources := fsOpts.getSources
if getSources == nil {
getSources = source.FromDefaultLabels(func(refspec reference.Spec) (hosts []docker.RegistryHost, _ error) {
return docker.ConfigureDefaultRegistries(docker.WithPlainHTTP(docker.MatchLocalhost))(refspec.Hostname())
})
}
tm := task.NewBackgroundTaskManager(maxConcurrency, 5*time.Second)
r, err := layer.NewResolver(root, tm, cfg, fsOpts.resolveHandlers, metadataStore, fsOpts.overlayOpaqueType, fsOpts.additionalDecompressors)
if err != nil {
return nil, fmt.Errorf("failed to setup resolver: %w", err)
}
var ns *metrics.Namespace
if !cfg.NoPrometheus {
ns = metrics.NewNamespace("stargz", "fs", nil)
logLevel := log.DebugLevel
if fsOpts.metricsLogLevel != nil {
logLevel = *fsOpts.metricsLogLevel
}
commonmetrics.Register(logLevel) // Register common metrics. This will happen only once.
}
c := layermetrics.NewLayerMetrics(ns)
if ns != nil {
metrics.Register(ns) // Register layer metrics.
}
return &filesystem{
resolver: r,
getSources: getSources,
prefetchSize: cfg.PrefetchSize,
noprefetch: cfg.NoPrefetch,
noBackgroundFetch: cfg.NoBackgroundFetch,
debug: cfg.Debug,
layer: make(map[string]layer.Layer),
backgroundTaskManager: tm,
allowNoVerification: cfg.AllowNoVerification,
disableVerification: cfg.DisableVerification,
metricsController: c,
attrTimeout: attrTimeout,
entryTimeout: entryTimeout,
}, nil
}
type filesystem struct {
resolver *layer.Resolver
prefetchSize int64
noprefetch bool
noBackgroundFetch bool
debug bool
layer map[string]layer.Layer
layerMu sync.Mutex
backgroundTaskManager *task.BackgroundTaskManager
allowNoVerification bool
disableVerification bool
getSources source.GetSources
metricsController *layermetrics.Controller
attrTimeout time.Duration
entryTimeout time.Duration
}
func (fs *filesystem) Mount(ctx context.Context, mountpoint string, labels map[string]string) (retErr error) {
// Setting the start time to measure the Mount operation duration.
start := time.Now()
// This is a prioritized task and all background tasks will be stopped
// execution so this can avoid being disturbed for NW traffic by background
// tasks.
fs.backgroundTaskManager.DoPrioritizedTask()
defer fs.backgroundTaskManager.DonePrioritizedTask()
ctx = log.WithLogger(ctx, log.G(ctx).WithField("mountpoint", mountpoint))
// Get source information of this layer.
src, err := fs.getSources(labels)
if err != nil {
return err
} else if len(src) == 0 {
return fmt.Errorf("source must be passed")
}
defaultPrefetchSize := fs.prefetchSize
if psStr, ok := labels[config.TargetPrefetchSizeLabel]; ok {
if ps, err := strconv.ParseInt(psStr, 10, 64); err == nil {
defaultPrefetchSize = ps
}
}
// Resolve the target layer
var (
resultChan = make(chan layer.Layer)
errChan = make(chan error)
)
go func() {
rErr := fmt.Errorf("failed to resolve target")
for _, s := range src {
l, err := fs.resolver.Resolve(ctx, s.Hosts, s.Name, s.Target)
if err == nil {
resultChan <- l
fs.prefetch(ctx, l, defaultPrefetchSize, start)
return
}
rErr = fmt.Errorf("failed to resolve layer %q from %q: %v: %w", s.Target.Digest, s.Name, err, rErr)
}
errChan <- rErr
}()
// Also resolve and cache other layers in parallel
preResolve := src[0] // TODO: should we pre-resolve blobs in other sources as well?
for _, desc := range neighboringLayers(preResolve.Manifest, preResolve.Target) {
desc := desc
go func() {
// Avoids to get canceled by client.
ctx := log.WithLogger(context.Background(), log.G(ctx).WithField("mountpoint", mountpoint))
l, err := fs.resolver.Resolve(ctx, preResolve.Hosts, preResolve.Name, desc)
if err != nil {
log.G(ctx).WithError(err).Debug("failed to pre-resolve")
return
}
fs.prefetch(ctx, l, defaultPrefetchSize, start)
// Release this layer because this isn't target and we don't use it anymore here.
// However, this will remain on the resolver cache until eviction.
l.Done()
}()
}
// Wait for resolving completion
var l layer.Layer
select {
case l = <-resultChan:
case err := <-errChan:
log.G(ctx).WithError(err).Debug("failed to resolve layer")
return fmt.Errorf("failed to resolve layer: %w", err)
case <-time.After(30 * time.Second):
log.G(ctx).Debug("failed to resolve layer (timeout)")
return fmt.Errorf("failed to resolve layer (timeout)")
}
defer func() {
if retErr != nil {
l.Done() // don't use this layer.
}
}()
// Verify layer's content
if fs.disableVerification {
// Skip if verification is disabled completely
l.SkipVerify()
log.G(ctx).Infof("Verification forcefully skipped")
} else if tocDigest, ok := labels[estargz.TOCJSONDigestAnnotation]; ok {
// Verify this layer using the TOC JSON digest passed through label.
dgst, err := digest.Parse(tocDigest)
if err != nil {
log.G(ctx).WithError(err).Debugf("failed to parse passed TOC digest %q", dgst)
return fmt.Errorf("invalid TOC digest: %v: %w", tocDigest, err)
}
if err := l.Verify(dgst); err != nil {
log.G(ctx).WithError(err).Debugf("invalid layer")
return fmt.Errorf("invalid stargz layer: %w", err)
}
log.G(ctx).Debugf("verified")
} else if _, ok := labels[config.TargetSkipVerifyLabel]; ok && fs.allowNoVerification {
// If unverified layer is allowed, use it with warning.
// This mode is for legacy stargz archives which don't contain digests
// necessary for layer verification.
l.SkipVerify()
log.G(ctx).Warningf("No verification is held for layer")
} else {
// Verification must be done. Don't mount this layer.
return fmt.Errorf("digest of TOC JSON must be passed")
}
node, err := l.RootNode(0)
if err != nil {
log.G(ctx).WithError(err).Warnf("Failed to get root node")
return fmt.Errorf("failed to get root node: %w", err)
}
// Measuring duration of Mount operation for resolved layer.
digest := l.Info().Digest // get layer sha
defer commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.Mount, digest, start)
// Register the mountpoint layer
fs.layerMu.Lock()
fs.layer[mountpoint] = l
fs.layerMu.Unlock()
fs.metricsController.Add(mountpoint, l)
// mount the node to the specified mountpoint
// TODO: bind mount the state directory as a read-only fs on snapshotter's side
rawFS := fusefs.NewNodeFS(node, &fusefs.Options{
AttrTimeout: &fs.attrTimeout,
EntryTimeout: &fs.entryTimeout,
NullPermissions: true,
})
mountOpts := &fuse.MountOptions{
AllowOther: true, // allow users other than root&mounter to access fs
FsName: "stargz", // name this filesystem as "stargz"
Debug: fs.debug,
}
if isFusermountBinExist() {
log.G(ctx).Infof("fusermount detected")
mountOpts.Options = []string{"suid"} // option for fusermount; allow setuid inside container
} else {
log.G(ctx).WithError(err).Infof("%s not installed; trying direct mount", fusermountBin)
mountOpts.DirectMount = true
}
server, err := fuse.NewServer(rawFS, mountpoint, mountOpts)
if err != nil {
log.G(ctx).WithError(err).Debug("failed to make filesystem server")
return err
}
go server.Serve()
return server.WaitMount()
}
func (fs *filesystem) Check(ctx context.Context, mountpoint string, labels map[string]string) error {
// This is a prioritized task and all background tasks will be stopped
// execution so this can avoid being disturbed for NW traffic by background
// tasks.
fs.backgroundTaskManager.DoPrioritizedTask()
defer fs.backgroundTaskManager.DonePrioritizedTask()
defer commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.PrefetchesCompleted, digest.FromString(""), time.Now()) // measuring the time the container launch is blocked on prefetch to complete
ctx = log.WithLogger(ctx, log.G(ctx).WithField("mountpoint", mountpoint))
fs.layerMu.Lock()
l := fs.layer[mountpoint]
fs.layerMu.Unlock()
if l == nil {
log.G(ctx).Debug("layer not registered")
return fmt.Errorf("layer not registered")
}
if l.Info().FetchedSize < l.Info().Size {
// Image contents hasn't fully cached yet.
// Check the blob connectivity and try to refresh the connection on failure
if err := fs.check(ctx, l, labels); err != nil {
log.G(ctx).WithError(err).Warn("check failed")
return err
}
}
// Wait for prefetch compeletion
if !fs.noprefetch {
if err := l.WaitForPrefetchCompletion(); err != nil {
log.G(ctx).WithError(err).Warn("failed to sync with prefetch completion")
}
}
return nil
}
func (fs *filesystem) check(ctx context.Context, l layer.Layer, labels map[string]string) error {
err := l.Check()
if err == nil {
return nil
}
log.G(ctx).WithError(err).Warn("failed to connect to blob")
// Check failed. Try to refresh the connection with fresh source information
src, err := fs.getSources(labels)
if err != nil {
return err
}
var (
retrynum = 1
rErr = fmt.Errorf("failed to refresh connection")
)
for retry := 0; retry < retrynum; retry++ {
log.G(ctx).Warnf("refreshing(%d)...", retry)
for _, s := range src {
err := l.Refresh(ctx, s.Hosts, s.Name, s.Target)
if err == nil {
log.G(ctx).Debug("Successfully refreshed connection")
return nil
}
log.G(ctx).WithError(err).Warnf("failed to refresh the layer %q from %q", s.Target.Digest, s.Name)
rErr = fmt.Errorf("failed(layer:%q, ref:%q): %v: %w", s.Target.Digest, s.Name, err, rErr)
}
}
return rErr
}
func (fs *filesystem) Unmount(ctx context.Context, mountpoint string) error {
if mountpoint == "" {
return fmt.Errorf("mount point must be specified")
}
fs.layerMu.Lock()
l, ok := fs.layer[mountpoint]
if !ok {
fs.layerMu.Unlock()
return fmt.Errorf("specified path %q isn't a mountpoint", mountpoint)
}
delete(fs.layer, mountpoint) // unregisters the corresponding layer
l.Done()
fs.layerMu.Unlock()
fs.metricsController.Remove(mountpoint)
if err := unmount(mountpoint, 0); err != nil {
if err != unix.EBUSY {
return err
}
// Try force unmount
log.G(ctx).WithError(err).Debugf("trying force unmount %q", mountpoint)
if err := unmount(mountpoint, unix.MNT_FORCE); err != nil {
return err
}
}
return nil
}
func unmount(target string, flags int) error {
for {
if err := unix.Unmount(target, flags); err != unix.EINTR {
return err
}
}
}
func (fs *filesystem) prefetch(ctx context.Context, l layer.Layer, defaultPrefetchSize int64, start time.Time) {
// Prefetch a layer. The first Check() for this layer waits for the prefetch completion.
if !fs.noprefetch {
go l.Prefetch(defaultPrefetchSize)
}
// Fetch whole layer aggressively in background.
if !fs.noBackgroundFetch {
go func() {
if err := l.BackgroundFetch(); err == nil {
// write log record for the latency between mount start and last on demand fetch
commonmetrics.LogLatencyForLastOnDemandFetch(ctx, l.Info().Digest, start, l.Info().ReadTime)
}
}()
}
}
// neighboringLayers returns layer descriptors except the `target` layer in the specified manifest.
func neighboringLayers(manifest ocispec.Manifest, target ocispec.Descriptor) (descs []ocispec.Descriptor) {
for _, desc := range manifest.Layers {
if desc.Digest.String() != target.Digest.String() {
descs = append(descs, desc)
}
}
return
}
func isFusermountBinExist() bool {
for _, b := range fusermountBin {
if _, err := exec.LookPath(b); err == nil {
return true
}
}
return false
}

View File

@ -0,0 +1,681 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package layer
import (
"bytes"
"context"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"time"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/estargz/zstdchunked"
"github.com/containerd/stargz-snapshotter/fs/config"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common"
"github.com/containerd/stargz-snapshotter/fs/reader"
"github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/fs/source"
"github.com/containerd/stargz-snapshotter/metadata"
"github.com/containerd/stargz-snapshotter/task"
"github.com/containerd/stargz-snapshotter/util/cacheutil"
"github.com/containerd/stargz-snapshotter/util/namedmutex"
fusefs "github.com/hanwen/go-fuse/v2/fs"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
const (
defaultResolveResultEntryTTLSec = 120
defaultMaxLRUCacheEntry = 10
defaultMaxCacheFds = 10
defaultPrefetchTimeoutSec = 10
memoryCacheType = "memory"
)
// Layer represents a layer.
type Layer interface {
// Info returns the information of this layer.
Info() Info
// RootNode returns the root node of this layer.
RootNode(baseInode uint32) (fusefs.InodeEmbedder, error)
// Check checks if the layer is still connectable.
Check() error
// Refresh refreshes the layer connection.
Refresh(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error
// Verify verifies this layer using the passed TOC Digest.
// Nop if Verify() or SkipVerify() was already called.
Verify(tocDigest digest.Digest) (err error)
// SkipVerify skips verification for this layer.
// Nop if Verify() or SkipVerify() was already called.
SkipVerify()
// Prefetch prefetches the specified size. If the layer is eStargz and contains landmark files,
// the range indicated by these files is respected.
Prefetch(prefetchSize int64) error
// ReadAt reads this layer.
ReadAt([]byte, int64, ...remote.Option) (int, error)
// WaitForPrefetchCompletion waits untils Prefetch completes.
WaitForPrefetchCompletion() error
// BackgroundFetch fetches the entire layer contents to the cache.
// Fetching contents is done as a background task.
BackgroundFetch() error
// Done releases the reference to this layer. The resources related to this layer will be
// discarded sooner or later. Queries after calling this function won't be serviced.
Done()
}
// Info is the current status of a layer.
type Info struct {
Digest digest.Digest
Size int64 // layer size in bytes
FetchedSize int64 // layer fetched size in bytes
PrefetchSize int64 // layer prefetch size in bytes
ReadTime time.Time // last time the layer was read
TOCDigest digest.Digest
}
// Resolver resolves the layer location and provieds the handler of that layer.
type Resolver struct {
rootDir string
resolver *remote.Resolver
prefetchTimeout time.Duration
layerCache *cacheutil.TTLCache
layerCacheMu sync.Mutex
blobCache *cacheutil.TTLCache
blobCacheMu sync.Mutex
backgroundTaskManager *task.BackgroundTaskManager
resolveLock *namedmutex.NamedMutex
config config.Config
metadataStore metadata.Store
overlayOpaqueType OverlayOpaqueType
additionalDecompressors func(context.Context, source.RegistryHosts, reference.Spec, ocispec.Descriptor) []metadata.Decompressor
}
// NewResolver returns a new layer resolver.
func NewResolver(root string, backgroundTaskManager *task.BackgroundTaskManager, cfg config.Config, resolveHandlers map[string]remote.Handler, metadataStore metadata.Store, overlayOpaqueType OverlayOpaqueType, additionalDecompressors func(context.Context, source.RegistryHosts, reference.Spec, ocispec.Descriptor) []metadata.Decompressor) (*Resolver, error) {
resolveResultEntryTTL := time.Duration(cfg.ResolveResultEntryTTLSec) * time.Second
if resolveResultEntryTTL == 0 {
resolveResultEntryTTL = defaultResolveResultEntryTTLSec * time.Second
}
prefetchTimeout := time.Duration(cfg.PrefetchTimeoutSec) * time.Second
if prefetchTimeout == 0 {
prefetchTimeout = defaultPrefetchTimeoutSec * time.Second
}
// layerCache caches resolved layers for future use. This is useful in a use-case where
// the filesystem resolves and caches all layers in an image (not only queried one) in parallel,
// before they are actually queried.
layerCache := cacheutil.NewTTLCache(resolveResultEntryTTL)
layerCache.OnEvicted = func(key string, value interface{}) {
if err := value.(*layer).close(); err != nil {
log.L.WithField("key", key).WithError(err).Warnf("failed to clean up layer")
return
}
log.L.WithField("key", key).Debugf("cleaned up layer")
}
// blobCache caches resolved blobs for futural use. This is especially useful when a layer
// isn't eStargz/stargz (the *layer object won't be created/cached in this case).
blobCache := cacheutil.NewTTLCache(resolveResultEntryTTL)
blobCache.OnEvicted = func(key string, value interface{}) {
if err := value.(remote.Blob).Close(); err != nil {
log.L.WithField("key", key).WithError(err).Warnf("failed to clean up blob")
return
}
log.L.WithField("key", key).Debugf("cleaned up blob")
}
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
}
return &Resolver{
rootDir: root,
resolver: remote.NewResolver(cfg.BlobConfig, resolveHandlers),
layerCache: layerCache,
blobCache: blobCache,
prefetchTimeout: prefetchTimeout,
backgroundTaskManager: backgroundTaskManager,
config: cfg,
resolveLock: new(namedmutex.NamedMutex),
metadataStore: metadataStore,
overlayOpaqueType: overlayOpaqueType,
additionalDecompressors: additionalDecompressors,
}, nil
}
func newCache(root string, cacheType string, cfg config.Config) (cache.BlobCache, error) {
if cacheType == memoryCacheType {
return cache.NewMemoryCache(), nil
}
dcc := cfg.DirectoryCacheConfig
maxDataEntry := dcc.MaxLRUCacheEntry
if maxDataEntry == 0 {
maxDataEntry = defaultMaxLRUCacheEntry
}
maxFdEntry := dcc.MaxCacheFds
if maxFdEntry == 0 {
maxFdEntry = defaultMaxCacheFds
}
bufPool := &sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
}
dCache, fCache := cacheutil.NewLRUCache(maxDataEntry), cacheutil.NewLRUCache(maxFdEntry)
dCache.OnEvicted = func(key string, value interface{}) {
value.(*bytes.Buffer).Reset()
bufPool.Put(value)
}
fCache.OnEvicted = func(key string, value interface{}) {
value.(*os.File).Close()
}
// create a cache on an unique directory
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
}
cachePath, err := os.MkdirTemp(root, "")
if err != nil {
return nil, fmt.Errorf("failed to initialize directory cache: %w", err)
}
return cache.NewDirectoryCache(
cachePath,
cache.DirectoryCacheConfig{
SyncAdd: dcc.SyncAdd,
DataCache: dCache,
FdCache: fCache,
BufPool: bufPool,
Direct: dcc.Direct,
},
)
}
// Resolve resolves a layer based on the passed layer blob information.
func (r *Resolver) Resolve(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor, esgzOpts ...metadata.Option) (_ Layer, retErr error) {
name := refspec.String() + "/" + desc.Digest.String()
// Wait if resolving this layer is already running. The result
// can hopefully get from the cache.
r.resolveLock.Lock(name)
defer r.resolveLock.Unlock(name)
ctx = log.WithLogger(ctx, log.G(ctx).WithField("src", name))
// First, try to retrieve this layer from the underlying cache.
r.layerCacheMu.Lock()
c, done, ok := r.layerCache.Get(name)
r.layerCacheMu.Unlock()
if ok {
if l := c.(*layer); l.Check() == nil {
log.G(ctx).Debugf("hit layer cache %q", name)
return &layerRef{l, done}, nil
}
// Cached layer is invalid
done()
r.layerCacheMu.Lock()
r.layerCache.Remove(name)
r.layerCacheMu.Unlock()
}
log.G(ctx).Debugf("resolving")
// Resolve the blob.
blobR, err := r.resolveBlob(ctx, hosts, refspec, desc)
if err != nil {
return nil, fmt.Errorf("failed to resolve the blob: %w", err)
}
defer func() {
if retErr != nil {
blobR.done()
}
}()
fsCache, err := newCache(filepath.Join(r.rootDir, "fscache"), r.config.FSCacheType, r.config)
if err != nil {
return nil, fmt.Errorf("failed to create fs cache: %w", err)
}
defer func() {
if retErr != nil {
fsCache.Close()
}
}()
// Get a reader for stargz archive.
// Each file's read operation is a prioritized task and all background tasks
// will be stopped during the execution so this can avoid being disturbed for
// NW traffic by background tasks.
sr := io.NewSectionReader(readerAtFunc(func(p []byte, offset int64) (n int, err error) {
r.backgroundTaskManager.DoPrioritizedTask()
defer r.backgroundTaskManager.DonePrioritizedTask()
return blobR.ReadAt(p, offset)
}), 0, blobR.Size())
// define telemetry hooks to measure latency metrics inside estargz package
telemetry := metadata.Telemetry{
GetFooterLatency: func(start time.Time) {
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.StargzFooterGet, desc.Digest, start)
},
GetTocLatency: func(start time.Time) {
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.StargzTocGet, desc.Digest, start)
},
DeserializeTocLatency: func(start time.Time) {
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.DeserializeTocJSON, desc.Digest, start)
},
}
additionalDecompressors := []metadata.Decompressor{new(zstdchunked.Decompressor)}
if r.additionalDecompressors != nil {
additionalDecompressors = append(additionalDecompressors, r.additionalDecompressors(ctx, hosts, refspec, desc)...)
}
meta, err := r.metadataStore(sr,
append(esgzOpts, metadata.WithTelemetry(&telemetry), metadata.WithDecompressors(additionalDecompressors...))...)
if err != nil {
return nil, err
}
vr, err := reader.NewReader(meta, fsCache, desc.Digest)
if err != nil {
return nil, fmt.Errorf("failed to read layer: %w", err)
}
// Combine layer information together and cache it.
l := newLayer(r, desc, blobR, vr)
r.layerCacheMu.Lock()
cachedL, done2, added := r.layerCache.Add(name, l)
r.layerCacheMu.Unlock()
if !added {
l.close() // layer already exists in the cache. discrad this.
}
log.G(ctx).Debugf("resolved")
return &layerRef{cachedL.(*layer), done2}, nil
}
// resolveBlob resolves a blob based on the passed layer blob information.
func (r *Resolver) resolveBlob(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) (_ *blobRef, retErr error) {
name := refspec.String() + "/" + desc.Digest.String()
// Try to retrieve the blob from the underlying cache.
r.blobCacheMu.Lock()
c, done, ok := r.blobCache.Get(name)
r.blobCacheMu.Unlock()
if ok {
if blob := c.(remote.Blob); blob.Check() == nil {
return &blobRef{blob, done}, nil
}
// invalid blob. discard this.
done()
r.blobCacheMu.Lock()
r.blobCache.Remove(name)
r.blobCacheMu.Unlock()
}
httpCache, err := newCache(filepath.Join(r.rootDir, "httpcache"), r.config.HTTPCacheType, r.config)
if err != nil {
return nil, fmt.Errorf("failed to create http cache: %w", err)
}
defer func() {
if retErr != nil {
httpCache.Close()
}
}()
// Resolve the blob and cache the result.
b, err := r.resolver.Resolve(ctx, hosts, refspec, desc, httpCache)
if err != nil {
return nil, fmt.Errorf("failed to resolve the source: %w", err)
}
r.blobCacheMu.Lock()
cachedB, done, added := r.blobCache.Add(name, b)
r.blobCacheMu.Unlock()
if !added {
b.Close() // blob already exists in the cache. discard this.
}
return &blobRef{cachedB.(remote.Blob), done}, nil
}
func newLayer(
resolver *Resolver,
desc ocispec.Descriptor,
blob *blobRef,
vr *reader.VerifiableReader,
) *layer {
return &layer{
resolver: resolver,
desc: desc,
blob: blob,
verifiableReader: vr,
prefetchWaiter: newWaiter(),
}
}
type layer struct {
resolver *Resolver
desc ocispec.Descriptor
blob *blobRef
verifiableReader *reader.VerifiableReader
prefetchWaiter *waiter
prefetchSize int64
prefetchSizeMu sync.Mutex
r reader.Reader
closed bool
closedMu sync.Mutex
prefetchOnce sync.Once
backgroundFetchOnce sync.Once
}
func (l *layer) Info() Info {
var readTime time.Time
if l.r != nil {
readTime = l.r.LastOnDemandReadTime()
}
return Info{
Digest: l.desc.Digest,
Size: l.blob.Size(),
FetchedSize: l.blob.FetchedSize(),
PrefetchSize: l.prefetchedSize(),
ReadTime: readTime,
TOCDigest: l.verifiableReader.Metadata().TOCDigest(),
}
}
func (l *layer) prefetchedSize() int64 {
l.prefetchSizeMu.Lock()
sz := l.prefetchSize
l.prefetchSizeMu.Unlock()
return sz
}
func (l *layer) Check() error {
if l.isClosed() {
return fmt.Errorf("layer is already closed")
}
return l.blob.Check()
}
func (l *layer) Refresh(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error {
if l.isClosed() {
return fmt.Errorf("layer is already closed")
}
return l.blob.Refresh(ctx, hosts, refspec, desc)
}
func (l *layer) Verify(tocDigest digest.Digest) (err error) {
if l.isClosed() {
return fmt.Errorf("layer is already closed")
}
if l.r != nil {
return nil
}
l.r, err = l.verifiableReader.VerifyTOC(tocDigest)
return
}
func (l *layer) SkipVerify() {
if l.r != nil {
return
}
l.r = l.verifiableReader.SkipVerify()
}
func (l *layer) Prefetch(prefetchSize int64) (err error) {
l.prefetchOnce.Do(func() {
ctx := context.Background()
l.resolver.backgroundTaskManager.DoPrioritizedTask()
defer l.resolver.backgroundTaskManager.DonePrioritizedTask()
err = l.prefetch(ctx, prefetchSize)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to prefetch layer=%v", l.desc.Digest)
return
}
log.G(ctx).Debug("completed to prefetch")
})
return
}
func (l *layer) prefetch(ctx context.Context, prefetchSize int64) error {
defer l.prefetchWaiter.done() // Notify the completion
// Measuring the total time to complete prefetch (use defer func() because l.Info().PrefetchSize is set later)
start := time.Now()
defer func() {
commonmetrics.WriteLatencyWithBytesLogValue(ctx, l.desc.Digest, commonmetrics.PrefetchTotal, start, commonmetrics.PrefetchSize, l.prefetchedSize())
}()
if l.isClosed() {
return fmt.Errorf("layer is already closed")
}
rootID := l.verifiableReader.Metadata().RootID()
if _, _, err := l.verifiableReader.Metadata().GetChild(rootID, estargz.NoPrefetchLandmark); err == nil {
// do not prefetch this layer
return nil
} else if id, _, err := l.verifiableReader.Metadata().GetChild(rootID, estargz.PrefetchLandmark); err == nil {
offset, err := l.verifiableReader.Metadata().GetOffset(id)
if err != nil {
return fmt.Errorf("failed to get offset of prefetch landmark: %w", err)
}
// override the prefetch size with optimized value
prefetchSize = offset
} else if prefetchSize > l.blob.Size() {
// adjust prefetch size not to exceed the whole layer size
prefetchSize = l.blob.Size()
}
// Fetch the target range
downloadStart := time.Now()
err := l.blob.Cache(0, prefetchSize)
commonmetrics.WriteLatencyLogValue(ctx, l.desc.Digest, commonmetrics.PrefetchDownload, downloadStart) // time to download prefetch data
if err != nil {
return fmt.Errorf("failed to prefetch layer: %w", err)
}
// Set prefetch size for metrics after prefetch completed
l.prefetchSizeMu.Lock()
l.prefetchSize = prefetchSize
l.prefetchSizeMu.Unlock()
// Cache uncompressed contents of the prefetched range
decompressStart := time.Now()
err = l.verifiableReader.Cache(reader.WithFilter(func(offset int64) bool {
return offset < prefetchSize // Cache only prefetch target
}))
commonmetrics.WriteLatencyLogValue(ctx, l.desc.Digest, commonmetrics.PrefetchDecompress, decompressStart) // time to decompress prefetch data
if err != nil {
return fmt.Errorf("failed to cache prefetched layer: %w", err)
}
return nil
}
func (l *layer) WaitForPrefetchCompletion() error {
if l.isClosed() {
return fmt.Errorf("layer is already closed")
}
return l.prefetchWaiter.wait(l.resolver.prefetchTimeout)
}
func (l *layer) BackgroundFetch() (err error) {
l.backgroundFetchOnce.Do(func() {
ctx := context.Background()
err = l.backgroundFetch(ctx)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to fetch whole layer=%v", l.desc.Digest)
return
}
log.G(ctx).Debug("completed to fetch all layer data in background")
})
return
}
func (l *layer) backgroundFetch(ctx context.Context) error {
defer commonmetrics.WriteLatencyLogValue(ctx, l.desc.Digest, commonmetrics.BackgroundFetchTotal, time.Now())
if l.isClosed() {
return fmt.Errorf("layer is already closed")
}
br := io.NewSectionReader(readerAtFunc(func(p []byte, offset int64) (retN int, retErr error) {
l.resolver.backgroundTaskManager.InvokeBackgroundTask(func(ctx context.Context) {
// Measuring the time to download background fetch data (in milliseconds)
defer commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.BackgroundFetchDownload, l.Info().Digest, time.Now()) // time to download background fetch data
retN, retErr = l.blob.ReadAt(
p,
offset,
remote.WithContext(ctx), // Make cancellable
remote.WithCacheOpts(cache.Direct()), // Do not pollute mem cache
)
}, 120*time.Second)
return
}), 0, l.blob.Size())
defer commonmetrics.WriteLatencyLogValue(ctx, l.desc.Digest, commonmetrics.BackgroundFetchDecompress, time.Now()) // time to decompress background fetch data (in milliseconds)
return l.verifiableReader.Cache(
reader.WithReader(br), // Read contents in background
reader.WithCacheOpts(cache.Direct()), // Do not pollute mem cache
)
}
func (l *layerRef) Done() {
l.done()
}
func (l *layer) RootNode(baseInode uint32) (fusefs.InodeEmbedder, error) {
if l.isClosed() {
return nil, fmt.Errorf("layer is already closed")
}
if l.r == nil {
return nil, fmt.Errorf("layer hasn't been verified yet")
}
return newNode(l.desc.Digest, l.r, l.blob, baseInode, l.resolver.overlayOpaqueType)
}
func (l *layer) ReadAt(p []byte, offset int64, opts ...remote.Option) (int, error) {
return l.blob.ReadAt(p, offset, opts...)
}
func (l *layer) close() error {
l.closedMu.Lock()
defer l.closedMu.Unlock()
if l.closed {
return nil
}
l.closed = true
defer l.blob.done() // Close reader first, then close the blob
l.verifiableReader.Close()
if l.r != nil {
return l.r.Close()
}
return nil
}
func (l *layer) isClosed() bool {
l.closedMu.Lock()
closed := l.closed
l.closedMu.Unlock()
return closed
}
// blobRef is a reference to the blob in the cache. Calling `done` decreases the reference counter
// of this blob in the underlying cache. When nobody refers to the blob in the cache, resources bound
// to this blob will be discarded.
type blobRef struct {
remote.Blob
done func()
}
// layerRef is a reference to the layer in the cache. Calling `Done` or `done` decreases the
// reference counter of this blob in the underlying cache. When nobody refers to the layer in the
// cache, resources bound to this layer will be discarded.
type layerRef struct {
*layer
done func()
}
func newWaiter() *waiter {
return &waiter{
completionCond: sync.NewCond(&sync.Mutex{}),
}
}
type waiter struct {
isDone bool
isDoneMu sync.Mutex
completionCond *sync.Cond
}
func (w *waiter) done() {
w.isDoneMu.Lock()
w.isDone = true
w.isDoneMu.Unlock()
w.completionCond.Broadcast()
}
func (w *waiter) wait(timeout time.Duration) error {
wait := func() <-chan struct{} {
ch := make(chan struct{})
go func() {
w.isDoneMu.Lock()
isDone := w.isDone
w.isDoneMu.Unlock()
w.completionCond.L.Lock()
if !isDone {
w.completionCond.Wait()
}
w.completionCond.L.Unlock()
ch <- struct{}{}
}()
return ch
}
select {
case <-time.After(timeout):
w.isDoneMu.Lock()
w.isDone = true
w.isDoneMu.Unlock()
w.completionCond.Broadcast()
return fmt.Errorf("timeout(%v)", timeout)
case <-wait():
return nil
}
}
type readerAtFunc func([]byte, int64) (int, error)
func (f readerAtFunc) ReadAt(p []byte, offset int64) (int, error) { return f(p, offset) }

View File

@ -0,0 +1,806 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package layer
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"sort"
"strings"
"sync"
"syscall"
"time"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/estargz"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common"
"github.com/containerd/stargz-snapshotter/fs/reader"
"github.com/containerd/stargz-snapshotter/fs/remote"
"github.com/containerd/stargz-snapshotter/metadata"
fusefs "github.com/hanwen/go-fuse/v2/fs"
"github.com/hanwen/go-fuse/v2/fuse"
digest "github.com/opencontainers/go-digest"
"golang.org/x/sys/unix"
)
const (
blockSize = 4096
physicalBlockSize = 512
// physicalBlockRatio is the ratio of blockSize to physicalBlockSize.
// It can be used to convert from # blockSize-byte blocks to # physicalBlockSize-byte blocks
physicalBlockRatio = blockSize / physicalBlockSize
whiteoutPrefix = ".wh."
whiteoutOpaqueDir = whiteoutPrefix + whiteoutPrefix + ".opq"
opaqueXattrValue = "y"
stateDirName = ".stargz-snapshotter"
statFileMode = syscall.S_IFREG | 0400 // -r--------
stateDirMode = syscall.S_IFDIR | 0500 // dr-x------
)
type OverlayOpaqueType int
const (
OverlayOpaqueAll OverlayOpaqueType = iota
OverlayOpaqueTrusted
OverlayOpaqueUser
)
var opaqueXattrs = map[OverlayOpaqueType][]string{
OverlayOpaqueAll: {"trusted.overlay.opaque", "user.overlay.opaque"},
OverlayOpaqueTrusted: {"trusted.overlay.opaque"},
OverlayOpaqueUser: {"user.overlay.opaque"},
}
func newNode(layerDgst digest.Digest, r reader.Reader, blob remote.Blob, baseInode uint32, opaque OverlayOpaqueType) (fusefs.InodeEmbedder, error) {
rootID := r.Metadata().RootID()
rootAttr, err := r.Metadata().GetAttr(rootID)
if err != nil {
return nil, err
}
opq, ok := opaqueXattrs[opaque]
if !ok {
return nil, fmt.Errorf("Unknown overlay opaque type")
}
ffs := &fs{
r: r,
layerDigest: layerDgst,
baseInode: baseInode,
rootID: rootID,
opaqueXattrs: opq,
}
ffs.s = ffs.newState(layerDgst, blob)
return &node{
id: rootID,
attr: rootAttr,
fs: ffs,
}, nil
}
// fs contains global metadata used by nodes
type fs struct {
r reader.Reader
s *state
layerDigest digest.Digest
baseInode uint32
rootID uint32
opaqueXattrs []string
}
func (fs *fs) inodeOfState() uint64 {
return (uint64(fs.baseInode) << 32) | 1 // reserved
}
func (fs *fs) inodeOfStatFile() uint64 {
return (uint64(fs.baseInode) << 32) | 2 // reserved
}
func (fs *fs) inodeOfID(id uint32) (uint64, error) {
// 0 is reserved by go-fuse 1 and 2 are reserved by the state dir
if id > ^uint32(0)-3 {
return 0, fmt.Errorf("too many inodes")
}
return (uint64(fs.baseInode) << 32) | uint64(3+id), nil
}
// node is a filesystem inode abstraction.
type node struct {
fusefs.Inode
fs *fs
id uint32
attr metadata.Attr
ents []fuse.DirEntry
entsCached bool
entsMu sync.Mutex
}
func (n *node) isRootNode() bool {
return n.id == n.fs.rootID
}
func (n *node) isOpaque() bool {
if _, _, err := n.fs.r.Metadata().GetChild(n.id, whiteoutOpaqueDir); err == nil {
return true
}
return false
}
var _ = (fusefs.InodeEmbedder)((*node)(nil))
var _ = (fusefs.NodeReaddirer)((*node)(nil))
func (n *node) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) {
ents, errno := n.readdir()
if errno != 0 {
return nil, errno
}
return fusefs.NewListDirStream(ents), 0
}
func (n *node) readdir() ([]fuse.DirEntry, syscall.Errno) {
// Measure how long node_readdir operation takes (in microseconds).
start := time.Now() // set start time
defer commonmetrics.MeasureLatencyInMicroseconds(commonmetrics.NodeReaddir, n.fs.layerDigest, start)
n.entsMu.Lock()
if n.entsCached {
ents := n.ents
n.entsMu.Unlock()
return ents, 0
}
n.entsMu.Unlock()
isRoot := n.isRootNode()
var ents []fuse.DirEntry
whiteouts := map[string]uint32{}
normalEnts := map[string]bool{}
var lastErr error
if err := n.fs.r.Metadata().ForeachChild(n.id, func(name string, id uint32, mode os.FileMode) bool {
// We don't want to show prefetch landmarks in "/".
if isRoot && (name == estargz.PrefetchLandmark || name == estargz.NoPrefetchLandmark) {
return true
}
// We don't want to show whiteouts.
if strings.HasPrefix(name, whiteoutPrefix) {
if name == whiteoutOpaqueDir {
return true
}
// Add the overlayfs-compiant whiteout later.
whiteouts[name] = id
return true
}
// This is a normal entry.
normalEnts[name] = true
ino, err := n.fs.inodeOfID(id)
if err != nil {
lastErr = err
return false
}
ents = append(ents, fuse.DirEntry{
Mode: fileModeToSystemMode(mode),
Name: name,
Ino: ino,
})
return true
}); err != nil || lastErr != nil {
n.fs.s.report(fmt.Errorf("node.Readdir: err = %v; lastErr = %v", err, lastErr))
return nil, syscall.EIO
}
// Append whiteouts if no entry replaces the target entry in the lower layer.
for w, id := range whiteouts {
if !normalEnts[w[len(whiteoutPrefix):]] {
ino, err := n.fs.inodeOfID(id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Readdir: err = %v; lastErr = %v", err, lastErr))
return nil, syscall.EIO
}
ents = append(ents, fuse.DirEntry{
Mode: syscall.S_IFCHR,
Name: w[len(whiteoutPrefix):],
Ino: ino,
})
}
}
// Avoid undeterministic order of entries on each call
sort.Slice(ents, func(i, j int) bool {
return ents[i].Name < ents[j].Name
})
n.entsMu.Lock()
defer n.entsMu.Unlock()
n.ents, n.entsCached = ents, true // cache it
return ents, 0
}
var _ = (fusefs.NodeLookuper)((*node)(nil))
func (n *node) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) {
isRoot := n.isRootNode()
// We don't want to show prefetch landmarks in "/".
if isRoot && (name == estargz.PrefetchLandmark || name == estargz.NoPrefetchLandmark) {
return nil, syscall.ENOENT
}
// We don't want to show whiteouts.
if strings.HasPrefix(name, whiteoutPrefix) {
return nil, syscall.ENOENT
}
// state directory
if isRoot && name == stateDirName {
return n.NewInode(ctx, n.fs.s, n.fs.stateToAttr(&out.Attr)), 0
}
// lookup on memory nodes
if cn := n.GetChild(name); cn != nil {
switch tn := cn.Operations().(type) {
case *node:
ino, err := n.fs.inodeOfID(tn.id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Lookup: %v", err))
return nil, syscall.EIO
}
entryToAttr(ino, tn.attr, &out.Attr)
case *whiteout:
ino, err := n.fs.inodeOfID(tn.id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Lookup: %v", err))
return nil, syscall.EIO
}
entryToAttr(ino, tn.attr, &out.Attr)
default:
n.fs.s.report(fmt.Errorf("node.Lookup: uknown node type detected"))
return nil, syscall.EIO
}
return cn, 0
}
// early return if this entry doesn't exist
n.entsMu.Lock()
if n.entsCached {
var found bool
for _, e := range n.ents {
if e.Name == name {
found = true
}
}
if !found {
n.entsMu.Unlock()
return nil, syscall.ENOENT
}
}
n.entsMu.Unlock()
id, ce, err := n.fs.r.Metadata().GetChild(n.id, name)
if err != nil {
// If the entry exists as a whiteout, show an overlayfs-styled whiteout node.
if whID, wh, err := n.fs.r.Metadata().GetChild(n.id, fmt.Sprintf("%s%s", whiteoutPrefix, name)); err == nil {
ino, err := n.fs.inodeOfID(whID)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Lookup: %v", err))
return nil, syscall.EIO
}
return n.NewInode(ctx, &whiteout{
id: whID,
fs: n.fs,
attr: wh,
}, entryToWhAttr(ino, wh, &out.Attr)), 0
}
n.readdir() // This code path is very expensive. Cache child entries here so that the next call don't reach here.
return nil, syscall.ENOENT
}
ino, err := n.fs.inodeOfID(id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Lookup: %v", err))
return nil, syscall.EIO
}
return n.NewInode(ctx, &node{
id: id,
fs: n.fs,
attr: ce,
}, entryToAttr(ino, ce, &out.Attr)), 0
}
var _ = (fusefs.NodeOpener)((*node)(nil))
func (n *node) Open(ctx context.Context, flags uint32) (fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) {
ra, err := n.fs.r.OpenFile(n.id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Open: %v", err))
return nil, 0, syscall.EIO
}
return &file{
n: n,
ra: ra,
}, fuse.FOPEN_KEEP_CACHE, 0
}
var _ = (fusefs.NodeGetattrer)((*node)(nil))
func (n *node) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno {
ino, err := n.fs.inodeOfID(n.id)
if err != nil {
n.fs.s.report(fmt.Errorf("node.Getattr: %v", err))
return syscall.EIO
}
entryToAttr(ino, n.attr, &out.Attr)
return 0
}
var _ = (fusefs.NodeGetxattrer)((*node)(nil))
func (n *node) Getxattr(ctx context.Context, attr string, dest []byte) (uint32, syscall.Errno) {
ent := n.attr
opq := n.isOpaque()
for _, opaqueXattr := range n.fs.opaqueXattrs {
if attr == opaqueXattr && opq {
// This node is an opaque directory so give overlayfs-compliant indicator.
if len(dest) < len(opaqueXattrValue) {
return uint32(len(opaqueXattrValue)), syscall.ERANGE
}
return uint32(copy(dest, opaqueXattrValue)), 0
}
}
if v, ok := ent.Xattrs[attr]; ok {
if len(dest) < len(v) {
return uint32(len(v)), syscall.ERANGE
}
return uint32(copy(dest, v)), 0
}
return 0, syscall.ENODATA
}
var _ = (fusefs.NodeListxattrer)((*node)(nil))
func (n *node) Listxattr(ctx context.Context, dest []byte) (uint32, syscall.Errno) {
ent := n.attr
opq := n.isOpaque()
var attrs []byte
if opq {
// This node is an opaque directory so add overlayfs-compliant indicator.
for _, opaqueXattr := range n.fs.opaqueXattrs {
attrs = append(attrs, []byte(opaqueXattr+"\x00")...)
}
}
for k := range ent.Xattrs {
attrs = append(attrs, []byte(k+"\x00")...)
}
if len(dest) < len(attrs) {
return uint32(len(attrs)), syscall.ERANGE
}
return uint32(copy(dest, attrs)), 0
}
var _ = (fusefs.NodeReadlinker)((*node)(nil))
func (n *node) Readlink(ctx context.Context) ([]byte, syscall.Errno) {
ent := n.attr
return []byte(ent.LinkName), 0
}
var _ = (fusefs.NodeStatfser)((*node)(nil))
func (n *node) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno {
defaultStatfs(out)
return 0
}
// file is a file abstraction which implements file handle in go-fuse.
type file struct {
n *node
ra io.ReaderAt
}
var _ = (fusefs.FileReader)((*file)(nil))
func (f *file) Read(ctx context.Context, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) {
defer commonmetrics.MeasureLatencyInMicroseconds(commonmetrics.ReadOnDemand, f.n.fs.layerDigest, time.Now()) // measure time for on-demand file reads (in microseconds)
defer commonmetrics.IncOperationCount(commonmetrics.OnDemandReadAccessCount, f.n.fs.layerDigest) // increment the counter for on-demand file accesses
n, err := f.ra.ReadAt(dest, off)
if err != nil && err != io.EOF {
f.n.fs.s.report(fmt.Errorf("file.Read: %v", err))
return nil, syscall.EIO
}
return fuse.ReadResultData(dest[:n]), 0
}
var _ = (fusefs.FileGetattrer)((*file)(nil))
func (f *file) Getattr(ctx context.Context, out *fuse.AttrOut) syscall.Errno {
ino, err := f.n.fs.inodeOfID(f.n.id)
if err != nil {
f.n.fs.s.report(fmt.Errorf("file.Getattr: %v", err))
return syscall.EIO
}
entryToAttr(ino, f.n.attr, &out.Attr)
return 0
}
// whiteout is a whiteout abstraction compliant to overlayfs.
type whiteout struct {
fusefs.Inode
id uint32
fs *fs
attr metadata.Attr
}
var _ = (fusefs.NodeGetattrer)((*whiteout)(nil))
func (w *whiteout) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno {
ino, err := w.fs.inodeOfID(w.id)
if err != nil {
w.fs.s.report(fmt.Errorf("whiteout.Getattr: %v", err))
return syscall.EIO
}
entryToWhAttr(ino, w.attr, &out.Attr)
return 0
}
var _ = (fusefs.NodeStatfser)((*whiteout)(nil))
func (w *whiteout) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno {
defaultStatfs(out)
return 0
}
// newState provides new state directory node.
// It creates statFile at the same time to give it stable inode number.
func (fs *fs) newState(layerDigest digest.Digest, blob remote.Blob) *state {
return &state{
statFile: &statFile{
name: layerDigest.String() + ".json",
statJSON: statJSON{
Digest: layerDigest.String(),
Size: blob.Size(),
},
blob: blob,
fs: fs,
},
fs: fs,
}
}
// state is a directory which contain a "state file" of this layer aiming to
// observability. This filesystem uses it to report something(e.g. error) to
// the clients(e.g. Kubernetes's livenessProbe).
// This directory has mode "dr-x------ root root".
type state struct {
fusefs.Inode
statFile *statFile
fs *fs
}
var _ = (fusefs.NodeReaddirer)((*state)(nil))
func (s *state) Readdir(ctx context.Context) (fusefs.DirStream, syscall.Errno) {
return fusefs.NewListDirStream([]fuse.DirEntry{
{
Mode: statFileMode,
Name: s.statFile.name,
Ino: s.fs.inodeOfStatFile(),
},
}), 0
}
var _ = (fusefs.NodeLookuper)((*state)(nil))
func (s *state) Lookup(ctx context.Context, name string, out *fuse.EntryOut) (*fusefs.Inode, syscall.Errno) {
if name != s.statFile.name {
return nil, syscall.ENOENT
}
attr, errno := s.statFile.attr(&out.Attr)
if errno != 0 {
return nil, errno
}
return s.NewInode(ctx, s.statFile, attr), 0
}
var _ = (fusefs.NodeGetattrer)((*state)(nil))
func (s *state) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno {
s.fs.stateToAttr(&out.Attr)
return 0
}
var _ = (fusefs.NodeStatfser)((*state)(nil))
func (s *state) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno {
defaultStatfs(out)
return 0
}
func (s *state) report(err error) {
s.statFile.report(err)
}
type statJSON struct {
Error string `json:"error,omitempty"`
Digest string `json:"digest"`
// URL is excluded for potential security reason
Size int64 `json:"size"`
FetchedSize int64 `json:"fetchedSize"`
FetchedPercent float64 `json:"fetchedPercent"` // Fetched / Size * 100.0
}
// statFile is a file which contain something to be reported from this layer.
// This filesystem uses statFile.report() to report something(e.g. error) to
// the clients(e.g. Kubernetes's livenessProbe).
// This file has mode "-r-------- root root".
type statFile struct {
fusefs.Inode
name string
blob remote.Blob
statJSON statJSON
mu sync.Mutex
fs *fs
}
var _ = (fusefs.NodeOpener)((*statFile)(nil))
func (sf *statFile) Open(ctx context.Context, flags uint32) (fh fusefs.FileHandle, fuseFlags uint32, errno syscall.Errno) {
return nil, 0, 0
}
var _ = (fusefs.NodeReader)((*statFile)(nil))
func (sf *statFile) Read(ctx context.Context, f fusefs.FileHandle, dest []byte, off int64) (fuse.ReadResult, syscall.Errno) {
sf.mu.Lock()
defer sf.mu.Unlock()
st, err := sf.updateStatUnlocked()
if err != nil {
return nil, syscall.EIO
}
n, err := bytes.NewReader(st).ReadAt(dest, off)
if err != nil && err != io.EOF {
return nil, syscall.EIO
}
return fuse.ReadResultData(dest[:n]), 0
}
var _ = (fusefs.NodeGetattrer)((*statFile)(nil))
func (sf *statFile) Getattr(ctx context.Context, f fusefs.FileHandle, out *fuse.AttrOut) syscall.Errno {
_, errno := sf.attr(&out.Attr)
return errno
}
var _ = (fusefs.NodeStatfser)((*statFile)(nil))
func (sf *statFile) Statfs(ctx context.Context, out *fuse.StatfsOut) syscall.Errno {
defaultStatfs(out)
return 0
}
// logContents puts the contents of statFile in the log
// to keep that information accessible for troubleshooting.
// The entries naming is kept to be consistend with the field naming in statJSON.
func (sf *statFile) logContents() {
ctx := context.Background()
log.G(ctx).WithFields(log.Fields{
"digest": sf.statJSON.Digest, "size": sf.statJSON.Size,
"fetchedSize": sf.statJSON.FetchedSize, "fetchedPercent": sf.statJSON.FetchedPercent,
}).WithError(errors.New(sf.statJSON.Error)).Error("statFile error")
}
func (sf *statFile) report(err error) {
sf.mu.Lock()
defer sf.mu.Unlock()
sf.statJSON.Error = err.Error()
sf.logContents()
}
func (sf *statFile) attr(out *fuse.Attr) (fusefs.StableAttr, syscall.Errno) {
sf.mu.Lock()
defer sf.mu.Unlock()
st, err := sf.updateStatUnlocked()
if err != nil {
return fusefs.StableAttr{}, syscall.EIO
}
return sf.fs.statFileToAttr(uint64(len(st)), out), 0
}
func (sf *statFile) updateStatUnlocked() ([]byte, error) {
sf.statJSON.FetchedSize = sf.blob.FetchedSize()
sf.statJSON.FetchedPercent = float64(sf.statJSON.FetchedSize) / float64(sf.statJSON.Size) * 100.0
j, err := json.Marshal(&sf.statJSON)
if err != nil {
return nil, err
}
j = append(j, []byte("\n")...)
return j, nil
}
// entryToAttr converts metadata.Attr to go-fuse's Attr.
func entryToAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAttr {
out.Ino = ino
out.Size = uint64(e.Size)
if e.Mode&os.ModeSymlink != 0 {
out.Size = uint64(len(e.LinkName))
}
out.Blksize = blockSize
out.Blocks = (out.Size + uint64(out.Blksize) - 1) / uint64(out.Blksize) * physicalBlockRatio
mtime := e.ModTime
out.SetTimes(nil, &mtime, nil)
out.Mode = fileModeToSystemMode(e.Mode)
out.Owner = fuse.Owner{Uid: uint32(e.UID), Gid: uint32(e.GID)}
out.Rdev = uint32(unix.Mkdev(uint32(e.DevMajor), uint32(e.DevMinor)))
out.Nlink = uint32(e.NumLink)
if out.Nlink == 0 {
out.Nlink = 1 // zero "NumLink" means one.
}
out.Padding = 0 // TODO
return fusefs.StableAttr{
Mode: out.Mode,
Ino: out.Ino,
// NOTE: The inode number is unique throughout the lifetime of
// this filesystem so we don't consider about generation at this
// moment.
}
}
// entryToWhAttr converts metadata.Attr to go-fuse's Attr of whiteouts.
func entryToWhAttr(ino uint64, e metadata.Attr, out *fuse.Attr) fusefs.StableAttr {
out.Ino = ino
out.Size = 0
out.Blksize = blockSize
out.Blocks = 0
mtime := e.ModTime
out.SetTimes(nil, &mtime, nil)
out.Mode = syscall.S_IFCHR
out.Owner = fuse.Owner{Uid: 0, Gid: 0}
out.Rdev = uint32(unix.Mkdev(0, 0))
out.Nlink = 1
out.Padding = 0 // TODO
return fusefs.StableAttr{
Mode: out.Mode,
Ino: out.Ino,
// NOTE: The inode number is unique throughout the lifetime of
// this filesystem so we don't consider about generation at this
// moment.
}
}
// stateToAttr converts state directory to go-fuse's Attr.
func (fs *fs) stateToAttr(out *fuse.Attr) fusefs.StableAttr {
out.Ino = fs.inodeOfState()
out.Size = 0
out.Blksize = blockSize
out.Blocks = 0
out.Nlink = 1
// root can read and open it (dr-x------ root root).
out.Mode = stateDirMode
out.Owner = fuse.Owner{Uid: 0, Gid: 0}
// dummy
out.Mtime = 0
out.Mtimensec = 0
out.Rdev = 0
out.Padding = 0
return fusefs.StableAttr{
Mode: out.Mode,
Ino: out.Ino,
// NOTE: The inode number is unique throughout the lifetime of
// this filesystem so we don't consider about generation at this
// moment.
}
}
// statFileToAttr converts stat file to go-fuse's Attr.
// func statFileToAttr(id uint64, sf *statFile, size uint64, out *fuse.Attr) fusefs.StableAttr {
func (fs *fs) statFileToAttr(size uint64, out *fuse.Attr) fusefs.StableAttr {
out.Ino = fs.inodeOfStatFile()
out.Size = size
out.Blksize = blockSize
out.Blocks = (out.Size + uint64(out.Blksize) - 1) / uint64(out.Blksize) * physicalBlockRatio
out.Nlink = 1
// Root can read it ("-r-------- root root").
out.Mode = statFileMode
out.Owner = fuse.Owner{Uid: 0, Gid: 0}
// dummy
out.Mtime = 0
out.Mtimensec = 0
out.Rdev = 0
out.Padding = 0
return fusefs.StableAttr{
Mode: out.Mode,
Ino: out.Ino,
// NOTE: The inode number is unique throughout the lifetime of
// this filesystem so we don't consider about generation at this
// moment.
}
}
func fileModeToSystemMode(m os.FileMode) uint32 {
// Permission bits
res := uint32(m & os.ModePerm)
// File type bits
switch m & os.ModeType {
case os.ModeDevice:
res |= syscall.S_IFBLK
case os.ModeDevice | os.ModeCharDevice:
res |= syscall.S_IFCHR
case os.ModeDir:
res |= syscall.S_IFDIR
case os.ModeNamedPipe:
res |= syscall.S_IFIFO
case os.ModeSymlink:
res |= syscall.S_IFLNK
case os.ModeSocket:
res |= syscall.S_IFSOCK
default: // regular file.
res |= syscall.S_IFREG
}
// suid, sgid, sticky bits
if m&os.ModeSetuid != 0 {
res |= syscall.S_ISUID
}
if m&os.ModeSetgid != 0 {
res |= syscall.S_ISGID
}
if m&os.ModeSticky != 0 {
res |= syscall.S_ISVTX
}
return res
}
func defaultStatfs(stat *fuse.StatfsOut) {
// http://man7.org/linux/man-pages/man2/statfs.2.html
stat.Blocks = 0 // dummy
stat.Bfree = 0
stat.Bavail = 0
stat.Files = 0 // dummy
stat.Ffree = 0
stat.Bsize = blockSize
stat.NameLen = 1<<32 - 1
stat.Frsize = blockSize
stat.Padding = 0
stat.Spare = [6]uint32{}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,216 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package commonmetrics
import (
"context"
"sync"
"time"
"github.com/containerd/log"
digest "github.com/opencontainers/go-digest"
"github.com/prometheus/client_golang/prometheus"
)
const (
// OperationLatencyKeyMilliseconds is the key for stargz operation latency metrics in milliseconds.
OperationLatencyKeyMilliseconds = "operation_duration_milliseconds"
// OperationLatencyKeyMicroseconds is the key for stargz operation latency metrics in microseconds.
OperationLatencyKeyMicroseconds = "operation_duration_microseconds"
// OperationCountKey is the key for stargz operation count metrics.
OperationCountKey = "operation_count"
// BytesServedKey is the key for any metric related to counting bytes served as the part of specific operation.
BytesServedKey = "bytes_served"
// Keep namespace as stargz and subsystem as fs.
namespace = "stargz"
subsystem = "fs"
)
// Lists all metric labels.
const (
// prometheus metrics
Mount = "mount"
RemoteRegistryGet = "remote_registry_get"
NodeReaddir = "node_readdir"
StargzHeaderGet = "stargz_header_get"
StargzFooterGet = "stargz_footer_get"
StargzTocGet = "stargz_toc_get"
DeserializeTocJSON = "stargz_toc_json_deserialize"
PrefetchesCompleted = "all_prefetches_completed"
ReadOnDemand = "read_on_demand"
MountLayerToLastOnDemandFetch = "mount_layer_to_last_on_demand_fetch"
OnDemandReadAccessCount = "on_demand_read_access_count"
OnDemandRemoteRegistryFetchCount = "on_demand_remote_registry_fetch_count"
OnDemandBytesServed = "on_demand_bytes_served"
OnDemandBytesFetched = "on_demand_bytes_fetched"
// logs metrics
PrefetchTotal = "prefetch_total"
PrefetchDownload = "prefetch_download"
PrefetchDecompress = "prefetch_decompress"
BackgroundFetchTotal = "background_fetch_total"
BackgroundFetchDownload = "background_fetch_download"
BackgroundFetchDecompress = "background_fetch_decompress"
PrefetchSize = "prefetch_size"
)
var (
// Buckets for OperationLatency metrics.
latencyBucketsMilliseconds = []float64{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384} // in milliseconds
latencyBucketsMicroseconds = []float64{1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024} // in microseconds
// operationLatencyMilliseconds collects operation latency numbers in milliseconds grouped by
// operation, type and layer digest.
operationLatencyMilliseconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: OperationLatencyKeyMilliseconds,
Help: "Latency in milliseconds of stargz snapshotter operations. Broken down by operation type and layer sha.",
Buckets: latencyBucketsMilliseconds,
},
[]string{"operation_type", "layer"},
)
// operationLatencyMicroseconds collects operation latency numbers in microseconds grouped by
// operation, type and layer digest.
operationLatencyMicroseconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: OperationLatencyKeyMicroseconds,
Help: "Latency in microseconds of stargz snapshotter operations. Broken down by operation type and layer sha.",
Buckets: latencyBucketsMicroseconds,
},
[]string{"operation_type", "layer"},
)
// operationCount collects operation count numbers by operation
// type and layer sha.
operationCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: OperationCountKey,
Help: "The count of stargz snapshotter operations. Broken down by operation type and layer sha.",
},
[]string{"operation_type", "layer"},
)
// bytesCount reflects the number of bytes served as the part of specitic operation type per layer sha.
bytesCount = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: BytesServedKey,
Help: "The number of bytes served per stargz snapshotter operations. Broken down by operation type and layer sha.",
},
[]string{"operation_type", "layer"},
)
)
var register sync.Once
var logLevel = log.DebugLevel
// sinceInMilliseconds gets the time since the specified start in milliseconds.
// The division by 1e6 is made to have the milliseconds value as floating point number, since the native method
// .Milliseconds() returns an integer value and you can lost a precision for sub-millisecond values.
func sinceInMilliseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds()) / 1e6
}
// sinceInMicroseconds gets the time since the specified start in microseconds.
// The division by 1e3 is made to have the microseconds value as floating point number, since the native method
// .Microseconds() returns an integer value and you can lost a precision for sub-microsecond values.
func sinceInMicroseconds(start time.Time) float64 {
return float64(time.Since(start).Nanoseconds()) / 1e3
}
// Register registers metrics. This is always called only once.
func Register(l log.Level) {
register.Do(func() {
logLevel = l
prometheus.MustRegister(operationLatencyMilliseconds)
prometheus.MustRegister(operationLatencyMicroseconds)
prometheus.MustRegister(operationCount)
prometheus.MustRegister(bytesCount)
})
}
// MeasureLatencyInMilliseconds wraps the labels attachment as well as calling Observe into a single method.
// Right now we attach the operation and layer digest, so it's possible to see the breakdown for latency
// by operation and individual layers.
// If you want this to be layer agnostic, just pass the digest from empty string, e.g.
// layerDigest := digest.FromString("")
func MeasureLatencyInMilliseconds(operation string, layer digest.Digest, start time.Time) {
operationLatencyMilliseconds.WithLabelValues(operation, layer.String()).Observe(sinceInMilliseconds(start))
}
// MeasureLatencyInMicroseconds wraps the labels attachment as well as calling Observe into a single method.
// Right now we attach the operation and layer digest, so it's possible to see the breakdown for latency
// by operation and individual layers.
// If you want this to be layer agnostic, just pass the digest from empty string, e.g.
// layerDigest := digest.FromString("")
func MeasureLatencyInMicroseconds(operation string, layer digest.Digest, start time.Time) {
operationLatencyMicroseconds.WithLabelValues(operation, layer.String()).Observe(sinceInMicroseconds(start))
}
// IncOperationCount wraps the labels attachment as well as calling Inc into a single method.
func IncOperationCount(operation string, layer digest.Digest) {
operationCount.WithLabelValues(operation, layer.String()).Inc()
}
// AddBytesCount wraps the labels attachment as well as calling Add into a single method.
func AddBytesCount(operation string, layer digest.Digest, bytes int64) {
bytesCount.WithLabelValues(operation, layer.String()).Add(float64(bytes))
}
// WriteLatencyLogValue wraps writing the log info record for latency in milliseconds. The log record breaks down by operation and layer digest.
func WriteLatencyLogValue(ctx context.Context, layer digest.Digest, operation string, start time.Time) {
ctx = log.WithLogger(ctx, log.G(ctx).WithField("metrics", "latency").WithField("operation", operation).WithField("layer_sha", layer.String()))
log.G(ctx).Logf(logLevel, "value=%v milliseconds", sinceInMilliseconds(start))
}
// WriteLatencyWithBytesLogValue wraps writing the log info record for latency in milliseconds with adding the size in bytes.
// The log record breaks down by operation, layer digest and byte value.
func WriteLatencyWithBytesLogValue(ctx context.Context, layer digest.Digest, latencyOperation string, start time.Time, bytesMetricName string, bytesMetricValue int64) {
ctx = log.WithLogger(ctx, log.G(ctx).WithField("metrics", "latency").WithField("operation", latencyOperation).WithField("layer_sha", layer.String()))
log.G(ctx).Logf(logLevel, "value=%v milliseconds; %v=%v bytes", sinceInMilliseconds(start), bytesMetricName, bytesMetricValue)
}
// LogLatencyForLastOnDemandFetch implements a special case for measuring the latency of last on demand fetch, which must be invoked at the end of
// background fetch operation only. Since this is expected to happen only once per container launch, it writes a log line,
// instead of directly emitting a metric.
// We do that in the following way:
// 1. We record the mount start time
// 2. We constantly record the timestamps when we do on demand fetch for each layer sha
// 3. On background fetch completed we measure the difference between the last on demand fetch and mount start time
// and record it as a metric
func LogLatencyForLastOnDemandFetch(ctx context.Context, layer digest.Digest, start time.Time, end time.Time) {
diffInMilliseconds := float64(end.Sub(start).Milliseconds())
// value can be negative if we pass the default value for time.Time as `end`
// this can happen if there were no on-demand fetch for the particular layer
if diffInMilliseconds > 0 {
ctx = log.WithLogger(ctx, log.G(ctx).WithField("metrics", "latency").WithField("operation", MountLayerToLastOnDemandFetch).WithField("layer_sha", layer.String()))
log.G(ctx).Logf(logLevel, "value=%v milliseconds", diffInMilliseconds)
}
}

View File

@ -0,0 +1,65 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package layermetrics
import (
"github.com/containerd/stargz-snapshotter/fs/layer"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
var layerMetrics = []*metric{
{
name: "layer_fetched_size",
help: "Total fetched size of the layer",
unit: metrics.Bytes,
vt: prometheus.CounterValue,
getValues: func(l layer.Layer) []value {
return []value{
{
v: float64(l.Info().FetchedSize),
},
}
},
},
{
name: "layer_prefetch_size",
help: "Total prefetched size of the layer",
unit: metrics.Bytes,
vt: prometheus.CounterValue,
getValues: func(l layer.Layer) []value {
return []value{
{
v: float64(l.Info().PrefetchSize),
},
}
},
},
{
name: "layer_size",
help: "Total size of the layer",
unit: metrics.Bytes,
vt: prometheus.CounterValue,
getValues: func(l layer.Layer) []value {
return []value{
{
v: float64(l.Info().Size),
},
}
},
},
}

View File

@ -0,0 +1,113 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package layermetrics
import (
"sync"
"github.com/containerd/stargz-snapshotter/fs/layer"
metrics "github.com/docker/go-metrics"
"github.com/prometheus/client_golang/prometheus"
)
func NewLayerMetrics(ns *metrics.Namespace) *Controller {
if ns == nil {
return &Controller{}
}
c := &Controller{
ns: ns,
layer: make(map[string]layer.Layer),
}
c.metrics = append(c.metrics, layerMetrics...)
ns.Add(c)
return c
}
type Controller struct {
ns *metrics.Namespace
metrics []*metric
layer map[string]layer.Layer
layerMu sync.RWMutex
}
func (c *Controller) Describe(ch chan<- *prometheus.Desc) {
for _, e := range c.metrics {
ch <- e.desc(c.ns)
}
}
func (c *Controller) Collect(ch chan<- prometheus.Metric) {
c.layerMu.RLock()
wg := &sync.WaitGroup{}
for mp, l := range c.layer {
mp, l := mp, l
wg.Add(1)
go func() {
defer wg.Done()
for _, e := range c.metrics {
e.collect(mp, l, c.ns, ch)
}
}()
}
c.layerMu.RUnlock()
wg.Wait()
}
func (c *Controller) Add(key string, l layer.Layer) {
if c.ns == nil {
return
}
c.layerMu.Lock()
c.layer[key] = l
c.layerMu.Unlock()
}
func (c *Controller) Remove(key string) {
if c.ns == nil {
return
}
c.layerMu.Lock()
delete(c.layer, key)
c.layerMu.Unlock()
}
type value struct {
v float64
l []string
}
type metric struct {
name string
help string
unit metrics.Unit
vt prometheus.ValueType
labels []string
// getValues returns the value and labels for the data
getValues func(l layer.Layer) []value
}
func (m *metric) desc(ns *metrics.Namespace) *prometheus.Desc {
return ns.NewDesc(m.name, m.help, m.unit, append([]string{"digest", "mountpoint"}, m.labels...)...)
}
func (m *metric) collect(mountpoint string, l layer.Layer, ns *metrics.Namespace, ch chan<- prometheus.Metric) {
values := m.getValues(l)
for _, v := range values {
ch <- prometheus.MustNewConstMetric(m.desc(ns), m.vt, v.v, append([]string{l.Info().Digest.String(), mountpoint}, v.l...)...)
}
}

View File

@ -0,0 +1,579 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package reader
import (
"bufio"
"bytes"
"context"
"crypto/sha256"
"fmt"
"io"
"os"
"runtime"
"sync"
"time"
"github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/estargz"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common"
"github.com/containerd/stargz-snapshotter/metadata"
"github.com/hashicorp/go-multierror"
digest "github.com/opencontainers/go-digest"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/semaphore"
)
const maxWalkDepth = 10000
type Reader interface {
OpenFile(id uint32) (io.ReaderAt, error)
Metadata() metadata.Reader
Close() error
LastOnDemandReadTime() time.Time
}
// VerifiableReader produces a Reader with a given verifier.
type VerifiableReader struct {
r *reader
lastVerifyErr error
lastVerifyErrMu sync.Mutex
prohibitVerifyFailure bool
prohibitVerifyFailureMu sync.RWMutex
closed bool
closedMu sync.Mutex
verifier func(uint32, string) (digest.Verifier, error)
}
func (vr *VerifiableReader) storeLastVerifyErr(err error) {
vr.lastVerifyErrMu.Lock()
vr.lastVerifyErr = err
vr.lastVerifyErrMu.Unlock()
}
func (vr *VerifiableReader) loadLastVerifyErr() error {
vr.lastVerifyErrMu.Lock()
err := vr.lastVerifyErr
vr.lastVerifyErrMu.Unlock()
return err
}
func (vr *VerifiableReader) SkipVerify() Reader {
return vr.r
}
func (vr *VerifiableReader) VerifyTOC(tocDigest digest.Digest) (Reader, error) {
if vr.isClosed() {
return nil, fmt.Errorf("reader is already closed")
}
vr.prohibitVerifyFailureMu.Lock()
vr.prohibitVerifyFailure = true
lastVerifyErr := vr.loadLastVerifyErr()
vr.prohibitVerifyFailureMu.Unlock()
if err := lastVerifyErr; err != nil {
return nil, fmt.Errorf("content error occurs during caching contents: %w", err)
}
if actual := vr.r.r.TOCDigest(); actual != tocDigest {
return nil, fmt.Errorf("invalid TOC JSON %q; want %q", actual, tocDigest)
}
vr.r.verify = true
return vr.r, nil
}
func (vr *VerifiableReader) Metadata() metadata.Reader {
// TODO: this shouldn't be called before verified
return vr.r.r
}
func (vr *VerifiableReader) Cache(opts ...CacheOption) (err error) {
if vr.isClosed() {
return fmt.Errorf("reader is already closed")
}
var cacheOpts cacheOptions
for _, o := range opts {
o(&cacheOpts)
}
gr := vr.r
r := gr.r
if cacheOpts.reader != nil {
r, err = r.Clone(cacheOpts.reader)
if err != nil {
return err
}
}
rootID := r.RootID()
filter := func(int64) bool {
return true
}
if cacheOpts.filter != nil {
filter = cacheOpts.filter
}
eg, egCtx := errgroup.WithContext(context.Background())
eg.Go(func() error {
return vr.cacheWithReader(egCtx,
0, eg, semaphore.NewWeighted(int64(runtime.GOMAXPROCS(0))),
rootID, r, filter, cacheOpts.cacheOpts...)
})
return eg.Wait()
}
func (vr *VerifiableReader) cacheWithReader(ctx context.Context, currentDepth int, eg *errgroup.Group, sem *semaphore.Weighted, dirID uint32, r metadata.Reader, filter func(int64) bool, opts ...cache.Option) (rErr error) {
if currentDepth > maxWalkDepth {
return fmt.Errorf("tree is too deep (depth:%d)", currentDepth)
}
rootID := r.RootID()
r.ForeachChild(dirID, func(name string, id uint32, mode os.FileMode) bool {
e, err := r.GetAttr(id)
if err != nil {
rErr = err
return false
}
if mode.IsDir() {
// Walk through all files on this stargz file.
// Ignore the entry of "./" (formated as "" by stargz lib) on root directory
// because this points to the root directory itself.
if dirID == rootID && name == "" {
return true
}
if err := vr.cacheWithReader(ctx, currentDepth+1, eg, sem, id, r, filter, opts...); err != nil {
rErr = err
return false
}
return true
} else if !mode.IsRegular() {
// Only cache regular files
return true
} else if dirID == rootID && name == estargz.TOCTarName {
// We don't need to cache TOC json file
return true
}
offset, err := r.GetOffset(id)
if err != nil {
rErr = err
return false
}
if !filter(offset) {
// This entry need to be filtered out
return true
}
fr, err := r.OpenFileWithPreReader(id, func(nid uint32, chunkOffset, chunkSize int64, chunkDigest string, r io.Reader) (retErr error) {
return vr.readAndCache(nid, r, chunkOffset, chunkSize, chunkDigest, opts...)
})
if err != nil {
rErr = err
return false
}
var nr int64
for nr < e.Size {
chunkOffset, chunkSize, chunkDigestStr, ok := fr.ChunkEntryForOffset(nr)
if !ok {
break
}
nr += chunkSize
if err := sem.Acquire(ctx, 1); err != nil {
rErr = err
return false
}
eg.Go(func() error {
defer sem.Release(1)
err := vr.readAndCache(id, io.NewSectionReader(fr, chunkOffset, chunkSize), chunkOffset, chunkSize, chunkDigestStr, opts...)
if err != nil {
return fmt.Errorf("failed to read %q (off:%d,size:%d): %w", name, chunkOffset, chunkSize, err)
}
return nil
})
}
return true
})
return
}
func (vr *VerifiableReader) readAndCache(id uint32, fr io.Reader, chunkOffset, chunkSize int64, chunkDigest string, opts ...cache.Option) (retErr error) {
gr := vr.r
if retErr != nil {
vr.storeLastVerifyErr(retErr)
}
// Check if it already exists in the cache
cacheID := genID(id, chunkOffset, chunkSize)
if r, err := gr.cache.Get(cacheID); err == nil {
r.Close()
return nil
}
// missed cache, needs to fetch and add it to the cache
br := bufio.NewReaderSize(fr, int(chunkSize))
if _, err := br.Peek(int(chunkSize)); err != nil {
return fmt.Errorf("cacheWithReader.peek: %v", err)
}
w, err := gr.cache.Add(cacheID, opts...)
if err != nil {
return err
}
defer w.Close()
v, err := vr.verifier(id, chunkDigest)
if err != nil {
vr.prohibitVerifyFailureMu.RLock()
if vr.prohibitVerifyFailure {
vr.prohibitVerifyFailureMu.RUnlock()
return fmt.Errorf("verifier not found: %w", err)
}
vr.storeLastVerifyErr(err)
vr.prohibitVerifyFailureMu.RUnlock()
}
tee := io.Discard
if v != nil {
tee = io.Writer(v) // verification is required
}
if _, err := io.CopyN(w, io.TeeReader(br, tee), chunkSize); err != nil {
w.Abort()
return fmt.Errorf("failed to cache file payload: %w", err)
}
if v != nil && !v.Verified() {
err := fmt.Errorf("invalid chunk")
vr.prohibitVerifyFailureMu.RLock()
if vr.prohibitVerifyFailure {
vr.prohibitVerifyFailureMu.RUnlock()
w.Abort()
return err
}
vr.storeLastVerifyErr(err)
vr.prohibitVerifyFailureMu.RUnlock()
}
return w.Commit()
}
func (vr *VerifiableReader) Close() error {
vr.closedMu.Lock()
defer vr.closedMu.Unlock()
if vr.closed {
return nil
}
vr.closed = true
return vr.r.Close()
}
func (vr *VerifiableReader) isClosed() bool {
vr.closedMu.Lock()
closed := vr.closed
vr.closedMu.Unlock()
return closed
}
// NewReader creates a Reader based on the given stargz blob and cache implementation.
// It returns VerifiableReader so the caller must provide a metadata.ChunkVerifier
// to use for verifying file or chunk contained in this stargz blob.
func NewReader(r metadata.Reader, cache cache.BlobCache, layerSha digest.Digest) (*VerifiableReader, error) {
vr := &reader{
r: r,
cache: cache,
bufPool: sync.Pool{
New: func() interface{} {
return new(bytes.Buffer)
},
},
layerSha: layerSha,
verifier: digestVerifier,
}
return &VerifiableReader{r: vr, verifier: digestVerifier}, nil
}
type reader struct {
r metadata.Reader
cache cache.BlobCache
bufPool sync.Pool
layerSha digest.Digest
lastReadTime time.Time
lastReadTimeMu sync.Mutex
closed bool
closedMu sync.Mutex
verify bool
verifier func(uint32, string) (digest.Verifier, error)
}
func (gr *reader) Metadata() metadata.Reader {
return gr.r
}
func (gr *reader) setLastReadTime(lastReadTime time.Time) {
gr.lastReadTimeMu.Lock()
gr.lastReadTime = lastReadTime
gr.lastReadTimeMu.Unlock()
}
func (gr *reader) LastOnDemandReadTime() time.Time {
gr.lastReadTimeMu.Lock()
t := gr.lastReadTime
gr.lastReadTimeMu.Unlock()
return t
}
func (gr *reader) OpenFile(id uint32) (io.ReaderAt, error) {
if gr.isClosed() {
return nil, fmt.Errorf("reader is already closed")
}
var fr metadata.File
fr, err := gr.r.OpenFileWithPreReader(id, func(nid uint32, chunkOffset, chunkSize int64, chunkDigest string, r io.Reader) error {
// Check if it already exists in the cache
cacheID := genID(nid, chunkOffset, chunkSize)
if r, err := gr.cache.Get(cacheID); err == nil {
r.Close()
return nil
}
// Read and cache
b := gr.bufPool.Get().(*bytes.Buffer)
b.Reset()
b.Grow(int(chunkSize))
ip := b.Bytes()[:chunkSize]
if _, err := io.ReadFull(r, ip); err != nil {
gr.putBuffer(b)
return err
}
err := gr.verifyAndCache(nid, ip, chunkDigest, cacheID)
gr.putBuffer(b)
return err
})
if err != nil {
return nil, fmt.Errorf("failed to open file %d: %w", id, err)
}
return &file{
id: id,
fr: fr,
gr: gr,
}, nil
}
func (gr *reader) Close() (retErr error) {
gr.closedMu.Lock()
defer gr.closedMu.Unlock()
if gr.closed {
return nil
}
gr.closed = true
if err := gr.cache.Close(); err != nil {
retErr = multierror.Append(retErr, err)
}
if err := gr.r.Close(); err != nil {
retErr = multierror.Append(retErr, err)
}
return
}
func (gr *reader) isClosed() bool {
gr.closedMu.Lock()
closed := gr.closed
gr.closedMu.Unlock()
return closed
}
func (gr *reader) putBuffer(b *bytes.Buffer) {
b.Reset()
gr.bufPool.Put(b)
}
type file struct {
id uint32
fr metadata.File
gr *reader
}
// ReadAt reads chunks from the stargz file with trying to fetch as many chunks
// as possible from the cache.
func (sf *file) ReadAt(p []byte, offset int64) (int, error) {
nr := 0
for nr < len(p) {
chunkOffset, chunkSize, chunkDigestStr, ok := sf.fr.ChunkEntryForOffset(offset + int64(nr))
if !ok {
break
}
var (
id = genID(sf.id, chunkOffset, chunkSize)
lowerDiscard = positive(offset - chunkOffset)
upperDiscard = positive(chunkOffset + chunkSize - (offset + int64(len(p))))
expectedSize = chunkSize - upperDiscard - lowerDiscard
)
// Check if the content exists in the cache
if r, err := sf.gr.cache.Get(id); err == nil {
n, err := r.ReadAt(p[nr:int64(nr)+expectedSize], lowerDiscard)
if (err == nil || err == io.EOF) && int64(n) == expectedSize {
nr += n
r.Close()
continue
}
r.Close()
}
// We missed cache. Take it from underlying reader.
// We read the whole chunk here and add it to the cache so that following
// reads against neighboring chunks can take the data without decmpression.
if lowerDiscard == 0 && upperDiscard == 0 {
// We can directly store the result to the given buffer
ip := p[nr : int64(nr)+chunkSize]
n, err := sf.fr.ReadAt(ip, chunkOffset)
if err != nil && err != io.EOF {
return 0, fmt.Errorf("failed to read data: %w", err)
}
if err := sf.gr.verifyAndCache(sf.id, ip, chunkDigestStr, id); err != nil {
return 0, err
}
nr += n
continue
}
// Use temporally buffer for aligning this chunk
b := sf.gr.bufPool.Get().(*bytes.Buffer)
b.Reset()
b.Grow(int(chunkSize))
ip := b.Bytes()[:chunkSize]
if _, err := sf.fr.ReadAt(ip, chunkOffset); err != nil && err != io.EOF {
sf.gr.putBuffer(b)
return 0, fmt.Errorf("failed to read data: %w", err)
}
if err := sf.gr.verifyAndCache(sf.id, ip, chunkDigestStr, id); err != nil {
sf.gr.putBuffer(b)
return 0, err
}
n := copy(p[nr:], ip[lowerDiscard:chunkSize-upperDiscard])
sf.gr.putBuffer(b)
if int64(n) != expectedSize {
return 0, fmt.Errorf("unexpected final data size %d; want %d", n, expectedSize)
}
nr += n
}
commonmetrics.AddBytesCount(commonmetrics.OnDemandBytesServed, sf.gr.layerSha, int64(nr)) // measure the number of on demand bytes served
return nr, nil
}
func (gr *reader) verifyAndCache(entryID uint32, ip []byte, chunkDigestStr string, cacheID string) error {
// We can end up doing on demand registry fetch when aligning the chunk
commonmetrics.IncOperationCount(commonmetrics.OnDemandRemoteRegistryFetchCount, gr.layerSha) // increment the number of on demand file fetches from remote registry
commonmetrics.AddBytesCount(commonmetrics.OnDemandBytesFetched, gr.layerSha, int64(len(ip))) // record total bytes fetched
gr.setLastReadTime(time.Now())
// Verify this chunk
if err := gr.verifyChunk(entryID, ip, chunkDigestStr); err != nil {
return fmt.Errorf("invalid chunk: %w", err)
}
// Cache this chunk
if w, err := gr.cache.Add(cacheID); err == nil {
if cn, err := w.Write(ip); err != nil || cn != len(ip) {
w.Abort()
} else {
w.Commit()
}
w.Close()
}
return nil
}
func (gr *reader) verifyChunk(id uint32, p []byte, chunkDigestStr string) error {
if !gr.verify {
return nil // verification is not required
}
v, err := gr.verifier(id, chunkDigestStr)
if err != nil {
return fmt.Errorf("invalid chunk: %w", err)
}
if _, err := v.Write(p); err != nil {
return fmt.Errorf("invalid chunk: failed to write to verifier: %w", err)
}
if !v.Verified() {
return fmt.Errorf("invalid chunk: not verified")
}
return nil
}
func genID(id uint32, offset, size int64) string {
sum := sha256.Sum256([]byte(fmt.Sprintf("%d-%d-%d", id, offset, size)))
return fmt.Sprintf("%x", sum)
}
func positive(n int64) int64 {
if n < 0 {
return 0
}
return n
}
type CacheOption func(*cacheOptions)
type cacheOptions struct {
cacheOpts []cache.Option
filter func(int64) bool
reader *io.SectionReader
}
func WithCacheOpts(cacheOpts ...cache.Option) CacheOption {
return func(opts *cacheOptions) {
opts.cacheOpts = cacheOpts
}
}
func WithFilter(filter func(int64) bool) CacheOption {
return func(opts *cacheOptions) {
opts.filter = filter
}
}
func WithReader(sr *io.SectionReader) CacheOption {
return func(opts *cacheOptions) {
opts.reader = sr
}
}
func digestVerifier(id uint32, chunkDigestStr string) (digest.Verifier, error) {
chunkDigest, err := digest.Parse(chunkDigestStr)
if err != nil {
return nil, fmt.Errorf("invalid chunk: no digest is recorded(len=%d): %w", len(chunkDigestStr), err)
}
return chunkDigest.Verifier(), nil
}

View File

@ -0,0 +1,821 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package reader
import (
"bytes"
"compress/gzip"
"fmt"
"io"
"os"
"path"
"path/filepath"
"strings"
"sync"
"testing"
"time"
"github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/metadata"
tutil "github.com/containerd/stargz-snapshotter/util/testutil"
"github.com/klauspost/compress/zstd"
digest "github.com/opencontainers/go-digest"
"golang.org/x/sync/errgroup"
)
type region struct{ b, e int64 }
const (
sampleChunkSize = 3
sampleMiddleOffset = sampleChunkSize / 2
sampleData1 = "0123456789"
lastChunkOffset1 = sampleChunkSize * (int64(len(sampleData1)) / sampleChunkSize)
)
var srcCompressions = map[string]tutil.CompressionFactory{
"zstd-fastest": tutil.ZstdCompressionWithLevel(zstd.SpeedFastest),
"gzip-bestspeed": tutil.GzipCompressionWithLevel(gzip.BestSpeed),
"externaltoc-gzip-bestspeed": tutil.ExternalTOCGzipCompressionWithLevel(gzip.BestSpeed),
}
func TestSuiteReader(t *testing.T, store metadata.Store) {
testFileReadAt(t, store)
testCacheVerify(t, store)
testFailReader(t, store)
testPreReader(t, store)
}
func testFileReadAt(t *testing.T, factory metadata.Store) {
sizeCond := map[string]int64{
"single_chunk": sampleChunkSize - sampleMiddleOffset,
"multi_chunks": sampleChunkSize + sampleMiddleOffset,
}
innerOffsetCond := map[string]int64{
"at_top": 0,
"at_middle": sampleMiddleOffset,
}
baseOffsetCond := map[string]int64{
"of_1st_chunk": sampleChunkSize * 0,
"of_2nd_chunk": sampleChunkSize * 1,
"of_last_chunk": lastChunkOffset1,
}
fileSizeCond := map[string]int64{
"in_1_chunk_file": sampleChunkSize * 1,
"in_2_chunks_file": sampleChunkSize * 2,
"in_max_size_file": int64(len(sampleData1)),
}
cacheCond := map[string][]region{
"with_clean_cache": nil,
"with_edge_filled_cache": {
region{0, sampleChunkSize - 1},
region{lastChunkOffset1, int64(len(sampleData1)) - 1},
},
"with_sparse_cache": {
region{0, sampleChunkSize - 1},
region{2 * sampleChunkSize, 3*sampleChunkSize - 1},
},
}
for sn, size := range sizeCond {
for in, innero := range innerOffsetCond {
for bo, baseo := range baseOffsetCond {
for fn, filesize := range fileSizeCond {
for cc, cacheExcept := range cacheCond {
for srcCompressionName, srcCompression := range srcCompressions {
srcCompression := srcCompression()
t.Run(fmt.Sprintf("reading_%s_%s_%s_%s_%s_%s", sn, in, bo, fn, cc, srcCompressionName), func(t *testing.T) {
if filesize > int64(len(sampleData1)) {
t.Fatal("sample file size is larger than sample data")
}
wantN := size
offset := baseo + innero
if remain := filesize - offset; remain < wantN {
if wantN = remain; wantN < 0 {
wantN = 0
}
}
// use constant string value as a data source.
want := strings.NewReader(sampleData1)
// data we want to get.
wantData := make([]byte, wantN)
_, err := want.ReadAt(wantData, offset)
if err != nil && err != io.EOF {
t.Fatalf("want.ReadAt (offset=%d,size=%d): %v", offset, wantN, err)
}
// data we get through a file.
f, closeFn := makeFile(t, []byte(sampleData1)[:filesize], sampleChunkSize, factory, srcCompression)
defer closeFn()
f.fr = newExceptFile(t, f.fr, cacheExcept...)
for _, reg := range cacheExcept {
id := genID(f.id, reg.b, reg.e-reg.b+1)
w, err := f.gr.cache.Add(id)
if err != nil {
w.Close()
t.Fatalf("failed to add cache %v: %v", id, err)
}
if _, err := w.Write([]byte(sampleData1[reg.b : reg.e+1])); err != nil {
w.Close()
t.Fatalf("failed to write cache %v: %v", id, err)
}
if err := w.Commit(); err != nil {
w.Close()
t.Fatalf("failed to commit cache %v: %v", id, err)
}
w.Close()
}
respData := make([]byte, size)
n, err := f.ReadAt(respData, offset)
if err != nil {
t.Errorf("failed to read off=%d, size=%d, filesize=%d: %v", offset, size, filesize, err)
return
}
respData = respData[:n]
if !bytes.Equal(wantData, respData) {
t.Errorf("off=%d, filesize=%d; read data{size=%d,data=%q}; want (size=%d,data=%q)",
offset, filesize, len(respData), string(respData), wantN, string(wantData))
return
}
// check cache has valid contents.
cn := 0
nr := 0
for int64(nr) < wantN {
chunkOffset, chunkSize, _, ok := f.fr.ChunkEntryForOffset(offset + int64(nr))
if !ok {
break
}
data := make([]byte, chunkSize)
id := genID(f.id, chunkOffset, chunkSize)
r, err := f.gr.cache.Get(id)
if err != nil {
t.Errorf("missed cache of offset=%d, size=%d: %v(got size=%d)", chunkOffset, chunkSize, err, n)
return
}
defer r.Close()
if n, err := r.ReadAt(data, 0); (err != nil && err != io.EOF) || n != int(chunkSize) {
t.Errorf("failed to read cache of offset=%d, size=%d: %v(got size=%d)", chunkOffset, chunkSize, err, n)
return
}
nr += n
cn++
}
})
}
}
}
}
}
}
}
func newExceptFile(t *testing.T, fr metadata.File, except ...region) metadata.File {
er := exceptFile{fr: fr, t: t}
er.except = map[region]bool{}
for _, reg := range except {
er.except[reg] = true
}
return &er
}
type exceptFile struct {
fr metadata.File
except map[region]bool
t *testing.T
}
func (er *exceptFile) ReadAt(p []byte, offset int64) (int, error) {
if er.except[region{offset, offset + int64(len(p)) - 1}] {
er.t.Fatalf("Requested prohibited region of chunk: (%d, %d)", offset, offset+int64(len(p))-1)
}
return er.fr.ReadAt(p, offset)
}
func (er *exceptFile) ChunkEntryForOffset(offset int64) (off int64, size int64, dgst string, ok bool) {
return er.fr.ChunkEntryForOffset(offset)
}
func makeFile(t *testing.T, contents []byte, chunkSize int, factory metadata.Store, comp tutil.Compression) (*file, func() error) {
testName := "test"
sr, dgst, err := tutil.BuildEStargz([]tutil.TarEntry{
tutil.File(testName, string(contents)),
}, tutil.WithEStargzOptions(estargz.WithChunkSize(chunkSize), estargz.WithCompression(comp)))
if err != nil {
t.Fatalf("failed to build sample estargz")
}
mr, err := factory(sr, metadata.WithDecompressors(comp))
if err != nil {
t.Fatalf("failed to create reader: %v", err)
}
vr, err := NewReader(mr, cache.NewMemoryCache(), digest.FromString(""))
if err != nil {
mr.Close()
t.Fatalf("failed to make new reader: %v", err)
}
r, err := vr.VerifyTOC(dgst)
if err != nil {
vr.Close()
t.Fatalf("failed to verify TOC: %v", err)
}
tid, _, err := r.Metadata().GetChild(r.Metadata().RootID(), testName)
if err != nil {
vr.Close()
t.Fatalf("failed to get %q: %v", testName, err)
}
ra, err := r.OpenFile(tid)
if err != nil {
vr.Close()
t.Fatalf("Failed to open testing file: %v", err)
}
f, ok := ra.(*file)
if !ok {
vr.Close()
t.Fatalf("invalid type of file %q", tid)
}
return f, vr.Close
}
func testCacheVerify(t *testing.T, factory metadata.Store) {
for _, skipVerify := range [2]bool{true, false} {
for _, invalidChunkBeforeVerify := range [2]bool{true, false} {
for _, invalidChunkAfterVerify := range [2]bool{true, false} {
for srcCompressionName, srcCompression := range srcCompressions {
srcCompression := srcCompression()
name := fmt.Sprintf("test_cache_verify_%v_%v_%v_%v",
skipVerify, invalidChunkBeforeVerify, invalidChunkAfterVerify, srcCompressionName)
t.Run(name, func(t *testing.T) {
sr, tocDgst, err := tutil.BuildEStargz([]tutil.TarEntry{
tutil.File("a", sampleData1+"a"),
tutil.File("b", sampleData1+"b"),
}, tutil.WithEStargzOptions(estargz.WithChunkSize(sampleChunkSize), estargz.WithCompression(srcCompression)))
if err != nil {
t.Fatalf("failed to build sample estargz")
}
// Determine the expected behaviour
var wantVerifyFail, wantCacheFail, wantCacheFail2 bool
if skipVerify {
// always no error if verification is disabled
wantVerifyFail, wantCacheFail, wantCacheFail2 = false, false, false
} else if invalidChunkBeforeVerify {
// errors occurred before verifying TOC must be reported via VerifyTOC()
wantVerifyFail = true
} else if invalidChunkAfterVerify {
// errors occurred after verifying TOC must be reported via Cache()
wantVerifyFail, wantCacheFail, wantCacheFail2 = false, true, true
} else {
// otherwise no verification error
wantVerifyFail, wantCacheFail, wantCacheFail2 = false, false, false
}
// Prepare reader
verifier := &failIDVerifier{}
mr, err := factory(sr, metadata.WithDecompressors(srcCompression))
if err != nil {
t.Fatalf("failed to prepare reader %v", err)
}
defer mr.Close()
vr, err := NewReader(mr, cache.NewMemoryCache(), digest.FromString(""))
if err != nil {
t.Fatalf("failed to make new reader: %v", err)
}
vr.verifier = verifier.verifier
vr.r.verifier = verifier.verifier
off2id, id2path, err := prepareMap(vr.Metadata(), vr.Metadata().RootID(), "")
if err != nil || off2id == nil || id2path == nil {
t.Fatalf("failed to prepare offset map %v, off2id = %+v, id2path = %+v", err, off2id, id2path)
}
// Perform Cache() before verification
// 1. Either of "a" or "b" is read and verified
// 2. VerifyTOC/SkipVerify is called
// 3. Another entry ("a" or "b") is called
verifyDone := make(chan struct{})
var firstEntryCalled bool
var eg errgroup.Group
var mu sync.Mutex
eg.Go(func() error {
return vr.Cache(WithFilter(func(off int64) bool {
id, ok := off2id[off]
if !ok {
t.Fatalf("no ID is assigned to offset %d", off)
}
name, ok := id2path[id]
if !ok {
t.Fatalf("no name is assigned to id %d", id)
}
if name == "a" || name == "b" {
mu.Lock()
if !firstEntryCalled {
firstEntryCalled = true
if invalidChunkBeforeVerify {
verifier.registerFails([]uint32{id})
}
mu.Unlock()
return true
}
mu.Unlock()
<-verifyDone
if invalidChunkAfterVerify {
verifier.registerFails([]uint32{id})
}
return true
}
return false
}))
})
if invalidChunkBeforeVerify {
// wait for encountering the error of the first chunk read
start := time.Now()
for {
if err := vr.loadLastVerifyErr(); err != nil {
break
}
if time.Since(start) > time.Second {
t.Fatalf("timeout(1s): failed to wait for read error is registered")
}
time.Sleep(10 * time.Millisecond)
}
}
// Perform verification
if skipVerify {
vr.SkipVerify()
} else {
_, err = vr.VerifyTOC(tocDgst)
}
if checkErr := checkError(wantVerifyFail, err); checkErr != nil {
t.Errorf("verify: %v", checkErr)
return
}
if err != nil {
return
}
close(verifyDone)
// Check the result of Cache()
if checkErr := checkError(wantCacheFail, eg.Wait()); checkErr != nil {
t.Errorf("cache: %v", checkErr)
return
}
// Call Cache() again and check the result
if checkErr := checkError(wantCacheFail2, vr.Cache()); checkErr != nil {
t.Errorf("cache(2): %v", checkErr)
return
}
})
}
}
}
}
}
type failIDVerifier struct {
fails []uint32
failsMu sync.Mutex
}
func (f *failIDVerifier) registerFails(fails []uint32) {
f.failsMu.Lock()
defer f.failsMu.Unlock()
f.fails = fails
}
func (f *failIDVerifier) verifier(id uint32, chunkDigest string) (digest.Verifier, error) {
f.failsMu.Lock()
defer f.failsMu.Unlock()
success := true
for _, n := range f.fails {
if n == id {
success = false
break
}
}
return &testVerifier{success}, nil
}
type testVerifier struct {
success bool
}
func (bv *testVerifier) Write(p []byte) (n int, err error) {
return len(p), nil
}
func (bv *testVerifier) Verified() bool {
return bv.success
}
func checkError(wantFail bool, err error) error {
if wantFail && err == nil {
return fmt.Errorf("wanted to fail but succeeded")
} else if !wantFail && err != nil {
return fmt.Errorf("wanted to succeed verification but failed: %w", err)
}
return nil
}
func prepareMap(mr metadata.Reader, id uint32, p string) (off2id map[int64]uint32, id2path map[uint32]string, _ error) {
attr, err := mr.GetAttr(id)
if err != nil {
return nil, nil, err
}
id2path = map[uint32]string{id: p}
off2id = make(map[int64]uint32)
if attr.Mode.IsRegular() {
off, err := mr.GetOffset(id)
if err != nil {
return nil, nil, err
}
off2id[off] = id
}
var retErr error
mr.ForeachChild(id, func(name string, id uint32, mode os.FileMode) bool {
o2i, i2p, err := prepareMap(mr, id, path.Join(p, name))
if err != nil {
retErr = err
return false
}
for k, v := range o2i {
off2id[k] = v
}
for k, v := range i2p {
id2path[k] = v
}
return true
})
if retErr != nil {
return nil, nil, retErr
}
return off2id, id2path, nil
}
func testFailReader(t *testing.T, factory metadata.Store) {
testFileName := "test"
for srcCompressionName, srcCompression := range srcCompressions {
srcCompression := srcCompression()
t.Run(fmt.Sprintf("%v", srcCompressionName), func(t *testing.T) {
for _, rs := range []bool{true, false} {
for _, vs := range []bool{true, false} {
stargzFile, tocDigest, err := tutil.BuildEStargz([]tutil.TarEntry{
tutil.File(testFileName, sampleData1),
}, tutil.WithEStargzOptions(estargz.WithChunkSize(sampleChunkSize), estargz.WithCompression(srcCompression)))
if err != nil {
t.Fatalf("failed to build sample estargz")
}
br := &breakReaderAt{
ReaderAt: stargzFile,
success: true,
}
bev := &testChunkVerifier{true}
mcache := cache.NewMemoryCache()
mr, err := factory(io.NewSectionReader(br, 0, stargzFile.Size()), metadata.WithDecompressors(srcCompression))
if err != nil {
t.Fatalf("failed to prepare metadata reader")
}
defer mr.Close()
vr, err := NewReader(mr, mcache, digest.FromString(""))
if err != nil {
t.Fatalf("failed to make new reader: %v", err)
}
defer vr.Close()
vr.verifier = bev.verifier
vr.r.verifier = bev.verifier
gr, err := vr.VerifyTOC(tocDigest)
if err != nil {
t.Fatalf("failed to verify TOC: %v", err)
}
notexist := uint32(0)
found := false
for i := uint32(0); i < 1000000; i++ {
if _, err := gr.Metadata().GetAttr(i); err != nil {
notexist, found = i, true
break
}
}
if !found {
t.Fatalf("free ID not found")
}
// tests for opening non-existing file
_, err = gr.OpenFile(notexist)
if err == nil {
t.Errorf("succeeded to open file but wanted to fail")
return
}
// tests failure behaviour of a file read
tid, _, err := gr.Metadata().GetChild(gr.Metadata().RootID(), testFileName)
if err != nil {
t.Errorf("failed to get %q: %v", testFileName, err)
return
}
fr, err := gr.OpenFile(tid)
if err != nil {
t.Errorf("failed to open file but wanted to succeed: %v", err)
return
}
mcache.(*cache.MemoryCache).Membuf = map[string]*bytes.Buffer{}
br.success = rs
bev.success = vs
// tests for reading file
p := make([]byte, len(sampleData1))
n, err := fr.ReadAt(p, 0)
if rs && vs {
if err != nil || n != len(sampleData1) || !bytes.Equal([]byte(sampleData1), p) {
t.Errorf("failed to read data but wanted to succeed: %v", err)
return
}
} else {
if err == nil {
t.Errorf("succeeded to read data but wanted to fail (reader:%v,verify:%v)", rs, vs)
return
}
}
}
}
})
}
}
type breakReaderAt struct {
io.ReaderAt
success bool
}
func (br *breakReaderAt) ReadAt(p []byte, off int64) (int, error) {
if br.success {
return br.ReaderAt.ReadAt(p, off)
}
return 0, fmt.Errorf("failed")
}
type testChunkVerifier struct {
success bool
}
func (bev *testChunkVerifier) verifier(id uint32, chunkDigest string) (digest.Verifier, error) {
return &testVerifier{bev.success}, nil
}
func testPreReader(t *testing.T, factory metadata.Store) {
data64KB := string(tutil.RandomBytes(t, 64000))
tests := []struct {
name string
chunkSize int
minChunkSize int
in []tutil.TarEntry
want []check
}{
{
name: "several_files_in_chunk",
minChunkSize: 8000,
in: []tutil.TarEntry{
tutil.Dir("foo/"),
tutil.File("foo/foo1", data64KB),
tutil.File("foo2", "bb"),
tutil.File("foo22", "ccc"),
tutil.Dir("bar/"),
tutil.File("bar/bar.txt", "aaa"),
tutil.File("foo3", data64KB),
},
// NOTE: we assume that the compressed "data64KB" is still larger than 8KB
// landmark+dir+foo1, foo2+foo22+dir+bar.txt+foo3, TOC, footer
want: []check{
hasFileContentsWithPreCached("foo22", 0, "ccc", chunkInfo{"foo2", "bb", 0, 2}, chunkInfo{"bar/bar.txt", "aaa", 0, 3}, chunkInfo{"foo3", data64KB, 0, 64000}),
hasFileContentsOffset("foo2", 0, "bb", true),
hasFileContentsOffset("bar/bar.txt", 0, "aaa", true),
hasFileContentsOffset("bar/bar.txt", 1, "aa", true),
hasFileContentsOffset("bar/bar.txt", 2, "a", true),
hasFileContentsOffset("foo3", 0, data64KB, true),
hasFileContentsOffset("foo22", 0, "ccc", true),
hasFileContentsOffset("foo/foo1", 0, data64KB, false),
hasFileContentsOffset("foo/foo1", 0, data64KB, true),
hasFileContentsOffset("foo/foo1", 1, data64KB[1:], true),
hasFileContentsOffset("foo/foo1", 2, data64KB[2:], true),
hasFileContentsOffset("foo/foo1", 3, data64KB[3:], true),
},
},
{
name: "several_files_in_chunk_chunked",
minChunkSize: 8000,
chunkSize: 32000,
in: []tutil.TarEntry{
tutil.Dir("foo/"),
tutil.File("foo/foo1", data64KB),
tutil.File("foo2", "bb"),
tutil.Dir("bar/"),
tutil.File("foo3", data64KB),
},
// NOTE: we assume that the compressed chunk of "data64KB" is still larger than 8KB
// landmark+dir+foo1(1), foo1(2), foo2+dir+foo3(1), foo3(2), TOC, footer
want: []check{
hasFileContentsWithPreCached("foo2", 0, "bb", chunkInfo{"foo3", data64KB[:32000], 0, 32000}),
hasFileContentsOffset("foo2", 0, "bb", true),
hasFileContentsOffset("foo2", 1, "b", true),
hasFileContentsOffset("foo3", 0, data64KB[:len(data64KB)/2], true),
hasFileContentsOffset("foo3", 1, data64KB[1:len(data64KB)/2], true),
hasFileContentsOffset("foo3", 2, data64KB[2:len(data64KB)/2], true),
hasFileContentsOffset("foo3", int64(len(data64KB)/2), data64KB[len(data64KB)/2:], false),
hasFileContentsOffset("foo3", int64(len(data64KB)-1), data64KB[len(data64KB)-1:], true),
hasFileContentsOffset("foo/foo1", 0, data64KB, false),
hasFileContentsOffset("foo/foo1", 1, data64KB[1:], true),
hasFileContentsOffset("foo/foo1", 2, data64KB[2:], true),
hasFileContentsOffset("foo/foo1", int64(len(data64KB)/2), data64KB[len(data64KB)/2:], true),
hasFileContentsOffset("foo/foo1", int64(len(data64KB)-1), data64KB[len(data64KB)-1:], true),
},
},
}
for _, tt := range tests {
for srcCompresionName, srcCompression := range srcCompressions {
srcCompression := srcCompression()
t.Run(tt.name+"-"+srcCompresionName, func(t *testing.T) {
opts := []tutil.BuildEStargzOption{
tutil.WithEStargzOptions(estargz.WithCompression(srcCompression)),
}
if tt.chunkSize > 0 {
opts = append(opts, tutil.WithEStargzOptions(estargz.WithChunkSize(tt.chunkSize)))
}
if tt.minChunkSize > 0 {
t.Logf("minChunkSize = %d", tt.minChunkSize)
opts = append(opts, tutil.WithEStargzOptions(estargz.WithMinChunkSize(tt.minChunkSize)))
}
esgz, tocDgst, err := tutil.BuildEStargz(tt.in, opts...)
if err != nil {
t.Fatalf("failed to build sample eStargz: %v", err)
}
testR := &calledReaderAt{esgz, nil}
mr, err := factory(io.NewSectionReader(testR, 0, esgz.Size()), metadata.WithDecompressors(srcCompression))
if err != nil {
t.Fatalf("failed to create new reader: %v", err)
}
defer mr.Close()
memcache := cache.NewMemoryCache()
vr, err := NewReader(mr, memcache, digest.FromString(""))
if err != nil {
t.Fatalf("failed to make new reader: %v", err)
}
rr, err := vr.VerifyTOC(tocDgst)
if err != nil {
t.Fatalf("failed to verify TOC: %v", err)
}
r := rr.(*reader)
for _, want := range tt.want {
want(t, r, testR)
}
})
}
}
}
type check func(*testing.T, *reader, *calledReaderAt)
type chunkInfo struct {
name string
data string
chunkOffset int64
chunkSize int64
}
func hasFileContentsOffset(name string, off int64, contents string, fromCache bool) check {
return func(t *testing.T, r *reader, cr *calledReaderAt) {
tid, err := lookup(r, name)
if err != nil {
t.Fatalf("failed to lookup %q", name)
}
ra, err := r.OpenFile(tid)
if err != nil {
t.Fatalf("Failed to open testing file: %v", err)
}
cr.called = nil // reset test
buf := make([]byte, len(contents))
n, err := ra.ReadAt(buf, off)
if err != nil {
t.Fatalf("failed to readat %q: %v", name, err)
}
if n != len(contents) {
t.Fatalf("failed to read contents %q (off:%d, want:%q) got %q", name, off, longBytesView([]byte(contents)), longBytesView(buf))
}
if string(buf) != contents {
t.Fatalf("unexpected content of %q: %q want %q", name, longBytesView(buf), longBytesView([]byte(contents)))
}
t.Logf("reader calls for %q: offsets: %+v", name, cr.called)
if fromCache {
if len(cr.called) != 0 {
t.Fatalf("unexpected read on %q: offsets: %v", name, cr.called)
}
} else {
if len(cr.called) == 0 {
t.Fatalf("no call happened to reader for %q", name)
}
}
}
}
func hasFileContentsWithPreCached(name string, off int64, contents string, extra ...chunkInfo) check {
return func(t *testing.T, r *reader, cr *calledReaderAt) {
tid, err := lookup(r, name)
if err != nil {
t.Fatalf("failed to lookup %q", name)
}
ra, err := r.OpenFile(tid)
if err != nil {
t.Fatalf("Failed to open testing file: %v", err)
}
buf := make([]byte, len(contents))
n, err := ra.ReadAt(buf, off)
if err != nil {
t.Fatalf("failed to readat %q: %v", name, err)
}
if n != len(contents) {
t.Fatalf("failed to read contents %q (off:%d, want:%q) got %q", name, off, longBytesView([]byte(contents)), longBytesView(buf))
}
if string(buf) != contents {
t.Fatalf("unexpected content of %q: %q want %q", name, longBytesView(buf), longBytesView([]byte(contents)))
}
for _, e := range extra {
eid, err := lookup(r, e.name)
if err != nil {
t.Fatalf("failed to lookup %q", e.name)
}
cacheID := genID(eid, e.chunkOffset, e.chunkSize)
er, err := r.cache.Get(cacheID)
if err != nil {
t.Fatalf("failed to get cache %q: %+v", cacheID, e)
}
data, err := io.ReadAll(io.NewSectionReader(er, 0, e.chunkSize))
er.Close()
if err != nil {
t.Fatalf("failed to read cache %q: %+v", cacheID, e)
}
if string(data) != e.data {
t.Fatalf("unexpected contents of cache %q (%+v): %q; wanted %q", cacheID, e, longBytesView(data), longBytesView([]byte(e.data)))
}
}
}
}
func lookup(r *reader, name string) (uint32, error) {
name = strings.TrimPrefix(path.Clean("/"+name), "/")
if name == "" {
return r.Metadata().RootID(), nil
}
dir, base := filepath.Split(name)
pid, err := lookup(r, dir)
if err != nil {
return 0, err
}
id, _, err := r.Metadata().GetChild(pid, base)
return id, err
}
type calledReaderAt struct {
io.ReaderAt
called []int64
}
func (r *calledReaderAt) ReadAt(p []byte, off int64) (int, error) {
r.called = append(r.called, off)
return r.ReaderAt.ReadAt(p, off)
}
// longBytesView is an alias of []byte suitable for printing a long data as an omitted string to avoid long data being printed.
type longBytesView []byte
func (b longBytesView) String() string {
if len(b) < 100 {
return string(b)
}
return string(b[:50]) + "...(omit)..." + string(b[len(b)-50:])
}

View File

@ -0,0 +1,535 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package remote
import (
"context"
"fmt"
"io"
"regexp"
"sort"
"strings"
"sync"
"time"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/fs/source"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/singleflight"
)
var contentRangeRegexp = regexp.MustCompile(`bytes ([0-9]+)-([0-9]+)/([0-9]+|\\*)`)
type Blob interface {
Check() error
Size() int64
FetchedSize() int64
ReadAt(p []byte, offset int64, opts ...Option) (int, error)
Cache(offset int64, size int64, opts ...Option) error
Refresh(ctx context.Context, host source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error
Close() error
}
type blob struct {
fetcher fetcher
fetcherMu sync.Mutex
size int64
chunkSize int64
prefetchChunkSize int64
cache cache.BlobCache
lastCheck time.Time
lastCheckMu sync.Mutex
checkInterval time.Duration
fetchTimeout time.Duration
fetchedRegionSet regionSet
fetchedRegionSetMu sync.Mutex
fetchedRegionGroup singleflight.Group
fetchedRegionCopyMu sync.Mutex
resolver *Resolver
closed bool
closedMu sync.Mutex
}
func makeBlob(fetcher fetcher, size int64, chunkSize int64, prefetchChunkSize int64,
blobCache cache.BlobCache, lastCheck time.Time, checkInterval time.Duration,
r *Resolver, fetchTimeout time.Duration) *blob {
return &blob{
fetcher: fetcher,
size: size,
chunkSize: chunkSize,
prefetchChunkSize: prefetchChunkSize,
cache: blobCache,
lastCheck: lastCheck,
checkInterval: checkInterval,
resolver: r,
fetchTimeout: fetchTimeout,
}
}
func (b *blob) Close() error {
b.closedMu.Lock()
defer b.closedMu.Unlock()
if b.closed {
return nil
}
b.closed = true
return b.cache.Close()
}
func (b *blob) isClosed() bool {
b.closedMu.Lock()
closed := b.closed
b.closedMu.Unlock()
return closed
}
func (b *blob) Refresh(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) error {
if b.isClosed() {
return fmt.Errorf("blob is already closed")
}
// refresh the fetcher
f, newSize, err := b.resolver.resolveFetcher(ctx, hosts, refspec, desc)
if err != nil {
return err
}
if newSize != b.size {
return fmt.Errorf("Invalid size of new blob %d; want %d", newSize, b.size)
}
// update the blob's fetcher with new one
b.fetcherMu.Lock()
b.fetcher = f
b.fetcherMu.Unlock()
b.lastCheckMu.Lock()
b.lastCheck = time.Now()
b.lastCheckMu.Unlock()
return nil
}
func (b *blob) Check() error {
if b.isClosed() {
return fmt.Errorf("blob is already closed")
}
now := time.Now()
b.lastCheckMu.Lock()
lastCheck := b.lastCheck
b.lastCheckMu.Unlock()
if now.Sub(lastCheck) < b.checkInterval {
// do nothing if not expired
return nil
}
b.fetcherMu.Lock()
fr := b.fetcher
b.fetcherMu.Unlock()
err := fr.check()
if err == nil {
// update lastCheck only if check succeeded.
// on failure, we should check this layer next time again.
b.lastCheckMu.Lock()
b.lastCheck = now
b.lastCheckMu.Unlock()
}
return err
}
func (b *blob) Size() int64 {
return b.size
}
func (b *blob) FetchedSize() int64 {
b.fetchedRegionSetMu.Lock()
sz := b.fetchedRegionSet.totalSize()
b.fetchedRegionSetMu.Unlock()
return sz
}
func makeSyncKey(allData map[region]io.Writer) string {
keys := make([]string, len(allData))
keysIndex := 0
for key := range allData {
keys[keysIndex] = fmt.Sprintf("[%d,%d]", key.b, key.e)
keysIndex++
}
sort.Strings(keys)
return strings.Join(keys, ",")
}
func (b *blob) cacheAt(offset int64, size int64, fr fetcher, cacheOpts *options) error {
fetchReg := region{floor(offset, b.chunkSize), ceil(offset+size-1, b.chunkSize) - 1}
discard := make(map[region]io.Writer)
err := b.walkChunks(fetchReg, func(reg region) error {
if r, err := b.cache.Get(fr.genID(reg), cacheOpts.cacheOpts...); err == nil {
return r.Close() // nop if the cache hits
}
discard[reg] = io.Discard
return nil
})
if err != nil {
return err
}
return b.fetchRange(discard, cacheOpts)
}
func (b *blob) Cache(offset int64, size int64, opts ...Option) error {
if b.isClosed() {
return fmt.Errorf("blob is already closed")
}
var cacheOpts options
for _, o := range opts {
o(&cacheOpts)
}
b.fetcherMu.Lock()
fr := b.fetcher
b.fetcherMu.Unlock()
if b.prefetchChunkSize <= b.chunkSize {
return b.cacheAt(offset, size, fr, &cacheOpts)
}
eg, _ := errgroup.WithContext(context.Background())
fetchSize := b.chunkSize * (b.prefetchChunkSize / b.chunkSize)
end := offset + size
for i := offset; i < end; i += fetchSize {
i, l := i, fetchSize
if i+l > end {
l = end - i
}
eg.Go(func() error {
return b.cacheAt(i, l, fr, &cacheOpts)
})
}
return eg.Wait()
}
// ReadAt reads remote chunks from specified offset for the buffer size.
// It tries to fetch as many chunks as possible from local cache.
// We can configure this function with options.
func (b *blob) ReadAt(p []byte, offset int64, opts ...Option) (int, error) {
if b.isClosed() {
return 0, fmt.Errorf("blob is already closed")
}
if len(p) == 0 || offset > b.size {
return 0, nil
}
// Make the buffer chunk aligned
allRegion := region{floor(offset, b.chunkSize), ceil(offset+int64(len(p))-1, b.chunkSize) - 1}
allData := make(map[region]io.Writer)
var readAtOpts options
for _, o := range opts {
o(&readAtOpts)
}
// Fetcher can be suddenly updated so we take and use the snapshot of it for
// consistency.
b.fetcherMu.Lock()
fr := b.fetcher
b.fetcherMu.Unlock()
b.walkChunks(allRegion, func(chunk region) error {
var (
base = positive(chunk.b - offset)
lowerUnread = positive(offset - chunk.b)
upperUnread = positive(chunk.e + 1 - (offset + int64(len(p))))
expectedSize = chunk.size() - upperUnread - lowerUnread
)
// Check if the content exists in the cache
r, err := b.cache.Get(fr.genID(chunk), readAtOpts.cacheOpts...)
if err == nil {
defer r.Close()
n, err := r.ReadAt(p[base:base+expectedSize], lowerUnread)
if (err == nil || err == io.EOF) && int64(n) == expectedSize {
return nil
}
}
// We missed cache. Take it from remote registry.
// We get the whole chunk here and add it to the cache so that following
// reads against neighboring chunks can take the data without making HTTP requests.
allData[chunk] = newBytesWriter(p[base:base+expectedSize], lowerUnread)
return nil
})
// Read required data
if err := b.fetchRange(allData, &readAtOpts); err != nil {
return 0, err
}
// Adjust the buffer size according to the blob size
if remain := b.size - offset; int64(len(p)) >= remain {
if remain < 0 {
remain = 0
}
p = p[:remain]
}
return len(p), nil
}
// fetchRegions fetches all specified chunks from remote blob and puts it in the local cache.
// It must be called from within fetchRange and need to ensure that it is inside the singleflight `Do` operation.
func (b *blob) fetchRegions(allData map[region]io.Writer, fetched map[region]bool, opts *options) error {
if len(allData) == 0 {
return nil
}
// Fetcher can be suddenly updated so we take and use the snapshot of it for
// consistency.
b.fetcherMu.Lock()
fr := b.fetcher
b.fetcherMu.Unlock()
// request missed regions
var req []region
for reg := range allData {
req = append(req, reg)
fetched[reg] = false
}
fetchCtx, cancel := context.WithTimeout(context.Background(), b.fetchTimeout)
defer cancel()
if opts.ctx != nil {
fetchCtx = opts.ctx
}
mr, err := fr.fetch(fetchCtx, req, true)
if err != nil {
return err
}
defer mr.Close()
// Update the check timer because we succeeded to access the blob
b.lastCheckMu.Lock()
b.lastCheck = time.Now()
b.lastCheckMu.Unlock()
// chunk and cache responsed data. Regions must be aligned by chunk size.
// TODO: Reorganize remoteData to make it be aligned by chunk size
for {
reg, p, err := mr.Next()
if err == io.EOF {
break
} else if err != nil {
return fmt.Errorf("failed to read multipart resp: %w", err)
}
if err := b.walkChunks(reg, func(chunk region) (retErr error) {
id := fr.genID(chunk)
cw, err := b.cache.Add(id, opts.cacheOpts...)
if err != nil {
return err
}
defer cw.Close()
w := io.Writer(cw)
// If this chunk is one of the targets, write the content to the
// passed reader too.
if _, ok := fetched[chunk]; ok {
w = io.MultiWriter(w, allData[chunk])
}
// Copy the target chunk
if _, err := io.CopyN(w, p, chunk.size()); err != nil {
cw.Abort()
return err
}
// Add the target chunk to the cache
if err := cw.Commit(); err != nil {
return err
}
b.fetchedRegionSetMu.Lock()
b.fetchedRegionSet.add(chunk)
b.fetchedRegionSetMu.Unlock()
fetched[chunk] = true
return nil
}); err != nil {
return fmt.Errorf("failed to get chunks: %w", err)
}
}
// Check all chunks are fetched
var unfetched []region
for c, b := range fetched {
if !b {
unfetched = append(unfetched, c)
}
}
if unfetched != nil {
return fmt.Errorf("failed to fetch region %v", unfetched)
}
return nil
}
// fetchRange fetches all specified chunks from local cache and remote blob.
func (b *blob) fetchRange(allData map[region]io.Writer, opts *options) error {
if len(allData) == 0 {
return nil
}
// We build a key based on regions we need to fetch and pass it to singleflightGroup.Do(...)
// to block simultaneous same requests. Once the request is finished and the data is ready,
// all blocked callers will be unblocked and that same data will be returned by all blocked callers.
key := makeSyncKey(allData)
fetched := make(map[region]bool)
_, err, shared := b.fetchedRegionGroup.Do(key, func() (interface{}, error) {
return nil, b.fetchRegions(allData, fetched, opts)
})
// When unblocked try to read from cache in case if there were no errors
// If we fail reading from cache, fetch from remote registry again
if err == nil && shared {
for reg := range allData {
if _, ok := fetched[reg]; ok {
continue
}
err = b.walkChunks(reg, func(chunk region) error {
b.fetcherMu.Lock()
fr := b.fetcher
b.fetcherMu.Unlock()
// Check if the content exists in the cache
// And if exists, read from cache
r, err := b.cache.Get(fr.genID(chunk), opts.cacheOpts...)
if err != nil {
return err
}
defer r.Close()
rr := io.NewSectionReader(r, 0, chunk.size())
// Copy the target chunk
b.fetchedRegionCopyMu.Lock()
defer b.fetchedRegionCopyMu.Unlock()
if _, err := io.CopyN(allData[chunk], rr, chunk.size()); err != nil {
return err
}
return nil
})
if err != nil {
break
}
}
// if we cannot read the data from cache, do fetch again
if err != nil {
return b.fetchRange(allData, opts)
}
}
return err
}
type walkFunc func(reg region) error
// walkChunks walks chunks from begin to end in order in the specified region.
// specified region must be aligned by chunk size.
func (b *blob) walkChunks(allRegion region, walkFn walkFunc) error {
if allRegion.b%b.chunkSize != 0 {
return fmt.Errorf("region (%d, %d) must be aligned by chunk size",
allRegion.b, allRegion.e)
}
for i := allRegion.b; i <= allRegion.e && i < b.size; i += b.chunkSize {
reg := region{i, i + b.chunkSize - 1}
if reg.e >= b.size {
reg.e = b.size - 1
}
if err := walkFn(reg); err != nil {
return err
}
}
return nil
}
func newBytesWriter(dest []byte, destOff int64) io.Writer {
return &bytesWriter{
dest: dest,
destOff: destOff,
current: 0,
}
}
type bytesWriter struct {
dest []byte
destOff int64
current int64
}
func (bw *bytesWriter) Write(p []byte) (int, error) {
defer func() { bw.current = bw.current + int64(len(p)) }()
var (
destBase = positive(bw.current - bw.destOff)
pBegin = positive(bw.destOff - bw.current)
pEnd = positive(bw.destOff + int64(len(bw.dest)) - bw.current)
)
if destBase > int64(len(bw.dest)) {
return len(p), nil
}
if pBegin >= int64(len(p)) {
return len(p), nil
}
if pEnd > int64(len(p)) {
pEnd = int64(len(p))
}
copy(bw.dest[destBase:], p[pBegin:pEnd])
return len(p), nil
}
func floor(n int64, unit int64) int64 {
return (n / unit) * unit
}
func ceil(n int64, unit int64) int64 {
return (n/unit + 1) * unit
}
func positive(n int64) int64 {
if n < 0 {
return 0
}
return n
}

View File

@ -0,0 +1,728 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package remote
import (
"context"
"crypto/rand"
"crypto/sha256"
"fmt"
"io"
"math/big"
"mime"
"mime/multipart"
"net/http"
"path"
"strconv"
"strings"
"sync"
"time"
"github.com/containerd/containerd/v2/core/remotes/docker"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/cache"
"github.com/containerd/stargz-snapshotter/fs/config"
commonmetrics "github.com/containerd/stargz-snapshotter/fs/metrics/common"
"github.com/containerd/stargz-snapshotter/fs/source"
"github.com/hashicorp/go-multierror"
rhttp "github.com/hashicorp/go-retryablehttp"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
const (
defaultChunkSize = 50000
defaultValidIntervalSec = 60
defaultFetchTimeoutSec = 300
defaultMaxRetries = 5
defaultMinWaitMSec = 30
defaultMaxWaitMSec = 300000
)
func NewResolver(cfg config.BlobConfig, handlers map[string]Handler) *Resolver {
if cfg.ChunkSize == 0 { // zero means "use default chunk size"
cfg.ChunkSize = defaultChunkSize
}
if cfg.ValidInterval == 0 { // zero means "use default interval"
cfg.ValidInterval = defaultValidIntervalSec
}
if cfg.CheckAlways {
cfg.ValidInterval = 0
}
if cfg.FetchTimeoutSec == 0 {
cfg.FetchTimeoutSec = defaultFetchTimeoutSec
}
if cfg.MaxRetries == 0 {
cfg.MaxRetries = defaultMaxRetries
}
if cfg.MinWaitMSec == 0 {
cfg.MinWaitMSec = defaultMinWaitMSec
}
if cfg.MaxWaitMSec == 0 {
cfg.MaxWaitMSec = defaultMaxWaitMSec
}
return &Resolver{
blobConfig: cfg,
handlers: handlers,
}
}
type Resolver struct {
blobConfig config.BlobConfig
handlers map[string]Handler
}
type fetcher interface {
fetch(ctx context.Context, rs []region, retry bool) (multipartReadCloser, error)
check() error
genID(reg region) string
}
func (r *Resolver) Resolve(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor, blobCache cache.BlobCache) (Blob, error) {
f, size, err := r.resolveFetcher(ctx, hosts, refspec, desc)
if err != nil {
return nil, err
}
blobConfig := &r.blobConfig
return makeBlob(f,
size,
blobConfig.ChunkSize,
blobConfig.PrefetchChunkSize,
blobCache,
time.Now(),
time.Duration(blobConfig.ValidInterval)*time.Second,
r,
time.Duration(blobConfig.FetchTimeoutSec)*time.Second), nil
}
func (r *Resolver) resolveFetcher(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) (f fetcher, size int64, err error) {
blobConfig := &r.blobConfig
fc := &fetcherConfig{
hosts: hosts,
refspec: refspec,
desc: desc,
maxRetries: blobConfig.MaxRetries,
minWaitMSec: time.Duration(blobConfig.MinWaitMSec) * time.Millisecond,
maxWaitMSec: time.Duration(blobConfig.MaxWaitMSec) * time.Millisecond,
}
var handlersErr error
for name, p := range r.handlers {
// TODO: allow to configure the selection of readers based on the hostname in refspec
r, size, err := p.Handle(ctx, desc)
if err != nil {
handlersErr = multierror.Append(handlersErr, err)
continue
}
log.G(ctx).WithField("handler name", name).WithField("ref", refspec.String()).WithField("digest", desc.Digest).
Debugf("contents is provided by a handler")
return &remoteFetcher{r}, size, nil
}
log.G(ctx).WithError(handlersErr).WithField("ref", refspec.String()).WithField("digest", desc.Digest).Debugf("using default handler")
hf, size, err := newHTTPFetcher(ctx, fc)
if err != nil {
return nil, 0, err
}
if blobConfig.ForceSingleRangeMode {
hf.singleRangeMode()
}
return hf, size, err
}
type fetcherConfig struct {
hosts source.RegistryHosts
refspec reference.Spec
desc ocispec.Descriptor
maxRetries int
minWaitMSec time.Duration
maxWaitMSec time.Duration
}
func jitter(duration time.Duration) time.Duration {
if duration <= 0 {
return duration
}
b, err := rand.Int(rand.Reader, big.NewInt(int64(duration)))
if err != nil {
panic(err)
}
return time.Duration(b.Int64() + int64(duration))
}
// backoffStrategy extends retryablehttp's DefaultBackoff to add a random jitter to avoid overwhelming the repository
// when it comes back online
// DefaultBackoff either tries to parse the 'Retry-After' header of the response; or, it uses an exponential backoff
// 2 ^ numAttempts, limited by max
func backoffStrategy(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration {
delayTime := rhttp.DefaultBackoff(min, max, attemptNum, resp)
return jitter(delayTime)
}
// retryStrategy extends retryablehttp's DefaultRetryPolicy to debug log the error when retrying
// DefaultRetryPolicy retries whenever err is non-nil (except for some url errors) or if returned
// status code is 429 or 5xx (except 501)
func retryStrategy(ctx context.Context, resp *http.Response, err error) (bool, error) {
retry, err2 := rhttp.DefaultRetryPolicy(ctx, resp, err)
if retry {
log.G(ctx).WithError(err).Debugf("Retrying request")
}
return retry, err2
}
func newHTTPFetcher(ctx context.Context, fc *fetcherConfig) (*httpFetcher, int64, error) {
reghosts, err := fc.hosts(fc.refspec)
if err != nil {
return nil, 0, err
}
desc := fc.desc
if desc.Digest.String() == "" {
return nil, 0, fmt.Errorf("Digest is mandatory in layer descriptor")
}
digest := desc.Digest
pullScope, err := docker.RepositoryScope(fc.refspec, false)
if err != nil {
return nil, 0, err
}
// Try to create fetcher until succeeded
rErr := fmt.Errorf("failed to resolve")
for _, host := range reghosts {
if host.Host == "" || strings.Contains(host.Host, "/") {
rErr = fmt.Errorf("invalid destination (host %q, ref:%q, digest:%q): %w", host.Host, fc.refspec, digest, rErr)
continue // Try another
}
// Prepare transport with authorization functionality
tr := host.Client.Transport
timeout := host.Client.Timeout
if rt, ok := tr.(*rhttp.RoundTripper); ok {
rt.Client.RetryMax = fc.maxRetries
rt.Client.RetryWaitMin = fc.minWaitMSec
rt.Client.RetryWaitMax = fc.maxWaitMSec
rt.Client.Backoff = backoffStrategy
rt.Client.CheckRetry = retryStrategy
timeout = rt.Client.HTTPClient.Timeout
}
if host.Authorizer != nil {
tr = &transport{
inner: tr,
auth: host.Authorizer,
scope: pullScope,
}
}
// Resolve redirection and get blob URL
blobURL := fmt.Sprintf("%s://%s/%s/blobs/%s",
host.Scheme,
path.Join(host.Host, host.Path),
strings.TrimPrefix(fc.refspec.Locator, fc.refspec.Hostname()+"/"),
digest)
url, header, err := redirect(ctx, blobURL, tr, timeout, host.Header)
if err != nil {
rErr = fmt.Errorf("failed to redirect (host %q, ref:%q, digest:%q): %v: %w", host.Host, fc.refspec, digest, err, rErr)
continue // Try another
}
// Get size information
// TODO: we should try to use the Size field in the descriptor here.
start := time.Now() // start time before getting layer header
size, err := getSize(ctx, url, tr, timeout, header)
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.StargzHeaderGet, digest, start) // time to get layer header
if err != nil {
rErr = fmt.Errorf("failed to get size (host %q, ref:%q, digest:%q): %v: %w", host.Host, fc.refspec, digest, err, rErr)
continue // Try another
}
// Hit one destination
return &httpFetcher{
url: url,
tr: tr,
blobURL: blobURL,
digest: digest,
timeout: timeout,
header: header,
orgHeader: host.Header,
}, size, nil
}
return nil, 0, fmt.Errorf("cannot resolve layer: %w", rErr)
}
type transport struct {
inner http.RoundTripper
auth docker.Authorizer
scope string
}
func (tr *transport) RoundTrip(req *http.Request) (*http.Response, error) {
ctx := docker.WithScope(req.Context(), tr.scope)
roundTrip := func(req *http.Request) (*http.Response, error) {
// authorize the request using docker.Authorizer
if err := tr.auth.Authorize(ctx, req); err != nil {
return nil, err
}
// send the request
return tr.inner.RoundTrip(req)
}
resp, err := roundTrip(req)
if err != nil {
return nil, err
}
// TODO: support more status codes and retries
if resp.StatusCode == http.StatusUnauthorized {
log.G(ctx).Infof("Received status code: %v. Refreshing creds...", resp.Status)
// prepare authorization for the target host using docker.Authorizer
if err := tr.auth.AddResponses(ctx, []*http.Response{resp}); err != nil {
if errdefs.IsNotImplemented(err) {
return resp, nil
}
return nil, err
}
// re-authorize and send the request
return roundTrip(req.Clone(ctx))
}
return resp, nil
}
func redirect(ctx context.Context, blobURL string, tr http.RoundTripper, timeout time.Duration, header http.Header) (url string, withHeader http.Header, err error) {
if timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}
// We use GET request for redirect.
// gcr.io returns 200 on HEAD without Location header (2020).
// ghcr.io returns 200 on HEAD without Location header (2020).
req, err := http.NewRequestWithContext(ctx, "GET", blobURL, nil)
if err != nil {
return "", nil, fmt.Errorf("failed to make request to the registry: %w", err)
}
req.Header = http.Header{}
for k, v := range header {
req.Header[k] = v
}
req.Close = false
req.Header.Set("Range", "bytes=0-1")
res, err := tr.RoundTrip(req)
if err != nil {
return "", nil, fmt.Errorf("failed to request: %w", err)
}
defer func() {
io.Copy(io.Discard, res.Body)
res.Body.Close()
}()
if res.StatusCode/100 == 2 {
url = blobURL
withHeader = header
} else if redir := res.Header.Get("Location"); redir != "" && res.StatusCode/100 == 3 {
// TODO: Support nested redirection
url = redir
// Do not pass headers to the redirected location.
} else {
return "", nil, fmt.Errorf("failed to access to the registry with code %v", res.StatusCode)
}
return
}
func getSize(ctx context.Context, url string, tr http.RoundTripper, timeout time.Duration, header http.Header) (int64, error) {
if timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, timeout)
defer cancel()
}
req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil)
if err != nil {
return 0, err
}
req.Header = http.Header{}
for k, v := range header {
req.Header[k] = v
}
req.Close = false
res, err := tr.RoundTrip(req)
if err != nil {
return 0, err
}
defer res.Body.Close()
if res.StatusCode == http.StatusOK {
return strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64)
}
headStatusCode := res.StatusCode
// Failed to do HEAD request. Fall back to GET.
// ghcr.io (https://github-production-container-registry.s3.amazonaws.com) doesn't allow
// HEAD request (2020).
req, err = http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return 0, fmt.Errorf("failed to make request to the registry: %w", err)
}
req.Header = http.Header{}
for k, v := range header {
req.Header[k] = v
}
req.Close = false
req.Header.Set("Range", "bytes=0-1")
res, err = tr.RoundTrip(req)
if err != nil {
return 0, fmt.Errorf("failed to request: %w", err)
}
defer func() {
io.Copy(io.Discard, res.Body)
res.Body.Close()
}()
if res.StatusCode == http.StatusOK {
return strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64)
} else if res.StatusCode == http.StatusPartialContent {
_, size, err := parseRange(res.Header.Get("Content-Range"))
return size, err
}
return 0, fmt.Errorf("failed to get size with code (HEAD=%v, GET=%v)",
headStatusCode, res.StatusCode)
}
type httpFetcher struct {
url string
urlMu sync.Mutex
tr http.RoundTripper
blobURL string
digest digest.Digest
singleRange bool
singleRangeMu sync.Mutex
timeout time.Duration
header http.Header
orgHeader http.Header
}
type multipartReadCloser interface {
Next() (region, io.Reader, error)
Close() error
}
func (f *httpFetcher) fetch(ctx context.Context, rs []region, retry bool) (multipartReadCloser, error) {
if len(rs) == 0 {
return nil, fmt.Errorf("no request queried")
}
var (
tr = f.tr
singleRangeMode = f.isSingleRangeMode()
)
// squash requesting chunks for reducing the total size of request header
// (servers generally have limits for the size of headers)
// TODO: when our request has too many ranges, we need to divide it into
// multiple requests to avoid huge header.
var s regionSet
for _, reg := range rs {
s.add(reg)
}
requests := s.rs
if singleRangeMode {
// Squash requests if the layer doesn't support multi range.
requests = []region{superRegion(requests)}
}
// Request to the registry
f.urlMu.Lock()
url := f.url
f.urlMu.Unlock()
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return nil, err
}
req.Header = http.Header{}
for k, v := range f.header {
req.Header[k] = v
}
var ranges string
for _, reg := range requests {
ranges += fmt.Sprintf("%d-%d,", reg.b, reg.e)
}
req.Header.Add("Range", fmt.Sprintf("bytes=%s", ranges[:len(ranges)-1]))
req.Header.Add("Accept-Encoding", "identity")
req.Close = false
// Recording the roundtrip latency for remote registry GET operation.
start := time.Now()
res, err := tr.RoundTrip(req) // NOT DefaultClient; don't want redirects
commonmetrics.MeasureLatencyInMilliseconds(commonmetrics.RemoteRegistryGet, f.digest, start)
if err != nil {
return nil, err
}
if res.StatusCode == http.StatusOK {
// We are getting the whole blob in one part (= status 200)
size, err := strconv.ParseInt(res.Header.Get("Content-Length"), 10, 64)
if err != nil {
return nil, fmt.Errorf("failed to parse Content-Length: %w", err)
}
return newSinglePartReader(region{0, size - 1}, res.Body), nil
} else if res.StatusCode == http.StatusPartialContent {
mediaType, params, err := mime.ParseMediaType(res.Header.Get("Content-Type"))
if err != nil {
return nil, fmt.Errorf("invalid media type %q: %w", mediaType, err)
}
if strings.HasPrefix(mediaType, "multipart/") {
// We are getting a set of chunks as a multipart body.
return newMultiPartReader(res.Body, params["boundary"]), nil
}
// We are getting single range
reg, _, err := parseRange(res.Header.Get("Content-Range"))
if err != nil {
return nil, fmt.Errorf("failed to parse Content-Range: %w", err)
}
return newSinglePartReader(reg, res.Body), nil
} else if retry && res.StatusCode == http.StatusForbidden {
log.G(ctx).Infof("Received status code: %v. Refreshing URL and retrying...", res.Status)
// re-redirect and retry this once.
if err := f.refreshURL(ctx); err != nil {
return nil, fmt.Errorf("failed to refresh URL on %v: %w", res.Status, err)
}
return f.fetch(ctx, rs, false)
} else if retry && res.StatusCode == http.StatusBadRequest && !singleRangeMode {
log.G(ctx).Infof("Received status code: %v. Setting single range mode and retrying...", res.Status)
// gcr.io (https://storage.googleapis.com) returns 400 on multi-range request (2020 #81)
f.singleRangeMode() // fallbacks to singe range request mode
return f.fetch(ctx, rs, false) // retries with the single range mode
}
return nil, fmt.Errorf("unexpected status code: %v", res.Status)
}
func (f *httpFetcher) check() error {
ctx := context.Background()
if f.timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, f.timeout)
defer cancel()
}
f.urlMu.Lock()
url := f.url
f.urlMu.Unlock()
req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
if err != nil {
return fmt.Errorf("check failed: failed to make request: %w", err)
}
req.Header = http.Header{}
for k, v := range f.header {
req.Header[k] = v
}
req.Close = false
req.Header.Set("Range", "bytes=0-1")
res, err := f.tr.RoundTrip(req)
if err != nil {
return fmt.Errorf("check failed: failed to request to registry: %w", err)
}
defer func() {
io.Copy(io.Discard, res.Body)
res.Body.Close()
}()
if res.StatusCode == http.StatusOK || res.StatusCode == http.StatusPartialContent {
return nil
} else if res.StatusCode == http.StatusForbidden {
// Try to re-redirect this blob
rCtx := context.Background()
if f.timeout > 0 {
var rCancel context.CancelFunc
rCtx, rCancel = context.WithTimeout(rCtx, f.timeout)
defer rCancel()
}
if err := f.refreshURL(rCtx); err == nil {
return nil
}
return fmt.Errorf("failed to refresh URL on status %v", res.Status)
}
return fmt.Errorf("unexpected status code %v", res.StatusCode)
}
func (f *httpFetcher) refreshURL(ctx context.Context) error {
newURL, headers, err := redirect(ctx, f.blobURL, f.tr, f.timeout, f.orgHeader)
if err != nil {
return err
}
f.urlMu.Lock()
f.url = newURL
f.header = headers
f.urlMu.Unlock()
return nil
}
func (f *httpFetcher) genID(reg region) string {
sum := sha256.Sum256([]byte(fmt.Sprintf("%s-%d-%d", f.blobURL, reg.b, reg.e)))
return fmt.Sprintf("%x", sum)
}
func (f *httpFetcher) singleRangeMode() {
f.singleRangeMu.Lock()
f.singleRange = true
f.singleRangeMu.Unlock()
}
func (f *httpFetcher) isSingleRangeMode() bool {
f.singleRangeMu.Lock()
r := f.singleRange
f.singleRangeMu.Unlock()
return r
}
func newSinglePartReader(reg region, rc io.ReadCloser) multipartReadCloser {
return &singlepartReader{
r: rc,
Closer: rc,
reg: reg,
}
}
type singlepartReader struct {
io.Closer
r io.Reader
reg region
called bool
}
func (sr *singlepartReader) Next() (region, io.Reader, error) {
if !sr.called {
sr.called = true
return sr.reg, sr.r, nil
}
return region{}, nil, io.EOF
}
func newMultiPartReader(rc io.ReadCloser, boundary string) multipartReadCloser {
return &multipartReader{
m: multipart.NewReader(rc, boundary),
Closer: rc,
}
}
type multipartReader struct {
io.Closer
m *multipart.Reader
}
func (sr *multipartReader) Next() (region, io.Reader, error) {
p, err := sr.m.NextPart()
if err != nil {
return region{}, nil, err
}
reg, _, err := parseRange(p.Header.Get("Content-Range"))
if err != nil {
return region{}, nil, fmt.Errorf("failed to parse Content-Range: %w", err)
}
return reg, p, nil
}
func parseRange(header string) (region, int64, error) {
submatches := contentRangeRegexp.FindStringSubmatch(header)
if len(submatches) < 4 {
return region{}, 0, fmt.Errorf("Content-Range %q doesn't have enough information", header)
}
begin, err := strconv.ParseInt(submatches[1], 10, 64)
if err != nil {
return region{}, 0, fmt.Errorf("failed to parse beginning offset %q: %w", submatches[1], err)
}
end, err := strconv.ParseInt(submatches[2], 10, 64)
if err != nil {
return region{}, 0, fmt.Errorf("failed to parse end offset %q: %w", submatches[2], err)
}
blobSize, err := strconv.ParseInt(submatches[3], 10, 64)
if err != nil {
return region{}, 0, fmt.Errorf("failed to parse blob size %q: %w", submatches[3], err)
}
return region{begin, end}, blobSize, nil
}
type Option func(*options)
type options struct {
ctx context.Context
cacheOpts []cache.Option
}
func WithContext(ctx context.Context) Option {
return func(opts *options) {
opts.ctx = ctx
}
}
func WithCacheOpts(cacheOpts ...cache.Option) Option {
return func(opts *options) {
opts.cacheOpts = cacheOpts
}
}
type remoteFetcher struct {
r Fetcher
}
func (r *remoteFetcher) fetch(ctx context.Context, rs []region, retry bool) (multipartReadCloser, error) {
var s regionSet
for _, reg := range rs {
s.add(reg)
}
reg := superRegion(s.rs)
rc, err := r.r.Fetch(ctx, reg.b, reg.size())
if err != nil {
return nil, err
}
return newSinglePartReader(reg, rc), nil
}
func (r *remoteFetcher) check() error {
return r.r.Check()
}
func (r *remoteFetcher) genID(reg region) string {
return r.r.GenID(reg.b, reg.size())
}
type Handler interface {
Handle(ctx context.Context, desc ocispec.Descriptor) (fetcher Fetcher, size int64, err error)
}
type Fetcher interface {
Fetch(ctx context.Context, off int64, size int64) (io.ReadCloser, error)
Check() error
GenID(off int64, size int64) string
}

View File

@ -0,0 +1,109 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*
Copyright 2019 The Go Authors. All rights reserved.
Use of this source code is governed by a BSD-style
license that can be found in the NOTICE.md file.
*/
package remote
// region is HTTP-range-request-compliant range.
// "b" is beginning byte of the range and "e" is the end.
// "e" is must be inclusive along with HTTP's range expression.
type region struct{ b, e int64 }
func (c region) size() int64 {
return c.e - c.b + 1
}
func superRegion(regs []region) region {
s := regs[0]
for _, reg := range regs {
if reg.b < s.b {
s.b = reg.b
}
if reg.e > s.e {
s.e = reg.e
}
}
return s
}
// regionSet is a set of regions
type regionSet struct {
rs []region // must be kept sorted
}
// add attempts to merge r to rs.rs with squashing the regions as
// small as possible. This operation takes O(n).
// TODO: more efficient way to do it.
func (rs *regionSet) add(r region) {
// Iterate over the sorted region slice from the tail.
// a) When an overwrap occurs, adjust `r` to fully contain the looking region
// `l` and remove `l` from region slice.
// b) Once l.e become less than r.b, no overwrap will occur again. So immediately
// insert `r` which fully contains all overwrapped regions, to the region slice.
// Here, `r` is inserted to the region slice with keeping it sorted, without
// overwrapping to any regions.
// *) If any `l` contains `r`, we don't need to do anything so return immediately.
for i := len(rs.rs) - 1; i >= 0; i-- {
l := &rs.rs[i]
// *) l contains r
if l.b <= r.b && r.e <= l.e {
return
}
// a) r overwraps to l so adjust r to fully contain l and reomve l
// from region slice.
if l.b <= r.b && r.b <= l.e+1 && l.e <= r.e {
r.b = l.b
rs.rs = append(rs.rs[:i], rs.rs[i+1:]...)
continue
}
if r.b <= l.b && l.b <= r.e+1 && r.e <= l.e {
r.e = l.e
rs.rs = append(rs.rs[:i], rs.rs[i+1:]...)
continue
}
if r.b <= l.b && l.e <= r.e {
rs.rs = append(rs.rs[:i], rs.rs[i+1:]...)
continue
}
// b) No overwrap will occur after this iteration. Instert r to the
// region slice immediately.
if l.e < r.b {
rs.rs = append(rs.rs[:i+1], append([]region{r}, rs.rs[i+1:]...)...)
return
}
// No overwrap occurs yet. See the next region.
}
// r is the topmost region among regions in the slice.
rs.rs = append([]region{r}, rs.rs...)
}
func (rs *regionSet) totalSize() int64 {
var sz int64
for _, f := range rs.rs {
sz += f.size()
}
return sz
}

View File

@ -0,0 +1,271 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package source
import (
"context"
"fmt"
"strings"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/remotes/docker"
"github.com/containerd/containerd/v2/pkg/labels"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/stargz-snapshotter/fs/config"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
// GetSources is a function for converting snapshot labels into typed blob sources
// information. This package defines a default converter which provides source
// information based on some labels but implementations aren't required to use labels.
// Implementations are allowed to return several sources (registry config + image refs)
// about the blob.
type GetSources func(labels map[string]string) (source []Source, err error)
// RegistryHosts returns a list of registries that provides the specified image.
type RegistryHosts func(reference.Spec) ([]docker.RegistryHost, error)
// Source is a typed blob source information. This contains information about
// a blob stored in registries and some contexts of the blob.
type Source struct {
// Hosts is a registry configuration where this blob is stored.
Hosts RegistryHosts
// Name is an image reference which contains this blob.
Name reference.Spec
// Target is a descriptor of this blob.
Target ocispec.Descriptor
// Manifest is an image manifest which contains the blob. This will
// be used by the filesystem to pre-resolve some layers contained in
// the manifest.
// Currently, only layer digests (Manifest.Layers.Digest) will be used.
Manifest ocispec.Manifest
}
const (
// targetRefLabel is a label which contains image reference.
targetRefLabel = "containerd.io/snapshot/remote/stargz.reference"
// targetDigestLabel is a label which contains layer digest.
targetDigestLabel = "containerd.io/snapshot/remote/stargz.digest"
// targetImageLayersLabel is a label which contains layer digests contained in
// the target image.
targetImageLayersLabel = "containerd.io/snapshot/remote/stargz.layers"
// targetImageURLsLabelPrefix is a label prefix which constructs a map from the layer index to
// urls of the layer descriptor.
targetImageURLsLabelPrefix = "containerd.io/snapshot/remote/urls."
// targetURsLLabel is a label which contains layer URL. This is only used to pass URL from containerd
// to snapshotter.
targetURLsLabel = "containerd.io/snapshot/remote/urls"
)
// FromDefaultLabels returns a function for converting snapshot labels to
// source information based on labels.
func FromDefaultLabels(hosts RegistryHosts) GetSources {
return func(labels map[string]string) ([]Source, error) {
refStr, ok := labels[targetRefLabel]
if !ok {
return nil, fmt.Errorf("reference hasn't been passed")
}
refspec, err := reference.Parse(refStr)
if err != nil {
return nil, err
}
digestStr, ok := labels[targetDigestLabel]
if !ok {
return nil, fmt.Errorf("digest hasn't been passed")
}
target, err := digest.Parse(digestStr)
if err != nil {
return nil, err
}
var neighboringLayers []ocispec.Descriptor
if l, ok := labels[targetImageLayersLabel]; ok {
layersStr := strings.Split(l, ",")
for i, l := range layersStr {
d, err := digest.Parse(l)
if err != nil {
return nil, err
}
if d.String() != target.String() {
desc := ocispec.Descriptor{Digest: d}
if urls, ok := labels[targetImageURLsLabelPrefix+fmt.Sprintf("%d", i)]; ok {
desc.URLs = strings.Split(urls, ",")
}
neighboringLayers = append(neighboringLayers, desc)
}
}
}
targetDesc := ocispec.Descriptor{
Digest: target,
Annotations: labels,
}
if targetURLs, ok := labels[targetURLsLabel]; ok {
targetDesc.URLs = append(targetDesc.URLs, strings.Split(targetURLs, ",")...)
}
return []Source{
{
Hosts: hosts,
Name: refspec,
Target: targetDesc,
Manifest: ocispec.Manifest{Layers: append([]ocispec.Descriptor{targetDesc}, neighboringLayers...)},
},
}, nil
}
}
// AppendDefaultLabelsHandlerWrapper makes a handler which appends image's basic
// information to each layer descriptor as annotations during unpack. These
// annotations will be passed to this remote snapshotter as labels and used to
// construct source information.
func AppendDefaultLabelsHandlerWrapper(ref string, prefetchSize int64) func(f images.Handler) images.Handler {
return func(f images.Handler) images.Handler {
return images.HandlerFunc(func(ctx context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) {
children, err := f.Handle(ctx, desc)
if err != nil {
return nil, err
}
switch desc.MediaType {
case ocispec.MediaTypeImageManifest, images.MediaTypeDockerSchema2Manifest:
for i := range children {
c := &children[i]
if images.IsLayerType(c.MediaType) {
if c.Annotations == nil {
c.Annotations = make(map[string]string)
}
c.Annotations[targetRefLabel] = ref
c.Annotations[targetDigestLabel] = c.Digest.String()
var layers string
for i, l := range children[i:] {
if images.IsLayerType(l.MediaType) {
ls := fmt.Sprintf("%s,", l.Digest.String())
// This avoids the label hits the size limitation.
// Skipping layers is allowed here and only affects performance.
if err := labels.Validate(targetImageLayersLabel, layers+ls); err != nil {
break
}
layers += ls
// Store URLs of the neighbouring layer as well.
urlsKey := targetImageURLsLabelPrefix + fmt.Sprintf("%d", i)
c.Annotations[urlsKey] = appendWithValidation(urlsKey, l.URLs)
}
}
c.Annotations[targetImageLayersLabel] = strings.TrimSuffix(layers, ",")
c.Annotations[config.TargetPrefetchSizeLabel] = fmt.Sprintf("%d", prefetchSize)
// store URL in annotation to let containerd to pass it to the snapshotter
c.Annotations[targetURLsLabel] = appendWithValidation(targetURLsLabel, c.URLs)
}
}
}
return children, nil
})
}
}
func appendWithValidation(key string, values []string) string {
var v string
for _, u := range values {
s := fmt.Sprintf("%s,", u)
if err := labels.Validate(key, v+s); err != nil {
break
}
v += s
}
return strings.TrimSuffix(v, ",")
}
// TODO: switch to "github.com/containerd/containerd/pkg/snapshotters" once all tools using
//
// stargz-snapshotter (e.g. k3s) move to containerd version where that pkg is available.
const (
// targetImageLayersLabel is a label which contains layer digests contained in
// the target image and will be passed to snapshotters for preparing layers in
// parallel. Skipping some layers is allowed and only affects performance.
targetImageLayersLabelContainerd = "containerd.io/snapshot/cri.image-layers"
)
// AppendExtraLabelsHandler adds optional labels that aren't provided by
// "github.com/containerd/containerd/pkg/snapshotters" but can be used for stargz snapshotter's extra functionalities.
func AppendExtraLabelsHandler(prefetchSize int64, wrapper func(images.Handler) images.Handler) func(images.Handler) images.Handler {
return func(f images.Handler) images.Handler {
return images.HandlerFunc(func(ctx context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) {
children, err := wrapper(f).Handle(ctx, desc)
if err != nil {
return nil, err
}
switch desc.MediaType {
case ocispec.MediaTypeImageManifest, images.MediaTypeDockerSchema2Manifest:
for i := range children {
c := &children[i]
if !images.IsLayerType(c.MediaType) {
continue
}
if _, ok := c.Annotations[targetURLsLabel]; !ok { // nop if this key is already set
c.Annotations[targetURLsLabel] = appendWithValidation(targetURLsLabel, c.URLs)
}
if _, ok := c.Annotations[config.TargetPrefetchSizeLabel]; !ok { // nop if this key is already set
c.Annotations[config.TargetPrefetchSizeLabel] = fmt.Sprintf("%d", prefetchSize)
}
// Store URLs of the neighbouring layer as well.
nlayers, ok := c.Annotations[targetImageLayersLabelContainerd]
if !ok {
continue
}
for j, dstr := range strings.Split(nlayers, ",") {
d, err := digest.Parse(dstr)
if err != nil {
return nil, err
}
l, ok := layerFromDigest(children, d)
if !ok {
continue
}
urlsKey := targetImageURLsLabelPrefix + fmt.Sprintf("%d", j)
if _, ok := c.Annotations[urlsKey]; !ok { // nop if this key is already set
c.Annotations[urlsKey] = appendWithValidation(urlsKey, l.URLs)
}
}
}
}
return children, nil
})
}
}
func layerFromDigest(layers []ocispec.Descriptor, target digest.Digest) (ocispec.Descriptor, bool) {
for _, l := range layers {
if l.Digest == target {
return l, images.IsLayerType(l.MediaType)
}
}
return ocispec.Descriptor{}, false
}

View File

@ -0,0 +1,283 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package memory
import (
"fmt"
"io"
"math"
"os"
"time"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/metadata"
digest "github.com/opencontainers/go-digest"
)
type reader struct {
r *estargz.Reader
rootID uint32
idMap map[uint32]*estargz.TOCEntry
idOfEntry map[string]uint32
estargzOpts []estargz.OpenOption
}
func newReader(er *estargz.Reader, rootID uint32, idMap map[uint32]*estargz.TOCEntry, idOfEntry map[string]uint32, estargzOpts []estargz.OpenOption) *reader {
return &reader{r: er, rootID: rootID, idMap: idMap, idOfEntry: idOfEntry, estargzOpts: estargzOpts}
}
func NewReader(sr *io.SectionReader, opts ...metadata.Option) (metadata.Reader, error) {
var rOpts metadata.Options
for _, o := range opts {
if err := o(&rOpts); err != nil {
return nil, fmt.Errorf("failed to apply option: %w", err)
}
}
telemetry := &estargz.Telemetry{}
if rOpts.Telemetry != nil {
telemetry.GetFooterLatency = estargz.MeasureLatencyHook(rOpts.Telemetry.GetFooterLatency)
telemetry.GetTocLatency = estargz.MeasureLatencyHook(rOpts.Telemetry.GetTocLatency)
telemetry.DeserializeTocLatency = estargz.MeasureLatencyHook(rOpts.Telemetry.DeserializeTocLatency)
}
var decompressors []estargz.Decompressor
for _, d := range rOpts.Decompressors {
decompressors = append(decompressors, d)
}
erOpts := []estargz.OpenOption{
estargz.WithTOCOffset(rOpts.TOCOffset),
estargz.WithTelemetry(telemetry),
estargz.WithDecompressors(decompressors...),
}
er, err := estargz.Open(sr, erOpts...)
if err != nil {
return nil, err
}
root, ok := er.Lookup("")
if !ok {
return nil, fmt.Errorf("failed to get root node")
}
rootID, idMap, idOfEntry, err := assignIDs(er, root)
if err != nil {
return nil, err
}
r := newReader(er, rootID, idMap, idOfEntry, erOpts)
return r, nil
}
// assignIDs assigns an to each TOC item and returns a mapping from ID to entry and vice-versa.
func assignIDs(er *estargz.Reader, e *estargz.TOCEntry) (rootID uint32, idMap map[uint32]*estargz.TOCEntry, idOfEntry map[string]uint32, err error) {
idMap = make(map[uint32]*estargz.TOCEntry)
idOfEntry = make(map[string]uint32)
curID := uint32(0)
nextID := func() (uint32, error) {
if curID == math.MaxUint32 {
return 0, fmt.Errorf("sequence id too large")
}
curID++
return curID, nil
}
var mapChildren func(e *estargz.TOCEntry) (uint32, error)
mapChildren = func(e *estargz.TOCEntry) (uint32, error) {
if e.Type == "hardlink" {
return 0, fmt.Errorf("unexpected type \"hardlink\": this should be replaced to the destination entry")
}
var ok bool
id, ok := idOfEntry[e.Name]
if !ok {
id, err = nextID()
if err != nil {
return 0, err
}
idMap[id] = e
idOfEntry[e.Name] = id
}
e.ForeachChild(func(_ string, ent *estargz.TOCEntry) bool {
_, err = mapChildren(ent)
return err == nil
})
if err != nil {
return 0, err
}
return id, nil
}
rootID, err = mapChildren(e)
if err != nil {
return 0, nil, nil, err
}
return rootID, idMap, idOfEntry, nil
}
func (r *reader) RootID() uint32 {
return r.rootID
}
func (r *reader) TOCDigest() digest.Digest {
return r.r.TOCDigest()
}
func (r *reader) GetOffset(id uint32) (offset int64, err error) {
e, ok := r.idMap[id]
if !ok {
return 0, fmt.Errorf("entry %d not found", id)
}
return e.Offset, nil
}
func (r *reader) GetAttr(id uint32) (attr metadata.Attr, err error) {
e, ok := r.idMap[id]
if !ok {
err = fmt.Errorf("entry %d not found", id)
return
}
// TODO: zero copy
attrFromTOCEntry(e, &attr)
return
}
func (r *reader) GetChild(pid uint32, base string) (id uint32, attr metadata.Attr, err error) {
e, ok := r.idMap[pid]
if !ok {
err = fmt.Errorf("parent entry %d not found", pid)
return
}
child, ok := e.LookupChild(base)
if !ok {
err = fmt.Errorf("child %q of entry %d not found", base, pid)
return
}
cid, ok := r.idOfEntry[child.Name]
if !ok {
err = fmt.Errorf("id of entry %q not found", base)
return
}
// TODO: zero copy
attrFromTOCEntry(child, &attr)
return cid, attr, nil
}
func (r *reader) ForeachChild(id uint32, f func(name string, id uint32, mode os.FileMode) bool) error {
e, ok := r.idMap[id]
if !ok {
return fmt.Errorf("parent entry %d not found", id)
}
var err error
e.ForeachChild(func(baseName string, ent *estargz.TOCEntry) bool {
id, ok := r.idOfEntry[ent.Name]
if !ok {
err = fmt.Errorf("id of child entry %q not found", baseName)
return false
}
return f(baseName, id, ent.Stat().Mode())
})
return err
}
func (r *reader) OpenFile(id uint32) (metadata.File, error) {
e, ok := r.idMap[id]
if !ok {
return nil, fmt.Errorf("entry %d not found", id)
}
sr, err := r.r.OpenFile(e.Name)
if err != nil {
return nil, err
}
return &file{r, e, sr}, nil
}
func (r *reader) OpenFileWithPreReader(id uint32, preRead func(id uint32, chunkOffset, chunkSize int64, chunkDigest string, r io.Reader) error) (metadata.File, error) {
e, ok := r.idMap[id]
if !ok {
return nil, fmt.Errorf("entry %d not found", id)
}
sr, err := r.r.OpenFileWithPreReader(e.Name, func(e *estargz.TOCEntry, chunkR io.Reader) error {
cid, ok := r.idOfEntry[e.Name]
if !ok {
return fmt.Errorf("id of entry %q not found", e.Name)
}
return preRead(cid, e.ChunkOffset, e.ChunkSize, e.ChunkDigest, chunkR)
})
if err != nil {
return nil, err
}
return &file{r, e, sr}, nil
}
func (r *reader) Clone(sr *io.SectionReader) (metadata.Reader, error) {
er, err := estargz.Open(sr, r.estargzOpts...)
if err != nil {
return nil, err
}
return newReader(er, r.rootID, r.idMap, r.idOfEntry, r.estargzOpts), nil
}
func (r *reader) Close() error {
return nil
}
type file struct {
r *reader
e *estargz.TOCEntry
sr *io.SectionReader
}
func (r *file) ChunkEntryForOffset(offset int64) (off int64, size int64, dgst string, ok bool) {
e, ok := r.r.r.ChunkEntryForOffset(r.e.Name, offset)
if !ok {
return 0, 0, "", false
}
dgst = e.Digest
if e.ChunkDigest != "" {
// NOTE* "reg" also can contain ChunkDigest (e.g. when "reg" is the first entry of
// chunked file)
dgst = e.ChunkDigest
}
return e.ChunkOffset, e.ChunkSize, dgst, true
}
func (r *file) ReadAt(p []byte, off int64) (n int, err error) {
return r.sr.ReadAt(p, off)
}
func (r *reader) NumOfNodes() (i int, _ error) {
return len(r.idMap), nil
}
// TODO: share it with db pkg
func attrFromTOCEntry(src *estargz.TOCEntry, dst *metadata.Attr) *metadata.Attr {
dst.Size = src.Size
dst.ModTime, _ = time.Parse(time.RFC3339, src.ModTime3339)
dst.LinkName = src.LinkName
dst.Mode = src.Stat().Mode()
dst.UID = src.UID
dst.GID = src.GID
dst.DevMajor = src.DevMajor
dst.DevMinor = src.DevMinor
dst.Xattrs = src.Xattrs
dst.NumLink = src.NumLink
return dst
}

View File

@ -0,0 +1,139 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metadata
import (
"io"
"os"
"time"
"github.com/containerd/stargz-snapshotter/estargz"
digest "github.com/opencontainers/go-digest"
)
// Attr reprensents the attributes of a node.
type Attr struct {
// Size, for regular files, is the logical size of the file.
Size int64
// ModTime is the modification time of the node.
ModTime time.Time
// LinkName, for symlinks, is the link target.
LinkName string
// Mode is the permission and mode bits.
Mode os.FileMode
// UID is the user ID of the owner.
UID int
// GID is the group ID of the owner.
GID int
// DevMajor is the major device number for device.
DevMajor int
// DevMinor is the major device number for device.
DevMinor int
// Xattrs are the extended attribute for the node.
Xattrs map[string][]byte
// NumLink is the number of names pointing to this node.
NumLink int
}
// Store reads the provided eStargz blob and creates a metadata reader.
type Store func(sr *io.SectionReader, opts ...Option) (Reader, error)
// Reader provides access to file metadata of a blob.
type Reader interface {
RootID() uint32
TOCDigest() digest.Digest
GetOffset(id uint32) (offset int64, err error)
GetAttr(id uint32) (attr Attr, err error)
GetChild(pid uint32, base string) (id uint32, attr Attr, err error)
ForeachChild(id uint32, f func(name string, id uint32, mode os.FileMode) bool) error
OpenFile(id uint32) (File, error)
OpenFileWithPreReader(id uint32, preRead func(id uint32, chunkOffset, chunkSize int64, chunkDigest string, r io.Reader) error) (File, error)
Clone(sr *io.SectionReader) (Reader, error)
Close() error
}
type File interface {
ChunkEntryForOffset(offset int64) (off int64, size int64, dgst string, ok bool)
ReadAt(p []byte, off int64) (n int, err error)
}
type Decompressor interface {
estargz.Decompressor
// DecompressTOC decompresses the passed blob and returns a reader of TOC JSON.
//
// If tocOffset returned by ParseFooter is < 0, we assume that TOC isn't contained in the blob.
// Pass nil reader to DecompressTOC then we expect that DecompressTOC acquire TOC from the external
// location and return it.
DecompressTOC(io.Reader) (tocJSON io.ReadCloser, err error)
}
type Options struct {
TOCOffset int64
Telemetry *Telemetry
Decompressors []Decompressor
}
// Option is an option to configure the behaviour of reader.
type Option func(o *Options) error
// WithTOCOffset option specifies the offset of TOC
func WithTOCOffset(tocOffset int64) Option {
return func(o *Options) error {
o.TOCOffset = tocOffset
return nil
}
}
// WithTelemetry option specifies the telemetry hooks
func WithTelemetry(telemetry *Telemetry) Option {
return func(o *Options) error {
o.Telemetry = telemetry
return nil
}
}
// WithDecompressors option specifies decompressors to use.
// Default is gzip-based decompressor.
func WithDecompressors(decompressors ...Decompressor) Option {
return func(o *Options) error {
o.Decompressors = decompressors
return nil
}
}
// A func which takes start time and records the diff
type MeasureLatencyHook func(time.Time)
// A struct which defines telemetry hooks. By implementing these hooks you should be able to record
// the latency metrics of the respective steps of estargz open operation.
type Telemetry struct {
GetFooterLatency MeasureLatencyHook // measure time to get stargz footer (in milliseconds)
GetTocLatency MeasureLatencyHook // measure time to GET TOC JSON (in milliseconds)
DeserializeTocLatency MeasureLatencyHook // measure time to deserialize TOC JSON (in milliseconds)
}

View File

@ -0,0 +1,154 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package estargz
import (
"context"
"fmt"
"io"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/images/converter"
"github.com/containerd/containerd/v2/core/images/converter/uncompress"
"github.com/containerd/containerd/v2/pkg/archive/compression"
"github.com/containerd/containerd/v2/pkg/labels"
"github.com/containerd/errdefs"
"github.com/containerd/stargz-snapshotter/estargz"
"github.com/containerd/stargz-snapshotter/util/ioutils"
"github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
// LayerConvertWithLayerAndCommonOptsFunc converts legacy tar.gz layers into eStargz tar.gz
// layers. Media type is unchanged. Should be used in conjunction with WithDockerToOCI(). See
// LayerConvertFunc for more details. The difference between this function and
// LayerConvertFunc is that this allows to specify additional eStargz options per layer.
func LayerConvertWithLayerAndCommonOptsFunc(opts map[digest.Digest][]estargz.Option, commonOpts ...estargz.Option) converter.ConvertFunc {
if opts == nil {
return LayerConvertFunc(commonOpts...)
}
return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) {
// TODO: enable to speciy option per layer "index" because it's possible that there are
// two layers having same digest in an image (but this should be rare case)
return LayerConvertFunc(append(commonOpts, opts[desc.Digest]...)...)(ctx, cs, desc)
}
}
// LayerConvertFunc converts legacy tar.gz layers into eStargz tar.gz layers.
// Media type is unchanged.
//
// Should be used in conjunction with WithDockerToOCI().
//
// Otherwise "containerd.io/snapshot/stargz/toc.digest" annotation will be lost,
// because the Docker media type does not support layer annotations.
func LayerConvertFunc(opts ...estargz.Option) converter.ConvertFunc {
return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) {
if !images.IsLayerType(desc.MediaType) {
// No conversion. No need to return an error here.
return nil, nil
}
info, err := cs.Info(ctx, desc.Digest)
if err != nil {
return nil, err
}
labelz := info.Labels
if labelz == nil {
labelz = make(map[string]string)
}
ra, err := cs.ReaderAt(ctx, desc)
if err != nil {
return nil, err
}
defer ra.Close()
sr := io.NewSectionReader(ra, 0, desc.Size)
blob, err := estargz.Build(sr, append(opts, estargz.WithContext(ctx))...)
if err != nil {
return nil, err
}
defer blob.Close()
ref := fmt.Sprintf("convert-estargz-from-%s", desc.Digest)
w, err := content.OpenWriter(ctx, cs, content.WithRef(ref))
if err != nil {
return nil, err
}
defer w.Close()
// Reset the writing position
// Old writer possibly remains without aborted
// (e.g. conversion interrupted by a signal)
if err := w.Truncate(0); err != nil {
return nil, err
}
// Copy and count the contents
pr, pw := io.Pipe()
c := new(ioutils.CountWriter)
doneCount := make(chan struct{})
go func() {
defer close(doneCount)
defer pr.Close()
decompressR, err := compression.DecompressStream(pr)
if err != nil {
pr.CloseWithError(err)
return
}
defer decompressR.Close()
if _, err := io.Copy(c, decompressR); err != nil {
pr.CloseWithError(err)
return
}
}()
n, err := io.Copy(w, io.TeeReader(blob, pw))
if err != nil {
return nil, err
}
if err := blob.Close(); err != nil {
return nil, err
}
if err := pw.Close(); err != nil {
return nil, err
}
<-doneCount
// update diffID label
labelz[labels.LabelUncompressed] = blob.DiffID().String()
if err = w.Commit(ctx, n, "", content.WithLabels(labelz)); err != nil && !errdefs.IsAlreadyExists(err) {
return nil, err
}
if err := w.Close(); err != nil {
return nil, err
}
newDesc := desc
if uncompress.IsUncompressedType(newDesc.MediaType) {
if images.IsDockerType(newDesc.MediaType) {
newDesc.MediaType += ".gzip"
} else {
newDesc.MediaType += "+gzip"
}
}
newDesc.Digest = w.Digest()
newDesc.Size = n
if newDesc.Annotations == nil {
newDesc.Annotations = make(map[string]string, 1)
}
newDesc.Annotations[estargz.TOCJSONDigestAnnotation] = blob.TOCDigest().String()
newDesc.Annotations[estargz.StoreUncompressedSizeAnnotation] = fmt.Sprintf("%d", c.Size())
return &newDesc, nil
}
}

View File

@ -0,0 +1,398 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package externaltoc
import (
"context"
"encoding/json"
"fmt"
"io"
"sort"
"time"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/images/converter"
"github.com/containerd/containerd/v2/core/images/converter/uncompress"
"github.com/containerd/containerd/v2/pkg/archive/compression"
"github.com/containerd/containerd/v2/pkg/labels"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/errdefs"
"github.com/containerd/stargz-snapshotter/estargz"
esgzexternaltoc "github.com/containerd/stargz-snapshotter/estargz/externaltoc"
estargzconvert "github.com/containerd/stargz-snapshotter/nativeconverter/estargz"
"github.com/containerd/stargz-snapshotter/util/ioutils"
"github.com/opencontainers/go-digest"
ocispecspec "github.com/opencontainers/image-spec/specs-go"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
// LayerConvertFunc converts legacy tar.gz layers into eStargz tar.gz layers.
//
// finalize() callback function returned by this function will return the image that contains
// external TOC of each layer. Note that the returned image by isn't stored to the containerd image
// store so far so the caller needs to do it.
//
// Media type is unchanged.
//
// Should be used in conjunction with WithDockerToOCI().
//
// Otherwise "containerd.io/snapshot/stargz/toc.digest" annotation will be lost,
// because the Docker media type does not support layer annotations.
//
// WithCompression() in esgzOpts will be ignored but used the one for external TOC instead.
func LayerConvertFunc(esgzOpts []estargz.Option, compressionLevel int) (convertFunc converter.ConvertFunc, finalize func(ctx context.Context, cs content.Store, ref string, desc *ocispec.Descriptor) (*images.Image, error)) {
return layerConvert(func(c estargz.Compression) converter.ConvertFunc {
return estargzconvert.LayerConvertFunc(append(esgzOpts, estargz.WithCompression(c))...)
}, compressionLevel)
}
// LayerConvertWithLayerAndCommonOptsFunc converts legacy tar.gz layers into eStargz.
// Media type is unchanged. Should be used in conjunction with WithDockerToOCI(). See
// LayerConvertFunc for more details. The difference between this function and
// LayerConvertFunc is that this allows to specify additional eStargz options per layer.
func LayerConvertWithLayerAndCommonOptsFunc(opts map[digest.Digest][]estargz.Option, commonOpts []estargz.Option, compressionLevel int) (convertFunc converter.ConvertFunc, finalize func(ctx context.Context, cs content.Store, ref string, desc *ocispec.Descriptor) (*images.Image, error)) {
return layerConvert(func(c estargz.Compression) converter.ConvertFunc {
return estargzconvert.LayerConvertWithLayerAndCommonOptsFunc(opts, append(commonOpts,
estargz.WithCompression(c),
)...)
}, compressionLevel)
}
// LayerConvertLossLessConfig is configuration for LayerConvertLossLessFunc.
type LayerConvertLossLessConfig struct {
CompressionLevel int
ChunkSize int
MinChunkSize int
}
// LayerConvertLossLessFunc converts legacy tar.gz layers into eStargz tar.gz layers without changing
// the diffIDs (i.e. uncompressed digest).
//
// finalize() callback function returned by this function will return the image that contains
// external TOC of each layer. Note that the returned image by isn't stored to the containerd image
// store so far so the caller needs to do it.
//
// Media type is unchanged.
//
// Should be used in conjunction with WithDockerToOCI().
//
// Otherwise "containerd.io/snapshot/stargz/toc.digest" annotation will be lost,
// because the Docker media type does not support layer annotations.
//
// WithCompression() in esgzOpts will be ignored but used the one for external TOC instead.
func LayerConvertLossLessFunc(cfg LayerConvertLossLessConfig) (convertFunc converter.ConvertFunc, finalize func(ctx context.Context, cs content.Store, ref string, desc *ocispec.Descriptor) (*images.Image, error)) {
return layerConvert(func(c estargz.Compression) converter.ConvertFunc {
return layerLossLessConvertFunc(c, cfg.ChunkSize, cfg.MinChunkSize)
}, cfg.CompressionLevel)
}
func layerConvert(layerConvertFunc func(estargz.Compression) converter.ConvertFunc, compressionLevel int) (convertFunc converter.ConvertFunc, finalize func(ctx context.Context, cs content.Store, ref string, desc *ocispec.Descriptor) (*images.Image, error)) {
type tocInfo struct {
digest digest.Digest
size int64
}
esgzDigest2TOC := make(map[digest.Digest]tocInfo)
// TODO: currently, all layers of all platforms are combined to one TOC manifest. Maybe we can consider
// having a separated TOC manifest per platform.
converterFunc := func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) {
cm := esgzexternaltoc.NewGzipCompressionWithLevel(nil, compressionLevel)
c := cm.(*esgzexternaltoc.GzipCompression)
cf := layerConvertFunc(c)
desc2, err := cf(ctx, cs, desc)
if err != nil {
return desc2, err
}
var layerDgst digest.Digest
if desc2 != nil {
layerDgst = desc2.Digest
} else {
layerDgst = desc.Digest // no conversion happened
}
dgst, size, err := writeTOCTo(ctx, c, cs)
if err != nil {
return nil, err
}
esgzDigest2TOC[layerDgst] = tocInfo{dgst, size}
return desc2, nil
}
finalizeFunc := func(ctx context.Context, cs content.Store, ref string, desc *ocispec.Descriptor) (*images.Image, error) {
var layers []ocispec.Descriptor
for esgzDigest, toc := range esgzDigest2TOC {
layers = append(layers, ocispec.Descriptor{
MediaType: ocispec.MediaTypeImageLayerGzip,
Digest: toc.digest,
Size: toc.size,
Annotations: map[string]string{
"containerd.io/snapshot/stargz/layer.digest": esgzDigest.String(),
},
})
}
sort.Slice(layers, func(i, j int) bool {
return layers[i].Digest.String() < layers[j].Digest.String()
})
mfst, err := createManifest(ctx, cs, ocispec.ImageConfig{}, layers)
if err != nil {
return nil, err
}
tocImgRef, err := getTOCReference(ref)
if err != nil {
return nil, err
}
return &images.Image{
Name: tocImgRef,
Target: *mfst,
}, nil
}
return converterFunc, finalizeFunc
}
func getTOCReference(ref string) (string, error) {
refspec, err := reference.Parse(ref)
if err != nil {
return "", err
}
refspec.Object = refspec.Object + "-esgztoc" // TODO: support custom location
return refspec.String(), nil
}
func layerLossLessConvertFunc(compressor estargz.Compressor, chunkSize int, minChunkSize int) converter.ConvertFunc {
return func(ctx context.Context, cs content.Store, desc ocispec.Descriptor) (*ocispec.Descriptor, error) {
if !images.IsLayerType(desc.MediaType) {
// No conversion. No need to return an error here.
return nil, nil
}
info, err := cs.Info(ctx, desc.Digest)
if err != nil {
return nil, err
}
labelz := info.Labels
if labelz == nil {
labelz = make(map[string]string)
}
ra, err := cs.ReaderAt(ctx, desc)
if err != nil {
return nil, err
}
defer ra.Close()
sr := io.NewSectionReader(ra, 0, desc.Size)
ref := fmt.Sprintf("convert-estargz-from-%s", desc.Digest)
w, err := content.OpenWriter(ctx, cs, content.WithRef(ref))
if err != nil {
return nil, err
}
defer w.Close()
// Reset the writing position
// Old writer possibly remains without aborted
// (e.g. conversion interrupted by a signal)
if err := w.Truncate(0); err != nil {
return nil, err
}
// Copy and count the contents
esgzUW, esgzUncompressedInfoCh := calcUncompression()
orgUW, orgUncompressedInfoCh := calcUncompression()
countW := new(ioutils.CountWriter)
mw := io.MultiWriter(io.MultiWriter(w, countW), esgzUW)
var ew *estargz.Writer
if compressor != nil {
ew = estargz.NewWriterWithCompressor(mw, compressor)
} else {
ew = estargz.NewWriter(mw)
}
if chunkSize > 0 {
ew.ChunkSize = chunkSize
}
ew.MinChunkSize = minChunkSize
if err := ew.AppendTarLossLess(io.TeeReader(sr, orgUW)); err != nil {
return nil, fmt.Errorf("cannot perform compression in lossless way: %w", err)
}
tocDgst, err := ew.Close()
if err != nil {
return nil, err
}
n := countW.Size()
if err := esgzUW.Close(); err != nil {
return nil, err
}
if err := orgUW.Close(); err != nil {
return nil, err
}
esgzUncompressedInfo := <-esgzUncompressedInfoCh
orgUncompressedInfo := <-orgUncompressedInfoCh
// check the lossless conversion
if esgzUncompressedInfo.diffID.String() != orgUncompressedInfo.diffID.String() {
return nil, fmt.Errorf("unexpected diffID %q; want %q",
esgzUncompressedInfo.diffID.String(), orgUncompressedInfo.diffID.String())
}
if esgzUncompressedInfo.size != orgUncompressedInfo.size {
return nil, fmt.Errorf("unexpected uncompressed size %q; want %q",
esgzUncompressedInfo.size, orgUncompressedInfo.size)
}
// write diffID label
labelz[labels.LabelUncompressed] = esgzUncompressedInfo.diffID.String()
if err = w.Commit(ctx, n, "", content.WithLabels(labelz)); err != nil && !errdefs.IsAlreadyExists(err) {
return nil, err
}
if err := w.Close(); err != nil {
return nil, err
}
newDesc := desc
if uncompress.IsUncompressedType(newDesc.MediaType) {
if images.IsDockerType(newDesc.MediaType) {
newDesc.MediaType += ".gzip"
} else {
newDesc.MediaType += "+gzip"
}
}
newDesc.Digest = w.Digest()
newDesc.Size = n
if newDesc.Annotations == nil {
newDesc.Annotations = make(map[string]string, 1)
}
newDesc.Annotations[estargz.TOCJSONDigestAnnotation] = tocDgst.String()
newDesc.Annotations[estargz.StoreUncompressedSizeAnnotation] = fmt.Sprintf("%d", esgzUncompressedInfo.size)
return &newDesc, nil
}
}
type uncompressedInfo struct {
diffID digest.Digest
size int64
}
func calcUncompression() (*io.PipeWriter, chan uncompressedInfo) {
pr, pw := io.Pipe()
infoCh := make(chan uncompressedInfo)
go func() {
defer pr.Close()
c := new(ioutils.CountWriter)
diffID := digest.Canonical.Digester()
decompressR, err := compression.DecompressStream(pr)
if err != nil {
pr.CloseWithError(err)
close(infoCh)
return
}
defer decompressR.Close()
if _, err := io.Copy(io.MultiWriter(c, diffID.Hash()), decompressR); err != nil {
pr.CloseWithError(err)
close(infoCh)
return
}
infoCh <- uncompressedInfo{
diffID: diffID.Digest(),
size: c.Size(),
}
}()
return pw, infoCh
}
func writeTOCTo(ctx context.Context, gc *esgzexternaltoc.GzipCompression, cs content.Store) (digest.Digest, int64, error) {
ref := "external-toc" + time.Now().String()
w, err := content.OpenWriter(ctx, cs, content.WithRef(ref))
if err != nil {
return "", 0, err
}
defer w.Close()
if err := w.Truncate(0); err != nil {
return "", 0, err
}
c := new(ioutils.CountWriter)
dgstr := digest.Canonical.Digester()
n, err := gc.WriteTOCTo(io.MultiWriter(io.MultiWriter(w, dgstr.Hash()), c))
if err != nil {
return "", 0, err
}
if err := w.Commit(ctx, int64(n), ""); err != nil && !errdefs.IsAlreadyExists(err) {
return "", 0, err
}
if err := w.Close(); err != nil {
return "", 0, err
}
return dgstr.Digest(), c.Size(), nil
}
func createManifest(ctx context.Context, cs content.Store, config ocispec.ImageConfig, layers []ocispec.Descriptor) (*ocispec.Descriptor, error) {
// Create config
configDgst, configSize, err := writeJSON(ctx, cs, &config, nil)
if err != nil {
return nil, err
}
// Create manifest
mfst := ocispec.Manifest{
Versioned: ocispecspec.Versioned{
SchemaVersion: 2,
},
MediaType: ocispec.MediaTypeImageManifest,
Config: ocispec.Descriptor{
MediaType: ocispec.MediaTypeImageConfig,
Digest: configDgst,
Size: configSize,
},
Layers: layers,
}
mfstLabels := make(map[string]string)
for i, ld := range mfst.Layers {
mfstLabels[fmt.Sprintf("containerd.io/gc.ref.content.l.%d", i)] = ld.Digest.String()
}
mfstLabels["containerd.io/gc.ref.content.c.0"] = configDgst.String()
mfstDgst, mfstSize, err := writeJSON(ctx, cs, &mfst, mfstLabels)
if err != nil {
return nil, err
}
return &ocispec.Descriptor{
MediaType: ocispec.MediaTypeImageManifest,
Digest: mfstDgst,
Size: mfstSize,
}, nil
}
func writeJSON(ctx context.Context, cs content.Store, data interface{}, labels map[string]string) (digest.Digest, int64, error) {
raw, err := json.Marshal(data)
if err != nil {
return "", 0, err
}
size := len(raw)
ref := "write-json-ref" + digest.FromBytes(raw).String()
w, err := content.OpenWriter(ctx, cs, content.WithRef(ref))
if err != nil {
return "", 0, err
}
defer w.Close()
if err := w.Truncate(0); err != nil {
return "", 0, err
}
if _, err := w.Write(raw); err != nil {
return "", 0, err
}
if err = w.Commit(ctx, int64(size), "", content.WithLabels(labels)); err != nil && !errdefs.IsAlreadyExists(err) {
return "", 0, err
}
dgst := w.Digest()
if err := w.Close(); err != nil {
return "", 0, err
}
return dgst, int64(size), nil
}

View File

@ -0,0 +1,90 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package externaltoc
import (
"context"
"fmt"
"io"
"github.com/containerd/containerd/v2/core/remotes"
"github.com/containerd/containerd/v2/core/remotes/docker"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/platforms"
esgzexternaltoc "github.com/containerd/stargz-snapshotter/estargz/externaltoc"
"github.com/containerd/stargz-snapshotter/fs/source"
"github.com/containerd/stargz-snapshotter/util/containerdutil"
"github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
func NewRemoteDecompressor(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) *esgzexternaltoc.GzipDecompressor {
return esgzexternaltoc.NewGzipDecompressor(func() ([]byte, error) {
resolver := docker.NewResolver(docker.ResolverOptions{
Hosts: func(host string) ([]docker.RegistryHost, error) {
if host != refspec.Hostname() {
return nil, fmt.Errorf("unexpected host %q for image ref %q", host, refspec.String())
}
return hosts(refspec)
},
})
return fetchTOCBlob(ctx, resolver, refspec, desc.Digest)
})
}
func fetchTOCBlob(ctx context.Context, resolver remotes.Resolver, refspec reference.Spec, dgst digest.Digest) ([]byte, error) {
// TODO: support custom location of TOC manifest and TOCs using annotations, etc.
tocImgRef, err := getTOCReference(refspec.String())
if err != nil {
return nil, err
}
_, img, err := resolver.Resolve(ctx, tocImgRef)
if err != nil {
return nil, err
}
fetcher, err := resolver.Fetcher(ctx, tocImgRef)
if err != nil {
return nil, err
}
// TODO: cache this manifest
manifest, err := containerdutil.FetchManifestPlatform(ctx, fetcher, img, platforms.DefaultSpec())
if err != nil {
return nil, err
}
return fetchTOCBlobFromManifest(ctx, fetcher, manifest, dgst)
}
func fetchTOCBlobFromManifest(ctx context.Context, fetcher remotes.Fetcher, manifest ocispec.Manifest, layerDigest digest.Digest) ([]byte, error) {
for _, l := range manifest.Layers {
if len(l.Annotations) == 0 {
continue
}
ldgst, ok := l.Annotations["containerd.io/snapshot/stargz/layer.digest"]
if !ok {
continue
}
if ldgst == layerDigest.String() {
r, err := fetcher.Fetch(ctx, l)
if err != nil {
return nil, err
}
defer r.Close()
return io.ReadAll(r)
}
}
return nil, fmt.Errorf("TOC not found")
}

View File

@ -0,0 +1,70 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"github.com/containerd/stargz-snapshotter/fs/config"
"github.com/containerd/stargz-snapshotter/service/resolver"
)
// Config is configuration for stargz snapshotter service.
type Config struct {
config.Config
// KubeconfigKeychainConfig is config for kubeconfig-based keychain.
KubeconfigKeychainConfig `toml:"kubeconfig_keychain"`
// CRIKeychainConfig is config for CRI-based keychain.
CRIKeychainConfig `toml:"cri_keychain"`
// ResolverConfig is config for resolving registries.
ResolverConfig `toml:"resolver"`
// SnapshotterConfig is snapshotter-related config.
SnapshotterConfig `toml:"snapshotter"`
}
// KubeconfigKeychainConfig is config for kubeconfig-based keychain.
type KubeconfigKeychainConfig struct {
// EnableKeychain enables kubeconfig-based keychain
EnableKeychain bool `toml:"enable_keychain"`
// KubeconfigPath is the path to kubeconfig which can be used to sync
// secrets on the cluster into this snapshotter.
KubeconfigPath string `toml:"kubeconfig_path"`
}
// CRIKeychainConfig is config for CRI-based keychain.
type CRIKeychainConfig struct {
// EnableKeychain enables CRI-based keychain
EnableKeychain bool `toml:"enable_keychain"`
// ImageServicePath is the path to the unix socket of backing CRI Image Service (e.g. containerd CRI plugin)
ImageServicePath string `toml:"image_service_path"`
}
// ResolverConfig is config for resolving registries.
type ResolverConfig resolver.Config
// SnapshotterConfig is snapshotter-related config.
type SnapshotterConfig struct {
// AllowInvalidMountsOnRestart allows that there are snapshot mounts that cannot access to the
// data source when restarting the snapshotter.
// NOTE: User needs to manually remove the snapshots from containerd's metadata store using
// ctr (e.g. `ctr snapshot rm`).
AllowInvalidMountsOnRestart bool `toml:"allow_invalid_mounts_on_restart"`
}

View File

@ -0,0 +1,112 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"fmt"
"strings"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/stargz-snapshotter/fs/source"
digest "github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
// TODO: switch to "github.com/containerd/containerd/pkg/snapshotters" once all tools using
//
// stargz-snapshotter (e.g. k3s) move to containerd version where that pkg is available.
const (
// targetRefLabel is a label which contains image reference and will be passed
// to snapshotters.
targetRefLabel = "containerd.io/snapshot/cri.image-ref"
// targetLayerDigestLabel is a label which contains layer digest and will be passed
// to snapshotters.
targetLayerDigestLabel = "containerd.io/snapshot/cri.layer-digest"
// targetImageLayersLabel is a label which contains layer digests contained in
// the target image and will be passed to snapshotters for preparing layers in
// parallel. Skipping some layers is allowed and only affects performance.
targetImageLayersLabel = "containerd.io/snapshot/cri.image-layers"
)
const (
// targetImageURLsLabelPrefix is a label prefix which constructs a map from the layer index to
// urls of the layer descriptor. This isn't contained in the set of the labels passed from CRI plugin but
// some clients (e.g. nerdctl) passes this for preserving url field in the OCI descriptor.
targetImageURLsLabelPrefix = "containerd.io/snapshot/remote/urls."
// targetURsLLabel is a label which contains layer URL. This is only used to pass URL from containerd
// to snapshotter. This isn't contained in the set of the labels passed from CRI plugin but
// some clients (e.g. nerdctl) passes this for preserving url field in the OCI descriptor.
targetURLsLabel = "containerd.io/snapshot/remote/urls"
)
func sourceFromCRILabels(hosts source.RegistryHosts) source.GetSources {
return func(labels map[string]string) ([]source.Source, error) {
refStr, ok := labels[targetRefLabel]
if !ok {
return nil, fmt.Errorf("reference hasn't been passed")
}
refspec, err := reference.Parse(refStr)
if err != nil {
return nil, err
}
digestStr, ok := labels[targetLayerDigestLabel]
if !ok {
return nil, fmt.Errorf("digest hasn't been passed")
}
target, err := digest.Parse(digestStr)
if err != nil {
return nil, err
}
var neighboringLayers []ocispec.Descriptor
if l, ok := labels[targetImageLayersLabel]; ok {
layersStr := strings.Split(l, ",")
for i, l := range layersStr {
d, err := digest.Parse(l)
if err != nil {
return nil, err
}
if d.String() != target.String() {
desc := ocispec.Descriptor{Digest: d}
if urls, ok := labels[targetImageURLsLabelPrefix+fmt.Sprintf("%d", i)]; ok {
desc.URLs = strings.Split(urls, ",")
}
neighboringLayers = append(neighboringLayers, desc)
}
}
}
targetDesc := ocispec.Descriptor{
Digest: target,
Annotations: labels,
}
if targetURLs, ok := labels[targetURLsLabel]; ok {
targetDesc.URLs = append(targetDesc.URLs, strings.Split(targetURLs, ",")...)
}
return []source.Source{
{
Hosts: hosts,
Name: refspec,
Target: targetDesc,
Manifest: ocispec.Manifest{Layers: append([]ocispec.Descriptor{targetDesc}, neighboringLayers...)},
},
}, nil
}
}

View File

@ -0,0 +1,145 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cri
import (
"context"
"errors"
"fmt"
"sync"
"time"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/service/resolver"
distribution "github.com/distribution/reference"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// NewCRIKeychain provides creds passed through CRI PullImage API.
// This also returns a CRI image service server that works as a proxy backed by the specified CRI service.
// This server reads all PullImageRequest and uses PullImageRequest.AuthConfig for authenticating snapshots.
func NewCRIKeychain(ctx context.Context, connectCRI func() (runtime.ImageServiceClient, error)) (resolver.Credential, runtime.ImageServiceServer) {
server := &instrumentedService{config: make(map[string]*runtime.AuthConfig)}
go func() {
log.G(ctx).Debugf("Waiting for CRI service is started...")
for i := 0; i < 100; i++ {
client, err := connectCRI()
if err == nil {
server.criMu.Lock()
server.cri = client
server.criMu.Unlock()
log.G(ctx).Info("connected to backend CRI service")
return
}
log.G(ctx).WithError(err).Warnf("failed to connect to CRI")
time.Sleep(10 * time.Second)
}
log.G(ctx).Warnf("no connection is available to CRI")
}()
return server.credentials, server
}
type instrumentedService struct {
cri runtime.ImageServiceClient
criMu sync.Mutex
config map[string]*runtime.AuthConfig
configMu sync.Mutex
}
func (in *instrumentedService) credentials(host string, refspec reference.Spec) (string, string, error) {
if host == "docker.io" || host == "registry-1.docker.io" {
// Creds of "docker.io" is stored keyed by "https://index.docker.io/v1/".
host = "index.docker.io"
}
in.configMu.Lock()
defer in.configMu.Unlock()
if cfg, ok := in.config[refspec.String()]; ok {
return resolver.ParseAuth(cfg, host)
}
return "", "", nil
}
func (in *instrumentedService) getCRI() (c runtime.ImageServiceClient) {
in.criMu.Lock()
c = in.cri
in.criMu.Unlock()
return
}
func (in *instrumentedService) ListImages(ctx context.Context, r *runtime.ListImagesRequest) (res *runtime.ListImagesResponse, err error) {
cri := in.getCRI()
if cri == nil {
return nil, errors.New("server is not initialized yet")
}
return cri.ListImages(ctx, r)
}
func (in *instrumentedService) ImageStatus(ctx context.Context, r *runtime.ImageStatusRequest) (res *runtime.ImageStatusResponse, err error) {
cri := in.getCRI()
if cri == nil {
return nil, errors.New("server is not initialized yet")
}
return cri.ImageStatus(ctx, r)
}
func (in *instrumentedService) PullImage(ctx context.Context, r *runtime.PullImageRequest) (res *runtime.PullImageResponse, err error) {
cri := in.getCRI()
if cri == nil {
return nil, errors.New("server is not initialized yet")
}
refspec, err := parseReference(r.GetImage().GetImage())
if err != nil {
return nil, err
}
in.configMu.Lock()
in.config[refspec.String()] = r.GetAuth()
in.configMu.Unlock()
return cri.PullImage(ctx, r)
}
func (in *instrumentedService) RemoveImage(ctx context.Context, r *runtime.RemoveImageRequest) (_ *runtime.RemoveImageResponse, err error) {
cri := in.getCRI()
if cri == nil {
return nil, errors.New("server is not initialized yet")
}
refspec, err := parseReference(r.GetImage().GetImage())
if err != nil {
return nil, err
}
in.configMu.Lock()
delete(in.config, refspec.String())
in.configMu.Unlock()
return cri.RemoveImage(ctx, r)
}
func (in *instrumentedService) ImageFsInfo(ctx context.Context, r *runtime.ImageFsInfoRequest) (res *runtime.ImageFsInfoResponse, err error) {
cri := in.getCRI()
if cri == nil {
return nil, errors.New("server is not initialized yet")
}
return cri.ImageFsInfo(ctx, r)
}
func parseReference(ref string) (reference.Spec, error) {
namedRef, err := distribution.ParseDockerRef(ref)
if err != nil {
return reference.Spec{}, fmt.Errorf("failed to parse image reference %q: %w", ref, err)
}
return reference.Parse(namedRef.String())
}

View File

@ -0,0 +1,49 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package dockerconfig
import (
"context"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/service/resolver"
"github.com/docker/cli/cli/config"
)
func NewDockerconfigKeychain(ctx context.Context) resolver.Credential {
return func(host string, refspec reference.Spec) (string, string, error) {
cf, err := config.Load("")
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to load docker config file")
return "", "", nil
}
if host == "docker.io" || host == "registry-1.docker.io" {
// Creds of docker.io is stored keyed by "https://index.docker.io/v1/".
host = "https://index.docker.io/v1/"
}
ac, err := cf.GetAuthConfig(host)
if err != nil {
return "", "", err
}
if ac.IdentityToken != "" {
return "", ac.IdentityToken, nil
}
return ac.Username, ac.Password, nil
}
}

View File

@ -0,0 +1,262 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package kubeconfig
import (
"bytes"
"context"
"fmt"
"os"
"sync"
"time"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/log"
"github.com/containerd/stargz-snapshotter/service/resolver"
dcfile "github.com/docker/cli/cli/config/configfile"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/util/workqueue"
)
const dockerconfigSelector = "type=" + string(corev1.SecretTypeDockerConfigJson)
type options struct {
kubeconfigPath string
}
type Option func(*options)
func WithKubeconfigPath(path string) Option {
return func(opts *options) {
opts.kubeconfigPath = path
}
}
// NewKubeconfigKeychain provides a keychain which can sync its contents with
// kubernetes API server by fetching all `kubernetes.io/dockerconfigjson`
// secrets in the cluster with provided kubeconfig. It's OK that config provides
// kubeconfig path but the file doesn't exist at that moment. In this case, this
// keychain keeps on trying to read the specified path periodically and when the
// file is actually provided, this keychain tries to access API server using the
// file. This is useful for some environments (e.g. single node cluster with
// containerized apiserver) where stargz snapshotter needs to start before
// everything, including booting containerd/kubelet/apiserver and configuring
// users/roles.
// TODO: support update of kubeconfig file
func NewKubeconfigKeychain(ctx context.Context, opts ...Option) resolver.Credential {
var kcOpts options
for _, o := range opts {
o(&kcOpts)
}
kc := newKeychain(ctx, kcOpts.kubeconfigPath)
return kc.credentials
}
func newKeychain(ctx context.Context, kubeconfigPath string) *keychain {
kc := &keychain{
config: make(map[string]*dcfile.ConfigFile),
}
ctx = log.WithLogger(ctx, log.G(ctx).WithField("kubeconfig", kubeconfigPath))
go func() {
if kubeconfigPath != "" {
log.G(ctx).Debugf("Waiting for kubeconfig being installed...")
for {
if _, err := os.Stat(kubeconfigPath); err == nil {
break
} else if !os.IsNotExist(err) {
log.G(ctx).WithError(err).
Warnf("failed to read; Disabling syncing")
return
}
time.Sleep(10 * time.Second)
}
}
// default loader for KUBECONFIG or `~/.kube/config`
// if no explicit path provided, KUBECONFIG will be used.
// if KUBECONFIG doesn't contain paths, `~/.kube/config` will be used.
loadingRule := clientcmd.NewDefaultClientConfigLoadingRules()
// explicitly provide path for kubeconfig.
// if path isn't "", this path will be respected.
loadingRule.ExplicitPath = kubeconfigPath
// load and merge config files
clientcfg, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(
loadingRule, // loader for config files
&clientcmd.ConfigOverrides{}, // no overrides for config
).ClientConfig()
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to load config; Disabling syncing")
return
}
client, err := kubernetes.NewForConfig(clientcfg)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to prepare client; Disabling syncing")
return
}
if err := kc.startSyncSecrets(ctx, client); err != nil {
log.G(ctx).WithError(err).Warnf("failed to sync secrets")
}
}()
return kc
}
type keychain struct {
config map[string]*dcfile.ConfigFile
configMu sync.Mutex
// the following entries are used for syncing secrets with API server.
// these fields are lazily filled after kubeconfig file is provided.
queue *workqueue.Typed[string]
informer cache.SharedIndexInformer
}
func (kc *keychain) credentials(host string, refspec reference.Spec) (string, string, error) {
if host == "docker.io" || host == "registry-1.docker.io" {
// Creds of "docker.io" is stored keyed by "https://index.docker.io/v1/".
host = "https://index.docker.io/v1/"
}
kc.configMu.Lock()
defer kc.configMu.Unlock()
for _, cfg := range kc.config {
if acfg, err := cfg.GetAuthConfig(host); err == nil {
if acfg.IdentityToken != "" {
return "", acfg.IdentityToken, nil
} else if !(acfg.Username == "" && acfg.Password == "") {
return acfg.Username, acfg.Password, nil
}
}
}
return "", "", nil
}
func (kc *keychain) startSyncSecrets(ctx context.Context, client kubernetes.Interface) error {
// don't let panics crash the process
defer utilruntime.HandleCrash()
// get informed on `kubernetes.io/dockerconfigjson` secrets in all namespaces
informer := cache.NewSharedIndexInformer(
&cache.ListWatch{
ListFunc: func(options metav1.ListOptions) (runtime.Object, error) {
// TODO: support legacy image secret `kubernetes.io/dockercfg`
options.FieldSelector = dockerconfigSelector
return client.CoreV1().Secrets(metav1.NamespaceAll).List(ctx, options)
},
WatchFunc: func(options metav1.ListOptions) (watch.Interface, error) {
// TODO: support legacy image secret `kubernetes.io/dockercfg`
options.FieldSelector = dockerconfigSelector
return client.CoreV1().Secrets(metav1.NamespaceAll).Watch(ctx, options)
},
},
&corev1.Secret{},
0,
cache.Indexers{},
)
// use workqueue because each task possibly takes long for parsing config,
// wating for lock, etc...
queue := workqueue.NewTyped[string]()
defer queue.ShutDown()
informer.AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: func(obj interface{}) {
key, err := cache.MetaNamespaceKeyFunc(obj)
if err == nil {
queue.Add(key)
}
},
UpdateFunc: func(old, new interface{}) {
key, err := cache.MetaNamespaceKeyFunc(new)
if err == nil {
queue.Add(key)
}
},
DeleteFunc: func(obj interface{}) {
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
if err == nil {
queue.Add(key)
}
},
})
go informer.Run(ctx.Done())
if !cache.WaitForCacheSync(ctx.Done(), informer.HasSynced) {
return fmt.Errorf("Timed out for syncing cache")
}
// get informer and queue
kc.informer = informer
kc.queue = queue
// keep on syncing secrets
wait.Until(kc.runWorker, time.Second, ctx.Done())
return nil
}
func (kc *keychain) runWorker() {
for kc.processNextItem() {
// continue looping
}
}
// TODO: consider retrying?
func (kc *keychain) processNextItem() bool {
key, quit := kc.queue.Get()
if quit {
return false
}
defer kc.queue.Done(key)
obj, exists, err := kc.informer.GetIndexer().GetByKey(key)
if err != nil {
utilruntime.HandleError(fmt.Errorf("failed to get object; don't sync %q: %v", key, err))
return true
}
if !exists {
kc.configMu.Lock()
delete(kc.config, key)
kc.configMu.Unlock()
return true
}
// TODO: support legacy image secret `kubernetes.io/dockercfg`
data, ok := obj.(*corev1.Secret).Data[corev1.DockerConfigJsonKey]
if !ok {
utilruntime.HandleError(fmt.Errorf("no secret is provided; don't sync %q", key))
return true
}
configFile := dcfile.New("")
if err := configFile.LoadFromReader(bytes.NewReader(data)); err != nil {
utilruntime.HandleError(fmt.Errorf("broken data; don't sync %q: %v", key, err))
return true
}
kc.configMu.Lock()
kc.config[key] = configFile
kc.configMu.Unlock()
return true
}

View File

@ -0,0 +1,23 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package plugin
import "github.com/containerd/stargz-snapshotter/service/plugincore"
func init() {
plugincore.RegisterPlugin()
}

View File

@ -0,0 +1,152 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package plugincore
import (
"errors"
"fmt"
"net"
"os"
"path/filepath"
"time"
"github.com/containerd/containerd/v2/defaults"
"github.com/containerd/containerd/v2/pkg/dialer"
ctdplugins "github.com/containerd/containerd/v2/plugins"
"github.com/containerd/log"
"github.com/containerd/platforms"
"github.com/containerd/plugin"
"github.com/containerd/plugin/registry"
"github.com/containerd/stargz-snapshotter/service"
"github.com/containerd/stargz-snapshotter/service/keychain/cri"
"github.com/containerd/stargz-snapshotter/service/keychain/dockerconfig"
"github.com/containerd/stargz-snapshotter/service/keychain/kubeconfig"
"github.com/containerd/stargz-snapshotter/service/resolver"
grpc "google.golang.org/grpc"
"google.golang.org/grpc/backoff"
"google.golang.org/grpc/credentials/insecure"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// Config represents configuration for the stargz snapshotter plugin.
type Config struct {
service.Config
// RootPath is the directory for the plugin
RootPath string `toml:"root_path"`
// CRIKeychainImageServicePath is the path to expose CRI service wrapped by CRI keychain
CRIKeychainImageServicePath string `toml:"cri_keychain_image_service_path"`
// Registry is CRI-plugin-compatible registry configuration
Registry resolver.Registry `toml:"registry"`
}
func RegisterPlugin() {
registry.Register(&plugin.Registration{
Type: ctdplugins.SnapshotPlugin,
ID: "stargz",
Config: &Config{},
InitFn: func(ic *plugin.InitContext) (interface{}, error) {
ic.Meta.Platforms = append(ic.Meta.Platforms, platforms.DefaultSpec())
ctx := ic.Context
config, ok := ic.Config.(*Config)
if !ok {
return nil, errors.New("invalid stargz snapshotter configuration")
}
root := ic.Properties[ctdplugins.PropertyRootDir]
if config.RootPath != "" {
root = config.RootPath
}
ic.Meta.Exports["root"] = root
// Configure keychain
credsFuncs := []resolver.Credential{dockerconfig.NewDockerconfigKeychain(ctx)}
if config.Config.KubeconfigKeychainConfig.EnableKeychain {
var opts []kubeconfig.Option
if kcp := config.Config.KubeconfigKeychainConfig.KubeconfigPath; kcp != "" {
opts = append(opts, kubeconfig.WithKubeconfigPath(kcp))
}
credsFuncs = append(credsFuncs, kubeconfig.NewKubeconfigKeychain(ctx, opts...))
}
if addr := config.CRIKeychainImageServicePath; config.Config.CRIKeychainConfig.EnableKeychain && addr != "" {
// connects to the backend CRI service (defaults to containerd socket)
criAddr := ic.Properties[ctdplugins.PropertyGRPCAddress]
if cp := config.Config.CRIKeychainConfig.ImageServicePath; cp != "" {
criAddr = cp
}
if criAddr == "" {
return nil, errors.New("backend CRI service address is not specified")
}
connectCRI := func() (runtime.ImageServiceClient, error) {
conn, err := newCRIConn(criAddr)
if err != nil {
return nil, err
}
return runtime.NewImageServiceClient(conn), nil
}
criCreds, criServer := cri.NewCRIKeychain(ctx, connectCRI)
// Create a gRPC server
rpc := grpc.NewServer()
runtime.RegisterImageServiceServer(rpc, criServer)
// Prepare the directory for the socket
if err := os.MkdirAll(filepath.Dir(addr), 0700); err != nil {
return nil, fmt.Errorf("failed to create directory %q: %w", filepath.Dir(addr), err)
}
// Try to remove the socket file to avoid EADDRINUSE
if err := os.RemoveAll(addr); err != nil {
return nil, fmt.Errorf("failed to remove %q: %w", addr, err)
}
// Listen and serve
l, err := net.Listen("unix", addr)
if err != nil {
return nil, fmt.Errorf("error on listen socket %q: %w", addr, err)
}
go func() {
if err := rpc.Serve(l); err != nil {
log.G(ctx).WithError(err).Warnf("error on serving via socket %q", addr)
}
}()
credsFuncs = append(credsFuncs, criCreds)
}
// TODO(ktock): print warn if old configuration is specified.
// TODO(ktock): should we respect old configuration?
return service.NewStargzSnapshotterService(ctx, root, &config.Config,
service.WithCustomRegistryHosts(resolver.RegistryHostsFromCRIConfig(ctx, config.Registry, credsFuncs...)))
},
})
}
func newCRIConn(criAddr string) (*grpc.ClientConn, error) {
// TODO: make gRPC options configurable from config.toml
backoffConfig := backoff.DefaultConfig
backoffConfig.MaxDelay = 3 * time.Second
connParams := grpc.ConnectParams{
Backoff: backoffConfig,
}
gopts := []grpc.DialOption{
grpc.WithTransportCredentials(insecure.NewCredentials()),
grpc.WithConnectParams(connParams),
grpc.WithContextDialer(dialer.ContextDialer),
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(defaults.DefaultMaxRecvMsgSize)),
grpc.WithDefaultCallOptions(grpc.MaxCallSendMsgSize(defaults.DefaultMaxSendMsgSize)),
}
return grpc.NewClient(dialer.DialAddress(criAddr), gopts...)
}

View File

@ -0,0 +1,349 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resolver
// =====
// This is CRI-plugin-compatible registry hosts configuration.
// Some functions are ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri as noted on each one.
// TODO: import them from CRI package once we drop support to continerd v1.5.x (cri v1alpha2)
// =====
import (
"context"
"crypto/tls"
"crypto/x509"
"encoding/base64"
"errors"
"fmt"
"net"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"github.com/containerd/containerd/v2/core/remotes/docker"
dconfig "github.com/containerd/containerd/v2/core/remotes/docker/config"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/errdefs"
"github.com/containerd/stargz-snapshotter/fs/source"
rhttp "github.com/hashicorp/go-retryablehttp"
runtime "k8s.io/cri-api/pkg/apis/runtime/v1"
)
// Registry is registry settings configured
type Registry struct {
// ConfigPath is a path to the root directory containing registry-specific
// configurations.
// If ConfigPath is set, the rest of the registry specific options are ignored.
ConfigPath string `toml:"config_path" json:"configPath"`
// Mirrors are namespace to mirror mapping for all namespaces.
// This option will not be used when ConfigPath is provided.
// DEPRECATED: Use ConfigPath instead. Remove in containerd 1.7.
Mirrors map[string]Mirror `toml:"mirrors" json:"mirrors"`
// Configs are configs for each registry.
// The key is the domain name or IP of the registry.
// This option will be fully deprecated for ConfigPath in the future.
Configs map[string]RegistryConfig `toml:"configs" json:"configs"`
}
// Mirror contains the config related to the registry mirror
type Mirror struct {
// Endpoints are endpoints for a namespace. CRI plugin will try the endpoints
// one by one until a working one is found. The endpoint must be a valid url
// with host specified.
// The scheme, host and path from the endpoint URL will be used.
Endpoints []string `toml:"endpoint" json:"endpoint"`
}
// RegistryConfig contains configuration used to communicate with the registry.
type RegistryConfig struct {
// Auth contains information to authenticate to the registry.
Auth *AuthConfig `toml:"auth" json:"auth"`
// TLS is a pair of CA/Cert/Key which then are used when creating the transport
// that communicates with the registry.
// This field will not be used when ConfigPath is provided.
// DEPRECATED: Use ConfigPath instead. Remove in containerd 1.7.
TLS *TLSConfig `toml:"tls" json:"tls"`
}
// AuthConfig contains the config related to authentication to a specific registry
type AuthConfig struct {
// Username is the username to login the registry.
Username string `toml:"username" json:"username"`
// Password is the password to login the registry.
Password string `toml:"password" json:"password"`
// Auth is a base64 encoded string from the concatenation of the username,
// a colon, and the password.
Auth string `toml:"auth" json:"auth"`
// IdentityToken is used to authenticate the user and get
// an access token for the registry.
IdentityToken string `toml:"identitytoken" json:"identitytoken"`
}
// TLSConfig contains the CA/Cert/Key used for a registry
type TLSConfig struct {
InsecureSkipVerify bool `toml:"insecure_skip_verify" json:"insecure_skip_verify"`
CAFile string `toml:"ca_file" json:"caFile"`
CertFile string `toml:"cert_file" json:"certFile"`
KeyFile string `toml:"key_file" json:"keyFile"`
}
// RegistryHostsFromCRIConfig creates RegistryHosts (a set of registry configuration) from CRI-plugin-compatible config.
// NOTE: ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri/server/image_pull.go#L332-L405
func RegistryHostsFromCRIConfig(ctx context.Context, config Registry, credsFuncs ...Credential) source.RegistryHosts {
paths := filepath.SplitList(config.ConfigPath)
if len(paths) > 0 {
return func(ref reference.Spec) ([]docker.RegistryHost, error) {
hostOptions := dconfig.HostOptions{}
hostOptions.Credentials = multiCredsFuncs(ref, append(credsFuncs, func(host string, ref reference.Spec) (string, string, error) {
config := config.Configs[host]
if config.Auth != nil {
return ParseAuth(toRuntimeAuthConfig(*config.Auth), host)
}
return "", "", nil
})...)
hostOptions.HostDir = hostDirFromRoots(paths)
return dconfig.ConfigureHosts(ctx, hostOptions)(ref.Hostname())
}
}
return func(ref reference.Spec) ([]docker.RegistryHost, error) {
host := ref.Hostname()
var registries []docker.RegistryHost
endpoints, err := registryEndpoints(config, host)
if err != nil {
return nil, fmt.Errorf("get registry endpoints: %w", err)
}
for _, e := range endpoints {
u, err := url.Parse(e)
if err != nil {
return nil, fmt.Errorf("parse registry endpoint %q from mirrors: %w", e, err)
}
var (
rclient = rhttp.NewClient()
config = config.Configs[u.Host]
)
rclient.Logger = nil // disable logging every request
if config.TLS != nil {
if tr, ok := rclient.HTTPClient.Transport.(*http.Transport); ok {
tr.TLSClientConfig, err = getTLSConfig(*config.TLS)
if err != nil {
return nil, fmt.Errorf("get TLSConfig for registry %q: %w", e, err)
}
} else {
return nil, errors.New("TLS config cannot be applied; Client.Transport is not *http.Transport")
}
}
client := rclient.StandardClient()
authorizer := docker.NewDockerAuthorizer(
docker.WithAuthClient(client),
docker.WithAuthCreds(multiCredsFuncs(ref, credsFuncs...)))
if u.Path == "" {
u.Path = "/v2"
}
registries = append(registries, docker.RegistryHost{
Client: client,
Authorizer: authorizer,
Host: u.Host,
Scheme: u.Scheme,
Path: u.Path,
Capabilities: docker.HostCapabilityResolve | docker.HostCapabilityPull,
})
}
return registries, nil
}
}
// NOTE: Ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri/server/image_pull.go#L316-L330
func hostDirFromRoots(roots []string) func(string) (string, error) {
rootfn := make([]func(string) (string, error), len(roots))
for i := range roots {
rootfn[i] = dconfig.HostDirFromRoot(roots[i])
}
return func(host string) (dir string, err error) {
for _, fn := range rootfn {
dir, err = fn(host)
if (err != nil && !errdefs.IsNotFound(err)) || (dir != "") {
break
}
}
return
}
}
// toRuntimeAuthConfig converts cri plugin auth config to runtime auth config.
// NOTE: Ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri/server/helpers.go#L295-L303
func toRuntimeAuthConfig(a AuthConfig) *runtime.AuthConfig {
return &runtime.AuthConfig{
Username: a.Username,
Password: a.Password,
Auth: a.Auth,
IdentityToken: a.IdentityToken,
}
}
// getTLSConfig returns a TLSConfig configured with a CA/Cert/Key specified by registryTLSConfig
// NOTE: Ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri/server/image_pull.go#L316-L330
func getTLSConfig(registryTLSConfig TLSConfig) (*tls.Config, error) {
var (
tlsConfig = &tls.Config{}
cert tls.Certificate
err error
)
if registryTLSConfig.CertFile != "" && registryTLSConfig.KeyFile == "" {
return nil, fmt.Errorf("cert file %q was specified, but no corresponding key file was specified", registryTLSConfig.CertFile)
}
if registryTLSConfig.CertFile == "" && registryTLSConfig.KeyFile != "" {
return nil, fmt.Errorf("key file %q was specified, but no corresponding cert file was specified", registryTLSConfig.KeyFile)
}
if registryTLSConfig.CertFile != "" && registryTLSConfig.KeyFile != "" {
cert, err = tls.LoadX509KeyPair(registryTLSConfig.CertFile, registryTLSConfig.KeyFile)
if err != nil {
return nil, fmt.Errorf("failed to load cert file: %w", err)
}
if len(cert.Certificate) != 0 {
tlsConfig.Certificates = []tls.Certificate{cert}
}
tlsConfig.BuildNameToCertificate() // nolint:staticcheck
}
if registryTLSConfig.CAFile != "" {
caCertPool, err := x509.SystemCertPool()
if err != nil {
return nil, fmt.Errorf("failed to get system cert pool: %w", err)
}
caCert, err := os.ReadFile(registryTLSConfig.CAFile)
if err != nil {
return nil, fmt.Errorf("failed to load CA file: %w", err)
}
caCertPool.AppendCertsFromPEM(caCert)
tlsConfig.RootCAs = caCertPool
}
tlsConfig.InsecureSkipVerify = registryTLSConfig.InsecureSkipVerify
return tlsConfig, nil
}
// defaultScheme returns the default scheme for a registry host.
// NOTE: Ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri/server/image_pull.go#L316-L330
func defaultScheme(host string) string {
if h, _, err := net.SplitHostPort(host); err == nil {
host = h
}
if host == "localhost" || host == "127.0.0.1" || host == "::1" {
return "http"
}
return "https"
}
// addDefaultScheme returns the endpoint with default scheme
// NOTE: Ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri/server/image_pull.go#L316-L330
func addDefaultScheme(endpoint string) (string, error) {
if strings.Contains(endpoint, "://") {
return endpoint, nil
}
ue := "dummy://" + endpoint
u, err := url.Parse(ue)
if err != nil {
return "", err
}
return fmt.Sprintf("%s://%s", defaultScheme(u.Host), endpoint), nil
}
// registryEndpoints returns endpoints for a given host.
// It adds default registry endpoint if it does not exist in the passed-in endpoint list.
// It also supports wildcard host matching with `*`.
// NOTE: Ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri/server/image_pull.go#L431-L464
func registryEndpoints(config Registry, host string) ([]string, error) {
var endpoints []string
_, ok := config.Mirrors[host]
if ok {
endpoints = config.Mirrors[host].Endpoints
} else {
endpoints = config.Mirrors["*"].Endpoints
}
defaultHost, err := docker.DefaultHost(host)
if err != nil {
return nil, fmt.Errorf("get default host: %w", err)
}
for i := range endpoints {
en, err := addDefaultScheme(endpoints[i])
if err != nil {
return nil, fmt.Errorf("parse endpoint url: %w", err)
}
endpoints[i] = en
}
for _, e := range endpoints {
u, err := url.Parse(e)
if err != nil {
return nil, fmt.Errorf("parse endpoint url: %w", err)
}
if u.Host == host {
// Do not add default if the endpoint already exists.
return endpoints, nil
}
}
return append(endpoints, defaultScheme(defaultHost)+"://"+defaultHost), nil
}
// ParseAuth parses AuthConfig and returns username and password/secret required by containerd.
// NOTE: Ported from https://github.com/containerd/containerd/blob/v1.5.2/pkg/cri/server/image_pull.go#L176-L214
func ParseAuth(auth *runtime.AuthConfig, host string) (string, string, error) {
if auth == nil {
return "", "", nil
}
if auth.ServerAddress != "" {
// Do not return the auth info when server address doesn't match.
u, err := url.Parse(auth.ServerAddress)
if err != nil {
return "", "", fmt.Errorf("parse server address: %w", err)
}
if host != u.Host {
return "", "", nil
}
}
if auth.Username != "" {
return auth.Username, auth.Password, nil
}
if auth.IdentityToken != "" {
return "", auth.IdentityToken, nil
}
if auth.Auth != "" {
decLen := base64.StdEncoding.DecodedLen(len(auth.Auth))
decoded := make([]byte, decLen)
_, err := base64.StdEncoding.Decode(decoded, []byte(auth.Auth))
if err != nil {
return "", "", err
}
fields := strings.SplitN(string(decoded), ":", 2)
if len(fields) != 2 {
return "", "", fmt.Errorf("invalid decoded auth: %q", decoded)
}
user, passwd := fields[0], fields[1]
return user, strings.Trim(passwd, "\x00"), nil
}
// TODO(random-liu): Support RegistryToken.
// An empty auth config is valid for anonymous registry
return "", "", nil
}

View File

@ -0,0 +1,149 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resolver
import (
"fmt"
"net/http"
"time"
"github.com/containerd/containerd/v2/core/remotes/docker"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/stargz-snapshotter/fs/source"
rhttp "github.com/hashicorp/go-retryablehttp"
)
const defaultRequestTimeoutSec = 30
// Config is config for resolving registries.
type Config struct {
Host map[string]HostConfig `toml:"host"`
}
type HostConfig struct {
Mirrors []MirrorConfig `toml:"mirrors"`
}
type MirrorConfig struct {
// Host is the hostname of the host.
Host string `toml:"host"`
// Insecure is true means use http scheme instead of https.
Insecure bool `toml:"insecure"`
// RequestTimeoutSec is timeout seconds of each request to the registry.
// RequestTimeoutSec == 0 indicates the default timeout (defaultRequestTimeoutSec).
// RequestTimeoutSec < 0 indicates no timeout.
RequestTimeoutSec int `toml:"request_timeout_sec"`
// Header are additional headers to send to the server
Header map[string]interface{} `toml:"header"`
}
type Credential func(string, reference.Spec) (string, string, error)
// RegistryHostsFromConfig creates RegistryHosts (a set of registry configuration) from Config.
func RegistryHostsFromConfig(cfg Config, credsFuncs ...Credential) source.RegistryHosts {
return func(ref reference.Spec) (hosts []docker.RegistryHost, _ error) {
host := ref.Hostname()
for _, h := range append(cfg.Host[host].Mirrors, MirrorConfig{
Host: host,
}) {
client := rhttp.NewClient()
client.Logger = nil // disable logging every request
if h.RequestTimeoutSec >= 0 {
if h.RequestTimeoutSec == 0 {
client.HTTPClient.Timeout = defaultRequestTimeoutSec * time.Second
} else {
client.HTTPClient.Timeout = time.Duration(h.RequestTimeoutSec) * time.Second
}
} // h.RequestTimeoutSec < 0 means "no timeout"
tr := client.StandardClient()
var header http.Header
var err error
if h.Header != nil {
header = http.Header{}
for key, ty := range h.Header {
switch value := ty.(type) {
case string:
header[key] = []string{value}
case []interface{}:
header[key], err = makeStringSlice(value, nil)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("invalid type %v for header %q", ty, key)
}
}
}
config := docker.RegistryHost{
Client: tr,
Host: h.Host,
Scheme: "https",
Path: "/v2",
Capabilities: docker.HostCapabilityPull | docker.HostCapabilityResolve,
Authorizer: docker.NewDockerAuthorizer(
docker.WithAuthClient(tr),
docker.WithAuthCreds(multiCredsFuncs(ref, credsFuncs...))),
Header: header,
}
if localhost, _ := docker.MatchLocalhost(config.Host); localhost || h.Insecure {
config.Scheme = "http"
}
if config.Host == "docker.io" {
config.Host = "registry-1.docker.io"
}
hosts = append(hosts, config)
}
return
}
}
func multiCredsFuncs(ref reference.Spec, credsFuncs ...Credential) func(string) (string, string, error) {
return func(host string) (string, string, error) {
for _, f := range credsFuncs {
if username, secret, err := f(host, ref); err != nil {
return "", "", err
} else if !(username == "" && secret == "") {
return username, secret, nil
}
}
return "", "", nil
}
}
// makeStringSlice is a helper func to convert from []interface{} to []string.
// Additionally an optional cb func may be passed to perform string mapping.
// NOTE: Ported from https://github.com/containerd/containerd/blob/v1.6.9/remotes/docker/config/hosts.go#L516-L533
func makeStringSlice(slice []interface{}, cb func(string) string) ([]string, error) {
out := make([]string, len(slice))
for i, value := range slice {
str, ok := value.(string)
if !ok {
return nil, fmt.Errorf("unable to cast %v to string", value)
}
if cb != nil {
out[i] = cb(str)
} else {
out[i] = str
}
}
return out, nil
}

View File

@ -0,0 +1,145 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package service
import (
"context"
"path/filepath"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/pkg/reference"
"github.com/containerd/containerd/v2/plugins/snapshots/overlay/overlayutils"
"github.com/containerd/log"
stargzfs "github.com/containerd/stargz-snapshotter/fs"
"github.com/containerd/stargz-snapshotter/fs/layer"
"github.com/containerd/stargz-snapshotter/fs/source"
"github.com/containerd/stargz-snapshotter/metadata"
esgzexternaltoc "github.com/containerd/stargz-snapshotter/nativeconverter/estargz/externaltoc"
"github.com/containerd/stargz-snapshotter/service/resolver"
snbase "github.com/containerd/stargz-snapshotter/snapshot"
"github.com/hashicorp/go-multierror"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
type Option func(*options)
type options struct {
credsFuncs []resolver.Credential
registryHosts source.RegistryHosts
fsOpts []stargzfs.Option
}
// WithCredsFuncs specifies credsFuncs to be used for connecting to the registries.
func WithCredsFuncs(creds ...resolver.Credential) Option {
return func(o *options) {
o.credsFuncs = append(o.credsFuncs, creds...)
}
}
// WithCustomRegistryHosts is registry hosts to use instead.
func WithCustomRegistryHosts(hosts source.RegistryHosts) Option {
return func(o *options) {
o.registryHosts = hosts
}
}
// WithFilesystemOptions allowes to pass filesystem-related configuration.
func WithFilesystemOptions(opts ...stargzfs.Option) Option {
return func(o *options) {
o.fsOpts = opts
}
}
// NewStargzSnapshotterService returns stargz snapshotter.
func NewStargzSnapshotterService(ctx context.Context, root string, config *Config, opts ...Option) (snapshots.Snapshotter, error) {
var sOpts options
for _, o := range opts {
o(&sOpts)
}
hosts := sOpts.registryHosts
if hosts == nil {
// Use RegistryHosts based on ResolverConfig and keychain
hosts = resolver.RegistryHostsFromConfig(resolver.Config(config.ResolverConfig), sOpts.credsFuncs...)
}
userxattr, err := overlayutils.NeedsUserXAttr(snapshotterRoot(root))
if err != nil {
log.G(ctx).WithError(err).Warnf("cannot detect whether \"userxattr\" option needs to be used, assuming to be %v", userxattr)
}
opq := layer.OverlayOpaqueTrusted
if userxattr {
opq = layer.OverlayOpaqueUser
}
// Configure filesystem and snapshotter
fsOpts := append(sOpts.fsOpts, stargzfs.WithGetSources(sources(
sourceFromCRILabels(hosts), // provides source info based on CRI labels
source.FromDefaultLabels(hosts), // provides source info based on default labels
)),
stargzfs.WithOverlayOpaqueType(opq),
stargzfs.WithAdditionalDecompressors(func(ctx context.Context, hosts source.RegistryHosts, refspec reference.Spec, desc ocispec.Descriptor) []metadata.Decompressor {
return []metadata.Decompressor{esgzexternaltoc.NewRemoteDecompressor(ctx, hosts, refspec, desc)}
}),
)
fs, err := stargzfs.NewFilesystem(fsRoot(root), config.Config, fsOpts...)
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to configure filesystem")
}
var snapshotter snapshots.Snapshotter
snOpts := []snbase.Opt{snbase.AsynchronousRemove}
if config.SnapshotterConfig.AllowInvalidMountsOnRestart {
snOpts = append(snOpts, snbase.AllowInvalidMountsOnRestart)
}
snapshotter, err = snbase.NewSnapshotter(ctx, snapshotterRoot(root), fs, snOpts...)
if err != nil {
log.G(ctx).WithError(err).Fatalf("failed to create new snapshotter")
}
return snapshotter, err
}
func snapshotterRoot(root string) string {
return filepath.Join(root, "snapshotter")
}
func fsRoot(root string) string {
return filepath.Join(root, "stargz")
}
func sources(ps ...source.GetSources) source.GetSources {
return func(labels map[string]string) (source []source.Source, allErr error) {
for _, p := range ps {
src, err := p(labels)
if err == nil {
return src, nil
}
allErr = multierror.Append(allErr, err)
}
return
}
}
// Supported returns nil when the remote snapshotter is functional on the system with the root directory.
// Supported is not called during plugin initialization, but exposed for downstream projects which uses
// this snapshotter as a library.
func Supported(root string) error {
// Remote snapshotter is implemented based on overlayfs snapshotter.
return overlayutils.Supported(snapshotterRoot(root))
}

View File

@ -0,0 +1,765 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package snapshot
import (
"context"
"fmt"
"os"
"path/filepath"
"strings"
"syscall"
"github.com/containerd/containerd/v2/core/mount"
"github.com/containerd/containerd/v2/core/snapshots"
"github.com/containerd/containerd/v2/core/snapshots/storage"
"github.com/containerd/containerd/v2/plugins/snapshots/overlay/overlayutils"
"github.com/containerd/continuity/fs"
"github.com/containerd/errdefs"
"github.com/containerd/log"
"github.com/moby/sys/mountinfo"
"golang.org/x/sync/errgroup"
)
const (
targetSnapshotLabel = "containerd.io/snapshot.ref"
remoteLabel = "containerd.io/snapshot/remote"
remoteLabelVal = "remote snapshot"
// remoteSnapshotLogKey is a key for log line, which indicates whether
// `Prepare` method successfully prepared targeting remote snapshot or not, as
// defined in the following:
// - "true" : indicates the snapshot has been successfully prepared as a
// remote snapshot
// - "false" : indicates the snapshot failed to be prepared as a remote
// snapshot
// - null : undetermined
remoteSnapshotLogKey = "remote-snapshot-prepared"
prepareSucceeded = "true"
prepareFailed = "false"
)
// FileSystem is a backing filesystem abstraction.
//
// Mount() tries to mount a remote snapshot to the specified mount point
// directory. If succeed, the mountpoint directory will be treated as a layer
// snapshot. If Mount() fails, the mountpoint directory MUST be cleaned up.
// Check() is called to check the connectibity of the existing layer snapshot
// every time the layer is used by containerd.
// Unmount() is called to unmount a remote snapshot from the specified mount point
// directory.
type FileSystem interface {
Mount(ctx context.Context, mountpoint string, labels map[string]string) error
Check(ctx context.Context, mountpoint string, labels map[string]string) error
Unmount(ctx context.Context, mountpoint string) error
}
// SnapshotterConfig is used to configure the remote snapshotter instance
type SnapshotterConfig struct {
asyncRemove bool
noRestore bool
allowInvalidMountsOnRestart bool
}
// Opt is an option to configure the remote snapshotter
type Opt func(config *SnapshotterConfig) error
// AsynchronousRemove defers removal of filesystem content until
// the Cleanup method is called. Removals will make the snapshot
// referred to by the key unavailable and make the key immediately
// available for re-use.
func AsynchronousRemove(config *SnapshotterConfig) error {
config.asyncRemove = true
return nil
}
func NoRestore(config *SnapshotterConfig) error {
config.noRestore = true
return nil
}
func AllowInvalidMountsOnRestart(config *SnapshotterConfig) error {
config.allowInvalidMountsOnRestart = true
return nil
}
type snapshotter struct {
root string
ms *storage.MetaStore
asyncRemove bool
// fs is a filesystem that this snapshotter recognizes.
fs FileSystem
userxattr bool // whether to enable "userxattr" mount option
noRestore bool
allowInvalidMountsOnRestart bool
}
// NewSnapshotter returns a Snapshotter which can use unpacked remote layers
// as snapshots. This is implemented based on the overlayfs snapshotter, so
// diffs are stored under the provided root and a metadata file is stored under
// the root as same as overlayfs snapshotter.
func NewSnapshotter(ctx context.Context, root string, targetFs FileSystem, opts ...Opt) (snapshots.Snapshotter, error) {
if targetFs == nil {
return nil, fmt.Errorf("Specify filesystem to use")
}
var config SnapshotterConfig
for _, opt := range opts {
if err := opt(&config); err != nil {
return nil, err
}
}
if err := os.MkdirAll(root, 0700); err != nil {
return nil, err
}
supportsDType, err := fs.SupportsDType(root)
if err != nil {
return nil, err
}
if !supportsDType {
return nil, fmt.Errorf("%s does not support d_type. If the backing filesystem is xfs, please reformat with ftype=1 to enable d_type support", root)
}
ms, err := storage.NewMetaStore(filepath.Join(root, "metadata.db"))
if err != nil {
return nil, err
}
if err := os.Mkdir(filepath.Join(root, "snapshots"), 0700); err != nil && !os.IsExist(err) {
return nil, err
}
userxattr, err := overlayutils.NeedsUserXAttr(root)
if err != nil {
log.G(ctx).WithError(err).Warnf("cannot detect whether \"userxattr\" option needs to be used, assuming to be %v", userxattr)
}
o := &snapshotter{
root: root,
ms: ms,
asyncRemove: config.asyncRemove,
fs: targetFs,
userxattr: userxattr,
noRestore: config.noRestore,
allowInvalidMountsOnRestart: config.allowInvalidMountsOnRestart,
}
if err := o.restoreRemoteSnapshot(ctx); err != nil {
return nil, fmt.Errorf("failed to restore remote snapshot: %w", err)
}
return o, nil
}
// Stat returns the info for an active or committed snapshot by name or
// key.
//
// Should be used for parent resolution, existence checks and to discern
// the kind of snapshot.
func (o *snapshotter) Stat(ctx context.Context, key string) (snapshots.Info, error) {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return snapshots.Info{}, err
}
defer t.Rollback()
_, info, _, err := storage.GetInfo(ctx, key)
if err != nil {
return snapshots.Info{}, err
}
return info, nil
}
func (o *snapshotter) Update(ctx context.Context, info snapshots.Info, fieldpaths ...string) (snapshots.Info, error) {
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return snapshots.Info{}, err
}
info, err = storage.UpdateInfo(ctx, info, fieldpaths...)
if err != nil {
t.Rollback()
return snapshots.Info{}, err
}
if err := t.Commit(); err != nil {
return snapshots.Info{}, err
}
return info, nil
}
// Usage returns the resources taken by the snapshot identified by key.
//
// For active snapshots, this will scan the usage of the overlay "diff" (aka
// "upper") directory and may take some time.
// for remote snapshots, no scan will be held and recognise the number of inodes
// and these sizes as "zero".
//
// For committed snapshots, the value is returned from the metadata database.
func (o *snapshotter) Usage(ctx context.Context, key string) (snapshots.Usage, error) {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return snapshots.Usage{}, err
}
id, info, usage, err := storage.GetInfo(ctx, key)
t.Rollback() // transaction no longer needed at this point.
if err != nil {
return snapshots.Usage{}, err
}
upperPath := o.upperPath(id)
if info.Kind == snapshots.KindActive {
du, err := fs.DiskUsage(ctx, upperPath)
if err != nil {
// TODO(stevvooe): Consider not reporting an error in this case.
return snapshots.Usage{}, err
}
usage = snapshots.Usage(du)
}
return usage, nil
}
func (o *snapshotter) Prepare(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
s, err := o.createSnapshot(ctx, snapshots.KindActive, key, parent, opts)
if err != nil {
return nil, err
}
// Try to prepare the remote snapshot. If succeeded, we commit the snapshot now
// and return ErrAlreadyExists.
var base snapshots.Info
for _, opt := range opts {
if err := opt(&base); err != nil {
return nil, err
}
}
if target, ok := base.Labels[targetSnapshotLabel]; ok {
// NOTE: If passed labels include a target of the remote snapshot, `Prepare`
// must log whether this method succeeded to prepare that remote snapshot
// or not, using the key `remoteSnapshotLogKey` defined in the above. This
// log is used by tests in this project.
lCtx := log.WithLogger(ctx, log.G(ctx).WithField("key", key).WithField("parent", parent))
if err := o.prepareRemoteSnapshot(lCtx, key, base.Labels); err != nil {
log.G(lCtx).WithField(remoteSnapshotLogKey, prepareFailed).
WithError(err).Warn("failed to prepare remote snapshot")
} else {
base.Labels[remoteLabel] = remoteLabelVal // Mark this snapshot as remote
err := o.commit(ctx, true, target, key, append(opts, snapshots.WithLabels(base.Labels))...)
if err == nil || errdefs.IsAlreadyExists(err) {
// count also AlreadyExists as "success"
log.G(lCtx).WithField(remoteSnapshotLogKey, prepareSucceeded).Debug("prepared remote snapshot")
return nil, fmt.Errorf("target snapshot %q: %w", target, errdefs.ErrAlreadyExists)
}
log.G(lCtx).WithField(remoteSnapshotLogKey, prepareFailed).
WithError(err).Warn("failed to internally commit remote snapshot")
// Don't fallback here (= prohibit to use this key again) because the FileSystem
// possible has done some work on this "upper" directory.
return nil, err
}
}
return o.mounts(ctx, s, parent)
}
func (o *snapshotter) View(ctx context.Context, key, parent string, opts ...snapshots.Opt) ([]mount.Mount, error) {
s, err := o.createSnapshot(ctx, snapshots.KindView, key, parent, opts)
if err != nil {
return nil, err
}
return o.mounts(ctx, s, parent)
}
// Mounts returns the mounts for the transaction identified by key. Can be
// called on an read-write or readonly transaction.
//
// This can be used to recover mounts after calling View or Prepare.
func (o *snapshotter) Mounts(ctx context.Context, key string) ([]mount.Mount, error) {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return nil, err
}
s, err := storage.GetSnapshot(ctx, key)
t.Rollback()
if err != nil {
return nil, fmt.Errorf("failed to get active mount: %w", err)
}
return o.mounts(ctx, s, key)
}
func (o *snapshotter) Commit(ctx context.Context, name, key string, opts ...snapshots.Opt) error {
return o.commit(ctx, false, name, key, opts...)
}
func (o *snapshotter) commit(ctx context.Context, isRemote bool, name, key string, opts ...snapshots.Opt) error {
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return err
}
rollback := true
defer func() {
if rollback {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
}
}()
// grab the existing id
id, _, usage, err := storage.GetInfo(ctx, key)
if err != nil {
return err
}
if !isRemote { // skip diskusage for remote snapshots for allowing lazy preparation of nodes
du, err := fs.DiskUsage(ctx, o.upperPath(id))
if err != nil {
return err
}
usage = snapshots.Usage(du)
}
if _, err = storage.CommitActive(ctx, key, name, usage, opts...); err != nil {
return fmt.Errorf("failed to commit snapshot: %w", err)
}
rollback = false
return t.Commit()
}
// Remove abandons the snapshot identified by key. The snapshot will
// immediately become unavailable and unrecoverable. Disk space will
// be freed up on the next call to `Cleanup`.
func (o *snapshotter) Remove(ctx context.Context, key string) (err error) {
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return err
}
defer func() {
if err != nil {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
}
}()
_, _, err = storage.Remove(ctx, key)
if err != nil {
return fmt.Errorf("failed to remove: %w", err)
}
if !o.asyncRemove {
var removals []string
const cleanupCommitted = false
removals, err = o.getCleanupDirectories(ctx, t, cleanupCommitted)
if err != nil {
return fmt.Errorf("unable to get directories for removal: %w", err)
}
// Remove directories after the transaction is closed, failures must not
// return error since the transaction is committed with the removal
// key no longer available.
defer func() {
if err == nil {
for _, dir := range removals {
if err := o.cleanupSnapshotDirectory(ctx, dir); err != nil {
log.G(ctx).WithError(err).WithField("path", dir).Warn("failed to remove directory")
}
}
}
}()
}
return t.Commit()
}
// Walk the snapshots.
func (o *snapshotter) Walk(ctx context.Context, fn snapshots.WalkFunc, fs ...string) error {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return err
}
defer t.Rollback()
return storage.WalkInfo(ctx, fn, fs...)
}
// Cleanup cleans up disk resources from removed or abandoned snapshots
func (o *snapshotter) Cleanup(ctx context.Context) error {
const cleanupCommitted = false
return o.cleanup(ctx, cleanupCommitted)
}
func (o *snapshotter) cleanup(ctx context.Context, cleanupCommitted bool) error {
cleanup, err := o.cleanupDirectories(ctx, cleanupCommitted)
if err != nil {
return err
}
log.G(ctx).Debugf("cleanup: dirs=%v", cleanup)
for _, dir := range cleanup {
if err := o.cleanupSnapshotDirectory(ctx, dir); err != nil {
log.G(ctx).WithError(err).WithField("path", dir).Warn("failed to remove directory")
}
}
return nil
}
func (o *snapshotter) cleanupDirectories(ctx context.Context, cleanupCommitted bool) ([]string, error) {
// Get a write transaction to ensure no other write transaction can be entered
// while the cleanup is scanning.
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return nil, err
}
defer t.Rollback()
return o.getCleanupDirectories(ctx, t, cleanupCommitted)
}
func (o *snapshotter) getCleanupDirectories(ctx context.Context, t storage.Transactor, cleanupCommitted bool) ([]string, error) {
ids, err := storage.IDMap(ctx)
if err != nil {
return nil, err
}
snapshotDir := filepath.Join(o.root, "snapshots")
fd, err := os.Open(snapshotDir)
if err != nil {
return nil, err
}
defer fd.Close()
dirs, err := fd.Readdirnames(0)
if err != nil {
return nil, err
}
cleanup := []string{}
for _, d := range dirs {
if !cleanupCommitted {
if _, ok := ids[d]; ok {
continue
}
}
cleanup = append(cleanup, filepath.Join(snapshotDir, d))
}
return cleanup, nil
}
func (o *snapshotter) cleanupSnapshotDirectory(ctx context.Context, dir string) error {
// On a remote snapshot, the layer is mounted on the "fs" directory.
// We use Filesystem's Unmount API so that it can do necessary finalization
// before/after the unmount.
mp := filepath.Join(dir, "fs")
if err := o.fs.Unmount(ctx, mp); err != nil {
log.G(ctx).WithError(err).WithField("dir", mp).Debug("failed to unmount")
}
if err := os.RemoveAll(dir); err != nil {
return fmt.Errorf("failed to remove directory %q: %w", dir, err)
}
return nil
}
func (o *snapshotter) createSnapshot(ctx context.Context, kind snapshots.Kind, key, parent string, opts []snapshots.Opt) (_ storage.Snapshot, err error) {
ctx, t, err := o.ms.TransactionContext(ctx, true)
if err != nil {
return storage.Snapshot{}, err
}
var td, path string
defer func() {
if err != nil {
if td != "" {
if err1 := o.cleanupSnapshotDirectory(ctx, td); err1 != nil {
log.G(ctx).WithError(err1).Warn("failed to cleanup temp snapshot directory")
}
}
if path != "" {
if err1 := o.cleanupSnapshotDirectory(ctx, path); err1 != nil {
log.G(ctx).WithError(err1).WithField("path", path).Error("failed to reclaim snapshot directory, directory may need removal")
err = fmt.Errorf("failed to remove path: %v: %w", err1, err)
}
}
}
}()
snapshotDir := filepath.Join(o.root, "snapshots")
td, err = o.prepareDirectory(ctx, snapshotDir, kind)
if err != nil {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
return storage.Snapshot{}, fmt.Errorf("failed to create prepare snapshot dir: %w", err)
}
rollback := true
defer func() {
if rollback {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
}
}()
s, err := storage.CreateSnapshot(ctx, kind, key, parent, opts...)
if err != nil {
return storage.Snapshot{}, fmt.Errorf("failed to create snapshot: %w", err)
}
if len(s.ParentIDs) > 0 {
st, err := os.Stat(o.upperPath(s.ParentIDs[0]))
if err != nil {
return storage.Snapshot{}, fmt.Errorf("failed to stat parent: %w", err)
}
stat := st.Sys().(*syscall.Stat_t)
if err := os.Lchown(filepath.Join(td, "fs"), int(stat.Uid), int(stat.Gid)); err != nil {
if rerr := t.Rollback(); rerr != nil {
log.G(ctx).WithError(rerr).Warn("failed to rollback transaction")
}
return storage.Snapshot{}, fmt.Errorf("failed to chown: %w", err)
}
}
path = filepath.Join(snapshotDir, s.ID)
if err = os.Rename(td, path); err != nil {
return storage.Snapshot{}, fmt.Errorf("failed to rename: %w", err)
}
td = ""
rollback = false
if err = t.Commit(); err != nil {
return storage.Snapshot{}, fmt.Errorf("commit failed: %w", err)
}
return s, nil
}
func (o *snapshotter) prepareDirectory(ctx context.Context, snapshotDir string, kind snapshots.Kind) (string, error) {
td, err := os.MkdirTemp(snapshotDir, "new-")
if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err)
}
if err := os.Mkdir(filepath.Join(td, "fs"), 0755); err != nil {
return td, err
}
if kind == snapshots.KindActive {
if err := os.Mkdir(filepath.Join(td, "work"), 0711); err != nil {
return td, err
}
}
return td, nil
}
func (o *snapshotter) mounts(ctx context.Context, s storage.Snapshot, checkKey string) ([]mount.Mount, error) {
// Make sure that all layers lower than the target layer are available
if checkKey != "" && !o.checkAvailability(ctx, checkKey) {
return nil, fmt.Errorf("layer %q unavailable: %w", s.ID, errdefs.ErrUnavailable)
}
if len(s.ParentIDs) == 0 {
// if we only have one layer/no parents then just return a bind mount as overlay
// will not work
roFlag := "rw"
if s.Kind == snapshots.KindView {
roFlag = "ro"
}
return []mount.Mount{
{
Source: o.upperPath(s.ID),
Type: "bind",
Options: []string{
roFlag,
"rbind",
},
},
}, nil
}
var options []string
if s.Kind == snapshots.KindActive {
options = append(options,
fmt.Sprintf("workdir=%s", o.workPath(s.ID)),
fmt.Sprintf("upperdir=%s", o.upperPath(s.ID)),
)
} else if len(s.ParentIDs) == 1 {
return []mount.Mount{
{
Source: o.upperPath(s.ParentIDs[0]),
Type: "bind",
Options: []string{
"ro",
"rbind",
},
},
}, nil
}
parentPaths := make([]string, len(s.ParentIDs))
for i := range s.ParentIDs {
parentPaths[i] = o.upperPath(s.ParentIDs[i])
}
options = append(options, fmt.Sprintf("lowerdir=%s", strings.Join(parentPaths, ":")))
if o.userxattr {
options = append(options, "userxattr")
}
return []mount.Mount{
{
Type: "overlay",
Source: "overlay",
Options: options,
},
}, nil
}
func (o *snapshotter) upperPath(id string) string {
return filepath.Join(o.root, "snapshots", id, "fs")
}
func (o *snapshotter) workPath(id string) string {
return filepath.Join(o.root, "snapshots", id, "work")
}
// Close closes the snapshotter
func (o *snapshotter) Close() error {
// unmount all mounts including Committed
const cleanupCommitted = true
ctx := context.Background()
if err := o.cleanup(ctx, cleanupCommitted); err != nil {
log.G(ctx).WithError(err).Warn("failed to cleanup")
}
return o.ms.Close()
}
// prepareRemoteSnapshot tries to prepare the snapshot as a remote snapshot
// using filesystems registered in this snapshotter.
func (o *snapshotter) prepareRemoteSnapshot(ctx context.Context, key string, labels map[string]string) error {
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
return err
}
defer t.Rollback()
id, _, _, err := storage.GetInfo(ctx, key)
if err != nil {
return err
}
mountpoint := o.upperPath(id)
log.G(ctx).Infof("preparing filesystem mount at mountpoint=%v", mountpoint)
return o.fs.Mount(ctx, mountpoint, labels)
}
// checkAvailability checks avaiability of the specified layer and all lower
// layers using filesystem's checking functionality.
func (o *snapshotter) checkAvailability(ctx context.Context, key string) bool {
log.G(ctx).WithField("key", key).Debug("checking layer availability")
ctx, t, err := o.ms.TransactionContext(ctx, false)
if err != nil {
log.G(ctx).WithError(err).Warn("failed to get transaction")
return false
}
defer t.Rollback()
eg, egCtx := errgroup.WithContext(ctx)
for cKey := key; cKey != ""; {
id, info, _, err := storage.GetInfo(ctx, cKey)
if err != nil {
log.G(ctx).WithError(err).Warnf("failed to get info of %q", cKey)
return false
}
mp := o.upperPath(id)
lCtx := log.WithLogger(ctx, log.G(ctx).WithField("mount-point", mp))
if _, ok := info.Labels[remoteLabel]; ok {
eg.Go(func() error {
log.G(lCtx).Debug("checking mount point")
if err := o.fs.Check(egCtx, mp, info.Labels); err != nil {
log.G(lCtx).WithError(err).Warn("layer is unavailable")
return err
}
return nil
})
} else {
log.G(lCtx).Debug("layer is normal snapshot(overlayfs)")
}
cKey = info.Parent
}
if err := eg.Wait(); err != nil {
return false
}
return true
}
func (o *snapshotter) restoreRemoteSnapshot(ctx context.Context) error {
mounts, err := mountinfo.GetMounts(nil)
if err != nil {
return err
}
for _, m := range mounts {
if strings.HasPrefix(m.Mountpoint, filepath.Join(o.root, "snapshots")) {
if err := syscall.Unmount(m.Mountpoint, syscall.MNT_FORCE); err != nil {
return fmt.Errorf("failed to unmount %s: %w", m.Mountpoint, err)
}
}
}
if o.noRestore {
return nil
}
var task []snapshots.Info
if err := o.Walk(ctx, func(ctx context.Context, info snapshots.Info) error {
if _, ok := info.Labels[remoteLabel]; ok {
task = append(task, info)
}
return nil
}); err != nil && !errdefs.IsNotFound(err) {
return err
}
for _, info := range task {
if err := o.prepareRemoteSnapshot(ctx, info.Name, info.Labels); err != nil {
if o.allowInvalidMountsOnRestart {
log.G(ctx).WithError(err).Warnf("failed to restore remote snapshot %s; remove this snapshot manually", info.Name)
// This snapshot mount is invalid but allow this.
// NOTE: snapshotter.Mount() will fail to return the mountpoint of these invalid snapshots so
// containerd cannot use them anymore. User needs to manually remove the snapshots from
// containerd's metadata store using ctr (e.g. `ctr snapshot rm`).
continue
}
return fmt.Errorf("failed to prepare remote snapshot: %s: %w", info.Name, err)
}
}
return nil
}

View File

@ -0,0 +1,155 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package task
import (
"context"
"sync"
"sync/atomic"
"time"
"golang.org/x/sync/semaphore"
)
// NewBackgroundTaskManager provides a task manager. You can specify the
// concurrency of background tasks. When running a background task, this will be
// forced to wait until no prioritized task is running for some period. You can
// specify the period through the argument of this function, too.
func NewBackgroundTaskManager(concurrency int64, period time.Duration) *BackgroundTaskManager {
return &BackgroundTaskManager{
backgroundSem: semaphore.NewWeighted(concurrency),
prioritizedTaskSilencePeriod: period,
prioritizedTaskStartNotify: make(chan struct{}),
prioritizedTaskDoneCond: sync.NewCond(&sync.Mutex{}),
}
}
// BackgroundTaskManager is a task manager which manages prioritized tasks and
// background tasks execution. Background tasks are less important than
// prioritized tasks. You can let these background tasks not to use compute
// resources (CPU, NW, etc...) during more important tasks(=prioritized tasks)
// running.
//
// When you run a prioritised task and don't want background tasks to use
// resources you can tell it this manager by calling DoPrioritizedTask method.
// DonePrioritizedTask method must be called at the end of the prioritised task
// execution.
//
// For running a background task, you can use InvokeBackgroundTask method. The
// background task must be able to be cancelled via context.Context argument.
// The task is forced to wait until no prioritized task is running for some
// period. You can specify the period when making this manager instance. The
// limited number of background tasks run simultaneously and you can specify the
// concurrency when making this manager instance too. If a prioritized task
// starts during the execution of background tasks, all background tasks running
// will be cancelled via context. These cancelled tasks will be executed again
// later, same as other background tasks (when no prioritized task is running
// for some period).
type BackgroundTaskManager struct {
prioritizedTasks int64
backgroundSem *semaphore.Weighted
prioritizedTaskSilencePeriod time.Duration
prioritizedTaskStartNotify chan struct{}
prioritizedTaskStartNotifyMu sync.Mutex
prioritizedTaskDoneCond *sync.Cond
}
// DoPrioritizedTask tells the manager that we are running a prioritized task
// and don't want background tasks to disturb resources(CPU, NW, etc...)
func (ts *BackgroundTaskManager) DoPrioritizedTask() {
// Notify the prioritized task execution to background tasks.
ts.prioritizedTaskStartNotifyMu.Lock()
atomic.AddInt64(&ts.prioritizedTasks, 1)
close(ts.prioritizedTaskStartNotify)
ts.prioritizedTaskStartNotify = make(chan struct{})
ts.prioritizedTaskStartNotifyMu.Unlock()
}
// DonePrioritizedTask tells the manager that we've done a prioritized task
// and don't want background tasks to disturb resources(CPU, NW, etc...)
func (ts *BackgroundTaskManager) DonePrioritizedTask() {
go func() {
// Notify the task completion after `ts.prioritizedTaskSilencePeriod`
// so that background tasks aren't invoked immediately.
time.Sleep(ts.prioritizedTaskSilencePeriod)
atomic.AddInt64(&ts.prioritizedTasks, -1)
ts.prioritizedTaskDoneCond.L.Lock()
ts.prioritizedTaskDoneCond.Broadcast()
ts.prioritizedTaskDoneCond.L.Unlock()
}()
}
// InvokeBackgroundTask invokes a background task. The task is started only when
// no prioritized tasks are running. Prioritized task's execution stops the
// execution of all background tasks. Background task must be able to be
// cancelled via context.Context argument and be able to be restarted again.
func (ts *BackgroundTaskManager) InvokeBackgroundTask(do func(context.Context), timeout time.Duration) {
for {
// Wait until all prioritized tasks are done
for {
if atomic.LoadInt64(&ts.prioritizedTasks) <= 0 {
break
}
// waits until a prioritized task is done
ts.prioritizedTaskDoneCond.L.Lock()
if atomic.LoadInt64(&ts.prioritizedTasks) > 0 {
ts.prioritizedTaskDoneCond.Wait()
}
ts.prioritizedTaskDoneCond.L.Unlock()
}
// limited number of background tasks can run at once.
// if prioritized tasks are running, cancel this task.
if func() bool {
ts.backgroundSem.Acquire(context.Background(), 1)
defer ts.backgroundSem.Release(1)
// Get notify the prioritized tasks execution.
ts.prioritizedTaskStartNotifyMu.Lock()
ch := ts.prioritizedTaskStartNotify
tasks := atomic.LoadInt64(&ts.prioritizedTasks)
ts.prioritizedTaskStartNotifyMu.Unlock()
if tasks > 0 {
return false
}
// Invoke the background task. if some prioritized tasks added during
// execution, cancel it and try it later.
var (
done = make(chan struct{})
ctx, cancel = context.WithTimeout(context.Background(), timeout)
)
defer cancel()
go func() {
do(ctx)
close(done)
}()
// Wait until the background task is done or canceled.
select {
case <-ch: // some prioritized tasks started; retry it later
cancel()
return false
case <-done: // All tasks completed
}
return true
}() {
break
}
}
}

View File

@ -0,0 +1,140 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacheutil
import (
"sync"
"github.com/golang/groupcache/lru"
)
// LRUCache is "groupcache/lru"-like cache. The difference is that "groupcache/lru" immediately
// finalizes theevicted contents using OnEvicted callback but our version strictly tracks the
// reference counts of contents and calls OnEvicted when nobody refers to the evicted contents.
type LRUCache struct {
cache *lru.Cache
mu sync.Mutex
// OnEvicted optionally specifies a callback function to be
// executed when an entry is purged from the cache.
OnEvicted func(key string, value interface{})
}
// NewLRUCache creates new lru cache.
func NewLRUCache(maxEntries int) *LRUCache {
inner := lru.New(maxEntries)
inner.OnEvicted = func(key lru.Key, value interface{}) {
// Decrease the ref count incremented in Add().
// When nobody refers to this value, this value will be finalized via refCounter.
value.(*refCounter).finalize()
}
return &LRUCache{
cache: inner,
}
}
// Get retrieves the specified object from the cache and increments the reference counter of the
// target content. Client must call `done` callback to decrease the reference count when the value
// will no longer be used.
func (c *LRUCache) Get(key string) (value interface{}, done func(), ok bool) {
c.mu.Lock()
defer c.mu.Unlock()
o, ok := c.cache.Get(key)
if !ok {
return nil, nil, false
}
rc := o.(*refCounter)
rc.inc()
return rc.v, c.decreaseOnceFunc(rc), true
}
// Add adds object to the cache and returns the cached contents with incrementing the reference count.
// If the specified content already exists in the cache, this sets `added` to false and returns
// "already cached" content (i.e. doesn't replace the content with the new one). Client must call
// `done` callback to decrease the counter when the value will no longer be used.
func (c *LRUCache) Add(key string, value interface{}) (cachedValue interface{}, done func(), added bool) {
c.mu.Lock()
defer c.mu.Unlock()
if o, ok := c.cache.Get(key); ok {
rc := o.(*refCounter)
rc.inc()
return rc.v, c.decreaseOnceFunc(rc), false
}
rc := &refCounter{
key: key,
v: value,
onEvicted: c.OnEvicted,
}
rc.initialize() // Keep this object having at least 1 ref count (will be decreased in OnEviction)
rc.inc() // The client references this object (will be decreased on "done")
c.cache.Add(key, rc)
return rc.v, c.decreaseOnceFunc(rc), true
}
// Remove removes the specified contents from the cache. OnEvicted callback will be called when
// nobody refers to the removed content.
func (c *LRUCache) Remove(key string) {
c.mu.Lock()
defer c.mu.Unlock()
c.cache.Remove(key)
}
func (c *LRUCache) decreaseOnceFunc(rc *refCounter) func() {
var once sync.Once
return func() {
c.mu.Lock()
defer c.mu.Unlock()
once.Do(func() { rc.dec() })
}
}
type refCounter struct {
onEvicted func(key string, value interface{})
key string
v interface{}
refCounts int64
mu sync.Mutex
initializeOnce sync.Once
finalizeOnce sync.Once
}
func (r *refCounter) inc() {
r.mu.Lock()
defer r.mu.Unlock()
r.refCounts++
}
func (r *refCounter) dec() {
r.mu.Lock()
defer r.mu.Unlock()
r.refCounts--
if r.refCounts <= 0 && r.onEvicted != nil {
// nobody will refer this object
r.onEvicted(r.key, r.v)
}
}
func (r *refCounter) initialize() {
r.initializeOnce.Do(func() { r.inc() })
}
func (r *refCounter) finalize() {
r.finalizeOnce.Do(func() { r.dec() })
}

View File

@ -0,0 +1,115 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cacheutil
import (
"sync"
"time"
)
// TTLCache is a ttl-based cache with reference counters.
// Each elements is deleted as soon as expiering the configured ttl.
type TTLCache struct {
m map[string]*refCounterWithTimer
mu sync.Mutex
ttl time.Duration
// OnEvicted optionally specifies a callback function to be
// executed when an entry is purged from the cache.
OnEvicted func(key string, value interface{})
}
// NewTTLCache creates a new ttl-based cache.
func NewTTLCache(ttl time.Duration) *TTLCache {
return &TTLCache{
m: make(map[string]*refCounterWithTimer),
ttl: ttl,
}
}
// Get retrieves the specified object from the cache and increments the reference counter of the
// target content. Client must call `done` callback to decrease the reference count when the value
// will no longer be used.
func (c *TTLCache) Get(key string) (value interface{}, done func(), ok bool) {
c.mu.Lock()
defer c.mu.Unlock()
rc, ok := c.m[key]
if !ok {
return nil, nil, false
}
rc.inc()
return rc.v, c.decreaseOnceFunc(rc), true
}
// Add adds object to the cache and returns the cached contents with incrementing the reference count.
// If the specified content already exists in the cache, this sets `added` to false and returns
// "already cached" content (i.e. doesn't replace the content with the new one). Client must call
// `done` callback to decrease the counter when the value will no longer be used.
func (c *TTLCache) Add(key string, value interface{}) (cachedValue interface{}, done func(), added bool) {
c.mu.Lock()
defer c.mu.Unlock()
if rc, ok := c.m[key]; ok {
rc.inc()
return rc.v, c.decreaseOnceFunc(rc), false
}
rc := &refCounterWithTimer{
refCounter: &refCounter{
key: key,
v: value,
onEvicted: c.OnEvicted,
},
}
rc.initialize() // Keep this object having at least 1 ref count (will be decreased in OnEviction)
rc.inc() // The client references this object (will be decreased on "done")
rc.t = time.AfterFunc(c.ttl, func() {
c.mu.Lock()
defer c.mu.Unlock()
c.evictLocked(key)
})
c.m[key] = rc
return rc.v, c.decreaseOnceFunc(rc), true
}
// Remove removes the specified contents from the cache. OnEvicted callback will be called when
// nobody refers to the removed content.
func (c *TTLCache) Remove(key string) {
c.mu.Lock()
defer c.mu.Unlock()
c.evictLocked(key)
}
func (c *TTLCache) evictLocked(key string) {
if rc, ok := c.m[key]; ok {
delete(c.m, key)
rc.t.Stop() // stop timer to prevent GC to this content anymore
rc.finalize()
}
}
func (c *TTLCache) decreaseOnceFunc(rc *refCounterWithTimer) func() {
var once sync.Once
return func() {
c.mu.Lock()
defer c.mu.Unlock()
once.Do(func() { rc.dec() })
}
}
type refCounterWithTimer struct {
*refCounter
t *time.Timer
}

View File

@ -0,0 +1,220 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package containerdutil
import (
"context"
"encoding/json"
"fmt"
"io"
"sort"
"time"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images"
"github.com/containerd/containerd/v2/core/remotes"
"github.com/containerd/errdefs"
"github.com/containerd/platforms"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
func ManifestDesc(ctx context.Context, provider content.Provider, image ocispec.Descriptor, platform platforms.MatchComparer) (ocispec.Descriptor, error) {
var (
limit = 1
m []ocispec.Descriptor
wasIndex bool
)
if err := images.Walk(ctx, images.HandlerFunc(func(ctx context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) {
switch desc.MediaType {
case images.MediaTypeDockerSchema2Manifest, ocispec.MediaTypeImageManifest:
p, err := content.ReadBlob(ctx, provider, desc)
if err != nil {
return nil, err
}
if err := ValidateMediaType(p, desc.MediaType); err != nil {
return nil, err
}
var manifest ocispec.Manifest
if err := json.Unmarshal(p, &manifest); err != nil {
return nil, err
}
if desc.Digest != image.Digest {
if desc.Platform != nil && !platform.Match(*desc.Platform) {
return nil, nil
}
if desc.Platform == nil {
p, err := content.ReadBlob(ctx, provider, manifest.Config)
if err != nil {
return nil, err
}
var image ocispec.Image
if err := json.Unmarshal(p, &image); err != nil {
return nil, err
}
if !platform.Match(platforms.Normalize(ocispec.Platform{OS: image.OS, Architecture: image.Architecture})) {
return nil, nil
}
}
}
m = append(m, desc)
return nil, nil
case images.MediaTypeDockerSchema2ManifestList, ocispec.MediaTypeImageIndex:
p, err := content.ReadBlob(ctx, provider, desc)
if err != nil {
return nil, err
}
if err := ValidateMediaType(p, desc.MediaType); err != nil {
return nil, err
}
var idx ocispec.Index
if err := json.Unmarshal(p, &idx); err != nil {
return nil, err
}
var descs []ocispec.Descriptor
for _, d := range idx.Manifests {
if d.Platform == nil || platform.Match(*d.Platform) {
descs = append(descs, d)
}
}
sort.SliceStable(descs, func(i, j int) bool {
if descs[i].Platform == nil {
return false
}
if descs[j].Platform == nil {
return true
}
return platform.Less(*descs[i].Platform, *descs[j].Platform)
})
wasIndex = true
if len(descs) > limit {
return descs[:limit], nil
}
return descs, nil
}
return nil, fmt.Errorf("unexpected media type %v for %v: %w", desc.MediaType, desc.Digest, errdefs.ErrNotFound)
}), image); err != nil {
return ocispec.Descriptor{}, err
}
if len(m) == 0 {
err := fmt.Errorf("manifest %v: %w", image.Digest, errdefs.ErrNotFound)
if wasIndex {
err = fmt.Errorf("no match for platform in manifest %v: %w", image.Digest, errdefs.ErrNotFound)
}
return ocispec.Descriptor{}, err
}
return m[0], nil
}
// Forked from github.com/containerd/containerd/v2/core/images/image.go
// commit: a776a27af54a803657d002e7574a4425b3949f56
// unknownDocument represents a manifest, manifest list, or index that has not
// yet been validated.
type unknownDocument struct {
MediaType string `json:"mediaType,omitempty"`
Config json.RawMessage `json:"config,omitempty"`
Layers json.RawMessage `json:"layers,omitempty"`
Manifests json.RawMessage `json:"manifests,omitempty"`
FSLayers json.RawMessage `json:"fsLayers,omitempty"` // schema 1
}
// ValidateMediaType returns an error if the byte slice is invalid JSON or if
// the media type identifies the blob as one format but it contains elements of
// another format.
func ValidateMediaType(b []byte, mt string) error {
var doc unknownDocument
if err := json.Unmarshal(b, &doc); err != nil {
return err
}
if len(doc.FSLayers) != 0 {
return fmt.Errorf("media-type: schema 1 not supported")
}
switch mt {
case images.MediaTypeDockerSchema2Manifest, ocispec.MediaTypeImageManifest:
if len(doc.Manifests) != 0 ||
doc.MediaType == images.MediaTypeDockerSchema2ManifestList ||
doc.MediaType == ocispec.MediaTypeImageIndex {
return fmt.Errorf("media-type: expected manifest but found index (%s)", mt)
}
case images.MediaTypeDockerSchema2ManifestList, ocispec.MediaTypeImageIndex:
if len(doc.Config) != 0 || len(doc.Layers) != 0 ||
doc.MediaType == images.MediaTypeDockerSchema2Manifest ||
doc.MediaType == ocispec.MediaTypeImageManifest {
return fmt.Errorf("media-type: expected index but found manifest (%s)", mt)
}
}
return nil
}
// Fetch manifest of the specified platform
func FetchManifestPlatform(ctx context.Context, fetcher remotes.Fetcher, desc ocispec.Descriptor, platform ocispec.Platform) (ocispec.Manifest, error) {
ctx, cancel := context.WithTimeout(ctx, time.Minute)
defer cancel()
r, err := fetcher.Fetch(ctx, desc)
if err != nil {
return ocispec.Manifest{}, err
}
defer r.Close()
var manifest ocispec.Manifest
switch desc.MediaType {
case images.MediaTypeDockerSchema2Manifest, ocispec.MediaTypeImageManifest:
p, err := io.ReadAll(r)
if err != nil {
return ocispec.Manifest{}, err
}
if err := ValidateMediaType(p, desc.MediaType); err != nil {
return ocispec.Manifest{}, err
}
if err := json.Unmarshal(p, &manifest); err != nil {
return ocispec.Manifest{}, err
}
return manifest, nil
case images.MediaTypeDockerSchema2ManifestList, ocispec.MediaTypeImageIndex:
var index ocispec.Index
p, err := io.ReadAll(r)
if err != nil {
return ocispec.Manifest{}, err
}
if err := ValidateMediaType(p, desc.MediaType); err != nil {
return ocispec.Manifest{}, err
}
if err = json.Unmarshal(p, &index); err != nil {
return ocispec.Manifest{}, err
}
var target ocispec.Descriptor
found := false
for _, m := range index.Manifests {
p := platforms.DefaultSpec()
if m.Platform != nil {
p = *m.Platform
}
if !platforms.NewMatcher(platform).Match(p) {
continue
}
target = m
found = true
break
}
if !found {
return ocispec.Manifest{}, fmt.Errorf("no manifest found for platform")
}
return FetchManifestPlatform(ctx, fetcher, target, platform)
}
return ocispec.Manifest{}, fmt.Errorf("unknown mediatype %q", desc.MediaType)
}

View File

@ -0,0 +1,38 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ioutils
import "sync"
type CountWriter struct {
n int64
mu sync.Mutex
}
func (c *CountWriter) Write(p []byte) (n int, err error) {
c.mu.Lock()
c.n += int64(len(p))
c.mu.Unlock()
return len(p), nil
}
func (c *CountWriter) Size() (n int64) {
c.mu.Lock()
n = c.n
c.mu.Unlock()
return
}

View File

@ -0,0 +1,62 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package namedmutex provides NamedMutex that wraps sync.Mutex
// and provides namespaced mutex.
package namedmutex
import (
"sync"
)
// NamedMutex wraps sync.Mutex and provides namespaced mutex.
type NamedMutex struct {
muMap map[string]*sync.Mutex
refMap map[string]int
mu sync.Mutex
}
// Lock locks the mutex of the given name
func (nl *NamedMutex) Lock(name string) {
nl.mu.Lock()
if nl.muMap == nil {
nl.muMap = make(map[string]*sync.Mutex)
}
if nl.refMap == nil {
nl.refMap = make(map[string]int)
}
if _, ok := nl.muMap[name]; !ok {
nl.muMap[name] = &sync.Mutex{}
}
mu := nl.muMap[name]
nl.refMap[name]++
nl.mu.Unlock()
mu.Lock()
}
// Unlock unlocks the mutex of the given name
func (nl *NamedMutex) Unlock(name string) {
nl.mu.Lock()
mu := nl.muMap[name]
nl.refMap[name]--
if nl.refMap[name] <= 0 {
delete(nl.muMap, name)
delete(nl.refMap, name)
}
nl.mu.Unlock()
mu.Unlock()
}

View File

@ -0,0 +1,80 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testutil
import (
"bytes"
"io"
"github.com/containerd/stargz-snapshotter/estargz"
esgzexternaltoc "github.com/containerd/stargz-snapshotter/estargz/externaltoc"
"github.com/containerd/stargz-snapshotter/estargz/zstdchunked"
"github.com/klauspost/compress/zstd"
)
type Compression interface {
estargz.Compressor
estargz.Decompressor
// DecompressTOC decompresses the passed blob and returns a reader of TOC JSON.
// This is needed to be used from metadata pkg
DecompressTOC(io.Reader) (tocJSON io.ReadCloser, err error)
}
type CompressionFactory func() Compression
type zstdCompression struct {
*zstdchunked.Compressor
*zstdchunked.Decompressor
}
func ZstdCompressionWithLevel(compressionLevel zstd.EncoderLevel) CompressionFactory {
return func() Compression {
return &zstdCompression{&zstdchunked.Compressor{CompressionLevel: compressionLevel}, &zstdchunked.Decompressor{}}
}
}
type gzipCompression struct {
*estargz.GzipCompressor
*estargz.GzipDecompressor
}
func GzipCompressionWithLevel(compressionLevel int) CompressionFactory {
return func() Compression {
return gzipCompression{estargz.NewGzipCompressorWithLevel(compressionLevel), &estargz.GzipDecompressor{}}
}
}
type externalTOCGzipCompression struct {
*esgzexternaltoc.GzipCompressor
*esgzexternaltoc.GzipDecompressor
}
func ExternalTOCGzipCompressionWithLevel(compressionLevel int) CompressionFactory {
return func() Compression {
compressor := esgzexternaltoc.NewGzipCompressorWithLevel(compressionLevel)
decompressor := esgzexternaltoc.NewGzipDecompressor(func() ([]byte, error) {
buf := new(bytes.Buffer)
if _, err := compressor.WriteTOCTo(buf); err != nil {
return nil, err
}
return buf.Bytes(), nil
})
return &externalTOCGzipCompression{compressor, decompressor}
}
}

View File

@ -0,0 +1,80 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testutil
import (
"compress/gzip"
"context"
"fmt"
"io"
"net/http"
"os"
"github.com/containerd/containerd/v2/core/content"
"github.com/containerd/containerd/v2/core/images/archive"
"github.com/containerd/containerd/v2/plugins/content/local"
"github.com/opencontainers/go-digest"
ocispec "github.com/opencontainers/image-spec/specs-go/v1"
)
const (
// HelloArchiveURL points to an OCI archive of `hello-world`.
// Exported from `docker.io/library/hello-world@sha256:1a523af650137b8accdaed439c17d684df61ee4d74feac151b5b337bd29e7eec` .
// See https://github.com/AkihiroSuda/test-oci-archives/releases/tag/v20210101
HelloArchiveURL = "https://github.com/AkihiroSuda/test-oci-archives/releases/download/v20210101/hello-world.tar.gz"
// HelloArchiveDigest is the digest of the archive.
HelloArchiveDigest = "sha256:5aa022621c4de0e941ab2a30d4569c403e156b4ba2de2ec32e382ae8679f40e1"
)
// EnsureHello creates a temp content store and ensures `hello-world` image from HelloArchiveURL into the store.
func EnsureHello(ctx context.Context) (*ocispec.Descriptor, content.Store, error) {
// Pulling an image without the daemon is a mess, so we use OCI archive here.
resp, err := http.Get(HelloArchiveURL)
if err != nil {
return nil, nil, err
}
defer resp.Body.Close()
sha256Digester := digest.SHA256.Digester()
sha256Hasher := sha256Digester.Hash()
tr := io.TeeReader(resp.Body, sha256Hasher)
gzReader, err := gzip.NewReader(tr)
if err != nil {
return nil, nil, err
}
tempDir, err := os.MkdirTemp("", "test-estargz")
if err != nil {
return nil, nil, err
}
cs, err := local.NewStore(tempDir)
if err != nil {
return nil, nil, err
}
desc, err := archive.ImportIndex(ctx, cs, gzReader)
if err != nil {
return nil, nil, err
}
resp.Body.Close()
if d := sha256Digester.Digest().String(); d != HelloArchiveDigest {
err = fmt.Errorf("expected digest of %q to be %q, got %q", HelloArchiveURL, HelloArchiveDigest, d)
return nil, nil, err
}
return &desc, cs, nil
}

View File

@ -0,0 +1,74 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testutil
import (
"bytes"
"io"
"github.com/containerd/stargz-snapshotter/estargz"
digest "github.com/opencontainers/go-digest"
)
type buildEStargzOptions struct {
estargzOptions []estargz.Option
buildTarOptions []BuildTarOption
}
type BuildEStargzOption func(o *buildEStargzOptions) error
// WithEStargzOptions specifies options for estargz lib
func WithEStargzOptions(eo ...estargz.Option) BuildEStargzOption {
return func(o *buildEStargzOptions) error {
o.estargzOptions = append(o.estargzOptions, eo...)
return nil
}
}
// WithBuildTarOptions option specifies the options for tar creation
func WithBuildTarOptions(to ...BuildTarOption) BuildEStargzOption {
return func(o *buildEStargzOptions) error {
o.buildTarOptions = append(o.buildTarOptions, to...)
return nil
}
}
func BuildEStargz(ents []TarEntry, opts ...BuildEStargzOption) (*io.SectionReader, digest.Digest, error) {
var beOpts buildEStargzOptions
for _, o := range opts {
o(&beOpts)
}
tarBuf := new(bytes.Buffer)
if _, err := io.Copy(tarBuf, BuildTar(ents, beOpts.buildTarOptions...)); err != nil {
return nil, "", err
}
tarData := tarBuf.Bytes()
rc, err := estargz.Build(
io.NewSectionReader(bytes.NewReader(tarData), 0, int64(len(tarData))),
beOpts.estargzOptions...)
if err != nil {
return nil, "", err
}
defer rc.Close()
vsb := new(bytes.Buffer)
if _, err := io.Copy(vsb, rc); err != nil {
return nil, "", err
}
vsbb := vsb.Bytes()
return io.NewSectionReader(bytes.NewReader(vsbb), 0, int64(len(vsbb))), rc.TOCDigest(), nil
}

View File

@ -0,0 +1,310 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testutil
// This utility helps test codes to generate sample tar blobs.
import (
"archive/tar"
"fmt"
"io"
"os"
"strings"
"time"
)
// TarEntry is an entry of tar.
type TarEntry interface {
AppendTar(tw *tar.Writer, opts BuildTarOptions) error
}
// BuildTarOptions is a set of options used during building blob.
type BuildTarOptions struct {
// Prefix is the prefix string need to be added to each file name (e.g. "./", "/", etc.)
Prefix string
}
// BuildTarOption is an option used during building blob.
type BuildTarOption func(o *BuildTarOptions)
// WithPrefix is an option to add a prefix string to each file name (e.g. "./", "/", etc.)
func WithPrefix(prefix string) BuildTarOption {
return func(o *BuildTarOptions) {
o.Prefix = prefix
}
}
// BuildTar builds a tar blob
func BuildTar(ents []TarEntry, opts ...BuildTarOption) io.Reader {
var bo BuildTarOptions
for _, o := range opts {
o(&bo)
}
pr, pw := io.Pipe()
go func() {
tw := tar.NewWriter(pw)
for _, ent := range ents {
if err := ent.AppendTar(tw, bo); err != nil {
pw.CloseWithError(err)
return
}
}
if err := tw.Close(); err != nil {
pw.CloseWithError(err)
return
}
pw.Close()
}()
return pr
}
type tarEntryFunc func(*tar.Writer, BuildTarOptions) error
func (f tarEntryFunc) AppendTar(tw *tar.Writer, opts BuildTarOptions) error { return f(tw, opts) }
// DirectoryBuildTarOption is an option for a directory entry.
type DirectoryBuildTarOption func(o *dirOpts)
type dirOpts struct {
uid int
gid int
xattrs map[string]string
mode *os.FileMode
modTime time.Time
}
// WithDirModTime specifies the modtime of the dir.
func WithDirModTime(modTime time.Time) DirectoryBuildTarOption {
return func(o *dirOpts) {
o.modTime = modTime
}
}
// WithDirOwner specifies the owner of the directory.
func WithDirOwner(uid, gid int) DirectoryBuildTarOption {
return func(o *dirOpts) {
o.uid = uid
o.gid = gid
}
}
// WithDirXattrs specifies the extended attributes of the directory.
func WithDirXattrs(xattrs map[string]string) DirectoryBuildTarOption {
return func(o *dirOpts) {
o.xattrs = xattrs
}
}
// WithDirMode specifies the mode of the directory.
func WithDirMode(mode os.FileMode) DirectoryBuildTarOption {
return func(o *dirOpts) {
o.mode = &mode
}
}
// Dir is a directory entry
func Dir(name string, opts ...DirectoryBuildTarOption) TarEntry {
return tarEntryFunc(func(tw *tar.Writer, buildOpts BuildTarOptions) error {
var dOpts dirOpts
for _, o := range opts {
o(&dOpts)
}
if !strings.HasSuffix(name, "/") {
panic(fmt.Sprintf("missing trailing slash in dir %q ", name))
}
var mode int64 = 0755
if dOpts.mode != nil {
mode = permAndExtraMode2TarMode(*dOpts.mode)
}
return tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeDir,
Name: buildOpts.Prefix + name,
Mode: mode,
ModTime: dOpts.modTime,
Xattrs: dOpts.xattrs,
Uid: dOpts.uid,
Gid: dOpts.gid,
})
})
}
// FileBuildTarOption is an option for a file entry.
type FileBuildTarOption func(o *fileOpts)
type fileOpts struct {
uid int
gid int
xattrs map[string]string
mode *os.FileMode
modTime time.Time
}
// WithFileOwner specifies the owner of the file.
func WithFileOwner(uid, gid int) FileBuildTarOption {
return func(o *fileOpts) {
o.uid = uid
o.gid = gid
}
}
// WithFileXattrs specifies the extended attributes of the file.
func WithFileXattrs(xattrs map[string]string) FileBuildTarOption {
return func(o *fileOpts) {
o.xattrs = xattrs
}
}
// WithFileModTime specifies the modtime of the file.
func WithFileModTime(modTime time.Time) FileBuildTarOption {
return func(o *fileOpts) {
o.modTime = modTime
}
}
// WithFileMode specifies the mode of the file.
func WithFileMode(mode os.FileMode) FileBuildTarOption {
return func(o *fileOpts) {
o.mode = &mode
}
}
// File is a regilar file entry
func File(name, contents string, opts ...FileBuildTarOption) TarEntry {
return tarEntryFunc(func(tw *tar.Writer, buildOpts BuildTarOptions) error {
var fOpts fileOpts
for _, o := range opts {
o(&fOpts)
}
if strings.HasSuffix(name, "/") {
return fmt.Errorf("bogus trailing slash in file %q", name)
}
var mode int64 = 0644
if fOpts.mode != nil {
mode = permAndExtraMode2TarMode(*fOpts.mode)
}
if err := tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeReg,
Name: buildOpts.Prefix + name,
Mode: mode,
ModTime: fOpts.modTime,
Xattrs: fOpts.xattrs,
Size: int64(len(contents)),
Uid: fOpts.uid,
Gid: fOpts.gid,
}); err != nil {
return err
}
_, err := io.WriteString(tw, contents)
return err
})
}
// Symlink is a symlink entry
func Symlink(name, target string) TarEntry {
return tarEntryFunc(func(tw *tar.Writer, buildOpts BuildTarOptions) error {
return tw.WriteHeader(&tar.Header{
Typeflag: tar.TypeSymlink,
Name: buildOpts.Prefix + name,
Linkname: target,
Mode: 0644,
})
})
}
// Link is a hard-link entry
func Link(name, linkname string) TarEntry {
now := time.Now()
return tarEntryFunc(func(w *tar.Writer, buildOpts BuildTarOptions) error {
return w.WriteHeader(&tar.Header{
Typeflag: tar.TypeLink,
Name: buildOpts.Prefix + name,
Linkname: linkname,
ModTime: now,
AccessTime: now,
ChangeTime: now,
})
})
}
// Chardev is a character device entry
func Chardev(name string, major, minor int64) TarEntry {
now := time.Now()
return tarEntryFunc(func(w *tar.Writer, buildOpts BuildTarOptions) error {
return w.WriteHeader(&tar.Header{
Typeflag: tar.TypeChar,
Name: buildOpts.Prefix + name,
Devmajor: major,
Devminor: minor,
ModTime: now,
AccessTime: now,
ChangeTime: now,
})
})
}
// Blockdev is a block device entry
func Blockdev(name string, major, minor int64) TarEntry {
now := time.Now()
return tarEntryFunc(func(w *tar.Writer, buildOpts BuildTarOptions) error {
return w.WriteHeader(&tar.Header{
Typeflag: tar.TypeBlock,
Name: buildOpts.Prefix + name,
Devmajor: major,
Devminor: minor,
ModTime: now,
AccessTime: now,
ChangeTime: now,
})
})
}
// Fifo is a fifo entry
func Fifo(name string) TarEntry {
now := time.Now()
return tarEntryFunc(func(w *tar.Writer, buildOpts BuildTarOptions) error {
return w.WriteHeader(&tar.Header{
Typeflag: tar.TypeFifo,
Name: buildOpts.Prefix + name,
ModTime: now,
AccessTime: now,
ChangeTime: now,
})
})
}
// suid, guid, sticky bits for archive/tar
// https://github.com/golang/go/blob/release-branch.go1.13/src/archive/tar/common.go#L607-L609
const (
cISUID = 04000 // Set uid
cISGID = 02000 // Set gid
cISVTX = 01000 // Save text (sticky bit)
)
func permAndExtraMode2TarMode(fm os.FileMode) (tm int64) {
tm = int64(fm & os.ModePerm)
if fm&os.ModeSetuid != 0 {
tm |= cISUID
}
if fm&os.ModeSetgid != 0 {
tm |= cISGID
}
if fm&os.ModeSticky != 0 {
tm |= cISVTX
}
return
}

View File

@ -0,0 +1,31 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package testutil
import (
"crypto/rand"
"testing"
)
// RandomBytes returns the specified number of random bytes
func RandomBytes(t *testing.T, n int) []byte {
b := make([]byte, n)
if _, err := rand.Read(b); err != nil {
t.Fatalf("failed rand.Read: %v", err)
}
return b
}

910
vendor/github.com/docker/cli/AUTHORS generated vendored Normal file
View File

@ -0,0 +1,910 @@
# File @generated by scripts/docs/generate-authors.sh. DO NOT EDIT.
# This file lists all contributors to the repository.
# See scripts/docs/generate-authors.sh to make modifications.
A. Lester Buck III <github-reg@nbolt.com>
Aanand Prasad <aanand.prasad@gmail.com>
Aaron L. Xu <liker.xu@foxmail.com>
Aaron Lehmann <alehmann@netflix.com>
Aaron.L.Xu <likexu@harmonycloud.cn>
Abdur Rehman <abdur_rehman@mentor.com>
Abhinandan Prativadi <abhi@docker.com>
Abin Shahab <ashahab@altiscale.com>
Abreto FU <public@abreto.email>
Ace Tang <aceapril@126.com>
Addam Hardy <addam.hardy@gmail.com>
Adolfo Ochagavía <aochagavia92@gmail.com>
Adrian Plata <adrian.plata@docker.com>
Adrien Duermael <adrien@duermael.com>
Adrien Folie <folie.adrien@gmail.com>
Adyanth Hosavalike <ahosavalike@ucsd.edu>
Ahmet Alp Balkan <ahmetb@microsoft.com>
Aidan Feldman <aidan.feldman@gmail.com>
Aidan Hobson Sayers <aidanhs@cantab.net>
AJ Bowen <aj@soulshake.net>
Akhil Mohan <akhil.mohan@mayadata.io>
Akihiro Suda <akihiro.suda.cz@hco.ntt.co.jp>
Akim Demaille <akim.demaille@docker.com>
Alan Thompson <cloojure@gmail.com>
Alano Terblanche <alano.terblanche@docker.com>
Albert Callarisa <shark234@gmail.com>
Alberto Roura <mail@albertoroura.com>
Albin Kerouanton <albinker@gmail.com>
Aleksa Sarai <asarai@suse.de>
Aleksander Piotrowski <apiotrowski312@gmail.com>
Alessandro Boch <aboch@tetrationanalytics.com>
Alex Couture-Beil <alex@earthly.dev>
Alex Mavrogiannis <alex.mavrogiannis@docker.com>
Alex Mayer <amayer5125@gmail.com>
Alexander Boyd <alex@opengroove.org>
Alexander Chneerov <achneerov@gmail.com>
Alexander Larsson <alexl@redhat.com>
Alexander Morozov <lk4d4math@gmail.com>
Alexander Ryabov <i@sepa.spb.ru>
Alexandre González <agonzalezro@gmail.com>
Alexey Igrychev <alexey.igrychev@flant.com>
Alexis Couvreur <alexiscouvreur.pro@gmail.com>
Alfred Landrum <alfred.landrum@docker.com>
Ali Rostami <rostami.ali@gmail.com>
Alicia Lauerman <alicia@eta.im>
Allen Sun <allensun.shl@alibaba-inc.com>
Alvin Deng <alvin.q.deng@utexas.edu>
Amen Belayneh <amenbelayneh@gmail.com>
Amey Shrivastava <72866602+AmeyShrivastava@users.noreply.github.com>
Amir Goldstein <amir73il@aquasec.com>
Amit Krishnan <amit.krishnan@oracle.com>
Amit Shukla <amit.shukla@docker.com>
Amy Lindburg <amy.lindburg@docker.com>
Anca Iordache <anca.iordache@docker.com>
Anda Xu <anda.xu@docker.com>
Andrea Luzzardi <aluzzardi@gmail.com>
Andreas Köhler <andi5.py@gmx.net>
Andres G. Aragoneses <knocte@gmail.com>
Andres Leon Rangel <aleon1220@gmail.com>
Andrew France <andrew@avito.co.uk>
Andrew Hsu <andrewhsu@docker.com>
Andrew Macpherson <hopscotch23@gmail.com>
Andrew McDonnell <bugs@andrewmcdonnell.net>
Andrew Po <absourd.noise@gmail.com>
Andrew-Zipperer <atzipperer@gmail.com>
Andrey Petrov <andrey.petrov@shazow.net>
Andrii Berehuliak <berkusandrew@gmail.com>
André Martins <aanm90@gmail.com>
Andy Goldstein <agoldste@redhat.com>
Andy Rothfusz <github@developersupport.net>
Anil Madhavapeddy <anil@recoil.org>
Ankush Agarwal <ankushagarwal11@gmail.com>
Anne Henmi <anne.henmi@docker.com>
Anton Polonskiy <anton.polonskiy@gmail.com>
Antonio Murdaca <antonio.murdaca@gmail.com>
Antonis Kalipetis <akalipetis@gmail.com>
Anusha Ragunathan <anusha.ragunathan@docker.com>
Ao Li <la9249@163.com>
Arash Deshmeh <adeshmeh@ca.ibm.com>
Arko Dasgupta <arko@tetrate.io>
Arnaud Porterie <icecrime@gmail.com>
Arnaud Rebillout <elboulangero@gmail.com>
Arthur Peka <arthur.peka@outlook.com>
Ashly Mathew <ashly.mathew@sap.com>
Ashwini Oruganti <ashwini.oruganti@gmail.com>
Aslam Ahemad <aslamahemad@gmail.com>
Azat Khuyiyakhmetov <shadow_uz@mail.ru>
Bardia Keyoumarsi <bkeyouma@ucsc.edu>
Barnaby Gray <barnaby@pickle.me.uk>
Bastiaan Bakker <bbakker@xebia.com>
BastianHofmann <bastianhofmann@me.com>
Ben Bodenmiller <bbodenmiller@gmail.com>
Ben Bonnefoy <frenchben@docker.com>
Ben Creasy <ben@bencreasy.com>
Ben Firshman <ben@firshman.co.uk>
Benjamin Boudreau <boudreau.benjamin@gmail.com>
Benjamin Böhmke <benjamin@boehmke.net>
Benjamin Nater <me@bn4t.me>
Benoit Sigoure <tsunanet@gmail.com>
Bhumika Bayani <bhumikabayani@gmail.com>
Bill Wang <ozbillwang@gmail.com>
Bin Liu <liubin0329@gmail.com>
Bingshen Wang <bingshen.wbs@alibaba-inc.com>
Bishal Das <bishalhnj127@gmail.com>
Bjorn Neergaard <bjorn.neergaard@docker.com>
Boaz Shuster <ripcurld.github@gmail.com>
Boban Acimovic <boban.acimovic@gmail.com>
Bogdan Anton <contact@bogdananton.ro>
Boris Pruessmann <boris@pruessmann.org>
Brad Baker <brad@brad.fi>
Bradley Cicenas <bradley.cicenas@gmail.com>
Brandon Mitchell <git@bmitch.net>
Brandon Philips <brandon.philips@coreos.com>
Brent Salisbury <brent.salisbury@docker.com>
Bret Fisher <bret@bretfisher.com>
Brian (bex) Exelbierd <bexelbie@redhat.com>
Brian Goff <cpuguy83@gmail.com>
Brian Tracy <brian.tracy33@gmail.com>
Brian Wieder <brian@4wieders.com>
Bruno Sousa <bruno.sousa@docker.com>
Bryan Bess <squarejaw@bsbess.com>
Bryan Boreham <bjboreham@gmail.com>
Bryan Murphy <bmurphy1976@gmail.com>
bryfry <bryon.fryer@gmail.com>
Calvin Liu <flycalvin@qq.com>
Cameron Spear <cameronspear@gmail.com>
Cao Weiwei <cao.weiwei30@zte.com.cn>
Carlo Mion <mion00@gmail.com>
Carlos Alexandro Becker <caarlos0@gmail.com>
Carlos de Paula <me@carlosedp.com>
Casey Korver <casey@korver.dev>
Ce Gao <ce.gao@outlook.com>
Cedric Davies <cedricda@microsoft.com>
Cezar Sa Espinola <cezarsa@gmail.com>
Chad Faragher <wyckster@hotmail.com>
Chao Wang <wangchao.fnst@cn.fujitsu.com>
Charles Chan <charleswhchan@users.noreply.github.com>
Charles Law <claw@conduce.com>
Charles Smith <charles.smith@docker.com>
Charlie Drage <charlie@charliedrage.com>
Charlotte Mach <charlotte.mach@fs.lmu.de>
ChaYoung You <yousbe@gmail.com>
Chee Hau Lim <cheehau.lim@mobimeo.com>
Chen Chuanliang <chen.chuanliang@zte.com.cn>
Chen Hanxiao <chenhanxiao@cn.fujitsu.com>
Chen Mingjie <chenmingjie0828@163.com>
Chen Qiu <cheney-90@hotmail.com>
Chris Chinchilla <chris@chrischinchilla.com>
Chris Couzens <ccouzens@gmail.com>
Chris Gavin <chris@chrisgavin.me>
Chris Gibson <chris@chrisg.io>
Chris McKinnel <chrismckinnel@gmail.com>
Chris Snow <chsnow123@gmail.com>
Chris Vermilion <christopher.vermilion@gmail.com>
Chris Weyl <cweyl@alumni.drew.edu>
Christian Persson <saser@live.se>
Christian Stefanescu <st.chris@gmail.com>
Christophe Robin <crobin@nekoo.com>
Christophe Vidal <kriss@krizalys.com>
Christopher Biscardi <biscarch@sketcht.com>
Christopher Crone <christopher.crone@docker.com>
Christopher Jones <tophj@linux.vnet.ibm.com>
Christopher Petito <47751006+krissetto@users.noreply.github.com>
Christopher Petito <chrisjpetito@gmail.com>
Christopher Svensson <stoffus@stoffus.com>
Christy Norman <christy@linux.vnet.ibm.com>
Chun Chen <ramichen@tencent.com>
Clinton Kitson <clintonskitson@gmail.com>
Coenraad Loubser <coenraad@wish.org.za>
Colin Hebert <hebert.colin@gmail.com>
Collin Guarino <collin.guarino@gmail.com>
Colm Hally <colmhally@gmail.com>
Comical Derskeal <27731088+derskeal@users.noreply.github.com>
Conner Crosby <conner@cavcrosby.tech>
Corey Farrell <git@cfware.com>
Corey Quon <corey.quon@docker.com>
Cory Bennet <cbennett@netflix.com>
Cory Snider <csnider@mirantis.com>
Craig Osterhout <craig.osterhout@docker.com>
Craig Wilhite <crwilhit@microsoft.com>
Cristian Staretu <cristian.staretu@gmail.com>
Daehyeok Mun <daehyeok@gmail.com>
Dafydd Crosby <dtcrsby@gmail.com>
Daisuke Ito <itodaisuke00@gmail.com>
dalanlan <dalanlan925@gmail.com>
Damien Nadé <github@livna.org>
Dan Cotora <dan@bluevision.ro>
Danial Gharib <danial.mail.gh@gmail.com>
Daniel Artine <daniel.artine@ufrj.br>
Daniel Cassidy <mail@danielcassidy.me.uk>
Daniel Dao <dqminh@cloudflare.com>
Daniel Farrell <dfarrell@redhat.com>
Daniel Gasienica <daniel@gasienica.ch>
Daniel Goosen <daniel.goosen@surveysampling.com>
Daniel Helfand <dhelfand@redhat.com>
Daniel Hiltgen <daniel.hiltgen@docker.com>
Daniel J Walsh <dwalsh@redhat.com>
Daniel Nephin <dnephin@docker.com>
Daniel Norberg <dano@spotify.com>
Daniel Watkins <daniel@daniel-watkins.co.uk>
Daniel Zhang <jmzwcn@gmail.com>
Daniil Nikolenko <qoo2p5@gmail.com>
Danny Berger <dpb587@gmail.com>
Darren Shepherd <darren.s.shepherd@gmail.com>
Darren Stahl <darst@microsoft.com>
Dattatraya Kumbhar <dattatraya.kumbhar@gslab.com>
Dave Goodchild <buddhamagnet@gmail.com>
Dave Henderson <dhenderson@gmail.com>
Dave Tucker <dt@docker.com>
David Alvarez <david.alvarez@flyeralarm.com>
David Beitey <david@davidjb.com>
David Calavera <david.calavera@gmail.com>
David Cramer <davcrame@cisco.com>
David Dooling <dooling@gmail.com>
David Gageot <david@gageot.net>
David Karlsson <david.karlsson@docker.com>
David le Blanc <systemmonkey42@users.noreply.github.com>
David Lechner <david@lechnology.com>
David Scott <dave@recoil.org>
David Sheets <dsheets@docker.com>
David Williamson <david.williamson@docker.com>
David Xia <dxia@spotify.com>
David Young <yangboh@cn.ibm.com>
Deng Guangxing <dengguangxing@huawei.com>
Denis Defreyne <denis@soundcloud.com>
Denis Gladkikh <denis@gladkikh.email>
Denis Ollier <larchunix@users.noreply.github.com>
Dennis Docter <dennis@d23.nl>
dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Derek McGowan <derek@mcg.dev>
Des Preston <despreston@gmail.com>
Deshi Xiao <dxiao@redhat.com>
Dharmit Shah <shahdharmit@gmail.com>
Dhawal Yogesh Bhanushali <dbhanushali@vmware.com>
Dieter Reuter <dieter.reuter@me.com>
Dima Stopel <dima@twistlock.com>
Dimitry Andric <d.andric@activevideo.com>
Ding Fei <dingfei@stars.org.cn>
Diogo Monica <diogo@docker.com>
Djordje Lukic <djordje.lukic@docker.com>
Dmitriy Fishman <fishman.code@gmail.com>
Dmitry Gusev <dmitry.gusev@gmail.com>
Dmitry Smirnov <onlyjob@member.fsf.org>
Dmitry V. Krivenok <krivenok.dmitry@gmail.com>
Dominik Braun <dominik.braun@nbsp.de>
Don Kjer <don.kjer@gmail.com>
Dong Chen <dongluo.chen@docker.com>
DongGeon Lee <secmatth1996@gmail.com>
Doug Davis <dug@us.ibm.com>
Drew Erny <derny@mirantis.com>
Ed Costello <epc@epcostello.com>
Ed Morley <501702+edmorley@users.noreply.github.com>
Elango Sivanandam <elango.siva@docker.com>
Eli Uriegas <eli.uriegas@docker.com>
Eli Uriegas <seemethere101@gmail.com>
Elias Faxö <elias.faxo@tre.se>
Elliot Luo <956941328@qq.com>
Eric Bode <eric.bode@foundries.io>
Eric Curtin <ericcurtin17@gmail.com>
Eric Engestrom <eric@engestrom.ch>
Eric G. Noriega <enoriega@vizuri.com>
Eric Rosenberg <ehaydenr@gmail.com>
Eric Sage <eric.david.sage@gmail.com>
Eric-Olivier Lamey <eo@lamey.me>
Erica Windisch <erica@windisch.us>
Erik Hollensbe <github@hollensbe.org>
Erik Humphrey <erik.humphrey@carleton.ca>
Erik St. Martin <alakriti@gmail.com>
Essam A. Hassan <es.hassan187@gmail.com>
Ethan Haynes <ethanhaynes@alumni.harvard.edu>
Euan Kemp <euank@euank.com>
Eugene Yakubovich <eugene.yakubovich@coreos.com>
Evan Allrich <evan@unguku.com>
Evan Hazlett <ejhazlett@gmail.com>
Evan Krall <krall@yelp.com>
Evan Lezar <elezar@nvidia.com>
Evelyn Xu <evelynhsu21@gmail.com>
Everett Toews <everett.toews@rackspace.com>
Fabio Falci <fabiofalci@gmail.com>
Fabrizio Soppelsa <fsoppelsa@mirantis.com>
Felix Geyer <debfx@fobos.de>
Felix Hupfeld <felix@quobyte.com>
Felix Rabe <felix@rabe.io>
fezzik1620 <fezzik1620@users.noreply.github.com>
Filip Jareš <filipjares@gmail.com>
Flavio Crisciani <flavio.crisciani@docker.com>
Florian Klein <florian.klein@free.fr>
Forest Johnson <fjohnson@peoplenetonline.com>
Foysal Iqbal <foysal.iqbal.fb@gmail.com>
François Scala <francois.scala@swiss-as.com>
Fred Lifton <fred.lifton@docker.com>
Frederic Hemberger <mail@frederic-hemberger.de>
Frederick F. Kautz IV <fkautz@redhat.com>
Frederik Nordahl Jul Sabroe <frederikns@gmail.com>
Frieder Bluemle <frieder.bluemle@gmail.com>
Gabriel Gore <gabgore@cisco.com>
Gabriel Nicolas Avellaneda <avellaneda.gabriel@gmail.com>
Gabriela Georgieva <gabriela.georgieva@docker.com>
Gaetan de Villele <gdevillele@gmail.com>
Gang Qiao <qiaohai8866@gmail.com>
Gary Schaetz <gary@schaetzkc.com>
Genki Takiuchi <genki@s21g.com>
George MacRorie <gmacr31@gmail.com>
George Margaritis <gmargaritis@protonmail.com>
George Xie <georgexsh@gmail.com>
Gianluca Borello <g.borello@gmail.com>
Gildas Cuisinier <gildas.cuisinier@gcuisinier.net>
Gio d'Amelio <giodamelio@gmail.com>
Gleb Stsenov <gleb.stsenov@gmail.com>
Goksu Toprak <goksu.toprak@docker.com>
Gou Rao <gou@portworx.com>
Govind Rai <raigovind93@gmail.com>
Grace Choi <grace.54109@gmail.com>
Graeme Wiebe <graeme.wiebe@gmail.com>
Grant Reaber <grant.reaber@gmail.com>
Greg Pflaum <gpflaum@users.noreply.github.com>
Gsealy <jiaojingwei1001@hotmail.com>
Guilhem Lettron <guilhem+github@lettron.fr>
Guillaume J. Charmes <guillaume.charmes@docker.com>
Guillaume Le Floch <glfloch@gmail.com>
Guillaume Tardif <guillaume.tardif@gmail.com>
gwx296173 <gaojing3@huawei.com>
Günther Jungbluth <gunther@gameslabs.net>
Hakan Özler <hakan.ozler@kodcu.com>
Hao Zhang <21521210@zju.edu.cn>
Harald Albers <github@albersweb.de>
Harold Cooper <hrldcpr@gmail.com>
Harry Zhang <harryz@hyper.sh>
He Simei <hesimei@zju.edu.cn>
Hector S <hfsam88@gmail.com>
Helen Xie <chenjg@harmonycloud.cn>
Henning Sprang <henning.sprang@gmail.com>
Henry N <henrynmail-github@yahoo.de>
Hernan Garcia <hernandanielg@gmail.com>
Hongbin Lu <hongbin034@gmail.com>
Hu Keping <hukeping@huawei.com>
Huayi Zhang <irachex@gmail.com>
Hugo Chastel <Hugo-C@users.noreply.github.com>
Hugo Gabriel Eyherabide <hugogabriel.eyherabide@gmail.com>
huqun <huqun@zju.edu.cn>
Huu Nguyen <huu@prismskylabs.com>
Hyzhou Zhy <hyzhou.zhy@alibaba-inc.com>
Iain Samuel McLean Elder <iain@isme.es>
Ian Campbell <ian.campbell@docker.com>
Ian Philpot <ian.philpot@microsoft.com>
Ignacio Capurro <icapurrofagian@gmail.com>
Ilya Dmitrichenko <errordeveloper@gmail.com>
Ilya Khlopotov <ilya.khlopotov@gmail.com>
Ilya Sotkov <ilya@sotkov.com>
Ioan Eugen Stan <eu@ieugen.ro>
Isabel Jimenez <contact.isabeljimenez@gmail.com>
Ivan Grcic <igrcic@gmail.com>
Ivan Grund <ivan.grund@gmail.com>
Ivan Markin <sw@nogoegst.net>
Jacob Atzen <jacob@jacobatzen.dk>
Jacob Tomlinson <jacob@tom.linson.uk>
Jacopo Rigoli <rigoli.jacopo@gmail.com>
Jaivish Kothari <janonymous.codevulture@gmail.com>
Jake Lambert <jake.lambert@volusion.com>
Jake Sanders <jsand@google.com>
Jake Stokes <contactjake@developerjake.com>
Jakub Panek <me@panekj.dev>
James Nesbitt <james.nesbitt@wunderkraut.com>
James Turnbull <james@lovedthanlost.net>
Jamie Hannaford <jamie@limetree.org>
Jan Koprowski <jan.koprowski@gmail.com>
Jan Pazdziora <jpazdziora@redhat.com>
Jan-Jaap Driessen <janjaapdriessen@gmail.com>
Jana Radhakrishnan <mrjana@docker.com>
Jared Hocutt <jaredh@netapp.com>
Jasmine Hegman <jasmine@jhegman.com>
Jason Hall <jason@chainguard.dev>
Jason Heiss <jheiss@aput.net>
Jason Plum <jplum@devonit.com>
Jay Kamat <github@jgkamat.33mail.com>
Jean Lecordier <jeanlecordier@hotmail.fr>
Jean Rouge <rougej+github@gmail.com>
Jean-Christophe Sirot <jean-christophe.sirot@docker.com>
Jean-Pierre Huynh <jean-pierre.huynh@ounet.fr>
Jeff Lindsay <progrium@gmail.com>
Jeff Nickoloff <jeff.nickoloff@gmail.com>
Jeff Silberman <jsilberm@gmail.com>
Jennings Zhang <jenni_zh@protonmail.com>
Jeremy Chambers <jeremy@thehipbot.com>
Jeremy Unruh <jeremybunruh@gmail.com>
Jeremy Yallop <yallop@docker.com>
Jeroen Franse <jeroenfranse@gmail.com>
Jesse Adametz <jesseadametz@gmail.com>
Jessica Frazelle <jess@oxide.computer>
Jezeniel Zapanta <jpzapanta22@gmail.com>
Jian Zhang <zhangjian.fnst@cn.fujitsu.com>
Jie Luo <luo612@zju.edu.cn>
Jilles Oldenbeuving <ojilles@gmail.com>
Jim Chen <njucjc@gmail.com>
Jim Galasyn <jim.galasyn@docker.com>
Jim Lin <b04705003@ntu.edu.tw>
Jimmy Leger <jimmy.leger@gmail.com>
Jimmy Song <rootsongjc@gmail.com>
jimmyxian <jimmyxian2004@yahoo.com.cn>
Jintao Zhang <zhangjintao9020@gmail.com>
Joao Fernandes <joao.fernandes@docker.com>
Joe Abbey <joe.abbey@gmail.com>
Joe Doliner <jdoliner@pachyderm.io>
Joe Gordon <joe.gordon0@gmail.com>
Joel Handwell <joelhandwell@gmail.com>
Joey Geiger <jgeiger@gmail.com>
Joffrey F <joffrey@docker.com>
Johan Euphrosine <proppy@google.com>
Johannes 'fish' Ziemke <github@freigeist.org>
John Feminella <jxf@jxf.me>
John Harris <john@johnharris.io>
John Howard <github@lowenna.com>
John Howard <howardjohn@google.com>
John Laswell <john.n.laswell@gmail.com>
John Maguire <jmaguire@duosecurity.com>
John Mulhausen <john@docker.com>
John Starks <jostarks@microsoft.com>
John Stephens <johnstep@docker.com>
John Tims <john.k.tims@gmail.com>
John V. Martinez <jvmatl@gmail.com>
John Willis <john.willis@docker.com>
Jon Johnson <jonjohnson@google.com>
Jon Zeolla <zeolla@gmail.com>
Jonatas Baldin <jonatas.baldin@gmail.com>
Jonathan A. Sternberg <jonathansternberg@gmail.com>
Jonathan Boulle <jonathanboulle@gmail.com>
Jonathan Lee <jonjohn1232009@gmail.com>
Jonathan Lomas <jonathan@floatinglomas.ca>
Jonathan McCrohan <jmccrohan@gmail.com>
Jonathan Warriss-Simmons <misterws@diogenes.ws>
Jonh Wendell <jonh.wendell@redhat.com>
Jordan Jennings <jjn2009@gmail.com>
Jorge Vallecillo <jorgevallecilloc@gmail.com>
Jose J. Escobar <53836904+jescobar-docker@users.noreply.github.com>
Joseph Kern <jkern@semafour.net>
Josh Bodah <jb3689@yahoo.com>
Josh Chorlton <jchorlton@gmail.com>
Josh Hawn <josh.hawn@docker.com>
Josh Horwitz <horwitz@addthis.com>
Josh Soref <jsoref@gmail.com>
Julian <gitea+julian@ic.thejulian.uk>
Julien Barbier <write0@gmail.com>
Julien Kassar <github@kassisol.com>
Julien Maitrehenry <julien.maitrehenry@me.com>
Justas Brazauskas <brazauskasjustas@gmail.com>
Justin Chadwell <me@jedevc.com>
Justin Cormack <justin.cormack@docker.com>
Justin Simonelis <justin.p.simonelis@gmail.com>
Justyn Temme <justyntemme@gmail.com>
Jyrki Puttonen <jyrkiput@gmail.com>
Jérémie Drouet <jeremie.drouet@gmail.com>
Jérôme Petazzoni <jerome.petazzoni@docker.com>
Jörg Thalheim <joerg@higgsboson.tk>
Kai Blin <kai@samba.org>
Kai Qiang Wu (Kennan) <wkq5325@gmail.com>
Kara Alexandra <kalexandra@us.ibm.com>
Kareem Khazem <karkhaz@karkhaz.com>
Karthik Nayak <Karthik.188@gmail.com>
Kat Samperi <kat.samperi@gmail.com>
Kathryn Spiers <kathryn@spiers.me>
Katie McLaughlin <katie@glasnt.com>
Ke Xu <leonhartx.k@gmail.com>
Kei Ohmura <ohmura.kei@gmail.com>
Keith Hudgins <greenman@greenman.org>
Kelton Bassingthwaite <KeltonBassingthwaite@gmail.com>
Ken Cochrane <kencochrane@gmail.com>
Ken ICHIKAWA <ichikawa.ken@jp.fujitsu.com>
Kenfe-Mickaël Laventure <mickael.laventure@gmail.com>
Kevin Alvarez <github@crazymax.dev>
Kevin Burke <kev@inburke.com>
Kevin Feyrer <kevin.feyrer@btinternet.com>
Kevin Kern <kaiwentan@harmonycloud.cn>
Kevin Kirsche <Kev.Kirsche+GitHub@gmail.com>
Kevin Meredith <kevin.m.meredith@gmail.com>
Kevin Richardson <kevin@kevinrichardson.co>
Kevin Woblick <mail@kovah.de>
khaled souf <khaled.souf@gmail.com>
Kim Eik <kim@heldig.org>
Kir Kolyshkin <kolyshkin@gmail.com>
Kirill A. Korinsky <kirill@korins.ky>
Kotaro Yoshimatsu <kotaro.yoshimatsu@gmail.com>
Krasi Georgiev <krasi@vip-consult.solutions>
Kris-Mikael Krister <krismikael@protonmail.com>
Kun Zhang <zkazure@gmail.com>
Kunal Kushwaha <kushwaha_kunal_v7@lab.ntt.co.jp>
Kyle Mitofsky <Kylemit@gmail.com>
Lachlan Cooper <lachlancooper@gmail.com>
Lai Jiangshan <jiangshanlai@gmail.com>
Lars Kellogg-Stedman <lars@redhat.com>
Laura Brehm <laurabrehm@hey.com>
Laura Frank <ljfrank@gmail.com>
Laurent Erignoux <lerignoux@gmail.com>
Lee Gaines <eightlimbed@gmail.com>
Lei Jitang <leijitang@huawei.com>
Lennie <github@consolejunkie.net>
Leo Gallucci <elgalu3@gmail.com>
Leonid Skorospelov <leosko94@gmail.com>
Lewis Daly <lewisdaly@me.com>
Li Fu Bang <lifubang@acmcoder.com>
Li Yi <denverdino@gmail.com>
Li Yi <weiyuan.yl@alibaba-inc.com>
Liang-Chi Hsieh <viirya@gmail.com>
Lihua Tang <lhtang@alauda.io>
Lily Guo <lily.guo@docker.com>
Lin Lu <doraalin@163.com>
Linus Heckemann <lheckemann@twig-world.com>
Liping Xue <lipingxue@gmail.com>
Liron Levin <liron@twistlock.com>
liwenqi <vikilwq@zju.edu.cn>
lixiaobing10051267 <li.xiaobing1@zte.com.cn>
Lloyd Dewolf <foolswisdom@gmail.com>
Lorenzo Fontana <lo@linux.com>
Louis Opter <kalessin@kalessin.fr>
Luca Favatella <luca.favatella@erlang-solutions.com>
Luca Marturana <lucamarturana@gmail.com>
Lucas Chan <lucas-github@lucaschan.com>
Luis Henrique Mulinari <luis.mulinari@gmail.com>
Luka Hartwig <mail@lukahartwig.de>
Lukas Heeren <lukas-heeren@hotmail.com>
Lukasz Zajaczkowski <Lukasz.Zajaczkowski@ts.fujitsu.com>
Lydell Manganti <LydellManganti@users.noreply.github.com>
Lénaïc Huard <lhuard@amadeus.com>
Ma Shimiao <mashimiao.fnst@cn.fujitsu.com>
Mabin <bin.ma@huawei.com>
Maciej Kalisz <maciej.d.kalisz@gmail.com>
Madhav Puri <madhav.puri@gmail.com>
Madhu Venugopal <madhu@socketplane.io>
Madhur Batra <madhurbatra097@gmail.com>
Malte Janduda <mail@janduda.net>
Manjunath A Kumatagi <mkumatag@in.ibm.com>
Mansi Nahar <mmn4185@rit.edu>
mapk0y <mapk0y@gmail.com>
Marc Bihlmaier <marc.bihlmaier@reddoxx.com>
Marc Cornellà <hello@mcornella.com>
Marco Mariani <marco.mariani@alterway.fr>
Marco Spiess <marco.spiess@hotmail.de>
Marco Vedovati <mvedovati@suse.com>
Marcus Martins <marcus@docker.com>
Marianna Tessel <mtesselh@gmail.com>
Marius Ileana <marius.ileana@gmail.com>
Marius Meschter <marius@meschter.me>
Marius Sturm <marius@graylog.com>
Mark Oates <fl0yd@me.com>
Marsh Macy <marsma@microsoft.com>
Martin Mosegaard Amdisen <martin.amdisen@praqma.com>
Mary Anthony <mary.anthony@docker.com>
Mason Fish <mason.fish@docker.com>
Mason Malone <mason.malone@gmail.com>
Mateusz Major <apkd@users.noreply.github.com>
Mathias Duedahl <64321057+Lussebullen@users.noreply.github.com>
Mathieu Champlon <mathieu.champlon@docker.com>
Mathieu Rollet <matletix@gmail.com>
Matt Gucci <matt9ucci@gmail.com>
Matt Robenolt <matt@ydekproductions.com>
Matteo Orefice <matteo.orefice@bites4bits.software>
Matthew Heon <mheon@redhat.com>
Matthieu Hauglustaine <matt.hauglustaine@gmail.com>
Mauro Porras P <mauroporrasp@gmail.com>
Max Shytikov <mshytikov@gmail.com>
Max-Julian Pogner <max-julian@pogner.at>
Maxime Petazzoni <max@signalfuse.com>
Maximillian Fan Xavier <maximillianfx@gmail.com>
Mei ChunTao <mei.chuntao@zte.com.cn>
Melroy van den Berg <melroy@melroy.org>
Metal <2466052+tedhexaflow@users.noreply.github.com>
Micah Zoltu <micah@newrelic.com>
Michael A. Smith <michael@smith-li.com>
Michael Bridgen <mikeb@squaremobius.net>
Michael Crosby <crosbymichael@gmail.com>
Michael Friis <friism@gmail.com>
Michael Irwin <mikesir87@gmail.com>
Michael Käufl <docker@c.michael-kaeufl.de>
Michael Prokop <github@michael-prokop.at>
Michael Scharf <github@scharf.gr>
Michael Spetsiotis <michael_spets@hotmail.com>
Michael Steinert <mike.steinert@gmail.com>
Michael West <mwest@mdsol.com>
Michal Minář <miminar@redhat.com>
Michał Czeraszkiewicz <czerasz@gmail.com>
Miguel Angel Alvarez Cabrerizo <doncicuto@gmail.com>
Mihai Borobocea <MihaiBorob@gmail.com>
Mihuleacc Sergiu <mihuleac.sergiu@gmail.com>
Mike Brown <brownwm@us.ibm.com>
Mike Casas <mkcsas0@gmail.com>
Mike Dalton <mikedalton@github.com>
Mike Danese <mikedanese@google.com>
Mike Dillon <mike@embody.org>
Mike Goelzer <mike.goelzer@docker.com>
Mike MacCana <mike.maccana@gmail.com>
mikelinjie <294893458@qq.com>
Mikhail Vasin <vasin@cloud-tv.ru>
Milind Chawre <milindchawre@gmail.com>
Mindaugas Rukas <momomg@gmail.com>
Miroslav Gula <miroslav.gula@naytrolabs.com>
Misty Stanley-Jones <misty@docker.com>
Mohammad Banikazemi <mb@us.ibm.com>
Mohammed Aaqib Ansari <maaquib@gmail.com>
Mohini Anne Dsouza <mohini3917@gmail.com>
Moorthy RS <rsmoorthy@gmail.com>
Morgan Bauer <mbauer@us.ibm.com>
Morten Hekkvang <morten.hekkvang@sbab.se>
Morten Linderud <morten@linderud.pw>
Moysés Borges <moysesb@gmail.com>
Mozi <29089388+pzhlkj6612@users.noreply.github.com>
Mrunal Patel <mrunalp@gmail.com>
muicoder <muicoder@gmail.com>
Murukesh Mohanan <murukesh.mohanan@gmail.com>
Muthukumar R <muthur@gmail.com>
Máximo Cuadros <mcuadros@gmail.com>
Mårten Cassel <marten.cassel@gmail.com>
Nace Oroz <orkica@gmail.com>
Nahum Shalman <nshalman@omniti.com>
Nalin Dahyabhai <nalin@redhat.com>
Nao YONASHIRO <owan.orisano@gmail.com>
Nassim 'Nass' Eddequiouaq <eddequiouaq.nassim@gmail.com>
Natalie Parker <nparker@omnifone.com>
Nate Brennand <nate.brennand@clever.com>
Nathan Hsieh <hsieh.nathan@gmail.com>
Nathan LeClaire <nathan.leclaire@docker.com>
Nathan McCauley <nathan.mccauley@docker.com>
Neil Peterson <neilpeterson@outlook.com>
Nick Adcock <nick.adcock@docker.com>
Nick Santos <nick.santos@docker.com>
Nick Sieger <nick@nicksieger.com>
Nico Stapelbroek <nstapelbroek@gmail.com>
Nicola Kabar <nicolaka@gmail.com>
Nicolas Borboën <ponsfrilus@gmail.com>
Nicolas De Loof <nicolas.deloof@gmail.com>
Nikhil Chawla <chawlanikhil24@gmail.com>
Nikolas Garofil <nikolas.garofil@uantwerpen.be>
Nikolay Milovanov <nmil@itransformers.net>
Nir Soffer <nsoffer@redhat.com>
Nishant Totla <nishanttotla@gmail.com>
NIWA Hideyuki <niwa.niwa@nifty.ne.jp>
Noah Treuhaft <noah.treuhaft@docker.com>
O.S. Tezer <ostezer@gmail.com>
Oded Arbel <oded@geek.co.il>
Odin Ugedal <odin@ugedal.com>
ohmystack <jun.jiang02@ele.me>
OKA Naoya <git@okanaoya.com>
Oliver Pomeroy <oppomeroy@gmail.com>
Olle Jonsson <olle.jonsson@gmail.com>
Olli Janatuinen <olli.janatuinen@gmail.com>
Oscar Wieman <oscrx@icloud.com>
Otto Kekäläinen <otto@seravo.fi>
Ovidio Mallo <ovidio.mallo@gmail.com>
Pascal Borreli <pascal@borreli.com>
Patrick Böänziger <patrick.baenziger@bsi-software.com>
Patrick Daigle <114765035+pdaig@users.noreply.github.com>
Patrick Hemmer <patrick.hemmer@gmail.com>
Patrick Lang <plang@microsoft.com>
Paul <paul9869@gmail.com>
Paul Kehrer <paul.l.kehrer@gmail.com>
Paul Lietar <paul@lietar.net>
Paul Mulders <justinkb@gmail.com>
Paul Seyfert <pseyfert.mathphys@gmail.com>
Paul Weaver <pauweave@cisco.com>
Pavel Pospisil <pospispa@gmail.com>
Paweł Gronowski <pawel.gronowski@docker.com>
Paweł Pokrywka <pepawel@users.noreply.github.com>
Paweł Szczekutowicz <pszczekutowicz@gmail.com>
Peeyush Gupta <gpeeyush@linux.vnet.ibm.com>
Per Lundberg <perlun@gmail.com>
Peter Dave Hello <hsu@peterdavehello.org>
Peter Edge <peter.edge@gmail.com>
Peter Hsu <shhsu@microsoft.com>
Peter Jaffe <pjaffe@nevo.com>
Peter Kehl <peter.kehl@gmail.com>
Peter Nagy <xificurC@gmail.com>
Peter Salvatore <peter@psftw.com>
Peter Waller <p@pwaller.net>
Phil Estes <estesp@gmail.com>
Philip Alexander Etling <paetling@gmail.com>
Philipp Gillé <philipp.gille@gmail.com>
Philipp Schmied <pschmied@schutzwerk.com>
Phong Tran <tran.pho@northeastern.edu>
pidster <pid@pidster.com>
Pieter E Smit <diepes@github.com>
pixelistik <pixelistik@users.noreply.github.com>
Pratik Karki <prertik@outlook.com>
Prayag Verma <prayag.verma@gmail.com>
Preston Cowley <preston.cowley@sony.com>
Pure White <daniel48@126.com>
Qiang Huang <h.huangqiang@huawei.com>
Qinglan Peng <qinglanpeng@zju.edu.cn>
QQ喵 <gqqnb2005@gmail.com>
qudongfang <qudongfang@gmail.com>
Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Rahul Kadyan <hi@znck.me>
Rahul Zoldyck <rahulzoldyck@gmail.com>
Ravi Shekhar Jethani <rsjethani@gmail.com>
Ray Tsang <rayt@google.com>
Reficul <xuzhenglun@gmail.com>
Remy Suen <remy.suen@gmail.com>
Renaud Gaubert <rgaubert@nvidia.com>
Ricardo N Feliciano <FelicianoTech@gmail.com>
Rich Moyse <rich@moyse.us>
Richard Chen Zheng <58443436+rchenzheng@users.noreply.github.com>
Richard Mathie <richard.mathie@amey.co.uk>
Richard Scothern <richard.scothern@gmail.com>
Rick Wieman <git@rickw.nl>
Ritesh H Shukla <sritesh@vmware.com>
Riyaz Faizullabhoy <riyaz.faizullabhoy@docker.com>
Rob Gulewich <rgulewich@netflix.com>
Rob Murray <rob.murray@docker.com>
Robert Wallis <smilingrob@gmail.com>
Robin Naundorf <r.naundorf@fh-muenster.de>
Robin Speekenbrink <robin@kingsquare.nl>
Roch Feuillade <roch.feuillade@pandobac.com>
Rodolfo Ortiz <rodolfo.ortiz@definityfirst.com>
Rogelio Canedo <rcanedo@mappy.priv>
Rohan Verma <hello@rohanverma.net>
Roland Kammerer <roland.kammerer@linbit.com>
Roman Dudin <katrmr@gmail.com>
Rory Hunter <roryhunter2@gmail.com>
Ross Boucher <rboucher@gmail.com>
Rubens Figueiredo <r.figueiredo.52@gmail.com>
Rui Cao <ruicao@alauda.io>
Rui JingAn <quiterace@gmail.com>
Ryan Belgrave <rmb1993@gmail.com>
Ryan Detzel <ryan.detzel@gmail.com>
Ryan Stelly <ryan.stelly@live.com>
Ryan Wilson-Perkin <ryanwilsonperkin@gmail.com>
Ryan Zhang <ryan.zhang@docker.com>
Sainath Grandhi <sainath.grandhi@intel.com>
Sakeven Jiang <jc5930@sina.cn>
Sally O'Malley <somalley@redhat.com>
Sam Neirinck <sam@samneirinck.com>
Sam Thibault <sam.thibault@docker.com>
Samarth Shah <samashah@microsoft.com>
Sambuddha Basu <sambuddhabasu1@gmail.com>
Sami Tabet <salph.tabet@gmail.com>
Samuel Cochran <sj26@sj26.com>
Samuel Karp <skarp@amazon.com>
Sandro Jäckel <sandro.jaeckel@gmail.com>
Santhosh Manohar <santhosh@docker.com>
Sargun Dhillon <sargun@netflix.com>
Saswat Bhattacharya <sas.saswat@gmail.com>
Saurabh Kumar <saurabhkumar0184@gmail.com>
Scott Brenner <scott@scottbrenner.me>
Scott Collier <emailscottcollier@gmail.com>
Sean Christopherson <sean.j.christopherson@intel.com>
Sean Rodman <srodman7689@gmail.com>
Sebastiaan van Stijn <github@gone.nl>
Sergey Tryuber <Sergeant007@users.noreply.github.com>
Serhat Gülçiçek <serhat25@gmail.com>
Sevki Hasirci <s@sevki.org>
Shaun Kaasten <shaunk@gmail.com>
Sheng Yang <sheng@yasker.org>
Shijiang Wei <mountkin@gmail.com>
Shishir Mahajan <shishir.mahajan@redhat.com>
Shoubhik Bose <sbose78@gmail.com>
Shukui Yang <yangshukui@huawei.com>
Sian Lerk Lau <kiawin@gmail.com>
Sidhartha Mani <sidharthamn@gmail.com>
sidharthamani <sid@rancher.com>
Silvin Lubecki <silvin.lubecki@docker.com>
Simei He <hesimei@zju.edu.cn>
Simon Ferquel <simon.ferquel@docker.com>
Simon Heimberg <simon.heimberg@heimberg-ea.ch>
Sindhu S <sindhus@live.in>
Slava Semushin <semushin@redhat.com>
Solomon Hykes <solomon@docker.com>
Song Gao <song@gao.io>
Spencer Brown <spencer@spencerbrown.org>
Spring Lee <xi.shuai@outlook.com>
squeegels <lmscrewy@gmail.com>
Srini Brahmaroutu <srbrahma@us.ibm.com>
Stefan S. <tronicum@user.github.com>
Stefan Scherer <stefan.scherer@docker.com>
Stefan Weil <sw@weilnetz.de>
Stephane Jeandeaux <stephane.jeandeaux@gmail.com>
Stephen Day <stevvooe@gmail.com>
Stephen Rust <srust@blockbridge.com>
Steve Durrheimer <s.durrheimer@gmail.com>
Steve Richards <steve.richards@docker.com>
Steven Burgess <steven.a.burgess@hotmail.com>
Stoica-Marcu Floris-Andrei <floris.sm@gmail.com>
Subhajit Ghosh <isubuz.g@gmail.com>
Sun Jianbo <wonderflow.sun@gmail.com>
Sune Keller <absukl@almbrand.dk>
Sungwon Han <sungwon.han@navercorp.com>
Sunny Gogoi <indiasuny000@gmail.com>
Sven Dowideit <SvenDowideit@home.org.au>
Sylvain Baubeau <sbaubeau@redhat.com>
Sébastien HOUZÉ <cto@verylastroom.com>
T K Sourabh <sourabhtk37@gmail.com>
TAGOMORI Satoshi <tagomoris@gmail.com>
taiji-tech <csuhqg@foxmail.com>
Takeshi Koenuma <t.koenuma2@gmail.com>
Takuya Noguchi <takninnovationresearch@gmail.com>
Taylor Jones <monitorjbl@gmail.com>
Teiva Harsanyi <t.harsanyi@thebeat.co>
Tejaswini Duggaraju <naduggar@microsoft.com>
Tengfei Wang <tfwang@alauda.io>
Teppei Fukuda <knqyf263@gmail.com>
Thatcher Peskens <thatcher@docker.com>
Thibault Coupin <thibault.coupin@gmail.com>
Thomas Gazagnaire <thomas@gazagnaire.org>
Thomas Krzero <thomas.kovatchitch@gmail.com>
Thomas Leonard <thomas.leonard@docker.com>
Thomas Léveil <thomasleveil@gmail.com>
Thomas Riccardi <thomas@deepomatic.com>
Thomas Swift <tgs242@gmail.com>
Tianon Gravi <admwiggin@gmail.com>
Tianyi Wang <capkurmagati@gmail.com>
Tibor Vass <teabee89@gmail.com>
Tim Dettrick <t.dettrick@uq.edu.au>
Tim Hockin <thockin@google.com>
Tim Sampson <tim@sampson.fi>
Tim Smith <timbot@google.com>
Tim Waugh <twaugh@redhat.com>
Tim Welsh <timothy.welsh@docker.com>
Tim Wraight <tim.wraight@tangentlabs.co.uk>
timfeirg <kkcocogogo@gmail.com>
Timothy Hobbs <timothyhobbs@seznam.cz>
Tobias Bradtke <webwurst@gmail.com>
Tobias Gesellchen <tobias@gesellix.de>
Todd Whiteman <todd.whiteman@joyent.com>
Tom Denham <tom@tomdee.co.uk>
Tom Fotherby <tom+github@peopleperhour.com>
Tom Klingenberg <tklingenberg@lastflood.net>
Tom Milligan <code@tommilligan.net>
Tom X. Tobin <tomxtobin@tomxtobin.com>
Tomas Bäckman <larstomas@gmail.com>
Tomas Tomecek <ttomecek@redhat.com>
Tomasz Kopczynski <tomek@kopczynski.net.pl>
Tomáš Hrčka <thrcka@redhat.com>
Tony Abboud <tdabboud@hotmail.com>
Tõnis Tiigi <tonistiigi@gmail.com>
Trapier Marshall <trapier.marshall@docker.com>
Travis Cline <travis.cline@gmail.com>
Tristan Carel <tristan@cogniteev.com>
Tycho Andersen <tycho@docker.com>
Tycho Andersen <tycho@tycho.ws>
uhayate <uhayate.gong@daocloud.io>
Ulrich Bareth <ulrich.bareth@gmail.com>
Ulysses Souza <ulysses.souza@docker.com>
Umesh Yadav <umesh4257@gmail.com>
Vaclav Struhar <struharv@gmail.com>
Valentin Lorentz <progval+git@progval.net>
Vardan Pogosian <vardan.pogosyan@gmail.com>
Venkateswara Reddy Bukkasamudram <bukkasamudram@outlook.com>
Veres Lajos <vlajos@gmail.com>
Victor Vieux <victor.vieux@docker.com>
Victoria Bialas <victoria.bialas@docker.com>
Viktor Stanchev <me@viktorstanchev.com>
Ville Skyttä <ville.skytta@iki.fi>
Vimal Raghubir <vraghubir0418@gmail.com>
Vincent Batts <vbatts@redhat.com>
Vincent Bernat <Vincent.Bernat@exoscale.ch>
Vincent Demeester <vincent.demeester@docker.com>
Vincent Woo <me@vincentwoo.com>
Vishnu Kannan <vishnuk@google.com>
Vivek Goyal <vgoyal@redhat.com>
Wang Jie <wangjie5@chinaskycloud.com>
Wang Lei <wanglei@tenxcloud.com>
Wang Long <long.wanglong@huawei.com>
Wang Ping <present.wp@icloud.com>
Wang Xing <hzwangxing@corp.netease.com>
Wang Yuexiao <wang.yuexiao@zte.com.cn>
Wang Yumu <37442693@qq.com>
Wataru Ishida <ishida.wataru@lab.ntt.co.jp>
Wayne Song <wsong@docker.com>
Wen Cheng Ma <wenchma@cn.ibm.com>
Wenzhi Liang <wenzhi.liang@gmail.com>
Wes Morgan <cap10morgan@gmail.com>
Wewang Xiaorenfine <wang.xiaoren@zte.com.cn>
William Henry <whenry@redhat.com>
Xianglin Gao <xlgao@zju.edu.cn>
Xiaodong Liu <liuxiaodong@loongson.cn>
Xiaodong Zhang <a4012017@sina.com>
Xiaoxi He <xxhe@alauda.io>
Xinbo Weng <xihuanbo_0521@zju.edu.cn>
Xuecong Liao <satorulogic@gmail.com>
Yan Feng <yanfeng2@huawei.com>
Yanqiang Miao <miao.yanqiang@zte.com.cn>
Yassine Tijani <yasstij11@gmail.com>
Yi EungJun <eungjun.yi@navercorp.com>
Ying Li <ying.li@docker.com>
Yong Tang <yong.tang.github@outlook.com>
Yosef Fertel <yfertel@gmail.com>
Yu Peng <yu.peng36@zte.com.cn>
Yuan Sun <sunyuan3@huawei.com>
Yucheng Wu <wyc123wyc@gmail.com>
Yue Zhang <zy675793960@yeah.net>
Yunxiang Huang <hyxqshk@vip.qq.com>
Zachary Romero <zacromero3@gmail.com>
Zander Mackie <zmackie@gmail.com>
zebrilee <zebrilee@gmail.com>
Zeel B Patel <patel_zeel@iitgn.ac.in>
Zhang Kun <zkazure@gmail.com>
Zhang Wei <zhangwei555@huawei.com>
Zhang Wentao <zhangwentao234@huawei.com>
ZhangHang <stevezhang2014@gmail.com>
zhenghenghuo <zhenghenghuo@zju.edu.cn>
Zhiwei Liang <zliang@akamai.com>
Zhou Hao <zhouhao@cn.fujitsu.com>
Zhoulin Xie <zhoulin.xie@daocloud.io>
Zhu Guihua <zhugh.fnst@cn.fujitsu.com>
Zhuo Zhi <h.dwwwwww@gmail.com>
Álex González <agonzalezro@gmail.com>
Álvaro Lázaro <alvaro.lazaro.g@gmail.com>
Átila Camurça Alves <camurca.home@gmail.com>
Александр Менщиков <__Singleton__@hackerdom.ru>
徐俊杰 <paco.xu@daocloud.io>

191
vendor/github.com/docker/cli/LICENSE generated vendored Normal file
View File

@ -0,0 +1,191 @@
Apache License
Version 2.0, January 2004
https://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright 2013-2017 Docker, Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
https://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

19
vendor/github.com/docker/cli/NOTICE generated vendored Normal file
View File

@ -0,0 +1,19 @@
Docker
Copyright 2012-2017 Docker, Inc.
This product includes software developed at Docker, Inc. (https://www.docker.com).
This product contains software (https://github.com/creack/pty) developed
by Keith Rarick, licensed under the MIT License.
The following is courtesy of our legal counsel:
Use and transfer of Docker may be subject to certain restrictions by the
United States and other governments.
It is your responsibility to ensure that your use and/or transfer does not
violate applicable laws.
For more information, see https://www.bis.doc.gov
See also https://www.apache.org/dev/crypto.html and/or seek legal counsel.

172
vendor/github.com/docker/cli/cli/config/config.go generated vendored Normal file
View File

@ -0,0 +1,172 @@
package config
import (
"fmt"
"io"
"os"
"os/user"
"path/filepath"
"runtime"
"strings"
"sync"
"github.com/docker/cli/cli/config/configfile"
"github.com/docker/cli/cli/config/credentials"
"github.com/docker/cli/cli/config/types"
"github.com/pkg/errors"
)
const (
// EnvOverrideConfigDir is the name of the environment variable that can be
// used to override the location of the client configuration files (~/.docker).
//
// It takes priority over the default, but can be overridden by the "--config"
// command line option.
EnvOverrideConfigDir = "DOCKER_CONFIG"
// ConfigFileName is the name of the client configuration file inside the
// config-directory.
ConfigFileName = "config.json"
configFileDir = ".docker"
contextsDir = "contexts"
)
var (
initConfigDir = new(sync.Once)
configDir string
)
// resetConfigDir is used in testing to reset the "configDir" package variable
// and its sync.Once to force re-lookup between tests.
func resetConfigDir() {
configDir = ""
initConfigDir = new(sync.Once)
}
// getHomeDir returns the home directory of the current user with the help of
// environment variables depending on the target operating system.
// Returned path should be used with "path/filepath" to form new paths.
//
// On non-Windows platforms, it falls back to nss lookups, if the home
// directory cannot be obtained from environment-variables.
//
// If linking statically with cgo enabled against glibc, ensure the
// osusergo build tag is used.
//
// If needing to do nss lookups, do not disable cgo or set osusergo.
//
// getHomeDir is a copy of [pkg/homedir.Get] to prevent adding docker/docker
// as dependency for consumers that only need to read the config-file.
//
// [pkg/homedir.Get]: https://pkg.go.dev/github.com/docker/docker@v26.1.4+incompatible/pkg/homedir#Get
func getHomeDir() string {
home, _ := os.UserHomeDir()
if home == "" && runtime.GOOS != "windows" {
if u, err := user.Current(); err == nil {
return u.HomeDir
}
}
return home
}
// Dir returns the directory the configuration file is stored in
func Dir() string {
initConfigDir.Do(func() {
configDir = os.Getenv(EnvOverrideConfigDir)
if configDir == "" {
configDir = filepath.Join(getHomeDir(), configFileDir)
}
})
return configDir
}
// ContextStoreDir returns the directory the docker contexts are stored in
func ContextStoreDir() string {
return filepath.Join(Dir(), contextsDir)
}
// SetDir sets the directory the configuration file is stored in
func SetDir(dir string) {
// trigger the sync.Once to synchronise with Dir()
initConfigDir.Do(func() {})
configDir = filepath.Clean(dir)
}
// Path returns the path to a file relative to the config dir
func Path(p ...string) (string, error) {
path := filepath.Join(append([]string{Dir()}, p...)...)
if !strings.HasPrefix(path, Dir()+string(filepath.Separator)) {
return "", errors.Errorf("path %q is outside of root config directory %q", path, Dir())
}
return path, nil
}
// LoadFromReader is a convenience function that creates a ConfigFile object from
// a reader. It returns an error if configData is malformed.
func LoadFromReader(configData io.Reader) (*configfile.ConfigFile, error) {
configFile := configfile.ConfigFile{
AuthConfigs: make(map[string]types.AuthConfig),
}
err := configFile.LoadFromReader(configData)
return &configFile, err
}
// Load reads the configuration file ([ConfigFileName]) from the given directory.
// If no directory is given, it uses the default [Dir]. A [*configfile.ConfigFile]
// is returned containing the contents of the configuration file, or a default
// struct if no configfile exists in the given location.
//
// Load returns an error if a configuration file exists in the given location,
// but cannot be read, or is malformed. Consumers must handle errors to prevent
// overwriting an existing configuration file.
func Load(configDir string) (*configfile.ConfigFile, error) {
if configDir == "" {
configDir = Dir()
}
return load(configDir)
}
func load(configDir string) (*configfile.ConfigFile, error) {
filename := filepath.Join(configDir, ConfigFileName)
configFile := configfile.New(filename)
file, err := os.Open(filename)
if err != nil {
if os.IsNotExist(err) {
// It is OK for no configuration file to be present, in which
// case we return a default struct.
return configFile, nil
}
// Any other error happening when failing to read the file must be returned.
return configFile, errors.Wrap(err, "loading config file")
}
defer file.Close()
err = configFile.LoadFromReader(file)
if err != nil {
err = errors.Wrapf(err, "loading config file: %s: ", filename)
}
return configFile, err
}
// LoadDefaultConfigFile attempts to load the default config file and returns
// a reference to the ConfigFile struct. If none is found or when failing to load
// the configuration file, it initializes a default ConfigFile struct. If no
// credentials-store is set in the configuration file, it attempts to discover
// the default store to use for the current platform.
//
// Important: LoadDefaultConfigFile prints a warning to stderr when failing to
// load the configuration file, but otherwise ignores errors. Consumers should
// consider using [Load] (and [credentials.DetectDefaultStore]) to detect errors
// when updating the configuration file, to prevent discarding a (malformed)
// configuration file.
func LoadDefaultConfigFile(stderr io.Writer) *configfile.ConfigFile {
configFile, err := load(Dir())
if err != nil {
// FIXME(thaJeztah): we should not proceed here to prevent overwriting existing (but malformed) config files; see https://github.com/docker/cli/issues/5075
_, _ = fmt.Fprintln(stderr, "WARNING: Error", err)
}
if !configFile.ContainsAuth() {
configFile.CredentialsStore = credentials.DetectDefaultStore(configFile.CredentialsStore)
}
return configFile
}

View File

@ -0,0 +1,354 @@
package configfile
import (
"encoding/base64"
"encoding/json"
"io"
"os"
"path/filepath"
"strings"
"github.com/docker/cli/cli/config/credentials"
"github.com/docker/cli/cli/config/types"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
)
// ConfigFile ~/.docker/config.json file info
type ConfigFile struct {
AuthConfigs map[string]types.AuthConfig `json:"auths"`
HTTPHeaders map[string]string `json:"HttpHeaders,omitempty"`
PsFormat string `json:"psFormat,omitempty"`
ImagesFormat string `json:"imagesFormat,omitempty"`
NetworksFormat string `json:"networksFormat,omitempty"`
PluginsFormat string `json:"pluginsFormat,omitempty"`
VolumesFormat string `json:"volumesFormat,omitempty"`
StatsFormat string `json:"statsFormat,omitempty"`
DetachKeys string `json:"detachKeys,omitempty"`
CredentialsStore string `json:"credsStore,omitempty"`
CredentialHelpers map[string]string `json:"credHelpers,omitempty"`
Filename string `json:"-"` // Note: for internal use only
ServiceInspectFormat string `json:"serviceInspectFormat,omitempty"`
ServicesFormat string `json:"servicesFormat,omitempty"`
TasksFormat string `json:"tasksFormat,omitempty"`
SecretFormat string `json:"secretFormat,omitempty"`
ConfigFormat string `json:"configFormat,omitempty"`
NodesFormat string `json:"nodesFormat,omitempty"`
PruneFilters []string `json:"pruneFilters,omitempty"`
Proxies map[string]ProxyConfig `json:"proxies,omitempty"`
Experimental string `json:"experimental,omitempty"`
CurrentContext string `json:"currentContext,omitempty"`
CLIPluginsExtraDirs []string `json:"cliPluginsExtraDirs,omitempty"`
Plugins map[string]map[string]string `json:"plugins,omitempty"`
Aliases map[string]string `json:"aliases,omitempty"`
Features map[string]string `json:"features,omitempty"`
}
// ProxyConfig contains proxy configuration settings
type ProxyConfig struct {
HTTPProxy string `json:"httpProxy,omitempty"`
HTTPSProxy string `json:"httpsProxy,omitempty"`
NoProxy string `json:"noProxy,omitempty"`
FTPProxy string `json:"ftpProxy,omitempty"`
AllProxy string `json:"allProxy,omitempty"`
}
// New initializes an empty configuration file for the given filename 'fn'
func New(fn string) *ConfigFile {
return &ConfigFile{
AuthConfigs: make(map[string]types.AuthConfig),
HTTPHeaders: make(map[string]string),
Filename: fn,
Plugins: make(map[string]map[string]string),
Aliases: make(map[string]string),
}
}
// LoadFromReader reads the configuration data given and sets up the auth config
// information with given directory and populates the receiver object
func (configFile *ConfigFile) LoadFromReader(configData io.Reader) error {
if err := json.NewDecoder(configData).Decode(configFile); err != nil && !errors.Is(err, io.EOF) {
return err
}
var err error
for addr, ac := range configFile.AuthConfigs {
if ac.Auth != "" {
ac.Username, ac.Password, err = decodeAuth(ac.Auth)
if err != nil {
return err
}
}
ac.Auth = ""
ac.ServerAddress = addr
configFile.AuthConfigs[addr] = ac
}
return nil
}
// ContainsAuth returns whether there is authentication configured
// in this file or not.
func (configFile *ConfigFile) ContainsAuth() bool {
return configFile.CredentialsStore != "" ||
len(configFile.CredentialHelpers) > 0 ||
len(configFile.AuthConfigs) > 0
}
// GetAuthConfigs returns the mapping of repo to auth configuration
func (configFile *ConfigFile) GetAuthConfigs() map[string]types.AuthConfig {
if configFile.AuthConfigs == nil {
configFile.AuthConfigs = make(map[string]types.AuthConfig)
}
return configFile.AuthConfigs
}
// SaveToWriter encodes and writes out all the authorization information to
// the given writer
func (configFile *ConfigFile) SaveToWriter(writer io.Writer) error {
// Encode sensitive data into a new/temp struct
tmpAuthConfigs := make(map[string]types.AuthConfig, len(configFile.AuthConfigs))
for k, authConfig := range configFile.AuthConfigs {
authCopy := authConfig
// encode and save the authstring, while blanking out the original fields
authCopy.Auth = encodeAuth(&authCopy)
authCopy.Username = ""
authCopy.Password = ""
authCopy.ServerAddress = ""
tmpAuthConfigs[k] = authCopy
}
saveAuthConfigs := configFile.AuthConfigs
configFile.AuthConfigs = tmpAuthConfigs
defer func() { configFile.AuthConfigs = saveAuthConfigs }()
// User-Agent header is automatically set, and should not be stored in the configuration
for v := range configFile.HTTPHeaders {
if strings.EqualFold(v, "User-Agent") {
delete(configFile.HTTPHeaders, v)
}
}
data, err := json.MarshalIndent(configFile, "", "\t")
if err != nil {
return err
}
_, err = writer.Write(data)
return err
}
// Save encodes and writes out all the authorization information
func (configFile *ConfigFile) Save() (retErr error) {
if configFile.Filename == "" {
return errors.Errorf("Can't save config with empty filename")
}
dir := filepath.Dir(configFile.Filename)
if err := os.MkdirAll(dir, 0o700); err != nil {
return err
}
temp, err := os.CreateTemp(dir, filepath.Base(configFile.Filename))
if err != nil {
return err
}
defer func() {
temp.Close()
if retErr != nil {
if err := os.Remove(temp.Name()); err != nil {
logrus.WithError(err).WithField("file", temp.Name()).Debug("Error cleaning up temp file")
}
}
}()
err = configFile.SaveToWriter(temp)
if err != nil {
return err
}
if err := temp.Close(); err != nil {
return errors.Wrap(err, "error closing temp file")
}
// Handle situation where the configfile is a symlink
cfgFile := configFile.Filename
if f, err := os.Readlink(cfgFile); err == nil {
cfgFile = f
}
// Try copying the current config file (if any) ownership and permissions
copyFilePermissions(cfgFile, temp.Name())
return os.Rename(temp.Name(), cfgFile)
}
// ParseProxyConfig computes proxy configuration by retrieving the config for the provided host and
// then checking this against any environment variables provided to the container
func (configFile *ConfigFile) ParseProxyConfig(host string, runOpts map[string]*string) map[string]*string {
var cfgKey string
if _, ok := configFile.Proxies[host]; !ok {
cfgKey = "default"
} else {
cfgKey = host
}
config := configFile.Proxies[cfgKey]
permitted := map[string]*string{
"HTTP_PROXY": &config.HTTPProxy,
"HTTPS_PROXY": &config.HTTPSProxy,
"NO_PROXY": &config.NoProxy,
"FTP_PROXY": &config.FTPProxy,
"ALL_PROXY": &config.AllProxy,
}
m := runOpts
if m == nil {
m = make(map[string]*string)
}
for k := range permitted {
if *permitted[k] == "" {
continue
}
if _, ok := m[k]; !ok {
m[k] = permitted[k]
}
if _, ok := m[strings.ToLower(k)]; !ok {
m[strings.ToLower(k)] = permitted[k]
}
}
return m
}
// encodeAuth creates a base64 encoded string to containing authorization information
func encodeAuth(authConfig *types.AuthConfig) string {
if authConfig.Username == "" && authConfig.Password == "" {
return ""
}
authStr := authConfig.Username + ":" + authConfig.Password
msg := []byte(authStr)
encoded := make([]byte, base64.StdEncoding.EncodedLen(len(msg)))
base64.StdEncoding.Encode(encoded, msg)
return string(encoded)
}
// decodeAuth decodes a base64 encoded string and returns username and password
func decodeAuth(authStr string) (string, string, error) {
if authStr == "" {
return "", "", nil
}
decLen := base64.StdEncoding.DecodedLen(len(authStr))
decoded := make([]byte, decLen)
authByte := []byte(authStr)
n, err := base64.StdEncoding.Decode(decoded, authByte)
if err != nil {
return "", "", err
}
if n > decLen {
return "", "", errors.Errorf("Something went wrong decoding auth config")
}
userName, password, ok := strings.Cut(string(decoded), ":")
if !ok || userName == "" {
return "", "", errors.Errorf("Invalid auth configuration file")
}
return userName, strings.Trim(password, "\x00"), nil
}
// GetCredentialsStore returns a new credentials store from the settings in the
// configuration file
func (configFile *ConfigFile) GetCredentialsStore(registryHostname string) credentials.Store {
if helper := getConfiguredCredentialStore(configFile, registryHostname); helper != "" {
return newNativeStore(configFile, helper)
}
return credentials.NewFileStore(configFile)
}
// var for unit testing.
var newNativeStore = func(configFile *ConfigFile, helperSuffix string) credentials.Store {
return credentials.NewNativeStore(configFile, helperSuffix)
}
// GetAuthConfig for a repository from the credential store
func (configFile *ConfigFile) GetAuthConfig(registryHostname string) (types.AuthConfig, error) {
return configFile.GetCredentialsStore(registryHostname).Get(registryHostname)
}
// getConfiguredCredentialStore returns the credential helper configured for the
// given registry, the default credsStore, or the empty string if neither are
// configured.
func getConfiguredCredentialStore(c *ConfigFile, registryHostname string) string {
if c.CredentialHelpers != nil && registryHostname != "" {
if helper, exists := c.CredentialHelpers[registryHostname]; exists {
return helper
}
}
return c.CredentialsStore
}
// GetAllCredentials returns all of the credentials stored in all of the
// configured credential stores.
func (configFile *ConfigFile) GetAllCredentials() (map[string]types.AuthConfig, error) {
auths := make(map[string]types.AuthConfig)
addAll := func(from map[string]types.AuthConfig) {
for reg, ac := range from {
auths[reg] = ac
}
}
defaultStore := configFile.GetCredentialsStore("")
newAuths, err := defaultStore.GetAll()
if err != nil {
return nil, err
}
addAll(newAuths)
// Auth configs from a registry-specific helper should override those from the default store.
for registryHostname := range configFile.CredentialHelpers {
newAuth, err := configFile.GetAuthConfig(registryHostname)
if err != nil {
// TODO(thaJeztah): use context-logger, so that this output can be suppressed (in tests).
logrus.WithError(err).Warnf("Failed to get credentials for registry: %s", registryHostname)
continue
}
auths[registryHostname] = newAuth
}
return auths, nil
}
// GetFilename returns the file name that this config file is based on.
func (configFile *ConfigFile) GetFilename() string {
return configFile.Filename
}
// PluginConfig retrieves the requested option for the given plugin.
func (configFile *ConfigFile) PluginConfig(pluginname, option string) (string, bool) {
if configFile.Plugins == nil {
return "", false
}
pluginConfig, ok := configFile.Plugins[pluginname]
if !ok {
return "", false
}
value, ok := pluginConfig[option]
return value, ok
}
// SetPluginConfig sets the option to the given value for the given
// plugin. Passing a value of "" will remove the option. If removing
// the final config item for a given plugin then also cleans up the
// overall plugin entry.
func (configFile *ConfigFile) SetPluginConfig(pluginname, option, value string) {
if configFile.Plugins == nil {
configFile.Plugins = make(map[string]map[string]string)
}
pluginConfig, ok := configFile.Plugins[pluginname]
if !ok {
pluginConfig = make(map[string]string)
configFile.Plugins[pluginname] = pluginConfig
}
if value != "" {
pluginConfig[option] = value
} else {
delete(pluginConfig, option)
}
if len(pluginConfig) == 0 {
delete(configFile.Plugins, pluginname)
}
}

View File

@ -0,0 +1,35 @@
//go:build !windows
package configfile
import (
"os"
"syscall"
)
// copyFilePermissions copies file ownership and permissions from "src" to "dst",
// ignoring any error during the process.
func copyFilePermissions(src, dst string) {
var (
mode os.FileMode = 0o600
uid, gid int
)
fi, err := os.Stat(src)
if err != nil {
return
}
if fi.Mode().IsRegular() {
mode = fi.Mode()
}
if err := os.Chmod(dst, mode); err != nil {
return
}
uid = int(fi.Sys().(*syscall.Stat_t).Uid)
gid = int(fi.Sys().(*syscall.Stat_t).Gid)
if uid > 0 && gid > 0 {
_ = os.Chown(dst, uid, gid)
}
}

View File

@ -0,0 +1,5 @@
package configfile
func copyFilePermissions(src, dst string) {
// TODO implement for Windows
}

View File

@ -0,0 +1,17 @@
package credentials
import (
"github.com/docker/cli/cli/config/types"
)
// Store is the interface that any credentials store must implement.
type Store interface {
// Erase removes credentials from the store for a given server.
Erase(serverAddress string) error
// Get retrieves credentials from the store for a given server.
Get(serverAddress string) (types.AuthConfig, error)
// GetAll retrieves all the credentials from the store.
GetAll() (map[string]types.AuthConfig, error)
// Store saves credentials in the store.
Store(authConfig types.AuthConfig) error
}

View File

@ -0,0 +1,22 @@
package credentials
import "os/exec"
// DetectDefaultStore return the default credentials store for the platform if
// no user-defined store is passed, and the store executable is available.
func DetectDefaultStore(store string) string {
if store != "" {
// use user-defined
return store
}
platformDefault := defaultCredentialsStore()
if platformDefault == "" {
return ""
}
if _, err := exec.LookPath(remoteCredentialsPrefix + platformDefault); err != nil {
return ""
}
return platformDefault
}

View File

@ -0,0 +1,5 @@
package credentials
func defaultCredentialsStore() string {
return "osxkeychain"
}

View File

@ -0,0 +1,13 @@
package credentials
import (
"os/exec"
)
func defaultCredentialsStore() string {
if _, err := exec.LookPath("pass"); err == nil {
return "pass"
}
return "secretservice"
}

View File

@ -0,0 +1,7 @@
//go:build !windows && !darwin && !linux
package credentials
func defaultCredentialsStore() string {
return ""
}

View File

@ -0,0 +1,5 @@
package credentials
func defaultCredentialsStore() string {
return "wincred"
}

View File

@ -0,0 +1,86 @@
package credentials
import (
"net"
"net/url"
"strings"
"github.com/docker/cli/cli/config/types"
)
type store interface {
Save() error
GetAuthConfigs() map[string]types.AuthConfig
GetFilename() string
}
// fileStore implements a credentials store using
// the docker configuration file to keep the credentials in plain text.
type fileStore struct {
file store
}
// NewFileStore creates a new file credentials store.
func NewFileStore(file store) Store {
return &fileStore{file: file}
}
// Erase removes the given credentials from the file store.
func (c *fileStore) Erase(serverAddress string) error {
delete(c.file.GetAuthConfigs(), serverAddress)
return c.file.Save()
}
// Get retrieves credentials for a specific server from the file store.
func (c *fileStore) Get(serverAddress string) (types.AuthConfig, error) {
authConfig, ok := c.file.GetAuthConfigs()[serverAddress]
if !ok {
// Maybe they have a legacy config file, we will iterate the keys converting
// them to the new format and testing
for r, ac := range c.file.GetAuthConfigs() {
if serverAddress == ConvertToHostname(r) {
return ac, nil
}
}
authConfig = types.AuthConfig{}
}
return authConfig, nil
}
func (c *fileStore) GetAll() (map[string]types.AuthConfig, error) {
return c.file.GetAuthConfigs(), nil
}
// Store saves the given credentials in the file store.
func (c *fileStore) Store(authConfig types.AuthConfig) error {
authConfigs := c.file.GetAuthConfigs()
authConfigs[authConfig.ServerAddress] = authConfig
return c.file.Save()
}
func (c *fileStore) GetFilename() string {
return c.file.GetFilename()
}
func (c *fileStore) IsFileStore() bool {
return true
}
// ConvertToHostname converts a registry url which has http|https prepended
// to just an hostname.
// Copied from github.com/docker/docker/registry.ConvertToHostname to reduce dependencies.
func ConvertToHostname(maybeURL string) string {
stripped := maybeURL
if strings.Contains(stripped, "://") {
u, err := url.Parse(stripped)
if err == nil && u.Hostname() != "" {
if u.Port() == "" {
return u.Hostname()
}
return net.JoinHostPort(u.Hostname(), u.Port())
}
}
hostName, _, _ := strings.Cut(stripped, "/")
return hostName
}

View File

@ -0,0 +1,147 @@
package credentials
import (
"github.com/docker/cli/cli/config/types"
"github.com/docker/docker-credential-helpers/client"
"github.com/docker/docker-credential-helpers/credentials"
)
const (
remoteCredentialsPrefix = "docker-credential-" //nolint:gosec // ignore G101: Potential hardcoded credentials
tokenUsername = "<token>"
)
// nativeStore implements a credentials store
// using native keychain to keep credentials secure.
// It piggybacks into a file store to keep users' emails.
type nativeStore struct {
programFunc client.ProgramFunc
fileStore Store
}
// NewNativeStore creates a new native store that
// uses a remote helper program to manage credentials.
func NewNativeStore(file store, helperSuffix string) Store {
name := remoteCredentialsPrefix + helperSuffix
return &nativeStore{
programFunc: client.NewShellProgramFunc(name),
fileStore: NewFileStore(file),
}
}
// Erase removes the given credentials from the native store.
func (c *nativeStore) Erase(serverAddress string) error {
if err := client.Erase(c.programFunc, serverAddress); err != nil {
return err
}
// Fallback to plain text store to remove email
return c.fileStore.Erase(serverAddress)
}
// Get retrieves credentials for a specific server from the native store.
func (c *nativeStore) Get(serverAddress string) (types.AuthConfig, error) {
// load user email if it exist or an empty auth config.
auth, _ := c.fileStore.Get(serverAddress)
creds, err := c.getCredentialsFromStore(serverAddress)
if err != nil {
return auth, err
}
auth.Username = creds.Username
auth.IdentityToken = creds.IdentityToken
auth.Password = creds.Password
auth.ServerAddress = creds.ServerAddress
return auth, nil
}
// GetAll retrieves all the credentials from the native store.
func (c *nativeStore) GetAll() (map[string]types.AuthConfig, error) {
auths, err := c.listCredentialsInStore()
if err != nil {
return nil, err
}
// Emails are only stored in the file store.
// This call can be safely eliminated when emails are removed.
fileConfigs, _ := c.fileStore.GetAll()
authConfigs := make(map[string]types.AuthConfig)
for registry := range auths {
creds, err := c.getCredentialsFromStore(registry)
if err != nil {
return nil, err
}
ac := fileConfigs[registry] // might contain Email
ac.Username = creds.Username
ac.Password = creds.Password
ac.IdentityToken = creds.IdentityToken
if ac.ServerAddress == "" {
ac.ServerAddress = creds.ServerAddress
}
authConfigs[registry] = ac
}
return authConfigs, nil
}
// Store saves the given credentials in the file store.
func (c *nativeStore) Store(authConfig types.AuthConfig) error {
if err := c.storeCredentialsInStore(authConfig); err != nil {
return err
}
authConfig.Username = ""
authConfig.Password = ""
authConfig.IdentityToken = ""
// Fallback to old credential in plain text to save only the email
return c.fileStore.Store(authConfig)
}
// storeCredentialsInStore executes the command to store the credentials in the native store.
func (c *nativeStore) storeCredentialsInStore(config types.AuthConfig) error {
creds := &credentials.Credentials{
ServerURL: config.ServerAddress,
Username: config.Username,
Secret: config.Password,
}
if config.IdentityToken != "" {
creds.Username = tokenUsername
creds.Secret = config.IdentityToken
}
return client.Store(c.programFunc, creds)
}
// getCredentialsFromStore executes the command to get the credentials from the native store.
func (c *nativeStore) getCredentialsFromStore(serverAddress string) (types.AuthConfig, error) {
var ret types.AuthConfig
creds, err := client.Get(c.programFunc, serverAddress)
if err != nil {
if credentials.IsErrCredentialsNotFound(err) {
// do not return an error if the credentials are not
// in the keychain. Let docker ask for new credentials.
return ret, nil
}
return ret, err
}
if creds.Username == tokenUsername {
ret.IdentityToken = creds.Secret
} else {
ret.Password = creds.Secret
ret.Username = creds.Username
}
ret.ServerAddress = serverAddress
return ret, nil
}
// listCredentialsInStore returns a listing of stored credentials as a map of
// URL -> username.
func (c *nativeStore) listCredentialsInStore() (map[string]string, error) {
return client.List(c.programFunc)
}

View File

@ -0,0 +1,22 @@
package types
// AuthConfig contains authorization information for connecting to a Registry
type AuthConfig struct {
Username string `json:"username,omitempty"`
Password string `json:"password,omitempty"`
Auth string `json:"auth,omitempty"`
// Email is an optional value associated with the username.
// This field is deprecated and will be removed in a later
// version of docker.
Email string `json:"email,omitempty"`
ServerAddress string `json:"serveraddress,omitempty"`
// IdentityToken is used to authenticate the user and get
// an access token for the registry.
IdentityToken string `json:"identitytoken,omitempty"`
// RegistryToken is a bearer token to be sent to a registry
RegistryToken string `json:"registrytoken,omitempty"`
}

View File

@ -0,0 +1,20 @@
Copyright (c) 2016 David Calavera
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -0,0 +1,121 @@
package client
import (
"bytes"
"encoding/json"
"fmt"
"strings"
"github.com/docker/docker-credential-helpers/credentials"
)
// isValidCredsMessage checks if 'msg' contains invalid credentials error message.
// It returns whether the logs are free of invalid credentials errors and the error if it isn't.
// error values can be errCredentialsMissingServerURL or errCredentialsMissingUsername.
func isValidCredsMessage(msg string) error {
if credentials.IsCredentialsMissingServerURLMessage(msg) {
return credentials.NewErrCredentialsMissingServerURL()
}
if credentials.IsCredentialsMissingUsernameMessage(msg) {
return credentials.NewErrCredentialsMissingUsername()
}
return nil
}
// Store uses an external program to save credentials.
func Store(program ProgramFunc, creds *credentials.Credentials) error {
cmd := program("store")
buffer := new(bytes.Buffer)
if err := json.NewEncoder(buffer).Encode(creds); err != nil {
return err
}
cmd.Input(buffer)
out, err := cmd.Output()
if err != nil {
t := strings.TrimSpace(string(out))
if isValidErr := isValidCredsMessage(t); isValidErr != nil {
err = isValidErr
}
return fmt.Errorf("error storing credentials - err: %v, out: `%s`", err, t)
}
return nil
}
// Get executes an external program to get the credentials from a native store.
func Get(program ProgramFunc, serverURL string) (*credentials.Credentials, error) {
cmd := program("get")
cmd.Input(strings.NewReader(serverURL))
out, err := cmd.Output()
if err != nil {
t := strings.TrimSpace(string(out))
if credentials.IsErrCredentialsNotFoundMessage(t) {
return nil, credentials.NewErrCredentialsNotFound()
}
if isValidErr := isValidCredsMessage(t); isValidErr != nil {
err = isValidErr
}
return nil, fmt.Errorf("error getting credentials - err: %v, out: `%s`", err, t)
}
resp := &credentials.Credentials{
ServerURL: serverURL,
}
if err := json.NewDecoder(bytes.NewReader(out)).Decode(resp); err != nil {
return nil, err
}
return resp, nil
}
// Erase executes a program to remove the server credentials from the native store.
func Erase(program ProgramFunc, serverURL string) error {
cmd := program("erase")
cmd.Input(strings.NewReader(serverURL))
out, err := cmd.Output()
if err != nil {
t := strings.TrimSpace(string(out))
if isValidErr := isValidCredsMessage(t); isValidErr != nil {
err = isValidErr
}
return fmt.Errorf("error erasing credentials - err: %v, out: `%s`", err, t)
}
return nil
}
// List executes a program to list server credentials in the native store.
func List(program ProgramFunc) (map[string]string, error) {
cmd := program("list")
cmd.Input(strings.NewReader("unused"))
out, err := cmd.Output()
if err != nil {
t := strings.TrimSpace(string(out))
if isValidErr := isValidCredsMessage(t); isValidErr != nil {
err = isValidErr
}
return nil, fmt.Errorf("error listing credentials - err: %v, out: `%s`", err, t)
}
var resp map[string]string
if err = json.NewDecoder(bytes.NewReader(out)).Decode(&resp); err != nil {
return nil, err
}
return resp, nil
}

View File

@ -0,0 +1,57 @@
package client
import (
"fmt"
"io"
"os"
exec "golang.org/x/sys/execabs"
)
// Program is an interface to execute external programs.
type Program interface {
Output() ([]byte, error)
Input(in io.Reader)
}
// ProgramFunc is a type of function that initializes programs based on arguments.
type ProgramFunc func(args ...string) Program
// NewShellProgramFunc creates programs that are executed in a Shell.
func NewShellProgramFunc(name string) ProgramFunc {
return NewShellProgramFuncWithEnv(name, nil)
}
// NewShellProgramFuncWithEnv creates programs that are executed in a Shell with environment variables
func NewShellProgramFuncWithEnv(name string, env *map[string]string) ProgramFunc {
return func(args ...string) Program {
return &Shell{cmd: createProgramCmdRedirectErr(name, args, env)}
}
}
func createProgramCmdRedirectErr(commandName string, args []string, env *map[string]string) *exec.Cmd {
programCmd := exec.Command(commandName, args...)
programCmd.Env = os.Environ()
if env != nil {
for k, v := range *env {
programCmd.Env = append(programCmd.Env, fmt.Sprintf("%s=%s", k, v))
}
}
programCmd.Stderr = os.Stderr
return programCmd
}
// Shell invokes shell commands to talk with a remote credentials helper.
type Shell struct {
cmd *exec.Cmd
}
// Output returns responses from the remote credentials helper.
func (s *Shell) Output() ([]byte, error) {
return s.cmd.Output()
}
// Input sets the input to send to a remote credentials helper.
func (s *Shell) Input(in io.Reader) {
s.cmd.Stdin = in
}

View File

@ -0,0 +1,186 @@
package credentials
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"os"
"strings"
)
// Credentials holds the information shared between docker and the credentials store.
type Credentials struct {
ServerURL string
Username string
Secret string
}
// isValid checks the integrity of Credentials object such that no credentials lack
// a server URL or a username.
// It returns whether the credentials are valid and the error if it isn't.
// error values can be errCredentialsMissingServerURL or errCredentialsMissingUsername
func (c *Credentials) isValid() (bool, error) {
if len(c.ServerURL) == 0 {
return false, NewErrCredentialsMissingServerURL()
}
if len(c.Username) == 0 {
return false, NewErrCredentialsMissingUsername()
}
return true, nil
}
// CredsLabel holds the way Docker credentials should be labeled as such in credentials stores that allow labelling.
// That label allows to filter out non-Docker credentials too at lookup/search in macOS keychain,
// Windows credentials manager and Linux libsecret. Default value is "Docker Credentials"
var CredsLabel = "Docker Credentials"
// SetCredsLabel is a simple setter for CredsLabel
func SetCredsLabel(label string) {
CredsLabel = label
}
// Serve initializes the credentials helper and parses the action argument.
// This function is designed to be called from a command line interface.
// It uses os.Args[1] as the key for the action.
// It uses os.Stdin as input and os.Stdout as output.
// This function terminates the program with os.Exit(1) if there is an error.
func Serve(helper Helper) {
var err error
if len(os.Args) != 2 {
err = fmt.Errorf("Usage: %s <store|get|erase|list|version>", os.Args[0])
}
if err == nil {
err = HandleCommand(helper, os.Args[1], os.Stdin, os.Stdout)
}
if err != nil {
fmt.Fprintf(os.Stdout, "%v\n", err)
os.Exit(1)
}
}
// HandleCommand uses a helper and a key to run a credential action.
func HandleCommand(helper Helper, key string, in io.Reader, out io.Writer) error {
switch key {
case "store":
return Store(helper, in)
case "get":
return Get(helper, in, out)
case "erase":
return Erase(helper, in)
case "list":
return List(helper, out)
case "version":
return PrintVersion(out)
}
return fmt.Errorf("Unknown credential action `%s`", key)
}
// Store uses a helper and an input reader to save credentials.
// The reader must contain the JSON serialization of a Credentials struct.
func Store(helper Helper, reader io.Reader) error {
scanner := bufio.NewScanner(reader)
buffer := new(bytes.Buffer)
for scanner.Scan() {
buffer.Write(scanner.Bytes())
}
if err := scanner.Err(); err != nil && err != io.EOF {
return err
}
var creds Credentials
if err := json.NewDecoder(buffer).Decode(&creds); err != nil {
return err
}
if ok, err := creds.isValid(); !ok {
return err
}
return helper.Add(&creds)
}
// Get retrieves the credentials for a given server url.
// The reader must contain the server URL to search.
// The writer is used to write the JSON serialization of the credentials.
func Get(helper Helper, reader io.Reader, writer io.Writer) error {
scanner := bufio.NewScanner(reader)
buffer := new(bytes.Buffer)
for scanner.Scan() {
buffer.Write(scanner.Bytes())
}
if err := scanner.Err(); err != nil && err != io.EOF {
return err
}
serverURL := strings.TrimSpace(buffer.String())
if len(serverURL) == 0 {
return NewErrCredentialsMissingServerURL()
}
username, secret, err := helper.Get(serverURL)
if err != nil {
return err
}
resp := Credentials{
ServerURL: serverURL,
Username: username,
Secret: secret,
}
buffer.Reset()
if err := json.NewEncoder(buffer).Encode(resp); err != nil {
return err
}
fmt.Fprint(writer, buffer.String())
return nil
}
// Erase removes credentials from the store.
// The reader must contain the server URL to remove.
func Erase(helper Helper, reader io.Reader) error {
scanner := bufio.NewScanner(reader)
buffer := new(bytes.Buffer)
for scanner.Scan() {
buffer.Write(scanner.Bytes())
}
if err := scanner.Err(); err != nil && err != io.EOF {
return err
}
serverURL := strings.TrimSpace(buffer.String())
if len(serverURL) == 0 {
return NewErrCredentialsMissingServerURL()
}
return helper.Delete(serverURL)
}
// List returns all the serverURLs of keys in
// the OS store as a list of strings
func List(helper Helper, writer io.Writer) error {
accts, err := helper.List()
if err != nil {
return err
}
return json.NewEncoder(writer).Encode(accts)
}
// PrintVersion outputs the current version.
func PrintVersion(writer io.Writer) error {
fmt.Fprintf(writer, "%s (%s) %s\n", Name, Package, Version)
return nil
}

View File

@ -0,0 +1,102 @@
package credentials
const (
// ErrCredentialsNotFound standardizes the not found error, so every helper returns
// the same message and docker can handle it properly.
errCredentialsNotFoundMessage = "credentials not found in native keychain"
// ErrCredentialsMissingServerURL and ErrCredentialsMissingUsername standardize
// invalid credentials or credentials management operations
errCredentialsMissingServerURLMessage = "no credentials server URL"
errCredentialsMissingUsernameMessage = "no credentials username"
)
// errCredentialsNotFound represents an error
// raised when credentials are not in the store.
type errCredentialsNotFound struct{}
// Error returns the standard error message
// for when the credentials are not in the store.
func (errCredentialsNotFound) Error() string {
return errCredentialsNotFoundMessage
}
// NewErrCredentialsNotFound creates a new error
// for when the credentials are not in the store.
func NewErrCredentialsNotFound() error {
return errCredentialsNotFound{}
}
// IsErrCredentialsNotFound returns true if the error
// was caused by not having a set of credentials in a store.
func IsErrCredentialsNotFound(err error) bool {
_, ok := err.(errCredentialsNotFound)
return ok
}
// IsErrCredentialsNotFoundMessage returns true if the error
// was caused by not having a set of credentials in a store.
//
// This function helps to check messages returned by an
// external program via its standard output.
func IsErrCredentialsNotFoundMessage(err string) bool {
return err == errCredentialsNotFoundMessage
}
// errCredentialsMissingServerURL represents an error raised
// when the credentials object has no server URL or when no
// server URL is provided to a credentials operation requiring
// one.
type errCredentialsMissingServerURL struct{}
func (errCredentialsMissingServerURL) Error() string {
return errCredentialsMissingServerURLMessage
}
// errCredentialsMissingUsername represents an error raised
// when the credentials object has no username or when no
// username is provided to a credentials operation requiring
// one.
type errCredentialsMissingUsername struct{}
func (errCredentialsMissingUsername) Error() string {
return errCredentialsMissingUsernameMessage
}
// NewErrCredentialsMissingServerURL creates a new error for
// errCredentialsMissingServerURL.
func NewErrCredentialsMissingServerURL() error {
return errCredentialsMissingServerURL{}
}
// NewErrCredentialsMissingUsername creates a new error for
// errCredentialsMissingUsername.
func NewErrCredentialsMissingUsername() error {
return errCredentialsMissingUsername{}
}
// IsCredentialsMissingServerURL returns true if the error
// was an errCredentialsMissingServerURL.
func IsCredentialsMissingServerURL(err error) bool {
_, ok := err.(errCredentialsMissingServerURL)
return ok
}
// IsCredentialsMissingServerURLMessage checks for an
// errCredentialsMissingServerURL in the error message.
func IsCredentialsMissingServerURLMessage(err string) bool {
return err == errCredentialsMissingServerURLMessage
}
// IsCredentialsMissingUsername returns true if the error
// was an errCredentialsMissingUsername.
func IsCredentialsMissingUsername(err error) bool {
_, ok := err.(errCredentialsMissingUsername)
return ok
}
// IsCredentialsMissingUsernameMessage checks for an
// errCredentialsMissingUsername in the error message.
func IsCredentialsMissingUsernameMessage(err string) bool {
return err == errCredentialsMissingUsernameMessage
}

View File

@ -0,0 +1,14 @@
package credentials
// Helper is the interface a credentials store helper must implement.
type Helper interface {
// Add appends credentials to the store.
Add(*Credentials) error
// Delete removes credentials from the store.
Delete(serverURL string) error
// Get retrieves credentials from the store.
// It returns username and secret as strings.
Get(serverURL string) (string, string, error)
// List returns the stored serverURLs and their associated usernames.
List() (map[string]string, error)
}

View File

@ -0,0 +1,16 @@
package credentials
var (
// Name is filled at linking time
Name = ""
// Package is filled at linking time
Package = "github.com/docker/docker-credential-helpers"
// Version holds the complete version number. Filled in at linking time.
Version = "v0.0.0+unknown"
// Revision is filled with the VCS (e.g. git) revision being used to build
// the program at linking time.
Revision = ""
)

26
vendor/github.com/go-openapi/jsonpointer/.editorconfig generated vendored Normal file
View File

@ -0,0 +1,26 @@
# top-most EditorConfig file
root = true
# Unix-style newlines with a newline ending every file
[*]
end_of_line = lf
insert_final_newline = true
indent_style = space
indent_size = 2
trim_trailing_whitespace = true
# Set default charset
[*.{js,py,go,scala,rb,java,html,css,less,sass,md}]
charset = utf-8
# Tab indentation (no size specified)
[*.go]
indent_style = tab
[*.md]
trim_trailing_whitespace = false
# Matches the exact files either package.json or .travis.yml
[{package.json,.travis.yml}]
indent_style = space
indent_size = 2

1
vendor/github.com/go-openapi/jsonpointer/.gitignore generated vendored Normal file
View File

@ -0,0 +1 @@
secrets.yml

View File

@ -0,0 +1,74 @@
# Contributor Covenant Code of Conduct
## Our Pledge
In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, gender identity and expression, level of experience,
nationality, personal appearance, race, religion, or sexual identity and
orientation.
## Our Standards
Examples of behavior that contributes to creating a positive environment
include:
* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members
Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or
advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Our Responsibilities
Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.
Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.
## Scope
This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at ivan+abuse@flanders.co.nz. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.
Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at [http://contributor-covenant.org/version/1/4][version]
[homepage]: http://contributor-covenant.org
[version]: http://contributor-covenant.org/version/1/4/

202
vendor/github.com/go-openapi/jsonpointer/LICENSE generated vendored Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

15
vendor/github.com/go-openapi/jsonpointer/README.md generated vendored Normal file
View File

@ -0,0 +1,15 @@
# gojsonpointer [![Build Status](https://travis-ci.org/go-openapi/jsonpointer.svg?branch=master)](https://travis-ci.org/go-openapi/jsonpointer) [![codecov](https://codecov.io/gh/go-openapi/jsonpointer/branch/master/graph/badge.svg)](https://codecov.io/gh/go-openapi/jsonpointer) [![Slack Status](https://slackin.goswagger.io/badge.svg)](https://slackin.goswagger.io)
[![license](http://img.shields.io/badge/license-Apache%20v2-orange.svg)](https://raw.githubusercontent.com/go-openapi/jsonpointer/master/LICENSE) [![GoDoc](https://godoc.org/github.com/go-openapi/jsonpointer?status.svg)](http://godoc.org/github.com/go-openapi/jsonpointer)
An implementation of JSON Pointer - Go language
## Status
Completed YES
Tested YES
## References
http://tools.ietf.org/html/draft-ietf-appsawg-json-pointer-07
### Note
The 4.Evaluation part of the previous reference, starting with 'If the currently referenced value is a JSON array, the reference token MUST contain either...' is not implemented.

390
vendor/github.com/go-openapi/jsonpointer/pointer.go generated vendored Normal file
View File

@ -0,0 +1,390 @@
// Copyright 2013 sigu-399 ( https://github.com/sigu-399 )
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// author sigu-399
// author-github https://github.com/sigu-399
// author-mail sigu.399@gmail.com
//
// repository-name jsonpointer
// repository-desc An implementation of JSON Pointer - Go language
//
// description Main and unique file.
//
// created 25-02-2013
package jsonpointer
import (
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"github.com/go-openapi/swag"
)
const (
emptyPointer = ``
pointerSeparator = `/`
invalidStart = `JSON pointer must be empty or start with a "` + pointerSeparator
)
var jsonPointableType = reflect.TypeOf(new(JSONPointable)).Elem()
var jsonSetableType = reflect.TypeOf(new(JSONSetable)).Elem()
// JSONPointable is an interface for structs to implement when they need to customize the
// json pointer process
type JSONPointable interface {
JSONLookup(string) (interface{}, error)
}
// JSONSetable is an interface for structs to implement when they need to customize the
// json pointer process
type JSONSetable interface {
JSONSet(string, interface{}) error
}
// New creates a new json pointer for the given string
func New(jsonPointerString string) (Pointer, error) {
var p Pointer
err := p.parse(jsonPointerString)
return p, err
}
// Pointer the json pointer reprsentation
type Pointer struct {
referenceTokens []string
}
// "Constructor", parses the given string JSON pointer
func (p *Pointer) parse(jsonPointerString string) error {
var err error
if jsonPointerString != emptyPointer {
if !strings.HasPrefix(jsonPointerString, pointerSeparator) {
err = errors.New(invalidStart)
} else {
referenceTokens := strings.Split(jsonPointerString, pointerSeparator)
for _, referenceToken := range referenceTokens[1:] {
p.referenceTokens = append(p.referenceTokens, referenceToken)
}
}
}
return err
}
// Get uses the pointer to retrieve a value from a JSON document
func (p *Pointer) Get(document interface{}) (interface{}, reflect.Kind, error) {
return p.get(document, swag.DefaultJSONNameProvider)
}
// Set uses the pointer to set a value from a JSON document
func (p *Pointer) Set(document interface{}, value interface{}) (interface{}, error) {
return document, p.set(document, value, swag.DefaultJSONNameProvider)
}
// GetForToken gets a value for a json pointer token 1 level deep
func GetForToken(document interface{}, decodedToken string) (interface{}, reflect.Kind, error) {
return getSingleImpl(document, decodedToken, swag.DefaultJSONNameProvider)
}
// SetForToken gets a value for a json pointer token 1 level deep
func SetForToken(document interface{}, decodedToken string, value interface{}) (interface{}, error) {
return document, setSingleImpl(document, value, decodedToken, swag.DefaultJSONNameProvider)
}
func getSingleImpl(node interface{}, decodedToken string, nameProvider *swag.NameProvider) (interface{}, reflect.Kind, error) {
rValue := reflect.Indirect(reflect.ValueOf(node))
kind := rValue.Kind()
if rValue.Type().Implements(jsonPointableType) {
r, err := node.(JSONPointable).JSONLookup(decodedToken)
if err != nil {
return nil, kind, err
}
return r, kind, nil
}
switch kind {
case reflect.Struct:
nm, ok := nameProvider.GetGoNameForType(rValue.Type(), decodedToken)
if !ok {
return nil, kind, fmt.Errorf("object has no field %q", decodedToken)
}
fld := rValue.FieldByName(nm)
return fld.Interface(), kind, nil
case reflect.Map:
kv := reflect.ValueOf(decodedToken)
mv := rValue.MapIndex(kv)
if mv.IsValid() {
return mv.Interface(), kind, nil
}
return nil, kind, fmt.Errorf("object has no key %q", decodedToken)
case reflect.Slice:
tokenIndex, err := strconv.Atoi(decodedToken)
if err != nil {
return nil, kind, err
}
sLength := rValue.Len()
if tokenIndex < 0 || tokenIndex >= sLength {
return nil, kind, fmt.Errorf("index out of bounds array[0,%d] index '%d'", sLength-1, tokenIndex)
}
elem := rValue.Index(tokenIndex)
return elem.Interface(), kind, nil
default:
return nil, kind, fmt.Errorf("invalid token reference %q", decodedToken)
}
}
func setSingleImpl(node, data interface{}, decodedToken string, nameProvider *swag.NameProvider) error {
rValue := reflect.Indirect(reflect.ValueOf(node))
if ns, ok := node.(JSONSetable); ok { // pointer impl
return ns.JSONSet(decodedToken, data)
}
if rValue.Type().Implements(jsonSetableType) {
return node.(JSONSetable).JSONSet(decodedToken, data)
}
switch rValue.Kind() {
case reflect.Struct:
nm, ok := nameProvider.GetGoNameForType(rValue.Type(), decodedToken)
if !ok {
return fmt.Errorf("object has no field %q", decodedToken)
}
fld := rValue.FieldByName(nm)
if fld.IsValid() {
fld.Set(reflect.ValueOf(data))
}
return nil
case reflect.Map:
kv := reflect.ValueOf(decodedToken)
rValue.SetMapIndex(kv, reflect.ValueOf(data))
return nil
case reflect.Slice:
tokenIndex, err := strconv.Atoi(decodedToken)
if err != nil {
return err
}
sLength := rValue.Len()
if tokenIndex < 0 || tokenIndex >= sLength {
return fmt.Errorf("index out of bounds array[0,%d] index '%d'", sLength, tokenIndex)
}
elem := rValue.Index(tokenIndex)
if !elem.CanSet() {
return fmt.Errorf("can't set slice index %s to %v", decodedToken, data)
}
elem.Set(reflect.ValueOf(data))
return nil
default:
return fmt.Errorf("invalid token reference %q", decodedToken)
}
}
func (p *Pointer) get(node interface{}, nameProvider *swag.NameProvider) (interface{}, reflect.Kind, error) {
if nameProvider == nil {
nameProvider = swag.DefaultJSONNameProvider
}
kind := reflect.Invalid
// Full document when empty
if len(p.referenceTokens) == 0 {
return node, kind, nil
}
for _, token := range p.referenceTokens {
decodedToken := Unescape(token)
r, knd, err := getSingleImpl(node, decodedToken, nameProvider)
if err != nil {
return nil, knd, err
}
node, kind = r, knd
}
rValue := reflect.ValueOf(node)
kind = rValue.Kind()
return node, kind, nil
}
func (p *Pointer) set(node, data interface{}, nameProvider *swag.NameProvider) error {
knd := reflect.ValueOf(node).Kind()
if knd != reflect.Ptr && knd != reflect.Struct && knd != reflect.Map && knd != reflect.Slice && knd != reflect.Array {
return fmt.Errorf("only structs, pointers, maps and slices are supported for setting values")
}
if nameProvider == nil {
nameProvider = swag.DefaultJSONNameProvider
}
// Full document when empty
if len(p.referenceTokens) == 0 {
return nil
}
lastI := len(p.referenceTokens) - 1
for i, token := range p.referenceTokens {
isLastToken := i == lastI
decodedToken := Unescape(token)
if isLastToken {
return setSingleImpl(node, data, decodedToken, nameProvider)
}
rValue := reflect.Indirect(reflect.ValueOf(node))
kind := rValue.Kind()
if rValue.Type().Implements(jsonPointableType) {
r, err := node.(JSONPointable).JSONLookup(decodedToken)
if err != nil {
return err
}
fld := reflect.ValueOf(r)
if fld.CanAddr() && fld.Kind() != reflect.Interface && fld.Kind() != reflect.Map && fld.Kind() != reflect.Slice && fld.Kind() != reflect.Ptr {
node = fld.Addr().Interface()
continue
}
node = r
continue
}
switch kind {
case reflect.Struct:
nm, ok := nameProvider.GetGoNameForType(rValue.Type(), decodedToken)
if !ok {
return fmt.Errorf("object has no field %q", decodedToken)
}
fld := rValue.FieldByName(nm)
if fld.CanAddr() && fld.Kind() != reflect.Interface && fld.Kind() != reflect.Map && fld.Kind() != reflect.Slice && fld.Kind() != reflect.Ptr {
node = fld.Addr().Interface()
continue
}
node = fld.Interface()
case reflect.Map:
kv := reflect.ValueOf(decodedToken)
mv := rValue.MapIndex(kv)
if !mv.IsValid() {
return fmt.Errorf("object has no key %q", decodedToken)
}
if mv.CanAddr() && mv.Kind() != reflect.Interface && mv.Kind() != reflect.Map && mv.Kind() != reflect.Slice && mv.Kind() != reflect.Ptr {
node = mv.Addr().Interface()
continue
}
node = mv.Interface()
case reflect.Slice:
tokenIndex, err := strconv.Atoi(decodedToken)
if err != nil {
return err
}
sLength := rValue.Len()
if tokenIndex < 0 || tokenIndex >= sLength {
return fmt.Errorf("index out of bounds array[0,%d] index '%d'", sLength, tokenIndex)
}
elem := rValue.Index(tokenIndex)
if elem.CanAddr() && elem.Kind() != reflect.Interface && elem.Kind() != reflect.Map && elem.Kind() != reflect.Slice && elem.Kind() != reflect.Ptr {
node = elem.Addr().Interface()
continue
}
node = elem.Interface()
default:
return fmt.Errorf("invalid token reference %q", decodedToken)
}
}
return nil
}
// DecodedTokens returns the decoded tokens
func (p *Pointer) DecodedTokens() []string {
result := make([]string, 0, len(p.referenceTokens))
for _, t := range p.referenceTokens {
result = append(result, Unescape(t))
}
return result
}
// IsEmpty returns true if this is an empty json pointer
// this indicates that it points to the root document
func (p *Pointer) IsEmpty() bool {
return len(p.referenceTokens) == 0
}
// Pointer to string representation function
func (p *Pointer) String() string {
if len(p.referenceTokens) == 0 {
return emptyPointer
}
pointerString := pointerSeparator + strings.Join(p.referenceTokens, pointerSeparator)
return pointerString
}
// Specific JSON pointer encoding here
// ~0 => ~
// ~1 => /
// ... and vice versa
const (
encRefTok0 = `~0`
encRefTok1 = `~1`
decRefTok0 = `~`
decRefTok1 = `/`
)
// Unescape unescapes a json pointer reference token string to the original representation
func Unescape(token string) string {
step1 := strings.Replace(token, encRefTok1, decRefTok1, -1)
step2 := strings.Replace(step1, encRefTok0, decRefTok0, -1)
return step2
}
// Escape escapes a pointer reference token string
func Escape(token string) string {
step1 := strings.Replace(token, decRefTok0, encRefTok0, -1)
step2 := strings.Replace(step1, decRefTok1, encRefTok1, -1)
return step2
}

Some files were not shown because too many files have changed in this diff Show More