From 9f90d8a9b4c66e0514191f2ad7ec873d38b0790c Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Thu, 1 Jun 2017 14:11:44 -0700 Subject: [PATCH 1/3] Add schema1 conversion on pull closes #851 Signed-off-by: Derek McGowan --- client.go | 39 ++- cmd/dist/fetch.go | 11 +- images/mediatypes.go | 2 + remotes/docker/fetcher.go | 1 + remotes/docker/schema1/converter.go | 391 ++++++++++++++++++++++++++++ remotes/handlers.go | 4 +- 6 files changed, 440 insertions(+), 8 deletions(-) create mode 100644 remotes/docker/schema1/converter.go diff --git a/client.go b/client.go index f017337a5..06fd3d571 100644 --- a/client.go +++ b/client.go @@ -21,6 +21,7 @@ import ( "github.com/containerd/containerd/images" "github.com/containerd/containerd/remotes" "github.com/containerd/containerd/remotes/docker" + "github.com/containerd/containerd/remotes/docker/schema1" contentservice "github.com/containerd/containerd/services/content" "github.com/containerd/containerd/services/diff" diffservice "github.com/containerd/containerd/services/diff" @@ -234,6 +235,11 @@ type RemoteContext struct { // These handlers always get called before any operation specific // handlers. BaseHandlers []images.Handler + + // ConvertSchema1 is whether to convert Docker registry schema 1 + // manifests. If this option is false then any image which resolves + // to schema 1 will return an error since schema 1 is not supported. + ConvertSchema1 bool } func defaultRemoteContext() *RemoteContext { @@ -252,6 +258,14 @@ func WithPullUnpack(client *Client, c *RemoteContext) error { return nil } +// WithSchema1Conversion is used to convert Docker registry schema 1 +// manifests to oci manifests on pull. Without this option schema 1 +// manifests will return a not supported error. +func WithSchema1Conversion(client *Client, c *RemoteContext) error { + c.ConvertSchema1 = true + return nil +} + // WithResolver specifies the resolver to use. func WithResolver(resolver remotes.Resolver) RemoteOpts { return func(client *Client, c *RemoteContext) error { @@ -286,13 +300,30 @@ func (c *Client) Pull(ctx context.Context, ref string, opts ...RemoteOpts) (Imag return nil, err } - handlers := append(pullCtx.BaseHandlers, - remotes.FetchHandler(store, fetcher), - images.ChildrenHandler(store), + var ( + schema1Converter *schema1.Converter + handler images.Handler ) - if err := images.Dispatch(ctx, images.Handlers(handlers...), desc); err != nil { + if desc.MediaType == images.MediaTypeDockerSchema1Manifest && pullCtx.ConvertSchema1 { + schema1Converter = schema1.NewConverter(store, fetcher) + handler = images.Handlers(append(pullCtx.BaseHandlers, schema1Converter)...) + } else { + handler = images.Handlers(append(pullCtx.BaseHandlers, + remotes.FetchHandler(store, fetcher), + images.ChildrenHandler(store))..., + ) + } + + if err := images.Dispatch(ctx, handler, desc); err != nil { return nil, err } + if schema1Converter != nil { + desc, err = schema1Converter.Convert(ctx) + if err != nil { + return nil, err + } + } + is := c.ImageService() if err := is.Put(ctx, name, desc); err != nil { return nil, err diff --git a/cmd/dist/fetch.go b/cmd/dist/fetch.go index 2d2abaa05..e541e575c 100644 --- a/cmd/dist/fetch.go +++ b/cmd/dist/fetch.go @@ -74,13 +74,15 @@ func fetch(ctx context.Context, ref string, clicontext *cli.Context) (containerd }() h := images.HandlerFunc(func(ctx context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) { - ongoing.add(desc) + if desc.MediaType != images.MediaTypeDockerSchema1Manifest { + ongoing.add(desc) + } return nil, nil }) log.G(pctx).WithField("image", ref).Debug("fetching") - img, err := client.Pull(pctx, ref, containerd.WithResolver(resolver), containerd.WithImageHandler(h)) + img, err := client.Pull(pctx, ref, containerd.WithResolver(resolver), containerd.WithImageHandler(h), containerd.WithSchema1Conversion) stopProgress() if err != nil { return nil, err @@ -268,7 +270,10 @@ func display(w io.Writer, statuses []statusInfo, start time.Time) { total += status.Offset switch status.Status { case "downloading", "uploading": - bar := progress.Bar(float64(status.Offset) / float64(status.Total)) + var bar progress.Bar + if status.Total > 0.0 { + bar = progress.Bar(float64(status.Offset) / float64(status.Total)) + } fmt.Fprintf(w, "%s:\t%s\t%40r\t%8.8s/%s\t\n", status.Ref, status.Status, diff --git a/images/mediatypes.go b/images/mediatypes.go index 7b75b8aff..676af4a3e 100644 --- a/images/mediatypes.go +++ b/images/mediatypes.go @@ -16,4 +16,6 @@ const ( MediaTypeContainerd1Resource = "application/vnd.containerd.container.resource.tar" MediaTypeContainerd1RW = "application/vnd.containerd.container.rw.tar" MediaTypeContainerd1CheckpointConfig = "application/vnd.containerd.container.checkpoint.config.v1+json" + // Legacy Docker schema1 manifest + MediaTypeDockerSchema1Manifest = "application/vnd.docker.distribution.manifest.v1+prettyjws" ) diff --git a/remotes/docker/fetcher.go b/remotes/docker/fetcher.go index ad118fd1d..24c2a6a54 100644 --- a/remotes/docker/fetcher.go +++ b/remotes/docker/fetcher.go @@ -67,6 +67,7 @@ func getV2URLPaths(desc ocispec.Descriptor) ([]string, error) { switch desc.MediaType { case images.MediaTypeDockerSchema2Manifest, images.MediaTypeDockerSchema2ManifestList, + images.MediaTypeDockerSchema1Manifest, ocispec.MediaTypeImageManifest, ocispec.MediaTypeImageIndex: urls = append(urls, path.Join("manifests", desc.Digest.String())) } diff --git a/remotes/docker/schema1/converter.go b/remotes/docker/schema1/converter.go new file mode 100644 index 000000000..097520c32 --- /dev/null +++ b/remotes/docker/schema1/converter.go @@ -0,0 +1,391 @@ +package schema1 + +import ( + "bytes" + "compress/gzip" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "io/ioutil" + "strings" + "sync" + "time" + + "golang.org/x/sync/errgroup" + + "github.com/containerd/containerd/content" + "github.com/containerd/containerd/images" + "github.com/containerd/containerd/log" + "github.com/containerd/containerd/remotes" + digest "github.com/opencontainers/go-digest" + specs "github.com/opencontainers/image-spec/specs-go" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" +) + +var ( + mediaTypeManifest = "application/vnd.docker.distribution.manifest.v1+json" +) + +// Converter converts schema1 manifests to schema2 on fetch +type Converter struct { + contentStore content.Store + fetcher remotes.Fetcher + + pulledManifest *manifest + + mu sync.Mutex + blobMap map[digest.Digest]digest.Digest +} + +func NewConverter(contentStore content.Store, fetcher remotes.Fetcher) *Converter { + return &Converter{ + contentStore: contentStore, + fetcher: fetcher, + blobMap: map[digest.Digest]digest.Digest{}, + } +} + +func (c *Converter) Handle(ctx context.Context, desc ocispec.Descriptor) ([]ocispec.Descriptor, error) { + switch desc.MediaType { + case images.MediaTypeDockerSchema1Manifest: + if err := c.fetchManifest(ctx, desc); err != nil { + return nil, err + } + + m := c.pulledManifest + if len(m.FSLayers) != len(m.History) { + return nil, errors.New("invalid schema 1 manifest, history and layer mismatch") + } + descs := make([]ocispec.Descriptor, 0, len(c.pulledManifest.FSLayers)) + + for i := range m.FSLayers { + var h v1History + if err := json.Unmarshal([]byte(m.History[i].V1Compatibility), &h); err != nil { + return nil, err + } + if !h.ThrowAway { + descs = append(descs, ocispec.Descriptor{ + MediaType: images.MediaTypeDockerSchema2LayerGzip, + Digest: c.pulledManifest.FSLayers[i].BlobSum, + }) + } + } + // Reverse + for i := 0; i <= len(descs)/2; i++ { + j := len(descs) - i - 1 + if i != j { + descs[i], descs[j] = descs[j], descs[i] + } + } + return descs, nil + case images.MediaTypeDockerSchema2LayerGzip: + if c.pulledManifest == nil { + return nil, errors.New("manifest required for schema 1 blob pull") + } + return nil, c.fetchBlob(ctx, desc) + default: + return nil, fmt.Errorf("%v not support for schema 1 manifests", desc.MediaType) + } +} + +func (c *Converter) Convert(ctx context.Context) (ocispec.Descriptor, error) { + if c.pulledManifest == nil { + return ocispec.Descriptor{}, errors.New("missing schema 1 manifest for conversion") + } + + img, err := convertSchema1Manifest(c.pulledManifest, c.blobMap) + if err != nil { + return ocispec.Descriptor{}, errors.Wrap(err, "schema 1 conversion failed") + } + b, err := json.Marshal(img) + if err != nil { + return ocispec.Descriptor{}, errors.Wrap(err, "failed to marshal image") + } + + config := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: digest.Canonical.FromBytes(b), + Size: int64(len(b)), + } + + ref := remotes.MakeRefKey(ctx, config) + if err := content.WriteBlob(ctx, c.contentStore, ref, bytes.NewReader(b), config.Size, config.Digest); err != nil { + return ocispec.Descriptor{}, errors.Wrap(err, "failed to write config") + } + + layers := make([]ocispec.Descriptor, 0) + for _, layer := range c.pulledManifest.FSLayers { + // TODO: Use rootfs mapping! + info, err := c.contentStore.Info(ctx, layer.BlobSum) + if err != nil { + if content.IsNotFound(err) { + continue + } + return ocispec.Descriptor{}, errors.Wrap(err, "failed to get blob info") + } + + layers = append([]ocispec.Descriptor{{ + MediaType: ocispec.MediaTypeImageLayerGzip, + Digest: layer.BlobSum, + Size: info.Size, + }}, layers...) + } + + manifest := ocispec.Manifest{ + Versioned: specs.Versioned{ + SchemaVersion: 2, + }, + Config: config, + Layers: layers, + } + + b, err = json.Marshal(manifest) + if err != nil { + return ocispec.Descriptor{}, errors.Wrap(err, "failed to marshal image") + } + + desc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageManifest, + Digest: digest.Canonical.FromBytes(b), + Size: int64(len(b)), + } + + ref = remotes.MakeRefKey(ctx, desc) + if err := content.WriteBlob(ctx, c.contentStore, ref, bytes.NewReader(b), desc.Size, desc.Digest); err != nil { + return ocispec.Descriptor{}, errors.Wrap(err, "failed to write config") + } + + return desc, nil +} + +func (c *Converter) fetchManifest(ctx context.Context, desc ocispec.Descriptor) error { + log.G(ctx).Debug("fetch schema 1") + + rc, err := c.fetcher.Fetch(ctx, desc) + if err != nil { + return err + } + + b, err := ioutil.ReadAll(rc) + rc.Close() + if err != nil { + return err + } + + b, err = stripSignature(b) + if err != nil { + return err + } + + var m manifest + if err := json.Unmarshal(b, &m); err != nil { + return err + } + c.pulledManifest = &m + + return nil +} + +func (c *Converter) fetchBlob(ctx context.Context, desc ocispec.Descriptor) error { + log.G(ctx).Debug("fetch blob") + + ref := remotes.MakeRefKey(ctx, desc) + + var diffID digest.Digest + + cw, err := c.contentStore.Writer(ctx, ref, desc.Size, desc.Digest) + if err != nil { + if !content.IsExists(err) { + return err + } + + // TODO: Check if blob -> diff id mapping already exists + + r, err := c.contentStore.Reader(ctx, desc.Digest) + if err != nil { + return err + } + defer r.Close() + + gr, err := gzip.NewReader(r) + defer gr.Close() + + diffID, err = digest.Canonical.FromReader(gr) + if err != nil { + return err + } + } else { + defer cw.Close() + + rc, err := c.fetcher.Fetch(ctx, desc) + if err != nil { + return err + } + defer rc.Close() + + eg, _ := errgroup.WithContext(ctx) + pr, pw := io.Pipe() + + eg.Go(func() error { + gr, err := gzip.NewReader(pr) + defer gr.Close() + + diffID, err = digest.Canonical.FromReader(gr) + pr.CloseWithError(err) + return err + }) + + eg.Go(func() error { + defer pw.Close() + return content.Copy(cw, io.TeeReader(rc, pw), desc.Size, desc.Digest) + }) + + if err := eg.Wait(); err != nil { + return err + } + } + + c.mu.Lock() + c.blobMap[desc.Digest] = diffID + c.mu.Unlock() + + return nil +} + +type fsLayer struct { + BlobSum digest.Digest `json:"blobSum"` +} + +type history struct { + V1Compatibility string `json:"v1Compatibility"` +} + +type manifest struct { + FSLayers []fsLayer `json:"fsLayers"` + History []history `json:"history"` +} + +type v1History struct { + Author string `json:"author,omitempty"` + Created time.Time `json:"created"` + Comment string `json:"comment,omitempty"` + ThrowAway bool `json:"throwaway,omitempty"` + ContainerConfig struct { + Cmd []string `json:"Cmd,omitempty"` + } `json:"container_config,omitempty"` +} + +func convertSchema1Manifest(m *manifest, blobs map[digest.Digest]digest.Digest) (*ocispec.Image, error) { + if len(m.History) == 0 { + return nil, errors.New("no history") + } + + var img ocispec.Image + if err := json.Unmarshal([]byte(m.History[0].V1Compatibility), &img); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal image from schema 1 history") + } + + diffIDs := make([]digest.Digest, 0, len(m.History)) + img.History = make([]ocispec.History, len(m.History)) + for i := range m.History { + var h v1History + if err := json.Unmarshal([]byte(m.History[i].V1Compatibility), &h); err != nil { + return nil, errors.Wrap(err, "failed to unmarshal history") + } + + img.History[len(img.History)-i-1] = ocispec.History{ + Author: h.Author, + Comment: h.Comment, + Created: &h.Created, + CreatedBy: strings.Join(h.ContainerConfig.Cmd, " "), + EmptyLayer: h.ThrowAway, + } + + if !h.ThrowAway { + diffID, ok := blobs[m.FSLayers[i].BlobSum] + if !ok { + return nil, errors.Errorf("no diff id for blob %s", m.FSLayers[i].BlobSum.String()) + } + + diffIDs = append(diffIDs, diffID) + } + + } + + for i := 0; i <= len(diffIDs)/2; i++ { + j := len(diffIDs) - i - 1 + if i != j { + diffIDs[i], diffIDs[j] = diffIDs[j], diffIDs[i] + } + } + + img.RootFS = ocispec.RootFS{ + Type: "layers", + DiffIDs: diffIDs, + } + + return &img, nil +} + +type signature struct { + Signatures []jsParsedSignature `json:"signatures"` +} + +type jsParsedSignature struct { + Protected string `json:"protected"` +} + +type protectedBlock struct { + Length int `json:"formatLength"` + Tail string `json:"formatTail"` +} + +// joseBase64UrlDecode decodes the given string using the standard base64 url +// decoder but first adds the appropriate number of trailing '=' characters in +// accordance with the jose specification. +// http://tools.ietf.org/html/draft-ietf-jose-json-web-signature-31#section-2 +func joseBase64UrlDecode(s string) ([]byte, error) { + switch len(s) % 4 { + case 0: + case 2: + s += "==" + case 3: + s += "=" + default: + return nil, errors.New("illegal base64url string") + } + return base64.URLEncoding.DecodeString(s) +} + +func stripSignature(b []byte) ([]byte, error) { + var sig signature + if err := json.Unmarshal(b, &sig); err != nil { + return nil, err + } + if len(sig.Signatures) == 0 { + return nil, errors.New("no signatures") + } + pb, err := joseBase64UrlDecode(sig.Signatures[0].Protected) + if err != nil { + return nil, errors.Wrapf(err, "could not decode %s", sig.Signatures[0].Protected) + } + + var protected protectedBlock + if err := json.Unmarshal(pb, &protected); err != nil { + return nil, err + } + + if protected.Length > len(b) { + return nil, errors.New("invalid protected length block") + } + + tail, err := joseBase64UrlDecode(protected.Tail) + if err != nil { + return nil, errors.Wrap(err, "invalid tail base 64 value") + } + + return append(b[:protected.Length], tail...), nil +} diff --git a/remotes/handlers.go b/remotes/handlers.go index 2fe2a1c7f..a5d9baf73 100644 --- a/remotes/handlers.go +++ b/remotes/handlers.go @@ -25,7 +25,7 @@ func MakeRefKey(ctx context.Context, desc ocispec.Descriptor) string { return "manifest-" + desc.Digest.String() case images.MediaTypeDockerSchema2Layer, images.MediaTypeDockerSchema2LayerGzip: return "layer-" + desc.Digest.String() - case "application/vnd.docker.container.image.v1+json": + case images.MediaTypeDockerSchema2Config, ocispec.MediaTypeImageConfig: return "config-" + desc.Digest.String() default: log.G(ctx).Warnf("reference for unknown type: %s", desc.MediaType) @@ -47,6 +47,8 @@ func FetchHandler(ingester content.Ingester, fetcher Fetcher) images.HandlerFunc switch desc.MediaType { case images.MediaTypeDockerSchema2ManifestList, ocispec.MediaTypeImageIndex: return nil, fmt.Errorf("%v not yet supported", desc.MediaType) + case images.MediaTypeDockerSchema1Manifest: + return nil, fmt.Errorf("%v not supported", desc.MediaType) default: err := fetch(ctx, ingester, fetcher, desc) return nil, err From 3a226ef17d52317a688df0baf3e497fdcbcf3e46 Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Wed, 7 Jun 2017 14:41:45 -0700 Subject: [PATCH 2/3] Update logic to determine if layer is empty Handle reliance on the size field when the throwaway field is not used. Signed-off-by: Derek McGowan --- remotes/docker/schema1/converter.go | 131 +++++++++++++--------------- 1 file changed, 63 insertions(+), 68 deletions(-) diff --git a/remotes/docker/schema1/converter.go b/remotes/docker/schema1/converter.go index 097520c32..cc38bf5e7 100644 --- a/remotes/docker/schema1/converter.go +++ b/remotes/docker/schema1/converter.go @@ -35,6 +35,7 @@ type Converter struct { fetcher remotes.Fetcher pulledManifest *manifest + layers []ocispec.Descriptor mu sync.Mutex blobMap map[digest.Digest]digest.Digest @@ -66,21 +67,17 @@ func (c *Converter) Handle(ctx context.Context, desc ocispec.Descriptor) ([]ocis if err := json.Unmarshal([]byte(m.History[i].V1Compatibility), &h); err != nil { return nil, err } - if !h.ThrowAway { - descs = append(descs, ocispec.Descriptor{ - MediaType: images.MediaTypeDockerSchema2LayerGzip, - Digest: c.pulledManifest.FSLayers[i].BlobSum, - }) + if !h.EmptyLayer() { + descs = append([]ocispec.Descriptor{ + { + MediaType: images.MediaTypeDockerSchema2LayerGzip, + Digest: c.pulledManifest.FSLayers[i].BlobSum, + }, + }, descs...) } } - // Reverse - for i := 0; i <= len(descs)/2; i++ { - j := len(descs) - i - 1 - if i != j { - descs[i], descs[j] = descs[j], descs[i] - } - } - return descs, nil + c.layers = descs + return c.layers, nil case images.MediaTypeDockerSchema2LayerGzip: if c.pulledManifest == nil { return nil, errors.New("manifest required for schema 1 blob pull") @@ -95,11 +92,45 @@ func (c *Converter) Convert(ctx context.Context) (ocispec.Descriptor, error) { if c.pulledManifest == nil { return ocispec.Descriptor{}, errors.New("missing schema 1 manifest for conversion") } + if len(c.pulledManifest.History) == 0 { + return ocispec.Descriptor{}, errors.New("no history") + } + if len(c.layers) == 0 { + return ocispec.Descriptor{}, errors.New("schema 1 manifest has no usable layers") + } - img, err := convertSchema1Manifest(c.pulledManifest, c.blobMap) + var img ocispec.Image + if err := json.Unmarshal([]byte(c.pulledManifest.History[0].V1Compatibility), &img); err != nil { + return ocispec.Descriptor{}, errors.Wrap(err, "failed to unmarshal image from schema 1 history") + } + + history, err := schema1ManifestHistory(c.pulledManifest) if err != nil { return ocispec.Descriptor{}, errors.Wrap(err, "schema 1 conversion failed") } + img.History = history + + diffIDs := make([]digest.Digest, len(c.layers)) + for i, layer := range c.layers { + info, err := c.contentStore.Info(ctx, layer.Digest) + if err != nil { + return ocispec.Descriptor{}, errors.Wrap(err, "failed to get blob info") + } + + // Fill in size since not given by schema 1 manifest + c.layers[i].Size = info.Size + + diffID, ok := c.blobMap[layer.Digest] + if !ok { + return ocispec.Descriptor{}, errors.New("missing diff id") + } + diffIDs[i] = diffID + } + img.RootFS = ocispec.RootFS{ + Type: "layers", + DiffIDs: diffIDs, + } + b, err := json.Marshal(img) if err != nil { return ocispec.Descriptor{}, errors.Wrap(err, "failed to marshal image") @@ -116,30 +147,12 @@ func (c *Converter) Convert(ctx context.Context) (ocispec.Descriptor, error) { return ocispec.Descriptor{}, errors.Wrap(err, "failed to write config") } - layers := make([]ocispec.Descriptor, 0) - for _, layer := range c.pulledManifest.FSLayers { - // TODO: Use rootfs mapping! - info, err := c.contentStore.Info(ctx, layer.BlobSum) - if err != nil { - if content.IsNotFound(err) { - continue - } - return ocispec.Descriptor{}, errors.Wrap(err, "failed to get blob info") - } - - layers = append([]ocispec.Descriptor{{ - MediaType: ocispec.MediaTypeImageLayerGzip, - Digest: layer.BlobSum, - Size: info.Size, - }}, layers...) - } - manifest := ocispec.Manifest{ Versioned: specs.Versioned{ SchemaVersion: 2, }, Config: config, - Layers: layers, + Layers: c.layers, } b, err = json.Marshal(manifest) @@ -272,62 +285,44 @@ type v1History struct { Author string `json:"author,omitempty"` Created time.Time `json:"created"` Comment string `json:"comment,omitempty"` - ThrowAway bool `json:"throwaway,omitempty"` + ThrowAway *bool `json:"throwaway,omitempty"` + Size *int `json:"Size,omitempty"` // used before ThrowAway field ContainerConfig struct { Cmd []string `json:"Cmd,omitempty"` } `json:"container_config,omitempty"` } -func convertSchema1Manifest(m *manifest, blobs map[digest.Digest]digest.Digest) (*ocispec.Image, error) { - if len(m.History) == 0 { - return nil, errors.New("no history") +func (h *v1History) EmptyLayer() bool { + if h.ThrowAway != nil { + return !(*h.ThrowAway) + } + if h.Size != nil { + return *h.Size == 0 } - var img ocispec.Image - if err := json.Unmarshal([]byte(m.History[0].V1Compatibility), &img); err != nil { - return nil, errors.Wrap(err, "failed to unmarshal image from schema 1 history") - } + // If no size is given or `ThrowAway` specified, the image is empty + return true +} - diffIDs := make([]digest.Digest, 0, len(m.History)) - img.History = make([]ocispec.History, len(m.History)) +func schema1ManifestHistory(m *manifest) ([]ocispec.History, error) { + history := make([]ocispec.History, len(m.History)) for i := range m.History { var h v1History if err := json.Unmarshal([]byte(m.History[i].V1Compatibility), &h); err != nil { return nil, errors.Wrap(err, "failed to unmarshal history") } - img.History[len(img.History)-i-1] = ocispec.History{ + empty := h.EmptyLayer() + history[len(history)-i-1] = ocispec.History{ Author: h.Author, Comment: h.Comment, Created: &h.Created, CreatedBy: strings.Join(h.ContainerConfig.Cmd, " "), - EmptyLayer: h.ThrowAway, - } - - if !h.ThrowAway { - diffID, ok := blobs[m.FSLayers[i].BlobSum] - if !ok { - return nil, errors.Errorf("no diff id for blob %s", m.FSLayers[i].BlobSum.String()) - } - - diffIDs = append(diffIDs, diffID) - } - - } - - for i := 0; i <= len(diffIDs)/2; i++ { - j := len(diffIDs) - i - 1 - if i != j { - diffIDs[i], diffIDs[j] = diffIDs[j], diffIDs[i] + EmptyLayer: empty, } } - img.RootFS = ocispec.RootFS{ - Type: "layers", - DiffIDs: diffIDs, - } - - return &img, nil + return history, nil } type signature struct { From 8ed1e24ae925b5c6d8195858ee89dddb0507d65f Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Wed, 14 Jun 2017 11:53:52 -0700 Subject: [PATCH 3/3] Add blob state calculation to handle empty ambiguity The size and throwaway fields in the history can bother be omitted, making the emptiness of a layer ambiguous. In these cases download and check whether the content is empty. Signed-off-by: Derek McGowan --- remotes/docker/schema1/converter.go | 217 ++++++++++++++++++---------- 1 file changed, 140 insertions(+), 77 deletions(-) diff --git a/remotes/docker/schema1/converter.go b/remotes/docker/schema1/converter.go index cc38bf5e7..cc36c647c 100644 --- a/remotes/docker/schema1/converter.go +++ b/remotes/docker/schema1/converter.go @@ -29,23 +29,29 @@ var ( mediaTypeManifest = "application/vnd.docker.distribution.manifest.v1+json" ) +type blobState struct { + diffID digest.Digest + empty bool +} + // Converter converts schema1 manifests to schema2 on fetch type Converter struct { contentStore content.Store fetcher remotes.Fetcher pulledManifest *manifest - layers []ocispec.Descriptor - mu sync.Mutex - blobMap map[digest.Digest]digest.Digest + mu sync.Mutex + blobMap map[digest.Digest]blobState + layerBlobs map[digest.Digest]ocispec.Descriptor } func NewConverter(contentStore content.Store, fetcher remotes.Fetcher) *Converter { return &Converter{ contentStore: contentStore, fetcher: fetcher, - blobMap: map[digest.Digest]digest.Digest{}, + blobMap: map[digest.Digest]blobState{}, + layerBlobs: map[digest.Digest]ocispec.Descriptor{}, } } @@ -63,21 +69,27 @@ func (c *Converter) Handle(ctx context.Context, desc ocispec.Descriptor) ([]ocis descs := make([]ocispec.Descriptor, 0, len(c.pulledManifest.FSLayers)) for i := range m.FSLayers { - var h v1History - if err := json.Unmarshal([]byte(m.History[i].V1Compatibility), &h); err != nil { - return nil, err - } - if !h.EmptyLayer() { - descs = append([]ocispec.Descriptor{ - { - MediaType: images.MediaTypeDockerSchema2LayerGzip, - Digest: c.pulledManifest.FSLayers[i].BlobSum, - }, - }, descs...) + if _, ok := c.blobMap[c.pulledManifest.FSLayers[i].BlobSum]; !ok { + empty, err := isEmptyLayer([]byte(m.History[i].V1Compatibility)) + if err != nil { + return nil, err + } + + // Do no attempt to download a known empty blob + if !empty { + descs = append([]ocispec.Descriptor{ + { + MediaType: images.MediaTypeDockerSchema2LayerGzip, + Digest: c.pulledManifest.FSLayers[i].BlobSum, + }, + }, descs...) + } + c.blobMap[c.pulledManifest.FSLayers[i].BlobSum] = blobState{ + empty: empty, + } } } - c.layers = descs - return c.layers, nil + return descs, nil case images.MediaTypeDockerSchema2LayerGzip: if c.pulledManifest == nil { return nil, errors.New("manifest required for schema 1 blob pull") @@ -89,14 +101,9 @@ func (c *Converter) Handle(ctx context.Context, desc ocispec.Descriptor) ([]ocis } func (c *Converter) Convert(ctx context.Context) (ocispec.Descriptor, error) { - if c.pulledManifest == nil { - return ocispec.Descriptor{}, errors.New("missing schema 1 manifest for conversion") - } - if len(c.pulledManifest.History) == 0 { - return ocispec.Descriptor{}, errors.New("no history") - } - if len(c.layers) == 0 { - return ocispec.Descriptor{}, errors.New("schema 1 manifest has no usable layers") + history, diffIDs, err := c.schema1ManifestHistory() + if err != nil { + return ocispec.Descriptor{}, errors.Wrap(err, "schema 1 conversion failed") } var img ocispec.Image @@ -104,28 +111,7 @@ func (c *Converter) Convert(ctx context.Context) (ocispec.Descriptor, error) { return ocispec.Descriptor{}, errors.Wrap(err, "failed to unmarshal image from schema 1 history") } - history, err := schema1ManifestHistory(c.pulledManifest) - if err != nil { - return ocispec.Descriptor{}, errors.Wrap(err, "schema 1 conversion failed") - } img.History = history - - diffIDs := make([]digest.Digest, len(c.layers)) - for i, layer := range c.layers { - info, err := c.contentStore.Info(ctx, layer.Digest) - if err != nil { - return ocispec.Descriptor{}, errors.Wrap(err, "failed to get blob info") - } - - // Fill in size since not given by schema 1 manifest - c.layers[i].Size = info.Size - - diffID, ok := c.blobMap[layer.Digest] - if !ok { - return ocispec.Descriptor{}, errors.New("missing diff id") - } - diffIDs[i] = diffID - } img.RootFS = ocispec.RootFS{ Type: "layers", DiffIDs: diffIDs, @@ -147,12 +133,17 @@ func (c *Converter) Convert(ctx context.Context) (ocispec.Descriptor, error) { return ocispec.Descriptor{}, errors.Wrap(err, "failed to write config") } + layers := make([]ocispec.Descriptor, len(diffIDs)) + for i, diffID := range diffIDs { + layers[i] = c.layerBlobs[diffID] + } + manifest := ocispec.Manifest{ Versioned: specs.Versioned{ SchemaVersion: 2, }, Config: config, - Layers: c.layers, + Layers: layers, } b, err = json.Marshal(manifest) @@ -207,7 +198,7 @@ func (c *Converter) fetchBlob(ctx context.Context, desc ocispec.Descriptor) erro ref := remotes.MakeRefKey(ctx, desc) - var diffID digest.Digest + calc := newBlobStateCalculator() cw, err := c.contentStore.Writer(ctx, ref, desc.Size, desc.Digest) if err != nil { @@ -216,6 +207,7 @@ func (c *Converter) fetchBlob(ctx context.Context, desc ocispec.Descriptor) erro } // TODO: Check if blob -> diff id mapping already exists + // TODO: Check if blob empty label exists r, err := c.contentStore.Reader(ctx, desc.Digest) if err != nil { @@ -226,7 +218,7 @@ func (c *Converter) fetchBlob(ctx context.Context, desc ocispec.Descriptor) erro gr, err := gzip.NewReader(r) defer gr.Close() - diffID, err = digest.Canonical.FromReader(gr) + _, err = io.Copy(calc, gr) if err != nil { return err } @@ -246,7 +238,7 @@ func (c *Converter) fetchBlob(ctx context.Context, desc ocispec.Descriptor) erro gr, err := gzip.NewReader(pr) defer gr.Close() - diffID, err = digest.Canonical.FromReader(gr) + _, err = io.Copy(calc, gr) pr.CloseWithError(err) return err }) @@ -259,14 +251,65 @@ func (c *Converter) fetchBlob(ctx context.Context, desc ocispec.Descriptor) erro if err := eg.Wait(); err != nil { return err } + + // TODO: Label blob } + if desc.Size == 0 { + info, err := c.contentStore.Info(ctx, desc.Digest) + if err != nil { + return errors.Wrap(err, "failed to get blob info") + } + desc.Size = info.Size + } + + state := calc.State() + c.mu.Lock() - c.blobMap[desc.Digest] = diffID + c.blobMap[desc.Digest] = state + c.layerBlobs[state.diffID] = desc c.mu.Unlock() return nil } +func (c *Converter) schema1ManifestHistory() ([]ocispec.History, []digest.Digest, error) { + if c.pulledManifest == nil { + return nil, nil, errors.New("missing schema 1 manifest for conversion") + } + m := *c.pulledManifest + + if len(m.History) == 0 { + return nil, nil, errors.New("no history") + } + + history := make([]ocispec.History, len(m.History)) + diffIDs := []digest.Digest{} + for i := range m.History { + var h v1History + if err := json.Unmarshal([]byte(m.History[i].V1Compatibility), &h); err != nil { + return nil, nil, errors.Wrap(err, "failed to unmarshal history") + } + + blobSum := m.FSLayers[i].BlobSum + + state := c.blobMap[blobSum] + + history[len(history)-i-1] = ocispec.History{ + Author: h.Author, + Comment: h.Comment, + Created: &h.Created, + CreatedBy: strings.Join(h.ContainerConfig.Cmd, " "), + EmptyLayer: state.empty, + } + + if !state.empty { + diffIDs = append([]digest.Digest{state.diffID}, diffIDs...) + + } + } + + return history, diffIDs, nil +} type fsLayer struct { BlobSum digest.Digest `json:"blobSum"` @@ -292,37 +335,26 @@ type v1History struct { } `json:"container_config,omitempty"` } -func (h *v1History) EmptyLayer() bool { +// isEmptyLayer returns whether the v1 compability history describes an +// empty layer. A return value of true indicates the layer is empty, +// however false does not indicate non-empty. +func isEmptyLayer(compatHistory []byte) (bool, error) { + var h v1History + if err := json.Unmarshal(compatHistory, &h); err != nil { + return false, err + } + if h.ThrowAway != nil { - return !(*h.ThrowAway) + return *h.ThrowAway, nil } if h.Size != nil { - return *h.Size == 0 + return *h.Size == 0, nil } - // If no size is given or `ThrowAway` specified, the image is empty - return true -} - -func schema1ManifestHistory(m *manifest) ([]ocispec.History, error) { - history := make([]ocispec.History, len(m.History)) - for i := range m.History { - var h v1History - if err := json.Unmarshal([]byte(m.History[i].V1Compatibility), &h); err != nil { - return nil, errors.Wrap(err, "failed to unmarshal history") - } - - empty := h.EmptyLayer() - history[len(history)-i-1] = ocispec.History{ - Author: h.Author, - Comment: h.Comment, - Created: &h.Created, - CreatedBy: strings.Join(h.ContainerConfig.Cmd, " "), - EmptyLayer: empty, - } - } - - return history, nil + // If no `Size` or `throwaway` field is given, then + // it cannot be determined whether the layer is empty + // from the history, return false + return false, nil } type signature struct { @@ -384,3 +416,34 @@ func stripSignature(b []byte) ([]byte, error) { return append(b[:protected.Length], tail...), nil } + +type blobStateCalculator struct { + empty bool + digester digest.Digester +} + +func newBlobStateCalculator() *blobStateCalculator { + return &blobStateCalculator{ + empty: true, + digester: digest.Canonical.Digester(), + } +} + +func (c *blobStateCalculator) Write(p []byte) (int, error) { + if c.empty { + for _, b := range p { + if b != 0x00 { + c.empty = false + break + } + } + } + return c.digester.Hash().Write(p) +} + +func (c *blobStateCalculator) State() blobState { + return blobState{ + empty: c.empty, + diffID: c.digester.Digest(), + } +}