From 444e4220c277f0fdf946f4facab91ecaaf77e0f1 Mon Sep 17 00:00:00 2001 From: Darren Stahl Date: Mon, 27 Nov 2017 14:04:47 -0800 Subject: [PATCH 1/3] Revendor hcsshim and go-tar Signed-off-by: Darren Stahl --- vendor.conf | 4 +- .../github.com/Microsoft/hcsshim/container.go | 6 + vendor/github.com/Microsoft/hcsshim/errors.go | 22 + .../Microsoft/hcsshim/hnsendpoint.go | 23 +- .../Microsoft/hcsshim/hnsnetwork.go | 3 +- .../github.com/Microsoft/hcsshim/hnspolicy.go | 25 +- .../github.com/Microsoft/hcsshim/interface.go | 11 +- vendor/github.com/Microsoft/hcsshim/legacy.go | 19 +- .../github.com/Microsoft/hcsshim/legacy18.go | 7 + .../github.com/Microsoft/hcsshim/legacy19.go | 7 + vendor/github.com/dmcgowan/go-tar/common.go | 706 +++++++++++++-- vendor/github.com/dmcgowan/go-tar/format.go | 170 +++- vendor/github.com/dmcgowan/go-tar/reader.go | 842 +++++++++-------- .../github.com/dmcgowan/go-tar/sparse_unix.go | 77 ++ .../dmcgowan/go-tar/sparse_windows.go | 129 +++ .../go-tar/{stat_atim.go => stat_actime1.go} | 0 .../{stat_atimespec.go => stat_actime2.go} | 0 .../github.com/dmcgowan/go-tar/stat_unix.go | 72 +- vendor/github.com/dmcgowan/go-tar/strconv.go | 126 ++- vendor/github.com/dmcgowan/go-tar/writer.go | 845 ++++++++++++------ 20 files changed, 2211 insertions(+), 883 deletions(-) create mode 100644 vendor/github.com/Microsoft/hcsshim/legacy18.go create mode 100644 vendor/github.com/Microsoft/hcsshim/legacy19.go create mode 100644 vendor/github.com/dmcgowan/go-tar/sparse_unix.go create mode 100644 vendor/github.com/dmcgowan/go-tar/sparse_windows.go rename vendor/github.com/dmcgowan/go-tar/{stat_atim.go => stat_actime1.go} (100%) rename vendor/github.com/dmcgowan/go-tar/{stat_atimespec.go => stat_actime2.go} (100%) diff --git a/vendor.conf b/vendor.conf index 29f3c8828..0316ebb83 100644 --- a/vendor.conf +++ b/vendor.conf @@ -35,10 +35,10 @@ golang.org/x/sync 450f422ab23cf9881c94e2db30cac0eb1b7cf80c github.com/BurntSushi/toml v0.2.0-21-g9906417 github.com/grpc-ecosystem/go-grpc-prometheus 6b7015e65d366bf3f19b2b2a000a831940f0f7e0 github.com/Microsoft/go-winio v0.4.4 -github.com/Microsoft/hcsshim v0.6.3 +github.com/Microsoft/hcsshim v0.6.7 github.com/Microsoft/opengcs v0.3.2 github.com/boltdb/bolt e9cf4fae01b5a8ff89d0ec6b32f0d9c9f79aefdd google.golang.org/genproto d80a6e20e776b0b17a324d0ba1ab50a39c8e8944 golang.org/x/text 19e51611da83d6be54ddafce4a4af510cb3e9ea4 -github.com/dmcgowan/go-tar 2e2c51242e8993c50445dab7c03c8e7febddd0cf +github.com/dmcgowan/go-tar go1.10 github.com/stevvooe/ttrpc bdb2ab7a8169e485e39421e666e15a505e575fd2 diff --git a/vendor/github.com/Microsoft/hcsshim/container.go b/vendor/github.com/Microsoft/hcsshim/container.go index b924d39f4..3354f70ef 100644 --- a/vendor/github.com/Microsoft/hcsshim/container.go +++ b/vendor/github.com/Microsoft/hcsshim/container.go @@ -201,12 +201,18 @@ func createContainerWithJSON(id string, c *ContainerConfig, additionalJSON strin if createError == nil || IsPending(createError) { if err := container.registerCallback(); err != nil { + // Terminate the container if it still exists. We're okay to ignore a failure here. + container.Terminate() return nil, makeContainerError(container, operation, "", err) } } err = processAsyncHcsResult(createError, resultp, container.callbackNumber, hcsNotificationSystemCreateCompleted, &defaultTimeout) if err != nil { + if err == ErrTimeout { + // Terminate the container if it still exists. We're okay to ignore a failure here. + container.Terminate() + } return nil, makeContainerError(container, operation, configuration, err) } diff --git a/vendor/github.com/Microsoft/hcsshim/errors.go b/vendor/github.com/Microsoft/hcsshim/errors.go index d2f9cc8bd..c0c6cac87 100644 --- a/vendor/github.com/Microsoft/hcsshim/errors.go +++ b/vendor/github.com/Microsoft/hcsshim/errors.go @@ -72,6 +72,22 @@ var ( ErrPlatformNotSupported = errors.New("unsupported platform request") ) +type EndpointNotFoundError struct { + EndpointName string +} + +func (e EndpointNotFoundError) Error() string { + return fmt.Sprintf("Endpoint %s not found", e.EndpointName) +} + +type NetworkNotFoundError struct { + NetworkName string +} + +func (e NetworkNotFoundError) Error() string { + return fmt.Sprintf("Network %s not found", e.NetworkName) +} + // ProcessError is an error encountered in HCS during an operation on a Process object type ProcessError struct { Process *process @@ -174,6 +190,12 @@ func makeProcessError(process *process, operation string, extraInfo string, err // will currently return true when the error is ErrElementNotFound or ErrProcNotFound. func IsNotExist(err error) bool { err = getInnerError(err) + if _, ok := err.(EndpointNotFoundError); ok { + return true + } + if _, ok := err.(NetworkNotFoundError); ok { + return true + } return err == ErrComputeSystemDoesNotExist || err == ErrElementNotFound || err == ErrProcNotFound diff --git a/vendor/github.com/Microsoft/hcsshim/hnsendpoint.go b/vendor/github.com/Microsoft/hcsshim/hnsendpoint.go index 92afc0c24..7e516f8a2 100644 --- a/vendor/github.com/Microsoft/hcsshim/hnsendpoint.go +++ b/vendor/github.com/Microsoft/hcsshim/hnsendpoint.go @@ -2,7 +2,6 @@ package hcsshim import ( "encoding/json" - "fmt" "net" "github.com/sirupsen/logrus" @@ -135,7 +134,7 @@ func GetHNSEndpointByName(endpointName string) (*HNSEndpoint, error) { return &hnsEndpoint, nil } } - return nil, fmt.Errorf("Endpoint %v not found", endpointName) + return nil, EndpointNotFoundError{EndpointName: endpointName} } // Create Endpoint by sending EndpointRequest to HNS. TODO: Create a separate HNS interface to place all these methods @@ -192,18 +191,24 @@ func (endpoint *HNSEndpoint) ContainerHotDetach(containerID string) error { return modifyNetworkEndpoint(containerID, endpoint.Id, Remove) } -// ApplyACLPolicy applies Acl Policy on the Endpoint -func (endpoint *HNSEndpoint) ApplyACLPolicy(policy *ACLPolicy) error { +// ApplyACLPolicy applies a set of ACL Policies on the Endpoint +func (endpoint *HNSEndpoint) ApplyACLPolicy(policies ...*ACLPolicy) error { operation := "ApplyACLPolicy" title := "HCSShim::HNSEndpoint::" + operation logrus.Debugf(title+" id=%s", endpoint.Id) - jsonString, err := json.Marshal(policy) - if err != nil { - return err + for _, policy := range policies { + if policy == nil { + continue + } + jsonString, err := json.Marshal(policy) + if err != nil { + return err + } + endpoint.Policies = append(endpoint.Policies, jsonString) } - endpoint.Policies[0] = jsonString - _, err = endpoint.Update() + + _, err := endpoint.Update() return err } diff --git a/vendor/github.com/Microsoft/hcsshim/hnsnetwork.go b/vendor/github.com/Microsoft/hcsshim/hnsnetwork.go index 3345bfa3f..04c1b5919 100644 --- a/vendor/github.com/Microsoft/hcsshim/hnsnetwork.go +++ b/vendor/github.com/Microsoft/hcsshim/hnsnetwork.go @@ -2,7 +2,6 @@ package hcsshim import ( "encoding/json" - "fmt" "net" "github.com/sirupsen/logrus" @@ -90,7 +89,7 @@ func GetHNSNetworkByName(networkName string) (*HNSNetwork, error) { return &hnsnetwork, nil } } - return nil, fmt.Errorf("Network %v not found", networkName) + return nil, NetworkNotFoundError{NetworkName: networkName} } // Create Network by sending NetworkRequest to HNS. diff --git a/vendor/github.com/Microsoft/hcsshim/hnspolicy.go b/vendor/github.com/Microsoft/hcsshim/hnspolicy.go index ecfbf0eda..65b8e93d9 100644 --- a/vendor/github.com/Microsoft/hcsshim/hnspolicy.go +++ b/vendor/github.com/Microsoft/hcsshim/hnspolicy.go @@ -75,19 +75,18 @@ const ( ) type ACLPolicy struct { - Type PolicyType `json:"Type"` - Protocol uint16 - InternalPort uint16 - Action ActionType - Direction DirectionType - LocalAddress string - RemoteAddress string - LocalPort uint16 - RemotePort uint16 - RuleType RuleType `json:"RuleType,omitempty"` - - Priority uint16 - ServiceName string + Type PolicyType `json:"Type"` + Protocol uint16 + InternalPort uint16 + Action ActionType + Direction DirectionType + LocalAddresses string + RemoteAddresses string + LocalPort uint16 + RemotePort uint16 + RuleType RuleType `json:"RuleType,omitempty"` + Priority uint16 + ServiceName string } type Policy struct { diff --git a/vendor/github.com/Microsoft/hcsshim/interface.go b/vendor/github.com/Microsoft/hcsshim/interface.go index 9fc7852e4..e21f30025 100644 --- a/vendor/github.com/Microsoft/hcsshim/interface.go +++ b/vendor/github.com/Microsoft/hcsshim/interface.go @@ -30,11 +30,12 @@ type Layer struct { } type MappedDir struct { - HostPath string - ContainerPath string - ReadOnly bool - BandwidthMaximum uint64 - IOPSMaximum uint64 + HostPath string + ContainerPath string + ReadOnly bool + BandwidthMaximum uint64 + IOPSMaximum uint64 + CreateInUtilityVM bool } type MappedPipe struct { diff --git a/vendor/github.com/Microsoft/hcsshim/legacy.go b/vendor/github.com/Microsoft/hcsshim/legacy.go index c7f6073ac..a0a97d7c7 100644 --- a/vendor/github.com/Microsoft/hcsshim/legacy.go +++ b/vendor/github.com/Microsoft/hcsshim/legacy.go @@ -472,15 +472,21 @@ func cloneTree(srcPath, destPath string, mutatedFiles map[string]bool) error { } destFilePath := filepath.Join(destPath, relPath) + fileAttributes := info.Sys().(*syscall.Win32FileAttributeData).FileAttributes // Directories, reparse points, and files that will be mutated during // utility VM import must be copied. All other files can be hard linked. - isReparsePoint := info.Sys().(*syscall.Win32FileAttributeData).FileAttributes&syscall.FILE_ATTRIBUTE_REPARSE_POINT != 0 - if info.IsDir() || isReparsePoint || mutatedFiles[relPath] { - fi, err := copyFileWithMetadata(srcFilePath, destFilePath, info.IsDir()) + isReparsePoint := fileAttributes&syscall.FILE_ATTRIBUTE_REPARSE_POINT != 0 + // In go1.9, FileInfo.IsDir() returns false if the directory is also a symlink. + // See: https://github.com/golang/go/commit/1989921aef60c83e6f9127a8448fb5ede10e9acc + // Fixes the problem by checking syscall.FILE_ATTRIBUTE_DIRECTORY directly + isDir := fileAttributes&syscall.FILE_ATTRIBUTE_DIRECTORY != 0 + + if isDir || isReparsePoint || mutatedFiles[relPath] { + fi, err := copyFileWithMetadata(srcFilePath, destFilePath, isDir) if err != nil { return err } - if info.IsDir() && !isReparsePoint { + if isDir && !isReparsePoint { di = append(di, dirInfo{path: destFilePath, fileInfo: *fi}) } } else { @@ -490,8 +496,9 @@ func cloneTree(srcPath, destPath string, mutatedFiles map[string]bool) error { } } - // Don't recurse on reparse points. - if info.IsDir() && isReparsePoint { + // Don't recurse on reparse points in go1.8 and older. Filepath.Walk + // handles this in go1.9 and newer. + if isDir && isReparsePoint && shouldSkipDirectoryReparse { return filepath.SkipDir } diff --git a/vendor/github.com/Microsoft/hcsshim/legacy18.go b/vendor/github.com/Microsoft/hcsshim/legacy18.go new file mode 100644 index 000000000..578552f91 --- /dev/null +++ b/vendor/github.com/Microsoft/hcsshim/legacy18.go @@ -0,0 +1,7 @@ +// +build !go1.9 + +package hcsshim + +// Due to a bug in go1.8 and before, directory reparse points need to be skipped +// during filepath.Walk. This is fixed in go1.9 +var shouldSkipDirectoryReparse = true diff --git a/vendor/github.com/Microsoft/hcsshim/legacy19.go b/vendor/github.com/Microsoft/hcsshim/legacy19.go new file mode 100644 index 000000000..6aa1dc058 --- /dev/null +++ b/vendor/github.com/Microsoft/hcsshim/legacy19.go @@ -0,0 +1,7 @@ +// +build go1.9 + +package hcsshim + +// Due to a bug in go1.8 and before, directory reparse points need to be skipped +// during filepath.Walk. This is fixed in go1.9 +var shouldSkipDirectoryReparse = false diff --git a/vendor/github.com/dmcgowan/go-tar/common.go b/vendor/github.com/dmcgowan/go-tar/common.go index d2ae66d55..e3609536c 100644 --- a/vendor/github.com/dmcgowan/go-tar/common.go +++ b/vendor/github.com/dmcgowan/go-tar/common.go @@ -3,20 +3,23 @@ // license that can be found in the LICENSE file. // Package tar implements access to tar archives. -// It aims to cover most of the variations, including those produced -// by GNU and BSD tars. // -// References: -// http://www.freebsd.org/cgi/man.cgi?query=tar&sektion=5 -// http://www.gnu.org/software/tar/manual/html_node/Standard.html -// http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html +// Tape archives (tar) are a file format for storing a sequence of files that +// can be read and written in a streaming manner. +// This package aims to cover most variations of the format, +// including those produced by GNU and BSD tar tools. package tar import ( "errors" "fmt" + "io" + "math" "os" "path" + "reflect" + "strconv" + "strings" "time" ) @@ -24,42 +27,569 @@ import ( // architectures. If a large value is encountered when decoding, the result // stored in Header will be the truncated version. -// Header type flags. -const ( - TypeReg = '0' // regular file - TypeRegA = '\x00' // regular file - TypeLink = '1' // hard link - TypeSymlink = '2' // symbolic link - TypeChar = '3' // character device node - TypeBlock = '4' // block device node - TypeDir = '5' // directory - TypeFifo = '6' // fifo node - TypeCont = '7' // reserved - TypeXHeader = 'x' // extended header - TypeXGlobalHeader = 'g' // global extended header - TypeGNULongName = 'L' // Next file has a long name - TypeGNULongLink = 'K' // Next file symlinks to a file w/ a long name - TypeGNUSparse = 'S' // sparse file +var ( + ErrHeader = errors.New("tar: invalid tar header") + ErrWriteTooLong = errors.New("tar: write too long") + ErrFieldTooLong = errors.New("tar: header field too long") + ErrWriteAfterClose = errors.New("tar: write after close") + errMissData = errors.New("tar: sparse file references non-existent data") + errUnrefData = errors.New("tar: sparse file contains unreferenced data") + errWriteHole = errors.New("tar: write non-NUL byte in sparse hole") ) +type headerError []string + +func (he headerError) Error() string { + const prefix = "tar: cannot encode header" + var ss []string + for _, s := range he { + if s != "" { + ss = append(ss, s) + } + } + if len(ss) == 0 { + return prefix + } + return fmt.Sprintf("%s: %v", prefix, strings.Join(ss, "; and ")) +} + +// Type flags for Header.Typeflag. +const ( + // Type '0' indicates a regular file. + TypeReg = '0' + TypeRegA = '\x00' // For legacy support; use TypeReg instead + + // Type '1' to '6' are header-only flags and may not have a data body. + TypeLink = '1' // Hard link + TypeSymlink = '2' // Symbolic link + TypeChar = '3' // Character device node + TypeBlock = '4' // Block device node + TypeDir = '5' // Directory + TypeFifo = '6' // FIFO node + + // Type '7' is reserved. + TypeCont = '7' + + // Type 'x' is used by the PAX format to store key-value records that + // are only relevant to the next file. + // This package transparently handles these types. + TypeXHeader = 'x' + + // Type 'g' is used by the PAX format to store key-value records that + // are relevant to all subsequent files. + // This package only supports parsing and composing such headers, + // but does not currently support persisting the global state across files. + TypeXGlobalHeader = 'g' + + // Type 'S' indicates a sparse file in the GNU format. + // Header.SparseHoles should be populated when using this type. + TypeGNUSparse = 'S' + + // Types 'L' and 'K' are used by the GNU format for a meta file + // used to store the path or link name for the next file. + // This package transparently handles these types. + TypeGNULongName = 'L' + TypeGNULongLink = 'K' +) + +// Keywords for PAX extended header records. +const ( + paxNone = "" // Indicates that no PAX key is suitable + paxPath = "path" + paxLinkpath = "linkpath" + paxSize = "size" + paxUid = "uid" + paxGid = "gid" + paxUname = "uname" + paxGname = "gname" + paxMtime = "mtime" + paxAtime = "atime" + paxCtime = "ctime" // Removed from later revision of PAX spec, but was valid + paxCharset = "charset" // Currently unused + paxComment = "comment" // Currently unused + + paxSchilyXattr = "SCHILY.xattr." + + // Keywords for GNU sparse files in a PAX extended header. + paxGNUSparse = "GNU.sparse." + paxGNUSparseNumBlocks = "GNU.sparse.numblocks" + paxGNUSparseOffset = "GNU.sparse.offset" + paxGNUSparseNumBytes = "GNU.sparse.numbytes" + paxGNUSparseMap = "GNU.sparse.map" + paxGNUSparseName = "GNU.sparse.name" + paxGNUSparseMajor = "GNU.sparse.major" + paxGNUSparseMinor = "GNU.sparse.minor" + paxGNUSparseSize = "GNU.sparse.size" + paxGNUSparseRealSize = "GNU.sparse.realsize" +) + +// basicKeys is a set of the PAX keys for which we have built-in support. +// This does not contain "charset" or "comment", which are both PAX-specific, +// so adding them as first-class features of Header is unlikely. +// Users can use the PAXRecords field to set it themselves. +var basicKeys = map[string]bool{ + paxPath: true, paxLinkpath: true, paxSize: true, paxUid: true, paxGid: true, + paxUname: true, paxGname: true, paxMtime: true, paxAtime: true, paxCtime: true, +} + // A Header represents a single header in a tar archive. // Some fields may not be populated. +// +// For forward compatibility, users that retrieve a Header from Reader.Next, +// mutate it in some ways, and then pass it back to Writer.WriteHeader +// should do so by creating a new Header and copying the fields +// that they are interested in preserving. type Header struct { - Name string // name of header file entry - Mode int64 // permission and mode bits - Uid int // user id of owner - Gid int // group id of owner - Size int64 // length in bytes - ModTime time.Time // modified time - Typeflag byte // type of header entry - Linkname string // target name of link - Uname string // user name of owner - Gname string // group name of owner - Devmajor int64 // major number of character or block device - Devminor int64 // minor number of character or block device - AccessTime time.Time // access time - ChangeTime time.Time // status change time - Xattrs map[string]string + Typeflag byte // Type of header entry (should be TypeReg for most files) + + Name string // Name of file entry + Linkname string // Target name of link (valid for TypeLink or TypeSymlink) + + Size int64 // Logical file size in bytes + Mode int64 // Permission and mode bits + Uid int // User ID of owner + Gid int // Group ID of owner + Uname string // User name of owner + Gname string // Group name of owner + + // If the Format is unspecified, then Writer.WriteHeader rounds ModTime + // to the nearest second and ignores the AccessTime and ChangeTime fields. + // + // To use AccessTime or ChangeTime, specify the Format as PAX or GNU. + // To use sub-second resolution, specify the Format as PAX. + ModTime time.Time // Modification time + AccessTime time.Time // Access time (requires either PAX or GNU support) + ChangeTime time.Time // Change time (requires either PAX or GNU support) + + Devmajor int64 // Major device number (valid for TypeChar or TypeBlock) + Devminor int64 // Minor device number (valid for TypeChar or TypeBlock) + + // SparseHoles represents a sequence of holes in a sparse file. + // + // A file is sparse if len(SparseHoles) > 0 or Typeflag is TypeGNUSparse. + // If TypeGNUSparse is set, then the format is GNU, otherwise + // the format is PAX (by using GNU-specific PAX records). + // + // A sparse file consists of fragments of data, intermixed with holes + // (described by this field). A hole is semantically a block of NUL-bytes, + // but does not actually exist within the tar file. + // The holes must be sorted in ascending order, + // not overlap with each other, and not extend past the specified Size. + SparseHoles []SparseEntry + + // Xattrs stores extended attributes as PAX records under the + // "SCHILY.xattr." namespace. + // + // The following are semantically equivalent: + // h.Xattrs[key] = value + // h.PAXRecords["SCHILY.xattr."+key] = value + // + // When Writer.WriteHeader is called, the contents of Xattrs will take + // precedence over those in PAXRecords. + // + // Deprecated: Use PAXRecords instead. + Xattrs map[string]string + + // PAXRecords is a map of PAX extended header records. + // + // User-defined records should have keys of the following form: + // VENDOR.keyword + // Where VENDOR is some namespace in all uppercase, and keyword may + // not contain the '=' character (e.g., "GOLANG.pkg.version"). + // The key and value should be non-empty UTF-8 strings. + // + // When Writer.WriteHeader is called, PAX records derived from the + // the other fields in Header take precedence over PAXRecords. + PAXRecords map[string]string + + // Format specifies the format of the tar header. + // + // This is set by Reader.Next as a best-effort guess at the format. + // Since the Reader liberally reads some non-compliant files, + // it is possible for this to be FormatUnknown. + // + // If the format is unspecified when Writer.WriteHeader is called, + // then it uses the first format (in the order of USTAR, PAX, GNU) + // capable of encoding this Header (see Format). + Format Format +} + +// SparseEntry represents a Length-sized fragment at Offset in the file. +type SparseEntry struct{ Offset, Length int64 } + +func (s SparseEntry) endOffset() int64 { return s.Offset + s.Length } + +// A sparse file can be represented as either a sparseDatas or a sparseHoles. +// As long as the total size is known, they are equivalent and one can be +// converted to the other form and back. The various tar formats with sparse +// file support represent sparse files in the sparseDatas form. That is, they +// specify the fragments in the file that has data, and treat everything else as +// having zero bytes. As such, the encoding and decoding logic in this package +// deals with sparseDatas. +// +// However, the external API uses sparseHoles instead of sparseDatas because the +// zero value of sparseHoles logically represents a normal file (i.e., there are +// no holes in it). On the other hand, the zero value of sparseDatas implies +// that the file has no data in it, which is rather odd. +// +// As an example, if the underlying raw file contains the 10-byte data: +// var compactFile = "abcdefgh" +// +// And the sparse map has the following entries: +// var spd sparseDatas = []sparseEntry{ +// {Offset: 2, Length: 5}, // Data fragment for 2..6 +// {Offset: 18, Length: 3}, // Data fragment for 18..20 +// } +// var sph sparseHoles = []SparseEntry{ +// {Offset: 0, Length: 2}, // Hole fragment for 0..1 +// {Offset: 7, Length: 11}, // Hole fragment for 7..17 +// {Offset: 21, Length: 4}, // Hole fragment for 21..24 +// } +// +// Then the content of the resulting sparse file with a Header.Size of 25 is: +// var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 +type ( + sparseDatas []SparseEntry + sparseHoles []SparseEntry +) + +// validateSparseEntries reports whether sp is a valid sparse map. +// It does not matter whether sp represents data fragments or hole fragments. +func validateSparseEntries(sp []SparseEntry, size int64) bool { + // Validate all sparse entries. These are the same checks as performed by + // the BSD tar utility. + if size < 0 { + return false + } + var pre SparseEntry + for _, cur := range sp { + switch { + case cur.Offset < 0 || cur.Length < 0: + return false // Negative values are never okay + case cur.Offset > math.MaxInt64-cur.Length: + return false // Integer overflow with large length + case cur.endOffset() > size: + return false // Region extends beyond the actual size + case pre.endOffset() > cur.Offset: + return false // Regions cannot overlap and must be in order + } + pre = cur + } + return true +} + +// alignSparseEntries mutates src and returns dst where each fragment's +// starting offset is aligned up to the nearest block edge, and each +// ending offset is aligned down to the nearest block edge. +// +// Even though the Go tar Reader and the BSD tar utility can handle entries +// with arbitrary offsets and lengths, the GNU tar utility can only handle +// offsets and lengths that are multiples of blockSize. +func alignSparseEntries(src []SparseEntry, size int64) []SparseEntry { + dst := src[:0] + for _, s := range src { + pos, end := s.Offset, s.endOffset() + pos += blockPadding(+pos) // Round-up to nearest blockSize + if end != size { + end -= blockPadding(-end) // Round-down to nearest blockSize + } + if pos < end { + dst = append(dst, SparseEntry{Offset: pos, Length: end - pos}) + } + } + return dst +} + +// invertSparseEntries converts a sparse map from one form to the other. +// If the input is sparseHoles, then it will output sparseDatas and vice-versa. +// The input must have been already validated. +// +// This function mutates src and returns a normalized map where: +// * adjacent fragments are coalesced together +// * only the last fragment may be empty +// * the endOffset of the last fragment is the total size +func invertSparseEntries(src []SparseEntry, size int64) []SparseEntry { + dst := src[:0] + var pre SparseEntry + for _, cur := range src { + if cur.Length == 0 { + continue // Skip empty fragments + } + pre.Length = cur.Offset - pre.Offset + if pre.Length > 0 { + dst = append(dst, pre) // Only add non-empty fragments + } + pre.Offset = cur.endOffset() + } + pre.Length = size - pre.Offset // Possibly the only empty fragment + return append(dst, pre) +} + +// fileState tracks the number of logical (includes sparse holes) and physical +// (actual in tar archive) bytes remaining for the current file. +// +// Invariant: LogicalRemaining >= PhysicalRemaining +type fileState interface { + LogicalRemaining() int64 + PhysicalRemaining() int64 +} + +// allowedFormats determines which formats can be used. +// The value returned is the logical OR of multiple possible formats. +// If the value is FormatUnknown, then the input Header cannot be encoded +// and an error is returned explaining why. +// +// As a by-product of checking the fields, this function returns paxHdrs, which +// contain all fields that could not be directly encoded. +// A value receiver ensures that this method does not mutate the source Header. +func (h Header) allowedFormats() (format Format, paxHdrs map[string]string, err error) { + format = FormatUSTAR | FormatPAX | FormatGNU + paxHdrs = make(map[string]string) + + var whyNoUSTAR, whyNoPAX, whyNoGNU string + var preferPAX bool // Prefer PAX over USTAR + verifyString := func(s string, size int, name, paxKey string) { + // NUL-terminator is optional for path and linkpath. + // Technically, it is required for uname and gname, + // but neither GNU nor BSD tar checks for it. + tooLong := len(s) > size + allowLongGNU := paxKey == paxPath || paxKey == paxLinkpath + if hasNUL(s) || (tooLong && !allowLongGNU) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%q", name, s) + format.mustNotBe(FormatGNU) + } + if !isASCII(s) || tooLong { + canSplitUSTAR := paxKey == paxPath + if _, _, ok := splitUSTARPath(s); !canSplitUSTAR || !ok { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%q", name, s) + format.mustNotBe(FormatUSTAR) + } + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%q", name, s) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = s + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == s { + paxHdrs[paxKey] = v + } + } + verifyNumeric := func(n int64, size int, name, paxKey string) { + if !fitsInBase256(size, n) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%d", name, n) + format.mustNotBe(FormatGNU) + } + if !fitsInOctal(size, n) { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%d", name, n) + format.mustNotBe(FormatUSTAR) + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%d", name, n) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = strconv.FormatInt(n, 10) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == strconv.FormatInt(n, 10) { + paxHdrs[paxKey] = v + } + } + verifyTime := func(ts time.Time, size int, name, paxKey string) { + if ts.IsZero() { + return // Always okay + } + if !fitsInBase256(size, ts.Unix()) { + whyNoGNU = fmt.Sprintf("GNU cannot encode %s=%v", name, ts) + format.mustNotBe(FormatGNU) + } + isMtime := paxKey == paxMtime + fitsOctal := fitsInOctal(size, ts.Unix()) + if (isMtime && !fitsOctal) || !isMtime { + whyNoUSTAR = fmt.Sprintf("USTAR cannot encode %s=%v", name, ts) + format.mustNotBe(FormatUSTAR) + } + needsNano := ts.Nanosecond() != 0 + if !isMtime || !fitsOctal || needsNano { + preferPAX = true // USTAR may truncate sub-second measurements + if paxKey == paxNone { + whyNoPAX = fmt.Sprintf("PAX cannot encode %s=%v", name, ts) + format.mustNotBe(FormatPAX) + } else { + paxHdrs[paxKey] = formatPAXTime(ts) + } + } + if v, ok := h.PAXRecords[paxKey]; ok && v == formatPAXTime(ts) { + paxHdrs[paxKey] = v + } + } + + // Check basic fields. + var blk block + v7 := blk.V7() + ustar := blk.USTAR() + gnu := blk.GNU() + verifyString(h.Name, len(v7.Name()), "Name", paxPath) + verifyString(h.Linkname, len(v7.LinkName()), "Linkname", paxLinkpath) + verifyString(h.Uname, len(ustar.UserName()), "Uname", paxUname) + verifyString(h.Gname, len(ustar.GroupName()), "Gname", paxGname) + verifyNumeric(h.Mode, len(v7.Mode()), "Mode", paxNone) + verifyNumeric(int64(h.Uid), len(v7.UID()), "Uid", paxUid) + verifyNumeric(int64(h.Gid), len(v7.GID()), "Gid", paxGid) + verifyNumeric(h.Size, len(v7.Size()), "Size", paxSize) + verifyNumeric(h.Devmajor, len(ustar.DevMajor()), "Devmajor", paxNone) + verifyNumeric(h.Devminor, len(ustar.DevMinor()), "Devminor", paxNone) + verifyTime(h.ModTime, len(v7.ModTime()), "ModTime", paxMtime) + verifyTime(h.AccessTime, len(gnu.AccessTime()), "AccessTime", paxAtime) + verifyTime(h.ChangeTime, len(gnu.ChangeTime()), "ChangeTime", paxCtime) + + // Check for header-only types. + var whyOnlyPAX, whyOnlyGNU string + switch h.Typeflag { + case TypeReg, TypeChar, TypeBlock, TypeFifo, TypeGNUSparse: + // Exclude TypeLink and TypeSymlink, since they may reference directories. + if strings.HasSuffix(h.Name, "/") { + return FormatUnknown, nil, headerError{"filename may not have trailing slash"} + } + case TypeXHeader, TypeGNULongName, TypeGNULongLink: + return FormatUnknown, nil, headerError{"cannot manually encode TypeXHeader, TypeGNULongName, or TypeGNULongLink headers"} + case TypeXGlobalHeader: + if !reflect.DeepEqual(h, Header{Typeflag: h.Typeflag, Xattrs: h.Xattrs, PAXRecords: h.PAXRecords, Format: h.Format}) { + return FormatUnknown, nil, headerError{"only PAXRecords may be set for TypeXGlobalHeader"} + } + whyOnlyPAX = "only PAX supports TypeXGlobalHeader" + format.mayOnlyBe(FormatPAX) + } + if !isHeaderOnlyType(h.Typeflag) && h.Size < 0 { + return FormatUnknown, nil, headerError{"negative size on header-only type"} + } + + // Check PAX records. + if len(h.Xattrs) > 0 { + for k, v := range h.Xattrs { + paxHdrs[paxSchilyXattr+k] = v + } + whyOnlyPAX = "only PAX supports Xattrs" + format.mayOnlyBe(FormatPAX) + } + if len(h.PAXRecords) > 0 { + for k, v := range h.PAXRecords { + switch _, exists := paxHdrs[k]; { + case exists: + continue // Do not overwrite existing records + case h.Typeflag == TypeXGlobalHeader: + paxHdrs[k] = v // Copy all records + case !basicKeys[k] && !strings.HasPrefix(k, paxGNUSparse): + paxHdrs[k] = v // Ignore local records that may conflict + } + } + whyOnlyPAX = "only PAX supports PAXRecords" + format.mayOnlyBe(FormatPAX) + } + for k, v := range paxHdrs { + if !validPAXRecord(k, v) { + return FormatUnknown, nil, headerError{fmt.Sprintf("invalid PAX record: %q", k+" = "+v)} + } + } + + // Check sparse files. + if len(h.SparseHoles) > 0 || h.Typeflag == TypeGNUSparse { + if isHeaderOnlyType(h.Typeflag) { + return FormatUnknown, nil, headerError{"header-only type cannot be sparse"} + } + if !validateSparseEntries(h.SparseHoles, h.Size) { + return FormatUnknown, nil, headerError{"invalid sparse holes"} + } + if h.Typeflag == TypeGNUSparse { + whyOnlyGNU = "only GNU supports TypeGNUSparse" + format.mayOnlyBe(FormatGNU) + } else { + whyNoGNU = "GNU supports sparse files only with TypeGNUSparse" + format.mustNotBe(FormatGNU) + } + whyNoUSTAR = "USTAR does not support sparse files" + format.mustNotBe(FormatUSTAR) + } + + // Check desired format. + if wantFormat := h.Format; wantFormat != FormatUnknown { + if wantFormat.has(FormatPAX) && !preferPAX { + wantFormat.mayBe(FormatUSTAR) // PAX implies USTAR allowed too + } + format.mayOnlyBe(wantFormat) // Set union of formats allowed and format wanted + } + if format == FormatUnknown { + switch h.Format { + case FormatUSTAR: + err = headerError{"Format specifies USTAR", whyNoUSTAR, whyOnlyPAX, whyOnlyGNU} + case FormatPAX: + err = headerError{"Format specifies PAX", whyNoPAX, whyOnlyGNU} + case FormatGNU: + err = headerError{"Format specifies GNU", whyNoGNU, whyOnlyPAX} + default: + err = headerError{whyNoUSTAR, whyNoPAX, whyNoGNU, whyOnlyPAX, whyOnlyGNU} + } + } + return format, paxHdrs, err +} + +var sysSparseDetect func(f *os.File) (sparseHoles, error) +var sysSparsePunch func(f *os.File, sph sparseHoles) error + +// DetectSparseHoles searches for holes within f to populate SparseHoles +// on supported operating systems and filesystems. +// The file offset is cleared to zero. +// +// When packing a sparse file, DetectSparseHoles should be called prior to +// serializing the header to the archive with Writer.WriteHeader. +func (h *Header) DetectSparseHoles(f *os.File) (err error) { + defer func() { + if _, serr := f.Seek(0, io.SeekStart); err == nil { + err = serr + } + }() + + h.SparseHoles = nil + if sysSparseDetect != nil { + sph, err := sysSparseDetect(f) + h.SparseHoles = sph + return err + } + return nil +} + +// PunchSparseHoles destroys the contents of f, and prepares a sparse file +// (on supported operating systems and filesystems) +// with holes punched according to SparseHoles. +// The file offset is cleared to zero. +// +// When extracting a sparse file, PunchSparseHoles should be called prior to +// populating the content of a file with Reader.WriteTo. +func (h *Header) PunchSparseHoles(f *os.File) (err error) { + defer func() { + if _, serr := f.Seek(0, io.SeekStart); err == nil { + err = serr + } + }() + + if err := f.Truncate(0); err != nil { + return err + } + + var size int64 + if len(h.SparseHoles) > 0 { + size = h.SparseHoles[len(h.SparseHoles)-1].endOffset() + } + if !validateSparseEntries(h.SparseHoles, size) { + return errors.New("tar: invalid sparse holes") + } + + if size == 0 { + return nil // For non-sparse files, do nothing (other than Truncate) + } + if sysSparsePunch != nil { + return sysSparsePunch(f, h.SparseHoles) + } + return f.Truncate(size) } // FileInfo returns an os.FileInfo for the Header. @@ -92,63 +622,43 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) { // Set setuid, setgid and sticky bits. if fi.h.Mode&c_ISUID != 0 { - // setuid mode |= os.ModeSetuid } if fi.h.Mode&c_ISGID != 0 { - // setgid mode |= os.ModeSetgid } if fi.h.Mode&c_ISVTX != 0 { - // sticky mode |= os.ModeSticky } - // Set file mode bits. - // clear perm, setuid, setgid and sticky bits. - m := os.FileMode(fi.h.Mode) &^ 07777 - if m == c_ISDIR { - // directory + // Set file mode bits; clear perm, setuid, setgid, and sticky bits. + switch m := os.FileMode(fi.h.Mode) &^ 07777; m { + case c_ISDIR: mode |= os.ModeDir - } - if m == c_ISFIFO { - // named pipe (FIFO) + case c_ISFIFO: mode |= os.ModeNamedPipe - } - if m == c_ISLNK { - // symbolic link + case c_ISLNK: mode |= os.ModeSymlink - } - if m == c_ISBLK { - // device file + case c_ISBLK: mode |= os.ModeDevice - } - if m == c_ISCHR { - // Unix character device + case c_ISCHR: mode |= os.ModeDevice mode |= os.ModeCharDevice - } - if m == c_ISSOCK { - // Unix domain socket + case c_ISSOCK: mode |= os.ModeSocket } switch fi.h.Typeflag { case TypeSymlink: - // symbolic link mode |= os.ModeSymlink case TypeChar: - // character device node mode |= os.ModeDevice mode |= os.ModeCharDevice case TypeBlock: - // block device node mode |= os.ModeDevice case TypeDir: - // directory mode |= os.ModeDir case TypeFifo: - // fifo node mode |= os.ModeNamedPipe } @@ -158,11 +668,15 @@ func (fi headerFileInfo) Mode() (mode os.FileMode) { // sysStat, if non-nil, populates h from system-dependent fields of fi. var sysStat func(fi os.FileInfo, h *Header) error -// Mode constants from the tar spec. const ( - c_ISUID = 04000 // Set uid - c_ISGID = 02000 // Set gid - c_ISVTX = 01000 // Save text (sticky bit) + // Mode constants from the USTAR spec: + // See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + c_ISUID = 04000 // Set uid + c_ISGID = 02000 // Set gid + c_ISVTX = 01000 // Save text (sticky bit) + + // Common Unix mode constants; these are not defined in any common tar standard. + // Header.FileInfo understands these, but FileInfoHeader will never produce these. c_ISDIR = 040000 // Directory c_ISFIFO = 010000 // FIFO c_ISREG = 0100000 // Regular file @@ -172,30 +686,16 @@ const ( c_ISSOCK = 0140000 // Socket ) -// Keywords for the PAX Extended Header -const ( - paxAtime = "atime" - paxCharset = "charset" - paxComment = "comment" - paxCtime = "ctime" // please note that ctime is not a valid pax header. - paxGid = "gid" - paxGname = "gname" - paxLinkpath = "linkpath" - paxMtime = "mtime" - paxPath = "path" - paxSize = "size" - paxUid = "uid" - paxUname = "uname" - paxXattr = "SCHILY.xattr." - paxNone = "" -) - // FileInfoHeader creates a partially-populated Header from fi. // If fi describes a symlink, FileInfoHeader records link as the link target. // If fi describes a directory, a slash is appended to the name. -// Because os.FileInfo's Name method returns only the base name of -// the file it describes, it may be necessary to modify the Name field -// of the returned header to provide the full path name of the file. +// +// Since os.FileInfo's Name method only returns the base name of +// the file it describes, it may be necessary to modify Header.Name +// to provide the full path name of the file. +// +// This function does not populate Header.SparseHoles; +// for sparse file support, additionally call Header.DetectSparseHoles. func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { if fi == nil { return nil, errors.New("tar: FileInfo is nil") @@ -208,32 +708,26 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { } switch { case fm.IsRegular(): - h.Mode |= c_ISREG h.Typeflag = TypeReg h.Size = fi.Size() case fi.IsDir(): h.Typeflag = TypeDir - h.Mode |= c_ISDIR h.Name += "/" case fm&os.ModeSymlink != 0: h.Typeflag = TypeSymlink - h.Mode |= c_ISLNK h.Linkname = link case fm&os.ModeDevice != 0: if fm&os.ModeCharDevice != 0 { - h.Mode |= c_ISCHR h.Typeflag = TypeChar } else { - h.Mode |= c_ISBLK h.Typeflag = TypeBlock } case fm&os.ModeNamedPipe != 0: h.Typeflag = TypeFifo - h.Mode |= c_ISFIFO case fm&os.ModeSocket != 0: - h.Mode |= c_ISSOCK + return nil, fmt.Errorf("tar: sockets not supported") default: - return nil, fmt.Errorf("archive/tar: unknown file mode %v", fm) + return nil, fmt.Errorf("tar: unknown file mode %v", fm) } if fm&os.ModeSetuid != 0 { h.Mode |= c_ISUID @@ -267,6 +761,15 @@ func FileInfoHeader(fi os.FileInfo, link string) (*Header, error) { h.Size = 0 h.Linkname = sys.Linkname } + if sys.SparseHoles != nil { + h.SparseHoles = append([]SparseEntry{}, sys.SparseHoles...) + } + if sys.PAXRecords != nil { + h.PAXRecords = make(map[string]string) + for k, v := range sys.PAXRecords { + h.PAXRecords[k] = v + } + } } if sysStat != nil { return h, sysStat(fi, h) @@ -284,3 +787,10 @@ func isHeaderOnlyType(flag byte) bool { return false } } + +func min(a, b int64) int64 { + if a < b { + return a + } + return b +} diff --git a/vendor/github.com/dmcgowan/go-tar/format.go b/vendor/github.com/dmcgowan/go-tar/format.go index c2c9910d0..cf1289534 100644 --- a/vendor/github.com/dmcgowan/go-tar/format.go +++ b/vendor/github.com/dmcgowan/go-tar/format.go @@ -4,38 +4,131 @@ package tar +import "strings" + +// Format represents the tar archive format. +// +// The original tar format was introduced in Unix V7. +// Since then, there have been multiple competing formats attempting to +// standardize or extend the V7 format to overcome its limitations. +// The most common formats are the USTAR, PAX, and GNU formats, +// each with their own advantages and limitations. +// +// The following table captures the capabilities of each format: +// +// | USTAR | PAX | GNU +// ------------------+--------+-----------+---------- +// Name | 256B | unlimited | unlimited +// Linkname | 100B | unlimited | unlimited +// Size | uint33 | unlimited | uint89 +// Mode | uint21 | uint21 | uint57 +// Uid/Gid | uint21 | unlimited | uint57 +// Uname/Gname | 32B | unlimited | 32B +// ModTime | uint33 | unlimited | int89 +// AccessTime | n/a | unlimited | int89 +// ChangeTime | n/a | unlimited | int89 +// Devmajor/Devminor | uint21 | uint21 | uint57 +// ------------------+--------+-----------+---------- +// string encoding | ASCII | UTF-8 | binary +// sub-second times | no | yes | no +// sparse files | no | yes | yes +// +// The table's upper portion shows the Header fields, where each format reports +// the maximum number of bytes allowed for each string field and +// the integer type used to store each numeric field +// (where timestamps are stored as the number of seconds since the Unix epoch). +// +// The table's lower portion shows specialized features of each format, +// such as supported string encodings, support for sub-second timestamps, +// or support for sparse files. +type Format int + // Constants to identify various tar formats. const ( - // The format is unknown. - formatUnknown = (1 << iota) / 2 // Sequence of 0, 1, 2, 4, 8, etc... + // Deliberately hide the meaning of constants from public API. + _ Format = (1 << iota) / 4 // Sequence of 0, 0, 1, 2, 4, 8, etc... + + // FormatUnknown indicates that the format is unknown. + FormatUnknown // The format of the original Unix V7 tar tool prior to standardization. formatV7 - // The old and new GNU formats, which are incompatible with USTAR. - // This does cover the old GNU sparse extension. - // This does not cover the GNU sparse extensions using PAX headers, - // versions 0.0, 0.1, and 1.0; these fall under the PAX format. - formatGNU + // FormatUSTAR represents the USTAR header format defined in POSIX.1-1988. + // + // While this format is compatible with most tar readers, + // the format has several limitations making it unsuitable for some usages. + // Most notably, it cannot support sparse files, files larger than 8GiB, + // filenames larger than 256 characters, and non-ASCII filenames. + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_06 + FormatUSTAR + + // FormatPAX represents the PAX header format defined in POSIX.1-2001. + // + // PAX extends USTAR by writing a special file with Typeflag TypeXHeader + // preceding the original header. This file contains a set of key-value + // records, which are used to overcome USTAR's shortcomings, in addition to + // providing the ability to have sub-second resolution for timestamps. + // + // Some newer formats add their own extensions to PAX by defining their + // own keys and assigning certain semantic meaning to the associated values. + // For example, sparse file support in PAX is implemented using keys + // defined by the GNU manual (e.g., "GNU.sparse.map"). + // + // Reference: + // http://pubs.opengroup.org/onlinepubs/009695399/utilities/pax.html + FormatPAX + + // FormatGNU represents the GNU header format. + // + // The GNU header format is older than the USTAR and PAX standards and + // is not compatible with them. The GNU format supports + // arbitrary file sizes, filenames of arbitrary encoding and length, + // sparse files, and other features. + // + // It is recommended that PAX be chosen over GNU unless the target + // application can only parse GNU formatted archives. + // + // Reference: + // http://www.gnu.org/software/tar/manual/html_node/Standard.html + FormatGNU // Schily's tar format, which is incompatible with USTAR. // This does not cover STAR extensions to the PAX format; these fall under // the PAX format. formatSTAR - // USTAR is the former standardization of tar defined in POSIX.1-1988. - // This is incompatible with the GNU and STAR formats. - formatUSTAR - - // PAX is the latest standardization of tar defined in POSIX.1-2001. - // This is an extension of USTAR and is "backwards compatible" with it. - // - // Some newer formats add their own extensions to PAX, such as GNU sparse - // files and SCHILY extended attributes. Since they are backwards compatible - // with PAX, they will be labelled as "PAX". - formatPAX + formatMax ) +func (f Format) has(f2 Format) bool { return f&f2 != 0 } +func (f *Format) mayBe(f2 Format) { *f |= f2 } +func (f *Format) mayOnlyBe(f2 Format) { *f &= f2 } +func (f *Format) mustNotBe(f2 Format) { *f &^= f2 } + +var formatNames = map[Format]string{ + formatV7: "V7", FormatUSTAR: "USTAR", FormatPAX: "PAX", FormatGNU: "GNU", formatSTAR: "STAR", +} + +func (f Format) String() string { + var ss []string + for f2 := Format(1); f2 < formatMax; f2 <<= 1 { + if f.has(f2) { + ss = append(ss, formatNames[f2]) + } + } + switch len(ss) { + case 0: + return "" + case 1: + return ss[0] + default: + return "(" + strings.Join(ss, " | ") + ")" + } +} + // Magics used to identify various formats. const ( magicGNU, versionGNU = "ustar ", " \x00" @@ -50,6 +143,12 @@ const ( prefixSize = 155 // Max length of the prefix field in USTAR format ) +// blockPadding computes the number of bytes needed to pad offset up to the +// nearest block edge where 0 <= n < blockSize. +func blockPadding(offset int64) (n int64) { + return -offset & (blockSize - 1) +} + var zeroBlock block type block [blockSize]byte @@ -63,14 +162,14 @@ func (b *block) Sparse() sparseArray { return (sparseArray)(b[:]) } // GetFormat checks that the block is a valid tar header based on the checksum. // It then attempts to guess the specific format based on magic values. -// If the checksum fails, then formatUnknown is returned. -func (b *block) GetFormat() (format int) { +// If the checksum fails, then FormatUnknown is returned. +func (b *block) GetFormat() Format { // Verify checksum. var p parser value := p.parseOctal(b.V7().Chksum()) chksum1, chksum2 := b.ComputeChecksum() if p.err != nil || (value != chksum1 && value != chksum2) { - return formatUnknown + return FormatUnknown } // Guess the magic values. @@ -81,9 +180,9 @@ func (b *block) GetFormat() (format int) { case magic == magicUSTAR && trailer == trailerSTAR: return formatSTAR case magic == magicUSTAR: - return formatUSTAR + return FormatUSTAR | FormatPAX case magic == magicGNU && version == versionGNU: - return formatGNU + return FormatGNU default: return formatV7 } @@ -91,19 +190,19 @@ func (b *block) GetFormat() (format int) { // SetFormat writes the magic values necessary for specified format // and then updates the checksum accordingly. -func (b *block) SetFormat(format int) { +func (b *block) SetFormat(format Format) { // Set the magic values. - switch format { - case formatV7: + switch { + case format.has(formatV7): // Do nothing. - case formatGNU: + case format.has(FormatGNU): copy(b.GNU().Magic(), magicGNU) copy(b.GNU().Version(), versionGNU) - case formatSTAR: + case format.has(formatSTAR): copy(b.STAR().Magic(), magicUSTAR) copy(b.STAR().Version(), versionUSTAR) copy(b.STAR().Trailer(), trailerSTAR) - case formatUSTAR, formatPAX: + case format.has(FormatUSTAR | FormatPAX): copy(b.USTAR().Magic(), magicUSTAR) copy(b.USTAR().Version(), versionUSTAR) default: @@ -134,6 +233,11 @@ func (b *block) ComputeChecksum() (unsigned, signed int64) { return unsigned, signed } +// Reset clears the block with all zeros. +func (b *block) Reset() { + *b = block{} +} + type headerV7 [blockSize]byte func (h *headerV7) Name() []byte { return h[000:][:100] } @@ -187,11 +291,11 @@ func (h *headerUSTAR) Prefix() []byte { return h[345:][:155] } type sparseArray []byte -func (s sparseArray) Entry(i int) sparseNode { return (sparseNode)(s[i*24:]) } +func (s sparseArray) Entry(i int) sparseElem { return (sparseElem)(s[i*24:]) } func (s sparseArray) IsExtended() []byte { return s[24*s.MaxEntries():][:1] } func (s sparseArray) MaxEntries() int { return len(s) / 24 } -type sparseNode []byte +type sparseElem []byte -func (s sparseNode) Offset() []byte { return s[00:][:12] } -func (s sparseNode) NumBytes() []byte { return s[12:][:12] } +func (s sparseElem) Offset() []byte { return s[00:][:12] } +func (s sparseElem) Length() []byte { return s[12:][:12] } diff --git a/vendor/github.com/dmcgowan/go-tar/reader.go b/vendor/github.com/dmcgowan/go-tar/reader.go index a6142c6b8..1d0673020 100644 --- a/vendor/github.com/dmcgowan/go-tar/reader.go +++ b/vendor/github.com/dmcgowan/go-tar/reader.go @@ -4,33 +4,23 @@ package tar -// TODO(dsymonds): -// - pax extensions - import ( "bytes" - "errors" "io" "io/ioutil" - "math" "strconv" "strings" "time" ) -var ( - ErrHeader = errors.New("archive/tar: invalid tar header") -) - -// A Reader provides sequential access to the contents of a tar archive. -// A tar archive consists of a sequence of files. -// The Next method advances to the next file in the archive (including the first), -// and then it can be treated as an io.Reader to access the file's data. +// Reader provides sequential access to the contents of a tar archive. +// Reader.Next advances to the next file in the archive (including the first), +// and then Reader can be treated as an io.Reader to access the file's data. type Reader struct { r io.Reader - pad int64 // amount of padding (ignored) after current file entry - curr numBytesReader // reader for current file entry - blk block // buffer to use as temporary local storage + pad int64 // Amount of padding (ignored) after current file entry + curr fileReader // Reader for current file entry + blk block // Buffer to use as temporary local storage // err is a persistent error. // It is only the responsibility of every exported method of Reader to @@ -38,68 +28,21 @@ type Reader struct { err error } -// A numBytesReader is an io.Reader with a numBytes method, returning the number -// of bytes remaining in the underlying encoded data. -type numBytesReader interface { +type fileReader interface { io.Reader - numBytes() int64 -} + fileState -// A regFileReader is a numBytesReader for reading file data from a tar archive. -type regFileReader struct { - r io.Reader // underlying reader - nb int64 // number of unread bytes for current file entry + WriteTo(io.Writer) (int64, error) } -// A sparseFileReader is a numBytesReader for reading sparse file data from a -// tar archive. -type sparseFileReader struct { - rfr numBytesReader // Reads the sparse-encoded file data - sp []sparseEntry // The sparse map for the file - pos int64 // Keeps track of file position - total int64 // Total size of the file -} - -// A sparseEntry holds a single entry in a sparse file's sparse map. -// -// Sparse files are represented using a series of sparseEntrys. -// Despite the name, a sparseEntry represents an actual data fragment that -// references data found in the underlying archive stream. All regions not -// covered by a sparseEntry are logically filled with zeros. -// -// For example, if the underlying raw file contains the 10-byte data: -// var compactData = "abcdefgh" -// -// And the sparse map has the following entries: -// var sp = []sparseEntry{ -// {offset: 2, numBytes: 5} // Data fragment for [2..7] -// {offset: 18, numBytes: 3} // Data fragment for [18..21] -// } -// -// Then the content of the resulting sparse file with a "real" size of 25 is: -// var sparseData = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4 -type sparseEntry struct { - offset int64 // Starting position of the fragment - numBytes int64 // Length of the fragment -} - -// Keywords for GNU sparse files in a PAX extended header -const ( - paxGNUSparseNumBlocks = "GNU.sparse.numblocks" - paxGNUSparseOffset = "GNU.sparse.offset" - paxGNUSparseNumBytes = "GNU.sparse.numbytes" - paxGNUSparseMap = "GNU.sparse.map" - paxGNUSparseName = "GNU.sparse.name" - paxGNUSparseMajor = "GNU.sparse.major" - paxGNUSparseMinor = "GNU.sparse.minor" - paxGNUSparseSize = "GNU.sparse.size" - paxGNUSparseRealSize = "GNU.sparse.realsize" -) - // NewReader creates a new Reader reading from r. -func NewReader(r io.Reader) *Reader { return &Reader{r: r} } +func NewReader(r io.Reader) *Reader { + return &Reader{r: r, curr: ®FileReader{r, 0}} +} // Next advances to the next entry in the tar archive. +// The Header.Size determines how many bytes can be read for the next file. +// Any remaining data in the current file is automatically discarded. // // io.EOF is returned at the end of the input. func (tr *Reader) Next() (*Header, error) { @@ -112,18 +55,26 @@ func (tr *Reader) Next() (*Header, error) { } func (tr *Reader) next() (*Header, error) { - var extHdrs map[string]string + var paxHdrs map[string]string + var gnuLongName, gnuLongLink string // Externally, Next iterates through the tar archive as if it is a series of // files. Internally, the tar format often uses fake "files" to add meta // data that describes the next file. These meta data "files" should not // normally be visible to the outside. As such, this loop iterates through // one or more "header files" until it finds a "normal file". + format := FormatUSTAR | FormatPAX | FormatGNU loop: for { - if err := tr.skipUnread(); err != nil { + // Discard the remainder of the file and any padding. + if err := discard(tr.r, tr.curr.PhysicalRemaining()); err != nil { return nil, err } + if _, err := tryReadFull(tr.r, tr.blk[:tr.pad]); err != nil { + return nil, err + } + tr.pad = 0 + hdr, rawHdr, err := tr.readHeader() if err != nil { return nil, err @@ -131,43 +82,57 @@ loop: if err := tr.handleRegularFile(hdr); err != nil { return nil, err } + format.mayOnlyBe(hdr.Format) // Check for PAX/GNU special headers and files. switch hdr.Typeflag { - case TypeXHeader: - extHdrs, err = parsePAX(tr) + case TypeXHeader, TypeXGlobalHeader: + format.mayOnlyBe(FormatPAX) + paxHdrs, err = parsePAX(tr) if err != nil { return nil, err } + if hdr.Typeflag == TypeXGlobalHeader { + mergePAX(hdr, paxHdrs) + return &Header{ + Typeflag: hdr.Typeflag, + Xattrs: hdr.Xattrs, + PAXRecords: hdr.PAXRecords, + Format: format, + }, nil + } continue loop // This is a meta header affecting the next header case TypeGNULongName, TypeGNULongLink: + format.mayOnlyBe(FormatGNU) realname, err := ioutil.ReadAll(tr) if err != nil { return nil, err } - // Convert GNU extensions to use PAX headers. - if extHdrs == nil { - extHdrs = make(map[string]string) - } var p parser switch hdr.Typeflag { case TypeGNULongName: - extHdrs[paxPath] = p.parseString(realname) + gnuLongName = p.parseString(realname) case TypeGNULongLink: - extHdrs[paxLinkpath] = p.parseString(realname) - } - if p.err != nil { - return nil, p.err + gnuLongLink = p.parseString(realname) } continue loop // This is a meta header affecting the next header default: // The old GNU sparse format is handled here since it is technically // just a regular file with additional attributes. - if err := mergePAX(hdr, extHdrs); err != nil { + if err := mergePAX(hdr, paxHdrs); err != nil { return nil, err } + if gnuLongName != "" { + hdr.Name = gnuLongName + } + if gnuLongLink != "" { + hdr.Linkname = gnuLongLink + } + if hdr.Typeflag == TypeRegA && strings.HasSuffix(hdr.Name, "/") { + hdr.Typeflag = TypeDir // Legacy archives use trailing slash for directories + } // The extended headers may have updated the size. // Thus, setup the regFileReader again after merging PAX headers. @@ -177,9 +142,15 @@ loop: // Sparse formats rely on being able to read from the logical data // section; there must be a preceding call to handleRegularFile. - if err := tr.handleSparseFile(hdr, rawHdr, extHdrs); err != nil { + if err := tr.handleSparseFile(hdr, rawHdr); err != nil { return nil, err } + + // Set the final guess at the format. + if format.has(FormatUSTAR) && format.has(FormatPAX) { + format.mayOnlyBe(FormatUSTAR) + } + hdr.Format = format return hdr, nil // This is a file, so stop } } @@ -197,105 +168,87 @@ func (tr *Reader) handleRegularFile(hdr *Header) error { return ErrHeader } - tr.pad = -nb & (blockSize - 1) // blockSize is a power of two + tr.pad = blockPadding(nb) tr.curr = ®FileReader{r: tr.r, nb: nb} return nil } // handleSparseFile checks if the current file is a sparse format of any type // and sets the curr reader appropriately. -func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block, extHdrs map[string]string) error { - var sp []sparseEntry +func (tr *Reader) handleSparseFile(hdr *Header, rawHdr *block) error { + var spd sparseDatas var err error if hdr.Typeflag == TypeGNUSparse { - sp, err = tr.readOldGNUSparseMap(hdr, rawHdr) - if err != nil { - return err - } + spd, err = tr.readOldGNUSparseMap(hdr, rawHdr) } else { - sp, err = tr.checkForGNUSparsePAXHeaders(hdr, extHdrs) - if err != nil { - return err - } + spd, err = tr.readGNUSparsePAXHeaders(hdr) } // If sp is non-nil, then this is a sparse file. - // Note that it is possible for len(sp) to be zero. - if sp != nil { - tr.curr, err = newSparseFileReader(tr.curr, sp, hdr.Size) + // Note that it is possible for len(sp) == 0. + if err == nil && spd != nil { + if isHeaderOnlyType(hdr.Typeflag) || !validateSparseEntries(spd, hdr.Size) { + return ErrHeader + } + sph := invertSparseEntries(spd, hdr.Size) + tr.curr = &sparseFileReader{tr.curr, sph, 0} + hdr.SparseHoles = append([]SparseEntry{}, sph...) } return err } -// checkForGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. If they are found, then -// this function reads the sparse map and returns it. Unknown sparse formats are ignored, causing the file to -// be treated as a regular file. -func (tr *Reader) checkForGNUSparsePAXHeaders(hdr *Header, headers map[string]string) ([]sparseEntry, error) { - var sparseFormat string - - // Check for sparse format indicators - major, majorOk := headers[paxGNUSparseMajor] - minor, minorOk := headers[paxGNUSparseMinor] - sparseName, sparseNameOk := headers[paxGNUSparseName] - _, sparseMapOk := headers[paxGNUSparseMap] - sparseSize, sparseSizeOk := headers[paxGNUSparseSize] - sparseRealSize, sparseRealSizeOk := headers[paxGNUSparseRealSize] - - // Identify which, if any, sparse format applies from which PAX headers are set - if majorOk && minorOk { - sparseFormat = major + "." + minor - } else if sparseNameOk && sparseMapOk { - sparseFormat = "0.1" - } else if sparseSizeOk { - sparseFormat = "0.0" - } else { - // Not a PAX format GNU sparse file. - return nil, nil +// readGNUSparsePAXHeaders checks the PAX headers for GNU sparse headers. +// If they are found, then this function reads the sparse map and returns it. +// This assumes that 0.0 headers have already been converted to 0.1 headers +// by the the PAX header parsing logic. +func (tr *Reader) readGNUSparsePAXHeaders(hdr *Header) (sparseDatas, error) { + // Identify the version of GNU headers. + var is1x0 bool + major, minor := hdr.PAXRecords[paxGNUSparseMajor], hdr.PAXRecords[paxGNUSparseMinor] + switch { + case major == "0" && (minor == "0" || minor == "1"): + is1x0 = false + case major == "1" && minor == "0": + is1x0 = true + case major != "" || minor != "": + return nil, nil // Unknown GNU sparse PAX version + case hdr.PAXRecords[paxGNUSparseMap] != "": + is1x0 = false // 0.0 and 0.1 did not have explicit version records, so guess + default: + return nil, nil // Not a PAX format GNU sparse file. } + hdr.Format.mayOnlyBe(FormatPAX) - // Check for unknown sparse format - if sparseFormat != "0.0" && sparseFormat != "0.1" && sparseFormat != "1.0" { - return nil, nil + // Update hdr from GNU sparse PAX headers. + if name := hdr.PAXRecords[paxGNUSparseName]; name != "" { + hdr.Name = name } - - // Update hdr from GNU sparse PAX headers - if sparseNameOk { - hdr.Name = sparseName + size := hdr.PAXRecords[paxGNUSparseSize] + if size == "" { + size = hdr.PAXRecords[paxGNUSparseRealSize] } - if sparseSizeOk { - realSize, err := strconv.ParseInt(sparseSize, 10, 64) + if size != "" { + n, err := strconv.ParseInt(size, 10, 64) if err != nil { return nil, ErrHeader } - hdr.Size = realSize - } else if sparseRealSizeOk { - realSize, err := strconv.ParseInt(sparseRealSize, 10, 64) - if err != nil { - return nil, ErrHeader - } - hdr.Size = realSize + hdr.Size = n } - // Set up the sparse map, according to the particular sparse format in use - var sp []sparseEntry - var err error - switch sparseFormat { - case "0.0", "0.1": - sp, err = readGNUSparseMap0x1(headers) - case "1.0": - sp, err = readGNUSparseMap1x0(tr.curr) + // Read the sparse map according to the appropriate format. + if is1x0 { + return readGNUSparseMap1x0(tr.curr) } - return sp, err + return readGNUSparseMap0x1(hdr.PAXRecords) } -// mergePAX merges well known headers according to PAX standard. -// In general headers with the same name as those found -// in the header struct overwrite those found in the header -// struct with higher precision or longer values. Esp. useful -// for name and linkname fields. -func mergePAX(hdr *Header, headers map[string]string) (err error) { - var id64 int64 - for k, v := range headers { +// mergePAX merges paxHdrs into hdr for all relevant fields of Header. +func mergePAX(hdr *Header, paxHdrs map[string]string) (err error) { + for k, v := range paxHdrs { + if v == "" { + continue // Keep the original USTAR value + } + var id64 int64 switch k { case paxPath: hdr.Name = v @@ -320,17 +273,18 @@ func mergePAX(hdr *Header, headers map[string]string) (err error) { case paxSize: hdr.Size, err = strconv.ParseInt(v, 10, 64) default: - if strings.HasPrefix(k, paxXattr) { + if strings.HasPrefix(k, paxSchilyXattr) { if hdr.Xattrs == nil { hdr.Xattrs = make(map[string]string) } - hdr.Xattrs[k[len(paxXattr):]] = v + hdr.Xattrs[k[len(paxSchilyXattr):]] = v } } if err != nil { return ErrHeader } } + hdr.PAXRecords = paxHdrs return nil } @@ -348,7 +302,7 @@ func parsePAX(r io.Reader) (map[string]string, error) { // headers since 0.0 headers were not PAX compliant. var sparseMap []string - extHdrs := make(map[string]string) + paxHdrs := make(map[string]string) for len(sbuf) > 0 { key, value, residual, err := parsePAXRecord(sbuf) if err != nil { @@ -366,58 +320,13 @@ func parsePAX(r io.Reader) (map[string]string, error) { } sparseMap = append(sparseMap, value) default: - // According to PAX specification, a value is stored only if it is - // non-empty. Otherwise, the key is deleted. - if len(value) > 0 { - extHdrs[key] = value - } else { - delete(extHdrs, key) - } + paxHdrs[key] = value } } if len(sparseMap) > 0 { - extHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") + paxHdrs[paxGNUSparseMap] = strings.Join(sparseMap, ",") } - return extHdrs, nil -} - -// skipUnread skips any unread bytes in the existing file entry, as well as any -// alignment padding. It returns io.ErrUnexpectedEOF if any io.EOF is -// encountered in the data portion; it is okay to hit io.EOF in the padding. -// -// Note that this function still works properly even when sparse files are being -// used since numBytes returns the bytes remaining in the underlying io.Reader. -func (tr *Reader) skipUnread() error { - dataSkip := tr.numBytes() // Number of data bytes to skip - totalSkip := dataSkip + tr.pad // Total number of bytes to skip - tr.curr, tr.pad = nil, 0 - - // If possible, Seek to the last byte before the end of the data section. - // Do this because Seek is often lazy about reporting errors; this will mask - // the fact that the tar stream may be truncated. We can rely on the - // io.CopyN done shortly afterwards to trigger any IO errors. - var seekSkipped int64 // Number of bytes skipped via Seek - if sr, ok := tr.r.(io.Seeker); ok && dataSkip > 1 { - // Not all io.Seeker can actually Seek. For example, os.Stdin implements - // io.Seeker, but calling Seek always returns an error and performs - // no action. Thus, we try an innocent seek to the current position - // to see if Seek is really supported. - pos1, err := sr.Seek(0, io.SeekCurrent) - if err == nil { - // Seek seems supported, so perform the real Seek. - pos2, err := sr.Seek(dataSkip-1, io.SeekCurrent) - if err != nil { - return err - } - seekSkipped = pos2 - pos1 - } - } - - copySkipped, err := io.CopyN(ioutil.Discard, tr.r, totalSkip-seekSkipped) - if err == io.EOF && seekSkipped+copySkipped < dataSkip { - err = io.ErrUnexpectedEOF - } - return err + return paxHdrs, nil } // readHeader reads the next block header and assumes that the underlying reader @@ -445,7 +354,7 @@ func (tr *Reader) readHeader() (*Header, *block, error) { // Verify the header matches a known format. format := tr.blk.GetFormat() - if format == formatUnknown { + if format == FormatUnknown { return nil, nil, ErrHeader } @@ -454,37 +363,86 @@ func (tr *Reader) readHeader() (*Header, *block, error) { // Unpack the V7 header. v7 := tr.blk.V7() + hdr.Typeflag = v7.TypeFlag()[0] hdr.Name = p.parseString(v7.Name()) + hdr.Linkname = p.parseString(v7.LinkName()) + hdr.Size = p.parseNumeric(v7.Size()) hdr.Mode = p.parseNumeric(v7.Mode()) hdr.Uid = int(p.parseNumeric(v7.UID())) hdr.Gid = int(p.parseNumeric(v7.GID())) - hdr.Size = p.parseNumeric(v7.Size()) hdr.ModTime = time.Unix(p.parseNumeric(v7.ModTime()), 0) - hdr.Typeflag = v7.TypeFlag()[0] - hdr.Linkname = p.parseString(v7.LinkName()) // Unpack format specific fields. if format > formatV7 { ustar := tr.blk.USTAR() hdr.Uname = p.parseString(ustar.UserName()) hdr.Gname = p.parseString(ustar.GroupName()) - if hdr.Typeflag == TypeChar || hdr.Typeflag == TypeBlock { - hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) - hdr.Devminor = p.parseNumeric(ustar.DevMinor()) - } + hdr.Devmajor = p.parseNumeric(ustar.DevMajor()) + hdr.Devminor = p.parseNumeric(ustar.DevMinor()) var prefix string - switch format { - case formatUSTAR, formatGNU: - // TODO(dsnet): Do not use the prefix field for the GNU format! - // See golang.org/issues/12594 + switch { + case format.has(FormatUSTAR | FormatPAX): + hdr.Format = format ustar := tr.blk.USTAR() prefix = p.parseString(ustar.Prefix()) - case formatSTAR: + + // For Format detection, check if block is properly formatted since + // the parser is more liberal than what USTAR actually permits. + notASCII := func(r rune) bool { return r >= 0x80 } + if bytes.IndexFunc(tr.blk[:], notASCII) >= 0 { + hdr.Format = FormatUnknown // Non-ASCII characters in block. + } + nul := func(b []byte) bool { return int(b[len(b)-1]) == 0 } + if !(nul(v7.Size()) && nul(v7.Mode()) && nul(v7.UID()) && nul(v7.GID()) && + nul(v7.ModTime()) && nul(ustar.DevMajor()) && nul(ustar.DevMinor())) { + hdr.Format = FormatUnknown // Numeric fields must end in NUL + } + case format.has(formatSTAR): star := tr.blk.STAR() prefix = p.parseString(star.Prefix()) hdr.AccessTime = time.Unix(p.parseNumeric(star.AccessTime()), 0) hdr.ChangeTime = time.Unix(p.parseNumeric(star.ChangeTime()), 0) + case format.has(FormatGNU): + hdr.Format = format + var p2 parser + gnu := tr.blk.GNU() + if b := gnu.AccessTime(); b[0] != 0 { + hdr.AccessTime = time.Unix(p2.parseNumeric(b), 0) + } + if b := gnu.ChangeTime(); b[0] != 0 { + hdr.ChangeTime = time.Unix(p2.parseNumeric(b), 0) + } + + // Prior to Go1.8, the Writer had a bug where it would output + // an invalid tar file in certain rare situations because the logic + // incorrectly believed that the old GNU format had a prefix field. + // This is wrong and leads to an output file that mangles the + // atime and ctime fields, which are often left unused. + // + // In order to continue reading tar files created by former, buggy + // versions of Go, we skeptically parse the atime and ctime fields. + // If we are unable to parse them and the prefix field looks like + // an ASCII string, then we fallback on the pre-Go1.8 behavior + // of treating these fields as the USTAR prefix field. + // + // Note that this will not use the fallback logic for all possible + // files generated by a pre-Go1.8 toolchain. If the generated file + // happened to have a prefix field that parses as valid + // atime and ctime fields (e.g., when they are valid octal strings), + // then it is impossible to distinguish between an valid GNU file + // and an invalid pre-Go1.8 file. + // + // See https://golang.org/issues/12594 + // See https://golang.org/issues/21005 + if p2.err != nil { + hdr.AccessTime, hdr.ChangeTime = time.Time{}, time.Time{} + ustar := tr.blk.USTAR() + if s := p.parseString(ustar.Prefix()); isASCII(s) { + prefix = s + } + hdr.Format = FormatUnknown // Buggy file is not GNU + } } if len(prefix) > 0 { hdr.Name = prefix + "/" + hdr.Name @@ -501,21 +459,22 @@ func (tr *Reader) readHeader() (*Header, *block, error) { // The Header.Size does not reflect the size of any extended headers used. // Thus, this function will read from the raw io.Reader to fetch extra headers. // This method mutates blk in the process. -func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, error) { +func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) (sparseDatas, error) { // Make sure that the input format is GNU. // Unfortunately, the STAR format also has a sparse header format that uses // the same type flag but has a completely different layout. - if blk.GetFormat() != formatGNU { + if blk.GetFormat() != FormatGNU { return nil, ErrHeader } + hdr.Format.mayOnlyBe(FormatGNU) var p parser hdr.Size = p.parseNumeric(blk.GNU().RealSize()) if p.err != nil { return nil, p.err } - var s sparseArray = blk.GNU().Sparse() - var sp = make([]sparseEntry, 0, s.MaxEntries()) + s := blk.GNU().Sparse() + spd := make(sparseDatas, 0, s.MaxEntries()) for { for i := 0; i < s.MaxEntries(); i++ { // This termination condition is identical to GNU and BSD tar. @@ -523,25 +482,22 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e break // Don't return, need to process extended headers (even if empty) } offset := p.parseNumeric(s.Entry(i).Offset()) - numBytes := p.parseNumeric(s.Entry(i).NumBytes()) + length := p.parseNumeric(s.Entry(i).Length()) if p.err != nil { return nil, p.err } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, SparseEntry{Offset: offset, Length: length}) } if s.IsExtended()[0] > 0 { // There are more entries. Read an extension header and parse its entries. - if _, err := io.ReadFull(tr.r, blk[:]); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } + if _, err := mustReadFull(tr.r, blk[:]); err != nil { return nil, err } s = blk.Sparse() continue } - return sp, nil // Done + return spd, nil // Done } } @@ -549,28 +505,27 @@ func (tr *Reader) readOldGNUSparseMap(hdr *Header, blk *block) ([]sparseEntry, e // version 1.0. The format of the sparse map consists of a series of // newline-terminated numeric fields. The first field is the number of entries // and is always present. Following this are the entries, consisting of two -// fields (offset, numBytes). This function must stop reading at the end +// fields (offset, length). This function must stop reading at the end // boundary of the block containing the last newline. // // Note that the GNU manual says that numeric values should be encoded in octal // format. However, the GNU tar utility itself outputs these values in decimal. // As such, this library treats values as being encoded in decimal. -func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { - var cntNewline int64 - var buf bytes.Buffer - var blk = make([]byte, blockSize) +func readGNUSparseMap1x0(r io.Reader) (sparseDatas, error) { + var ( + cntNewline int64 + buf bytes.Buffer + blk block + ) - // feedTokens copies data in numBlock chunks from r into buf until there are + // feedTokens copies data in blocks from r into buf until there are // at least cnt newlines in buf. It will not read more blocks than needed. - var feedTokens = func(cnt int64) error { - for cntNewline < cnt { - if _, err := io.ReadFull(r, blk); err != nil { - if err == io.EOF { - err = io.ErrUnexpectedEOF - } + feedTokens := func(n int64) error { + for cntNewline < n { + if _, err := mustReadFull(r, blk[:]); err != nil { return err } - buf.Write(blk) + buf.Write(blk[:]) for _, c := range blk { if c == '\n' { cntNewline++ @@ -582,10 +537,10 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { // nextToken gets the next token delimited by a newline. This assumes that // at least one newline exists in the buffer. - var nextToken = func() string { + nextToken := func() string { cntNewline-- tok, _ := buf.ReadString('\n') - return tok[:len(tok)-1] // Cut off newline + return strings.TrimRight(tok, "\n") } // Parse for the number of entries. @@ -604,80 +559,67 @@ func readGNUSparseMap1x0(r io.Reader) ([]sparseEntry, error) { if err := feedTokens(2 * numEntries); err != nil { return nil, err } - sp := make([]sparseEntry, 0, numEntries) + spd := make(sparseDatas, 0, numEntries) for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { + offset, err1 := strconv.ParseInt(nextToken(), 10, 64) + length, err2 := strconv.ParseInt(nextToken(), 10, 64) + if err1 != nil || err2 != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(nextToken(), 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, SparseEntry{Offset: offset, Length: length}) } - return sp, nil + return spd, nil } // readGNUSparseMap0x1 reads the sparse map as stored in GNU's PAX sparse format // version 0.1. The sparse map is stored in the PAX headers. -func readGNUSparseMap0x1(extHdrs map[string]string) ([]sparseEntry, error) { +func readGNUSparseMap0x1(paxHdrs map[string]string) (sparseDatas, error) { // Get number of entries. // Use integer overflow resistant math to check this. - numEntriesStr := extHdrs[paxGNUSparseNumBlocks] + numEntriesStr := paxHdrs[paxGNUSparseNumBlocks] numEntries, err := strconv.ParseInt(numEntriesStr, 10, 0) // Intentionally parse as native int if err != nil || numEntries < 0 || int(2*numEntries) < int(numEntries) { return nil, ErrHeader } // There should be two numbers in sparseMap for each entry. - sparseMap := strings.Split(extHdrs[paxGNUSparseMap], ",") + sparseMap := strings.Split(paxHdrs[paxGNUSparseMap], ",") + if len(sparseMap) == 1 && sparseMap[0] == "" { + sparseMap = sparseMap[:0] + } if int64(len(sparseMap)) != 2*numEntries { return nil, ErrHeader } // Loop through the entries in the sparse map. // numEntries is trusted now. - sp := make([]sparseEntry, 0, numEntries) - for i := int64(0); i < numEntries; i++ { - offset, err := strconv.ParseInt(sparseMap[2*i], 10, 64) - if err != nil { + spd := make(sparseDatas, 0, numEntries) + for len(sparseMap) >= 2 { + offset, err1 := strconv.ParseInt(sparseMap[0], 10, 64) + length, err2 := strconv.ParseInt(sparseMap[1], 10, 64) + if err1 != nil || err2 != nil { return nil, ErrHeader } - numBytes, err := strconv.ParseInt(sparseMap[2*i+1], 10, 64) - if err != nil { - return nil, ErrHeader - } - sp = append(sp, sparseEntry{offset: offset, numBytes: numBytes}) + spd = append(spd, SparseEntry{Offset: offset, Length: length}) + sparseMap = sparseMap[2:] } - return sp, nil + return spd, nil } -// numBytes returns the number of bytes left to read in the current file's entry -// in the tar archive, or 0 if there is no current file. -func (tr *Reader) numBytes() int64 { - if tr.curr == nil { - // No current file, so no bytes - return 0 - } - return tr.curr.numBytes() -} - -// Read reads from the current entry in the tar archive. -// It returns 0, io.EOF when it reaches the end of that entry, -// until Next is called to advance to the next entry. +// Read reads from the current file in the tar archive. +// It returns (0, io.EOF) when it reaches the end of that file, +// until Next is called to advance to the next file. // -// Calling Read on special types like TypeLink, TypeSymLink, TypeChar, -// TypeBlock, TypeDir, and TypeFifo returns 0, io.EOF regardless of what +// If the current file is sparse, then the regions marked as a hole +// are read back as NUL-bytes. +// +// Calling Read on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, io.EOF) regardless of what // the Header.Size claims. func (tr *Reader) Read(b []byte) (int, error) { if tr.err != nil { return 0, tr.err } - if tr.curr == nil { - return 0, io.EOF - } - n, err := tr.curr.Read(b) if err != nil && err != io.EOF { tr.err = err @@ -685,116 +627,226 @@ func (tr *Reader) Read(b []byte) (int, error) { return n, err } -func (rfr *regFileReader) Read(b []byte) (n int, err error) { - if rfr.nb == 0 { - // file consumed - return 0, io.EOF +// WriteTo writes the content of the current file to w. +// The bytes written matches the number of remaining bytes in the current file. +// +// If the current file is sparse and w is an io.WriteSeeker, +// then WriteTo uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are filled with NULs. +// This always writes the last byte to ensure w is the right size. +func (tr *Reader) WriteTo(w io.Writer) (int64, error) { + if tr.err != nil { + return 0, tr.err } - if int64(len(b)) > rfr.nb { - b = b[0:rfr.nb] - } - n, err = rfr.r.Read(b) - rfr.nb -= int64(n) - - if err == io.EOF && rfr.nb > 0 { - err = io.ErrUnexpectedEOF - } - return -} - -// numBytes returns the number of bytes left to read in the file's data in the tar archive. -func (rfr *regFileReader) numBytes() int64 { - return rfr.nb -} - -// newSparseFileReader creates a new sparseFileReader, but validates all of the -// sparse entries before doing so. -func newSparseFileReader(rfr numBytesReader, sp []sparseEntry, total int64) (*sparseFileReader, error) { - if total < 0 { - return nil, ErrHeader // Total size cannot be negative - } - - // Validate all sparse entries. These are the same checks as performed by - // the BSD tar utility. - for i, s := range sp { - switch { - case s.offset < 0 || s.numBytes < 0: - return nil, ErrHeader // Negative values are never okay - case s.offset > math.MaxInt64-s.numBytes: - return nil, ErrHeader // Integer overflow with large length - case s.offset+s.numBytes > total: - return nil, ErrHeader // Region extends beyond the "real" size - case i > 0 && sp[i-1].offset+sp[i-1].numBytes > s.offset: - return nil, ErrHeader // Regions can't overlap and must be in order - } - } - return &sparseFileReader{rfr: rfr, sp: sp, total: total}, nil -} - -// readHole reads a sparse hole ending at endOffset. -func (sfr *sparseFileReader) readHole(b []byte, endOffset int64) int { - n64 := endOffset - sfr.pos - if n64 > int64(len(b)) { - n64 = int64(len(b)) - } - n := int(n64) - for i := 0; i < n; i++ { - b[i] = 0 - } - sfr.pos += n64 - return n -} - -// Read reads the sparse file data in expanded form. -func (sfr *sparseFileReader) Read(b []byte) (n int, err error) { - // Skip past all empty fragments. - for len(sfr.sp) > 0 && sfr.sp[0].numBytes == 0 { - sfr.sp = sfr.sp[1:] - } - - // If there are no more fragments, then it is possible that there - // is one last sparse hole. - if len(sfr.sp) == 0 { - // This behavior matches the BSD tar utility. - // However, GNU tar stops returning data even if sfr.total is unmet. - if sfr.pos < sfr.total { - return sfr.readHole(b, sfr.total), nil - } - return 0, io.EOF - } - - // In front of a data fragment, so read a hole. - if sfr.pos < sfr.sp[0].offset { - return sfr.readHole(b, sfr.sp[0].offset), nil - } - - // In a data fragment, so read from it. - // This math is overflow free since we verify that offset and numBytes can - // be safely added when creating the sparseFileReader. - endPos := sfr.sp[0].offset + sfr.sp[0].numBytes // End offset of fragment - bytesLeft := endPos - sfr.pos // Bytes left in fragment - if int64(len(b)) > bytesLeft { - b = b[:bytesLeft] - } - - n, err = sfr.rfr.Read(b) - sfr.pos += int64(n) - if err == io.EOF { - if sfr.pos < endPos { - err = io.ErrUnexpectedEOF // There was supposed to be more data - } else if sfr.pos < sfr.total { - err = nil // There is still an implicit sparse hole at the end - } - } - - if sfr.pos == endPos { - sfr.sp = sfr.sp[1:] // We are done with this fragment, so pop it + n, err := tr.curr.WriteTo(w) + if err != nil { + tr.err = err } return n, err } -// numBytes returns the number of bytes left to read in the sparse file's -// sparse-encoded data in the tar archive. -func (sfr *sparseFileReader) numBytes() int64 { - return sfr.rfr.numBytes() +// regFileReader is a fileReader for reading data from a regular file entry. +type regFileReader struct { + r io.Reader // Underlying Reader + nb int64 // Number of remaining bytes to read +} + +func (fr *regFileReader) Read(b []byte) (n int, err error) { + if int64(len(b)) > fr.nb { + b = b[:fr.nb] + } + if len(b) > 0 { + n, err = fr.r.Read(b) + fr.nb -= int64(n) + } + switch { + case err == io.EOF && fr.nb > 0: + return n, io.ErrUnexpectedEOF + case err == nil && fr.nb == 0: + return n, io.EOF + default: + return n, err + } +} + +func (fr *regFileReader) WriteTo(w io.Writer) (int64, error) { + return io.Copy(w, struct{ io.Reader }{fr}) +} + +func (fr regFileReader) LogicalRemaining() int64 { + return fr.nb +} + +func (fr regFileReader) PhysicalRemaining() int64 { + return fr.nb +} + +// sparseFileReader is a fileReader for reading data from a sparse file entry. +type sparseFileReader struct { + fr fileReader // Underlying fileReader + sp sparseHoles // Normalized list of sparse holes + pos int64 // Current position in sparse file +} + +func (sr *sparseFileReader) Read(b []byte) (n int, err error) { + finished := int64(len(b)) >= sr.LogicalRemaining() + if finished { + b = b[:sr.LogicalRemaining()] + } + + b0 := b + endPos := sr.pos + int64(len(b)) + for endPos > sr.pos && err == nil { + var nf int // Bytes read in fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + bf := b[:min(int64(len(b)), holeStart-sr.pos)] + nf, err = tryReadFull(sr.fr, bf) + } else { // In a hole fragment + bf := b[:min(int64(len(b)), holeEnd-sr.pos)] + nf, err = tryReadFull(zeroReader{}, bf) + } + b = b[nf:] + sr.pos += int64(nf) + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + n = len(b0) - len(b) + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + case finished: + return n, io.EOF + default: + return n, nil + } +} + +func (sr *sparseFileReader) WriteTo(w io.Writer) (n int64, err error) { + ws, ok := w.(io.WriteSeeker) + if ok { + if _, err := ws.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(w, struct{ io.Reader }{sr}) + } + + var writeLastByte bool + pos0 := sr.pos + for sr.LogicalRemaining() > 0 && !writeLastByte && err == nil { + var nf int64 // Size of fragment + holeStart, holeEnd := sr.sp[0].Offset, sr.sp[0].endOffset() + if sr.pos < holeStart { // In a data fragment + nf = holeStart - sr.pos + nf, err = io.CopyN(ws, sr.fr, nf) + } else { // In a hole fragment + nf = holeEnd - sr.pos + if sr.PhysicalRemaining() == 0 { + writeLastByte = true + nf-- + } + _, err = ws.Seek(nf, io.SeekCurrent) + } + sr.pos += nf + if sr.pos >= holeEnd && len(sr.sp) > 1 { + sr.sp = sr.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // write a single byte to ensure the file is the right size. + if writeLastByte && err == nil { + _, err = ws.Write([]byte{0}) + sr.pos++ + } + + n = sr.pos - pos0 + switch { + case err == io.EOF: + return n, errMissData // Less data in dense file than sparse file + case err != nil: + return n, err + case sr.LogicalRemaining() == 0 && sr.PhysicalRemaining() > 0: + return n, errUnrefData // More data in dense file than sparse file + default: + return n, nil + } +} + +func (sr sparseFileReader) LogicalRemaining() int64 { + return sr.sp[len(sr.sp)-1].endOffset() - sr.pos +} +func (sr sparseFileReader) PhysicalRemaining() int64 { + return sr.fr.PhysicalRemaining() +} + +type zeroReader struct{} + +func (zeroReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = 0 + } + return len(b), nil +} + +// mustReadFull is like io.ReadFull except it returns +// io.ErrUnexpectedEOF when io.EOF is hit before len(b) bytes are read. +func mustReadFull(r io.Reader, b []byte) (int, error) { + n, err := tryReadFull(r, b) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + return n, err +} + +// tryReadFull is like io.ReadFull except it returns +// io.EOF when it is hit before len(b) bytes are read. +func tryReadFull(r io.Reader, b []byte) (n int, err error) { + for len(b) > n && err == nil { + var nn int + nn, err = r.Read(b[n:]) + n += nn + } + if len(b) == n && err == io.EOF { + err = nil + } + return n, err +} + +// discard skips n bytes in r, reporting an error if unable to do so. +func discard(r io.Reader, n int64) error { + // If possible, Seek to the last byte before the end of the data section. + // Do this because Seek is often lazy about reporting errors; this will mask + // the fact that the stream may be truncated. We can rely on the + // io.CopyN done shortly afterwards to trigger any IO errors. + var seekSkipped int64 // Number of bytes skipped via Seek + if sr, ok := r.(io.Seeker); ok && n > 1 { + // Not all io.Seeker can actually Seek. For example, os.Stdin implements + // io.Seeker, but calling Seek always returns an error and performs + // no action. Thus, we try an innocent seek to the current position + // to see if Seek is really supported. + pos1, err := sr.Seek(0, io.SeekCurrent) + if pos1 >= 0 && err == nil { + // Seek seems supported, so perform the real Seek. + pos2, err := sr.Seek(n-1, io.SeekCurrent) + if pos2 < 0 || err != nil { + return err + } + seekSkipped = pos2 - pos1 + } + } + + copySkipped, err := io.CopyN(ioutil.Discard, r, n-seekSkipped) + if err == io.EOF && seekSkipped+copySkipped < n { + err = io.ErrUnexpectedEOF + } + return err } diff --git a/vendor/github.com/dmcgowan/go-tar/sparse_unix.go b/vendor/github.com/dmcgowan/go-tar/sparse_unix.go new file mode 100644 index 000000000..c623c1ee4 --- /dev/null +++ b/vendor/github.com/dmcgowan/go-tar/sparse_unix.go @@ -0,0 +1,77 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build linux darwin dragonfly freebsd openbsd netbsd solaris + +package tar + +import ( + "io" + "os" + "runtime" + "syscall" +) + +func init() { + sysSparseDetect = sparseDetectUnix +} + +func sparseDetectUnix(f *os.File) (sph sparseHoles, err error) { + // SEEK_DATA and SEEK_HOLE originated from Solaris and support for it + // has been added to most of the other major Unix systems. + var seekData, seekHole = 3, 4 // SEEK_DATA/SEEK_HOLE from unistd.h + + if runtime.GOOS == "darwin" { + // Darwin has the constants swapped, compared to all other UNIX. + seekData, seekHole = 4, 3 + } + + // Check for seekData/seekHole support. + // Different OS and FS may differ in the exact errno that is returned when + // there is no support. Rather than special-casing every possible errno + // representing "not supported", just assume that a non-nil error means + // that seekData/seekHole is not supported. + if _, err := f.Seek(0, seekHole); err != nil { + return nil, nil + } + + // Populate the SparseHoles. + var last, pos int64 = -1, 0 + for { + // Get the location of the next hole section. + if pos, err = fseek(f, pos, seekHole); pos == last || err != nil { + return sph, err + } + offset := pos + last = pos + + // Get the location of the next data section. + if pos, err = fseek(f, pos, seekData); pos == last || err != nil { + return sph, err + } + length := pos - offset + last = pos + + if length > 0 { + sph = append(sph, SparseEntry{offset, length}) + } + } +} + +func fseek(f *os.File, pos int64, whence int) (int64, error) { + pos, err := f.Seek(pos, whence) + if errno(err) == syscall.ENXIO { + // SEEK_DATA returns ENXIO when past the last data fragment, + // which makes determining the size of the last hole difficult. + pos, err = f.Seek(0, io.SeekEnd) + } + return pos, err +} + +func errno(err error) error { + if perr, ok := err.(*os.PathError); ok { + return perr.Err + } + return err +} diff --git a/vendor/github.com/dmcgowan/go-tar/sparse_windows.go b/vendor/github.com/dmcgowan/go-tar/sparse_windows.go new file mode 100644 index 000000000..05bf1a90b --- /dev/null +++ b/vendor/github.com/dmcgowan/go-tar/sparse_windows.go @@ -0,0 +1,129 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build windows + +package tar + +import ( + "os" + "syscall" + "unsafe" +) + +var errInvalidFunc = syscall.Errno(1) // ERROR_INVALID_FUNCTION from WinError.h + +func init() { + sysSparseDetect = sparseDetectWindows + sysSparsePunch = sparsePunchWindows +} + +func sparseDetectWindows(f *os.File) (sph sparseHoles, err error) { + const queryAllocRanges = 0x000940CF // FSCTL_QUERY_ALLOCATED_RANGES from WinIoCtl.h + type allocRangeBuffer struct{ offset, length int64 } // FILE_ALLOCATED_RANGE_BUFFER from WinIoCtl.h + + s, err := f.Stat() + if err != nil { + return nil, err + } + + queryRange := allocRangeBuffer{0, s.Size()} + allocRanges := make([]allocRangeBuffer, 64) + + // Repeatedly query for ranges until the input buffer is large enough. + var bytesReturned uint32 + for { + err := syscall.DeviceIoControl( + syscall.Handle(f.Fd()), queryAllocRanges, + (*byte)(unsafe.Pointer(&queryRange)), uint32(unsafe.Sizeof(queryRange)), + (*byte)(unsafe.Pointer(&allocRanges[0])), uint32(len(allocRanges)*int(unsafe.Sizeof(allocRanges[0]))), + &bytesReturned, nil, + ) + if err == syscall.ERROR_MORE_DATA { + allocRanges = make([]allocRangeBuffer, 2*len(allocRanges)) + continue + } + if err == errInvalidFunc { + return nil, nil // Sparse file not supported on this FS + } + if err != nil { + return nil, err + } + break + } + n := bytesReturned / uint32(unsafe.Sizeof(allocRanges[0])) + allocRanges = append(allocRanges[:n], allocRangeBuffer{s.Size(), 0}) + + // Invert the data fragments into hole fragments. + var pos int64 + for _, r := range allocRanges { + if r.offset > pos { + sph = append(sph, SparseEntry{pos, r.offset - pos}) + } + pos = r.offset + r.length + } + return sph, nil +} + +func sparsePunchWindows(f *os.File, sph sparseHoles) error { + const setSparse = 0x000900C4 // FSCTL_SET_SPARSE from WinIoCtl.h + const setZeroData = 0x000980C8 // FSCTL_SET_ZERO_DATA from WinIoCtl.h + type zeroDataInfo struct{ start, end int64 } // FILE_ZERO_DATA_INFORMATION from WinIoCtl.h + + // Set the file as being sparse. + var bytesReturned uint32 + devErr := syscall.DeviceIoControl( + syscall.Handle(f.Fd()), setSparse, + nil, 0, nil, 0, + &bytesReturned, nil, + ) + if devErr != nil && devErr != errInvalidFunc { + return devErr + } + + // Set the file to the right size. + var size int64 + if len(sph) > 0 { + size = sph[len(sph)-1].endOffset() + } + if err := f.Truncate(size); err != nil { + return err + } + if devErr == errInvalidFunc { + // Sparse file not supported on this FS. + // Call sparsePunchManual since SetEndOfFile does not guarantee that + // the extended space is filled with zeros. + return sparsePunchManual(f, sph) + } + + // Punch holes for all relevant fragments. + for _, s := range sph { + zdi := zeroDataInfo{s.Offset, s.endOffset()} + err := syscall.DeviceIoControl( + syscall.Handle(f.Fd()), setZeroData, + (*byte)(unsafe.Pointer(&zdi)), uint32(unsafe.Sizeof(zdi)), + nil, 0, + &bytesReturned, nil, + ) + if err != nil { + return err + } + } + return nil +} + +// sparsePunchManual writes zeros into each hole. +func sparsePunchManual(f *os.File, sph sparseHoles) error { + const chunkSize = 32 << 10 + zbuf := make([]byte, chunkSize) + for _, s := range sph { + for pos := s.Offset; pos < s.endOffset(); pos += chunkSize { + n := min(chunkSize, s.endOffset()-pos) + if _, err := f.WriteAt(zbuf[:n], pos); err != nil { + return err + } + } + } + return nil +} diff --git a/vendor/github.com/dmcgowan/go-tar/stat_atim.go b/vendor/github.com/dmcgowan/go-tar/stat_actime1.go similarity index 100% rename from vendor/github.com/dmcgowan/go-tar/stat_atim.go rename to vendor/github.com/dmcgowan/go-tar/stat_actime1.go diff --git a/vendor/github.com/dmcgowan/go-tar/stat_atimespec.go b/vendor/github.com/dmcgowan/go-tar/stat_actime2.go similarity index 100% rename from vendor/github.com/dmcgowan/go-tar/stat_atimespec.go rename to vendor/github.com/dmcgowan/go-tar/stat_actime2.go diff --git a/vendor/github.com/dmcgowan/go-tar/stat_unix.go b/vendor/github.com/dmcgowan/go-tar/stat_unix.go index cb843db4c..868105f33 100644 --- a/vendor/github.com/dmcgowan/go-tar/stat_unix.go +++ b/vendor/github.com/dmcgowan/go-tar/stat_unix.go @@ -8,6 +8,10 @@ package tar import ( "os" + "os/user" + "runtime" + "strconv" + "sync" "syscall" ) @@ -15,6 +19,10 @@ func init() { sysStat = statUnix } +// userMap and groupMap caches UID and GID lookups for performance reasons. +// The downside is that renaming uname or gname by the OS never takes effect. +var userMap, groupMap sync.Map // map[int]string + func statUnix(fi os.FileInfo, h *Header) error { sys, ok := fi.Sys().(*syscall.Stat_t) if !ok { @@ -22,11 +30,67 @@ func statUnix(fi os.FileInfo, h *Header) error { } h.Uid = int(sys.Uid) h.Gid = int(sys.Gid) - // TODO(bradfitz): populate username & group. os/user - // doesn't cache LookupId lookups, and lacks group - // lookup functions. + + // Best effort at populating Uname and Gname. + // The os/user functions may fail for any number of reasons + // (not implemented on that platform, cgo not enabled, etc). + if u, ok := userMap.Load(h.Uid); ok { + h.Uname = u.(string) + } else if u, err := user.LookupId(strconv.Itoa(h.Uid)); err == nil { + h.Uname = u.Username + userMap.Store(h.Uid, h.Uname) + } + if g, ok := groupMap.Load(h.Gid); ok { + h.Gname = g.(string) + } else if g, err := user.LookupGroupId(strconv.Itoa(h.Gid)); err == nil { + h.Gname = g.Name + groupMap.Store(h.Gid, h.Gname) + } + h.AccessTime = statAtime(sys) h.ChangeTime = statCtime(sys) - // TODO(bradfitz): major/minor device numbers? + + // Best effort at populating Devmajor and Devminor. + if h.Typeflag == TypeChar || h.Typeflag == TypeBlock { + dev := uint64(sys.Rdev) // May be int32 or uint32 + switch runtime.GOOS { + case "linux": + // Copied from golang.org/x/sys/unix/dev_linux.go. + major := uint32((dev & 0x00000000000fff00) >> 8) + major |= uint32((dev & 0xfffff00000000000) >> 32) + minor := uint32((dev & 0x00000000000000ff) >> 0) + minor |= uint32((dev & 0x00000ffffff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "darwin": + // Copied from golang.org/x/sys/unix/dev_darwin.go. + major := uint32((dev >> 24) & 0xff) + minor := uint32(dev & 0xffffff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "dragonfly": + // Copied from golang.org/x/sys/unix/dev_dragonfly.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "freebsd": + // Copied from golang.org/x/sys/unix/dev_freebsd.go. + major := uint32((dev >> 8) & 0xff) + minor := uint32(dev & 0xffff00ff) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "netbsd": + // Copied from golang.org/x/sys/unix/dev_netbsd.go. + major := uint32((dev & 0x000fff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xfff00000) >> 12) + h.Devmajor, h.Devminor = int64(major), int64(minor) + case "openbsd": + // Copied from golang.org/x/sys/unix/dev_openbsd.go. + major := uint32((dev & 0x0000ff00) >> 8) + minor := uint32((dev & 0x000000ff) >> 0) + minor |= uint32((dev & 0xffff0000) >> 8) + h.Devmajor, h.Devminor = int64(major), int64(minor) + default: + // TODO: Implement solaris (see https://golang.org/issue/8106) + } + } return nil } diff --git a/vendor/github.com/dmcgowan/go-tar/strconv.go b/vendor/github.com/dmcgowan/go-tar/strconv.go index bb5b51c02..8bbd65cd1 100644 --- a/vendor/github.com/dmcgowan/go-tar/strconv.go +++ b/vendor/github.com/dmcgowan/go-tar/strconv.go @@ -12,26 +12,34 @@ import ( "time" ) +// hasNUL reports whether the NUL character exists within s. +func hasNUL(s string) bool { + return strings.IndexByte(s, 0) >= 0 +} + +// isASCII reports whether the input is an ASCII C-style string. func isASCII(s string) bool { for _, c := range s { - if c >= 0x80 { + if c >= 0x80 || c == 0x00 { return false } } return true } +// toASCII converts the input to an ASCII C-style string. +// This a best effort conversion, so invalid characters are dropped. func toASCII(s string) string { if isASCII(s) { return s } - var buf bytes.Buffer + b := make([]byte, 0, len(s)) for _, c := range s { - if c < 0x80 { - buf.WriteByte(byte(c)) + if c < 0x80 && c != 0x00 { + b = append(b, byte(c)) } } - return buf.String() + return string(b) } type parser struct { @@ -45,23 +53,28 @@ type formatter struct { // parseString parses bytes as a NUL-terminated C-style string. // If a NUL byte is not found then the whole slice is returned as a string. func (*parser) parseString(b []byte) string { - n := 0 - for n < len(b) && b[n] != 0 { - n++ + if i := bytes.IndexByte(b, 0); i >= 0 { + return string(b[:i]) } - return string(b[0:n]) + return string(b) } -// Write s into b, terminating it with a NUL if there is room. +// formatString copies s into b, NUL-terminating if possible. func (f *formatter) formatString(b []byte, s string) { if len(s) > len(b) { f.err = ErrFieldTooLong - return } - ascii := toASCII(s) - copy(b, ascii) - if len(ascii) < len(b) { - b[len(ascii)] = 0 + copy(b, s) + if len(s) < len(b) { + b[len(s)] = 0 + } + + // Some buggy readers treat regular files with a trailing slash + // in the V7 path field as a directory even though the full path + // recorded elsewhere (e.g., via PAX record) contains no trailing slash. + if len(s) > len(b) && b[len(b)-1] == '/' { + n := len(strings.TrimRight(s[:len(b)], "/")) + b[n] = 0 // Replace trailing slash with NUL terminator } } @@ -73,7 +86,7 @@ func (f *formatter) formatString(b []byte, s string) { // that the first byte can only be either 0x80 or 0xff. Thus, the first byte is // equivalent to the sign bit in two's complement form. func fitsInBase256(n int, x int64) bool { - var binBits = uint(n-1) * 8 + binBits := uint(n-1) * 8 return n >= 9 || (x >= -1<= 0; i-- { b[i] = byte(x) @@ -155,6 +174,11 @@ func (p *parser) parseOctal(b []byte) int64 { } func (f *formatter) formatOctal(b []byte, x int64) { + if !fitsInOctal(len(b), x) { + x = 0 // Last resort, just write zero + f.err = ErrFieldTooLong + } + s := strconv.FormatInt(x, 8) // Add leading zeros, but leave room for a NUL. if n := len(b) - len(s) - 1; n > 0 { @@ -163,6 +187,13 @@ func (f *formatter) formatOctal(b []byte, x int64) { f.formatString(b, s) } +// fitsInOctal reports whether the integer x fits in a field n-bytes long +// using octal encoding with the appropriate NUL terminator. +func fitsInOctal(n int, x int64) bool { + octBits := uint(n-1) * 3 + return x >= 0 && (n >= 22 || x < 1<= 0 { + return false + } + switch k { + case paxPath, paxLinkpath, paxUname, paxGname: + return !hasNUL(v) + default: + return !hasNUL(k) } - return record } diff --git a/vendor/github.com/dmcgowan/go-tar/writer.go b/vendor/github.com/dmcgowan/go-tar/writer.go index 596fb8b9e..2eed61934 100644 --- a/vendor/github.com/dmcgowan/go-tar/writer.go +++ b/vendor/github.com/dmcgowan/go-tar/writer.go @@ -4,12 +4,8 @@ package tar -// TODO(dsymonds): -// - catch more errors (no first header, etc.) - import ( "bytes" - "errors" "fmt" "io" "path" @@ -19,234 +15,365 @@ import ( "time" ) -var ( - ErrWriteTooLong = errors.New("archive/tar: write too long") - ErrFieldTooLong = errors.New("archive/tar: header field too long") - ErrWriteAfterClose = errors.New("archive/tar: write after close") - errInvalidHeader = errors.New("archive/tar: header field too long or contains invalid values") -) - -// A Writer provides sequential writing of a tar archive in POSIX.1 format. -// A tar archive consists of a sequence of files. -// Call WriteHeader to begin a new file, and then call Write to supply that file's data, -// writing at most hdr.Size bytes in total. +// Writer provides sequential writing of a tar archive. +// Write.WriteHeader begins a new file with the provided Header, +// and then Writer can be treated as an io.Writer to supply that file's data. type Writer struct { - w io.Writer - err error - nb int64 // number of unwritten bytes for current file entry - pad int64 // amount of padding to write after current file entry - closed bool - usedBinary bool // whether the binary numeric field extension was used - preferPax bool // use PAX header instead of binary numeric header - hdrBuff block // buffer to use in writeHeader when writing a regular header - paxHdrBuff block // buffer to use in writeHeader when writing a PAX header + w io.Writer + pad int64 // Amount of padding to write after current file entry + curr fileWriter // Writer for current file entry + hdr Header // Shallow copy of Header that is safe for mutations + blk block // Buffer to use as temporary local storage + + // err is a persistent error. + // It is only the responsibility of every exported method of Writer to + // ensure that this error is sticky. + err error } // NewWriter creates a new Writer writing to w. -func NewWriter(w io.Writer) *Writer { return &Writer{w: w} } +func NewWriter(w io.Writer) *Writer { + return &Writer{w: w, curr: ®FileWriter{w, 0}} +} -// Flush finishes writing the current file (optional). +type fileWriter interface { + io.Writer + fileState + + ReadFrom(io.Reader) (int64, error) +} + +// Flush finishes writing the current file's block padding. +// The current file must be fully written before Flush can be called. +// +// Deprecated: This is unnecessary as the next call to WriteHeader or Close +// will implicitly flush out the file's padding. func (tw *Writer) Flush() error { - if tw.nb > 0 { - tw.err = fmt.Errorf("archive/tar: missed writing %d bytes", tw.nb) - return tw.err - } - - n := tw.nb + tw.pad - for n > 0 && tw.err == nil { - nr := n - if nr > blockSize { - nr = blockSize - } - var nw int - nw, tw.err = tw.w.Write(zeroBlock[0:nr]) - n -= int64(nw) - } - tw.nb = 0 - tw.pad = 0 - return tw.err -} - -var ( - minTime = time.Unix(0, 0) - // There is room for 11 octal digits (33 bits) of mtime. - maxTime = minTime.Add((1<<33 - 1) * time.Second) -) - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -func (tw *Writer) WriteHeader(hdr *Header) error { - return tw.writeHeader(hdr, true) -} - -// WriteHeader writes hdr and prepares to accept the file's contents. -// WriteHeader calls Flush if it is not the first header. -// Calling after a Close will return ErrWriteAfterClose. -// As this method is called internally by writePax header to allow it to -// suppress writing the pax header. -func (tw *Writer) writeHeader(hdr *Header, allowPax bool) error { - if tw.closed { - return ErrWriteAfterClose - } - if tw.err == nil { - tw.Flush() - } if tw.err != nil { return tw.err } - - // a map to hold pax header records, if any are needed - paxHeaders := make(map[string]string) - - // TODO(dsnet): we might want to use PAX headers for - // subsecond time resolution, but for now let's just capture - // too long fields or non ascii characters - - // We need to select which scratch buffer to use carefully, - // since this method is called recursively to write PAX headers. - // If allowPax is true, this is the non-recursive call, and we will use hdrBuff. - // If allowPax is false, we are being called by writePAXHeader, and hdrBuff is - // already being used by the non-recursive call, so we must use paxHdrBuff. - header := &tw.hdrBuff - if !allowPax { - header = &tw.paxHdrBuff + if nb := tw.curr.LogicalRemaining(); nb > 0 { + return fmt.Errorf("tar: missed writing %d bytes", nb) } - copy(header[:], zeroBlock[:]) - - // Wrappers around formatter that automatically sets paxHeaders if the - // argument extends beyond the capacity of the input byte slice. - var f formatter - var formatString = func(b []byte, s string, paxKeyword string) { - needsPaxHeader := paxKeyword != paxNone && len(s) > len(b) || !isASCII(s) - if needsPaxHeader { - paxHeaders[paxKeyword] = s - return - } - f.formatString(b, s) - } - var formatNumeric = func(b []byte, x int64, paxKeyword string) { - // Try octal first. - s := strconv.FormatInt(x, 8) - if len(s) < len(b) { - f.formatOctal(b, x) - return - } - - // If it is too long for octal, and PAX is preferred, use a PAX header. - if paxKeyword != paxNone && tw.preferPax { - f.formatOctal(b, 0) - s := strconv.FormatInt(x, 10) - paxHeaders[paxKeyword] = s - return - } - - tw.usedBinary = true - f.formatNumeric(b, x) - } - - // Handle out of range ModTime carefully. - var modTime int64 - if !hdr.ModTime.Before(minTime) && !hdr.ModTime.After(maxTime) { - modTime = hdr.ModTime.Unix() - } - - v7 := header.V7() - formatString(v7.Name(), hdr.Name, paxPath) - // TODO(dsnet): The GNU format permits the mode field to be encoded in - // base-256 format. Thus, we can use formatNumeric instead of formatOctal. - f.formatOctal(v7.Mode(), hdr.Mode) - formatNumeric(v7.UID(), int64(hdr.Uid), paxUid) - formatNumeric(v7.GID(), int64(hdr.Gid), paxGid) - formatNumeric(v7.Size(), hdr.Size, paxSize) - // TODO(dsnet): Consider using PAX for finer time granularity. - formatNumeric(v7.ModTime(), modTime, paxNone) - v7.TypeFlag()[0] = hdr.Typeflag - formatString(v7.LinkName(), hdr.Linkname, paxLinkpath) - - ustar := header.USTAR() - formatString(ustar.UserName(), hdr.Uname, paxUname) - formatString(ustar.GroupName(), hdr.Gname, paxGname) - formatNumeric(ustar.DevMajor(), hdr.Devmajor, paxNone) - formatNumeric(ustar.DevMinor(), hdr.Devminor, paxNone) - - // TODO(dsnet): The logic surrounding the prefix field is broken when trying - // to encode the header as GNU format. The challenge with the current logic - // is that we are unsure what format we are using at any given moment until - // we have processed *all* of the fields. The problem is that by the time - // all fields have been processed, some work has already been done to handle - // each field under the assumption that it is for one given format or - // another. In some situations, this causes the Writer to be confused and - // encode a prefix field when the format being used is GNU. Thus, producing - // an invalid tar file. - // - // As a short-term fix, we disable the logic to use the prefix field, which - // will force the badly generated GNU files to become encoded as being - // the PAX format. - // - // As an alternative fix, we could hard-code preferPax to be true. However, - // this is problematic for the following reasons: - // * The preferPax functionality is not tested at all. - // * This can result in headers that try to use both the GNU and PAX - // features at the same time, which is also wrong. - // - // The proper fix for this is to use a two-pass method: - // * The first pass simply determines what set of formats can possibly - // encode the given header. - // * The second pass actually encodes the header as that given format - // without worrying about violating the format. - // - // See the following: - // https://golang.org/issue/12594 - // https://golang.org/issue/17630 - // https://golang.org/issue/9683 - const usePrefix = false - - // try to use a ustar header when only the name is too long - _, paxPathUsed := paxHeaders[paxPath] - if usePrefix && !tw.preferPax && len(paxHeaders) == 1 && paxPathUsed { - prefix, suffix, ok := splitUSTARPath(hdr.Name) - if ok { - // Since we can encode in USTAR format, disable PAX header. - delete(paxHeaders, paxPath) - - // Update the path fields - formatString(v7.Name(), suffix, paxNone) - formatString(ustar.Prefix(), prefix, paxNone) - } - } - - if tw.usedBinary { - header.SetFormat(formatGNU) - } else { - header.SetFormat(formatUSTAR) - } - - // Check if there were any formatting errors. - if f.err != nil { - tw.err = f.err + if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { return tw.err } + tw.pad = 0 + return nil +} - if allowPax { - for k, v := range hdr.Xattrs { - paxHeaders[paxXattr+k] = v +// WriteHeader writes hdr and prepares to accept the file's contents. +// The Header.Size determines how many bytes can be written for the next file. +// If the current file is not fully written, then this returns an error. +// This implicitly flushes any padding necessary before writing the header. +func (tw *Writer) WriteHeader(hdr *Header) error { + if err := tw.Flush(); err != nil { + return err + } + tw.hdr = *hdr // Shallow copy of Header + + // Round ModTime and ignore AccessTime and ChangeTime unless + // the format is explicitly chosen. + // This ensures nominal usage of WriteHeader (without specifying the format) + // does not always result in the PAX format being chosen, which + // causes a 1KiB increase to every header. + if tw.hdr.Format == FormatUnknown { + tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second) + tw.hdr.AccessTime = time.Time{} + tw.hdr.ChangeTime = time.Time{} + } + + allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() + switch { + case allowedFormats.has(FormatUSTAR): + tw.err = tw.writeUSTARHeader(&tw.hdr) + return tw.err + case allowedFormats.has(FormatPAX): + tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs) + return tw.err + case allowedFormats.has(FormatGNU): + tw.err = tw.writeGNUHeader(&tw.hdr) + return tw.err + default: + return err // Non-fatal error + } +} + +func (tw *Writer) writeUSTARHeader(hdr *Header) error { + // Check if we can use USTAR prefix/suffix splitting. + var namePrefix string + if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok { + namePrefix, hdr.Name = prefix, suffix + } + + // Pack the main header. + var f formatter + blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) + f.formatString(blk.USTAR().Prefix(), namePrefix) + blk.SetFormat(FormatUSTAR) + if f.err != nil { + return f.err // Should never happen since header is validated + } + return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) +} + +func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { + realName, realSize := hdr.Name, hdr.Size + + // Handle sparse files. + var spd sparseDatas + var spb []byte + if len(hdr.SparseHoles) > 0 { + sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + hdr.Size = 0 // Replace with encoded size + spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') + for _, s := range spd { + hdr.Size += s.Length + spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') + spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') + } + pad := blockPadding(int64(len(spb))) + spb = append(spb, zeroBlock[:pad]...) + hdr.Size += int64(len(spb)) // Accounts for encoded sparse map + + // Add and modify appropriate PAX records. + dir, file := path.Split(realName) + hdr.Name = path.Join(dir, "GNUSparseFile.0", file) + paxHdrs[paxGNUSparseMajor] = "1" + paxHdrs[paxGNUSparseMinor] = "0" + paxHdrs[paxGNUSparseName] = realName + paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) + paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) + delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName + } + + // Write PAX records to the output. + isGlobal := hdr.Typeflag == TypeXGlobalHeader + if len(paxHdrs) > 0 || isGlobal { + // Sort keys for deterministic ordering. + var keys []string + for k := range paxHdrs { + keys = append(keys, k) + } + sort.Strings(keys) + + // Write each record to a buffer. + var buf bytes.Buffer + for _, k := range keys { + rec, err := formatPAXRecord(k, paxHdrs[k]) + if err != nil { + return err + } + buf.WriteString(rec) + } + + // Write the extended header file. + var name string + var flag byte + if isGlobal { + name = "GlobalHead.0.0" + flag = TypeXGlobalHeader + } else { + dir, file := path.Split(realName) + name = path.Join(dir, "PaxHeaders.0", file) + flag = TypeXHeader + } + data := buf.String() + if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal { + return err // Global headers return here } } - if len(paxHeaders) > 0 { - if !allowPax { - return errInvalidHeader + // Pack the main header. + var f formatter // Ignore errors since they are expected + fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } + blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) + blk.SetFormat(FormatPAX) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.curr since the sparse map is accounted for in hdr.Size. + if _, err := tw.curr.Write(spb); err != nil { + return err } - if err := tw.writePAXHeader(hdr, paxHeaders); err != nil { + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil +} + +func (tw *Writer) writeGNUHeader(hdr *Header) error { + // Use long-link files if Name or Linkname exceeds the field size. + const longName = "././@LongLink" + if len(hdr.Name) > nameSize { + data := hdr.Name + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil { + return err + } + } + if len(hdr.Linkname) > nameSize { + data := hdr.Linkname + "\x00" + if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil { return err } } - tw.nb = hdr.Size - tw.pad = (blockSize - (tw.nb % blockSize)) % blockSize - _, tw.err = tw.w.Write(header[:]) - return tw.err + // Pack the main header. + var f formatter // Ignore errors since they are expected + var spd sparseDatas + var spb []byte + blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) + if !hdr.AccessTime.IsZero() { + f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) + } + if !hdr.ChangeTime.IsZero() { + f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) + } + if hdr.Typeflag == TypeGNUSparse { + sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map + sph = alignSparseEntries(sph, hdr.Size) + spd = invertSparseEntries(sph, hdr.Size) + + // Format the sparse map. + formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { + for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { + f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) + f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) + sp = sp[1:] + } + if len(sp) > 0 { + sa.IsExtended()[0] = 1 + } + return sp + } + sp2 := formatSPD(spd, blk.GNU().Sparse()) + for len(sp2) > 0 { + var spHdr block + sp2 = formatSPD(sp2, spHdr.Sparse()) + spb = append(spb, spHdr[:]...) + } + + // Update size fields in the header block. + realSize := hdr.Size + hdr.Size = 0 // Encoded size; does not account for encoded sparse map + for _, s := range spd { + hdr.Size += s.Length + } + copy(blk.V7().Size(), zeroBlock[:]) // Reset field + f.formatNumeric(blk.V7().Size(), hdr.Size) + f.formatNumeric(blk.GNU().RealSize(), realSize) + } + blk.SetFormat(FormatGNU) + if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { + return err + } + + // Write the extended sparse map and setup the sparse writer if necessary. + if len(spd) > 0 { + // Use tw.w since the sparse map is not accounted for in hdr.Size. + if _, err := tw.w.Write(spb); err != nil { + return err + } + tw.curr = &sparseFileWriter{tw.curr, spd, 0} + } + return nil +} + +type ( + stringFormatter func([]byte, string) + numberFormatter func([]byte, int64) +) + +// templateV7Plus fills out the V7 fields of a block using values from hdr. +// It also fills out fields (uname, gname, devmajor, devminor) that are +// shared in the USTAR, PAX, and GNU formats using the provided formatters. +// +// The block returned is only valid until the next call to +// templateV7Plus or writeRawFile. +func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block { + tw.blk.Reset() + + modTime := hdr.ModTime + if modTime.IsZero() { + modTime = time.Unix(0, 0) + } + + v7 := tw.blk.V7() + v7.TypeFlag()[0] = hdr.Typeflag + fmtStr(v7.Name(), hdr.Name) + fmtStr(v7.LinkName(), hdr.Linkname) + fmtNum(v7.Mode(), hdr.Mode) + fmtNum(v7.UID(), int64(hdr.Uid)) + fmtNum(v7.GID(), int64(hdr.Gid)) + fmtNum(v7.Size(), hdr.Size) + fmtNum(v7.ModTime(), modTime.Unix()) + + ustar := tw.blk.USTAR() + fmtStr(ustar.UserName(), hdr.Uname) + fmtStr(ustar.GroupName(), hdr.Gname) + fmtNum(ustar.DevMajor(), hdr.Devmajor) + fmtNum(ustar.DevMinor(), hdr.Devminor) + + return &tw.blk +} + +// writeRawFile writes a minimal file with the given name and flag type. +// It uses format to encode the header format and will write data as the body. +// It uses default values for all of the other fields (as BSD and GNU tar does). +func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error { + tw.blk.Reset() + + // Best effort for the filename. + name = toASCII(name) + if len(name) > nameSize { + name = name[:nameSize] + } + name = strings.TrimRight(name, "/") + + var f formatter + v7 := tw.blk.V7() + v7.TypeFlag()[0] = flag + f.formatString(v7.Name(), name) + f.formatOctal(v7.Mode(), 0) + f.formatOctal(v7.UID(), 0) + f.formatOctal(v7.GID(), 0) + f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB + f.formatOctal(v7.ModTime(), 0) + tw.blk.SetFormat(format) + if f.err != nil { + return f.err // Only occurs if size condition is violated + } + + // Write the header and data. + if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil { + return err + } + _, err := io.WriteString(tw, data) + return err +} + +// writeRawHeader writes the value of blk, regardless of its value. +// It sets up the Writer such that it can accept a file of the given size. +// If the flag is a special header-only flag, then the size is treated as zero. +func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { + if err := tw.Flush(); err != nil { + return err + } + if _, err := tw.w.Write(blk[:]); err != nil { + return err + } + if isHeaderOnlyType(flag) { + size = 0 + } + tw.curr = ®FileWriter{tw.w, size} + tw.pad = blockPadding(size) + return nil } // splitUSTARPath splits a path according to USTAR prefix and suffix rules. @@ -270,95 +397,233 @@ func splitUSTARPath(name string) (prefix, suffix string, ok bool) { return name[:i], name[i+1:], true } -// writePaxHeader writes an extended pax header to the -// archive. -func (tw *Writer) writePAXHeader(hdr *Header, paxHeaders map[string]string) error { - // Prepare extended header - ext := new(Header) - ext.Typeflag = TypeXHeader - // Setting ModTime is required for reader parsing to - // succeed, and seems harmless enough. - ext.ModTime = hdr.ModTime - // The spec asks that we namespace our pseudo files - // with the current pid. However, this results in differing outputs - // for identical inputs. As such, the constant 0 is now used instead. - // golang.org/issue/12358 - dir, file := path.Split(hdr.Name) - fullName := path.Join(dir, "PaxHeaders.0", file) - - ascii := toASCII(fullName) - if len(ascii) > nameSize { - ascii = ascii[:nameSize] - } - ext.Name = ascii - // Construct the body - var buf bytes.Buffer - - // Keys are sorted before writing to body to allow deterministic output. - keys := make([]string, 0, len(paxHeaders)) - for k := range paxHeaders { - keys = append(keys, k) - } - sort.Strings(keys) - - for _, k := range keys { - fmt.Fprint(&buf, formatPAXRecord(k, paxHeaders[k])) - } - - ext.Size = int64(len(buf.Bytes())) - if err := tw.writeHeader(ext, false); err != nil { - return err - } - if _, err := tw.Write(buf.Bytes()); err != nil { - return err - } - if err := tw.Flush(); err != nil { - return err - } - return nil -} - -// Write writes to the current entry in the tar archive. +// Write writes to the current file in the tar archive. // Write returns the error ErrWriteTooLong if more than -// hdr.Size bytes are written after WriteHeader. -func (tw *Writer) Write(b []byte) (n int, err error) { - if tw.closed { - err = ErrWriteAfterClose - return +// Header.Size bytes are written after WriteHeader. +// +// If the current file is sparse, then the regions marked as a hole +// must be written as NUL-bytes. +// +// Calling Write on special types like TypeLink, TypeSymlink, TypeChar, +// TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless +// of what the Header.Size claims. +func (tw *Writer) Write(b []byte) (int, error) { + if tw.err != nil { + return 0, tw.err } - overwrite := false - if int64(len(b)) > tw.nb { - b = b[0:tw.nb] - overwrite = true + n, err := tw.curr.Write(b) + if err != nil && err != ErrWriteTooLong { + tw.err = err } - n, err = tw.w.Write(b) - tw.nb -= int64(n) - if err == nil && overwrite { - err = ErrWriteTooLong - return - } - tw.err = err - return + return n, err } -// Close closes the tar archive, flushing any unwritten -// data to the underlying writer. -func (tw *Writer) Close() error { - if tw.err != nil || tw.closed { - return tw.err +// ReadFrom populates the content of the current file by reading from r. +// The bytes read must match the number of remaining bytes in the current file. +// +// If the current file is sparse and r is an io.ReadSeeker, +// then ReadFrom uses Seek to skip past holes defined in Header.SparseHoles, +// assuming that skipped regions are all NULs. +// This always reads the last byte to ensure r is the right size. +func (tw *Writer) ReadFrom(r io.Reader) (int64, error) { + if tw.err != nil { + return 0, tw.err + } + n, err := tw.curr.ReadFrom(r) + if err != nil && err != ErrWriteTooLong { + tw.err = err + } + return n, err +} + +// Close closes the tar archive by flushing the padding, and writing the footer. +// If the current file (from a prior call to WriteHeader) is not fully written, +// then this returns an error. +func (tw *Writer) Close() error { + if tw.err == ErrWriteAfterClose { + return nil } - tw.Flush() - tw.closed = true if tw.err != nil { return tw.err } - // trailer: two zero blocks - for i := 0; i < 2; i++ { - _, tw.err = tw.w.Write(zeroBlock[:]) - if tw.err != nil { - break + // Trailer: two zero blocks. + err := tw.Flush() + for i := 0; i < 2 && err == nil; i++ { + _, err = tw.w.Write(zeroBlock[:]) + } + + // Ensure all future actions are invalid. + tw.err = ErrWriteAfterClose + return err // Report IO errors +} + +// regFileWriter is a fileWriter for writing data to a regular file entry. +type regFileWriter struct { + w io.Writer // Underlying Writer + nb int64 // Number of remaining bytes to write +} + +func (fw *regFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > fw.nb + if overwrite { + b = b[:fw.nb] + } + if len(b) > 0 { + n, err = fw.w.Write(b) + fw.nb -= int64(n) + } + switch { + case err != nil: + return n, err + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) { + return io.Copy(struct{ io.Writer }{fw}, r) +} + +func (fw regFileWriter) LogicalRemaining() int64 { + return fw.nb +} +func (fw regFileWriter) PhysicalRemaining() int64 { + return fw.nb +} + +// sparseFileWriter is a fileWriter for writing data to a sparse file entry. +type sparseFileWriter struct { + fw fileWriter // Underlying fileWriter + sp sparseDatas // Normalized list of data fragments + pos int64 // Current position in sparse file +} + +func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { + overwrite := int64(len(b)) > sw.LogicalRemaining() + if overwrite { + b = b[:sw.LogicalRemaining()] + } + + b0 := b + endPos := sw.pos + int64(len(b)) + for endPos > sw.pos && err == nil { + var nf int // Bytes written in fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + bf := b[:min(int64(len(b)), dataStart-sw.pos)] + nf, err = zeroWriter{}.Write(bf) + } else { // In a data fragment + bf := b[:min(int64(len(b)), dataEnd-sw.pos)] + nf, err = sw.fw.Write(bf) + } + b = b[nf:] + sw.pos += int64(nf) + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains } } - return tw.err + + n = len(b0) - len(b) + switch { + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + case overwrite: + return n, ErrWriteTooLong + default: + return n, nil + } +} + +func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) { + rs, ok := r.(io.ReadSeeker) + if ok { + if _, err := rs.Seek(0, io.SeekCurrent); err != nil { + ok = false // Not all io.Seeker can really seek + } + } + if !ok { + return io.Copy(struct{ io.Writer }{sw}, r) + } + + var readLastByte bool + pos0 := sw.pos + for sw.LogicalRemaining() > 0 && !readLastByte && err == nil { + var nf int64 // Size of fragment + dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() + if sw.pos < dataStart { // In a hole fragment + nf = dataStart - sw.pos + if sw.PhysicalRemaining() == 0 { + readLastByte = true + nf-- + } + _, err = rs.Seek(nf, io.SeekCurrent) + } else { // In a data fragment + nf = dataEnd - sw.pos + nf, err = io.CopyN(sw.fw, rs, nf) + } + sw.pos += nf + if sw.pos >= dataEnd && len(sw.sp) > 1 { + sw.sp = sw.sp[1:] // Ensure last fragment always remains + } + } + + // If the last fragment is a hole, then seek to 1-byte before EOF, and + // read a single byte to ensure the file is the right size. + if readLastByte && err == nil { + _, err = mustReadFull(rs, []byte{0}) + sw.pos++ + } + + n = sw.pos - pos0 + switch { + case err == io.EOF: + return n, io.ErrUnexpectedEOF + case err == ErrWriteTooLong: + return n, errMissData // Not possible; implies bug in validation logic + case err != nil: + return n, err + case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: + return n, errUnrefData // Not possible; implies bug in validation logic + default: + return n, ensureEOF(rs) + } +} + +func (sw sparseFileWriter) LogicalRemaining() int64 { + return sw.sp[len(sw.sp)-1].endOffset() - sw.pos +} +func (sw sparseFileWriter) PhysicalRemaining() int64 { + return sw.fw.PhysicalRemaining() +} + +// zeroWriter may only be written with NULs, otherwise it returns errWriteHole. +type zeroWriter struct{} + +func (zeroWriter) Write(b []byte) (int, error) { + for i, c := range b { + if c != 0 { + return i, errWriteHole + } + } + return len(b), nil +} + +// ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so. +func ensureEOF(r io.Reader) error { + n, err := tryReadFull(r, []byte{0}) + switch { + case n > 0: + return ErrWriteTooLong + case err == io.EOF: + return nil + default: + return err + } } From 16d00870eff7efee2692a32bd6c5aaa56aac5e9a Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Tue, 28 Nov 2017 16:18:15 -0800 Subject: [PATCH 2/3] Add test for prefix header issue Signed-off-by: Derek McGowan --- archive/issues_test.go | 47 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 archive/issues_test.go diff --git a/archive/issues_test.go b/archive/issues_test.go new file mode 100644 index 000000000..65e9f2b6c --- /dev/null +++ b/archive/issues_test.go @@ -0,0 +1,47 @@ +package archive + +import ( + "bytes" + "context" + "io/ioutil" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/containerd/containerd/archive/compression" + "github.com/containerd/containerd/testutil" +) + +// TestPrefixHeaderReadable tests that files that could be created with the +// version of this package that was built with <=go17 are still readable. +func TestPrefixHeaderReadable(t *testing.T) { + testutil.RequiresRoot(t) + + // https://gist.github.com/stevvooe/e2a790ad4e97425896206c0816e1a882#file-out-go + var testFile = []byte("\x1f\x8b\x08\x08\x44\x21\x68\x59\x00\x03\x74\x2e\x74\x61\x72\x00\x4b\xcb\xcf\x67\xa0\x35\x30\x80\x00\x86\x06\x10\x47\x01\xc1\x37\x40\x00\x54\xb6\xb1\xa1\xa9\x99\x09\x48\x25\x1d\x40\x69\x71\x49\x62\x91\x02\xe5\x76\xa1\x79\x84\x21\x91\xd6\x80\x72\xaf\x8f\x82\x51\x30\x0a\x46\x36\x00\x00\xf0\x1c\x1e\x95\x00\x06\x00\x00") + + tmpDir, err := ioutil.TempDir("", "prefix-test") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + + r, err := compression.DecompressStream(bytes.NewReader(testFile)) + if err != nil { + t.Fatal(err) + } + defer r.Close() + _, err = Apply(context.Background(), tmpDir, r) + if err != nil { + t.Fatal(err) + } + + baseName := "foo" + pth := strings.Repeat("a", 100-len(baseName)) + "/" + baseName + + _, err = os.Lstat(filepath.Join(tmpDir, pth)) + if err != nil { + t.Fatal(err) + } +} From bc9cb25012ce528c807e7188fd3d5fcef2ce6dba Mon Sep 17 00:00:00 2001 From: Derek McGowan Date: Tue, 28 Nov 2017 17:03:14 -0800 Subject: [PATCH 3/3] Update tar to use PAXRecords instead of Xattrs Signed-off-by: Derek McGowan --- archive/tar.go | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/archive/tar.go b/archive/tar.go index 0550f218c..cbd8340da 100644 --- a/archive/tar.go +++ b/archive/tar.go @@ -82,6 +82,8 @@ const ( // whiteoutOpaqueDir file means directory has been made opaque - meaning // readdir calls to this directory do not follow to lower layers. whiteoutOpaqueDir = whiteoutMetaPrefix + ".opq" + + paxSchilyXattr = "SCHILY.xattrs." ) // Apply applies a tar stream of an OCI style diff tar. @@ -388,9 +390,10 @@ func (cw *changeWriter) HandleChange(k fs.ChangeKind, p string, f os.FileInfo, e if capability, err := getxattr(source, "security.capability"); err != nil { return errors.Wrap(err, "failed to get capabilities xattr") } else if capability != nil { - hdr.Xattrs = map[string]string{ - "security.capability": string(capability), + if hdr.PAXRecords == nil { + hdr.PAXRecords = map[string]string{} } + hdr.PAXRecords[paxSchilyXattr+"security.capability"] = string(capability) } if err := cw.tw.WriteHeader(hdr); err != nil { @@ -509,13 +512,16 @@ func createTarFile(ctx context.Context, path, extractDir string, hdr *tar.Header } } - for key, value := range hdr.Xattrs { - if err := setxattr(path, key, value); err != nil { - if errors.Cause(err) == syscall.ENOTSUP { - log.G(ctx).WithError(err).Warnf("ignored xattr %s in archive", key) - continue + for key, value := range hdr.PAXRecords { + if strings.HasPrefix(key, paxSchilyXattr) { + key = key[len(paxSchilyXattr):] + if err := setxattr(path, key, value); err != nil { + if errors.Cause(err) == syscall.ENOTSUP { + log.G(ctx).WithError(err).Warnf("ignored xattr %s in archive", key) + continue + } + return err } - return err } }