Move content store implementation

Move the filesystem content store implementation to a
subpackage of content.

Signed-off-by: Derek McGowan <derek@mcgstyle.net>
This commit is contained in:
Derek McGowan
2017-07-12 15:25:13 -07:00
parent 9b53b8b68d
commit 442365248b
10 changed files with 53 additions and 42 deletions

38
content/fs/locks.go Normal file
View File

@@ -0,0 +1,38 @@
package fs
import (
"sync"
"github.com/containerd/containerd/errdefs"
"github.com/pkg/errors"
)
// Handles locking references
// TODO: use boltdb for lock status
var (
// locks lets us lock in process
locks = map[string]struct{}{}
locksMu sync.Mutex
)
func tryLock(ref string) error {
locksMu.Lock()
defer locksMu.Unlock()
if _, ok := locks[ref]; ok {
return errors.Wrapf(errdefs.ErrUnavailable, "ref %s locked", ref)
}
locks[ref] = struct{}{}
return nil
}
func unlock(ref string) {
locksMu.Lock()
defer locksMu.Unlock()
if _, ok := locks[ref]; ok {
delete(locks, ref)
}
}

26
content/fs/readerat.go Normal file
View File

@@ -0,0 +1,26 @@
package fs
import (
"io"
"os"
)
// readerat implements io.ReaderAt in a completely stateless manner by opening
// the referenced file for each call to ReadAt.
type readerAt struct {
f string
}
func (ra readerAt) ReadAt(p []byte, offset int64) (int, error) {
fp, err := os.Open(ra.f)
if err != nil {
return 0, err
}
defer fp.Close()
if _, err := fp.Seek(offset, io.SeekStart); err != nil {
return 0, err
}
return fp.Read(p)
}

401
content/fs/store.go Normal file
View File

@@ -0,0 +1,401 @@
package fs
import (
"context"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"time"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/errdefs"
"github.com/containerd/containerd/filters"
"github.com/containerd/containerd/log"
digest "github.com/opencontainers/go-digest"
"github.com/pkg/errors"
)
var (
bufPool = sync.Pool{
New: func() interface{} {
return make([]byte, 1<<20)
},
}
)
// Store is digest-keyed store for content. All data written into the store is
// stored under a verifiable digest.
//
// Store can generally support multi-reader, single-writer ingest of data,
// including resumable ingest.
type store struct {
root string
}
func NewStore(root string) (content.Store, error) {
if err := os.MkdirAll(filepath.Join(root, "ingest"), 0777); err != nil && !os.IsExist(err) {
return nil, err
}
return &store{
root: root,
}, nil
}
func (s *store) Info(ctx context.Context, dgst digest.Digest) (content.Info, error) {
p := s.blobPath(dgst)
fi, err := os.Stat(p)
if err != nil {
if os.IsNotExist(err) {
err = errors.Wrapf(errdefs.ErrNotFound, "content %v", dgst)
}
return content.Info{}, err
}
return s.info(dgst, fi), nil
}
func (s *store) info(dgst digest.Digest, fi os.FileInfo) content.Info {
return content.Info{
Digest: dgst,
Size: fi.Size(),
CreatedAt: fi.ModTime(),
UpdatedAt: fi.ModTime(),
}
}
// Reader returns an io.ReadCloser for the blob.
func (s *store) Reader(ctx context.Context, dgst digest.Digest) (io.ReadCloser, error) {
fp, err := os.Open(s.blobPath(dgst))
if err != nil {
if os.IsNotExist(err) {
err = errors.Wrapf(errdefs.ErrNotFound, "content %v", dgst)
}
return nil, err
}
return fp, nil
}
// ReaderAt returns an io.ReaderAt for the blob.
func (s *store) ReaderAt(ctx context.Context, dgst digest.Digest) (io.ReaderAt, error) {
return readerAt{f: s.blobPath(dgst)}, nil
}
// Delete removes a blob by its digest.
//
// While this is safe to do concurrently, safe exist-removal logic must hold
// some global lock on the store.
func (cs *store) Delete(ctx context.Context, dgst digest.Digest) error {
if err := os.RemoveAll(cs.blobPath(dgst)); err != nil {
if !os.IsNotExist(err) {
return err
}
return errors.Wrapf(errdefs.ErrNotFound, "content %v", dgst)
}
return nil
}
func (cs *store) Update(ctx context.Context, info content.Info, fieldpaths ...string) (content.Info, error) {
// TODO: Support persisting and updating mutable content data
return content.Info{}, errors.Wrapf(errdefs.ErrFailedPrecondition, "update not supported on immutable content store")
}
func (cs *store) Walk(ctx context.Context, fn content.WalkFunc, filters ...string) error {
// TODO: Support filters
root := filepath.Join(cs.root, "blobs")
var alg digest.Algorithm
return filepath.Walk(root, func(path string, fi os.FileInfo, err error) error {
if err != nil {
return err
}
if !fi.IsDir() && !alg.Available() {
return nil
}
// TODO(stevvooe): There are few more cases with subdirs that should be
// handled in case the layout gets corrupted. This isn't strict enough
// an may spew bad data.
if path == root {
return nil
}
if filepath.Dir(path) == root {
alg = digest.Algorithm(filepath.Base(path))
if !alg.Available() {
alg = ""
return filepath.SkipDir
}
// descending into a hash directory
return nil
}
dgst := digest.NewDigestFromHex(alg.String(), filepath.Base(path))
if err := dgst.Validate(); err != nil {
// log error but don't report
log.L.WithError(err).WithField("path", path).Error("invalid digest for blob path")
// if we see this, it could mean some sort of corruption of the
// store or extra paths not expected previously.
}
return fn(cs.info(dgst, fi))
})
}
func (s *store) Status(ctx context.Context, ref string) (content.Status, error) {
return s.status(s.ingestRoot(ref))
}
func (s *store) ListStatuses(ctx context.Context, fs ...string) ([]content.Status, error) {
fp, err := os.Open(filepath.Join(s.root, "ingest"))
if err != nil {
return nil, err
}
defer fp.Close()
fis, err := fp.Readdir(-1)
if err != nil {
return nil, err
}
filter, err := filters.ParseAll(fs...)
if err != nil {
return nil, err
}
var active []content.Status
for _, fi := range fis {
p := filepath.Join(s.root, "ingest", fi.Name())
stat, err := s.status(p)
if err != nil {
if !os.IsNotExist(err) {
return nil, err
}
// TODO(stevvooe): This is a common error if uploads are being
// completed while making this listing. Need to consider taking a
// lock on the whole store to coordinate this aspect.
//
// Another option is to cleanup downloads asynchronously and
// coordinate this method with the cleanup process.
//
// For now, we just skip them, as they really don't exist.
continue
}
if filter.Match(adaptStatus(stat)) {
active = append(active, stat)
}
}
return active, nil
}
// status works like stat above except uses the path to the ingest.
func (s *store) status(ingestPath string) (content.Status, error) {
dp := filepath.Join(ingestPath, "data")
fi, err := os.Stat(dp)
if err != nil {
return content.Status{}, err
}
ref, err := readFileString(filepath.Join(ingestPath, "ref"))
if err != nil {
return content.Status{}, err
}
return content.Status{
Ref: ref,
Offset: fi.Size(),
Total: s.total(ingestPath),
UpdatedAt: fi.ModTime(),
StartedAt: getStartTime(fi),
}, nil
}
func adaptStatus(status content.Status) filters.Adaptor {
return filters.AdapterFunc(func(fieldpath []string) (string, bool) {
if len(fieldpath) == 0 {
return "", false
}
switch fieldpath[0] {
case "ref":
return status.Ref, true
}
return "", false
})
}
// total attempts to resolve the total expected size for the write.
func (s *store) total(ingestPath string) int64 {
totalS, err := readFileString(filepath.Join(ingestPath, "total"))
if err != nil {
return 0
}
total, err := strconv.ParseInt(totalS, 10, 64)
if err != nil {
// represents a corrupted file, should probably remove.
return 0
}
return total
}
// Writer begins or resumes the active writer identified by ref. If the writer
// is already in use, an error is returned. Only one writer may be in use per
// ref at a time.
//
// The argument `ref` is used to uniquely identify a long-lived writer transaction.
func (s *store) Writer(ctx context.Context, ref string, total int64, expected digest.Digest) (content.Writer, error) {
// TODO(stevvooe): Need to actually store expected here. We have
// code in the service that shouldn't be dealing with this.
if expected != "" {
p := s.blobPath(expected)
if _, err := os.Stat(p); err == nil {
return nil, errors.Wrapf(errdefs.ErrAlreadyExists, "content %v", expected)
}
}
path, refp, data := s.ingestPaths(ref)
if err := tryLock(ref); err != nil {
return nil, errors.Wrapf(err, "locking %v failed", ref)
}
var (
digester = digest.Canonical.Digester()
offset int64
startedAt time.Time
updatedAt time.Time
)
// ensure that the ingest path has been created.
if err := os.Mkdir(path, 0755); err != nil {
if !os.IsExist(err) {
return nil, err
}
status, err := s.status(path)
if err != nil {
return nil, errors.Wrap(err, "failed reading status of resume write")
}
if ref != status.Ref {
// NOTE(stevvooe): This is fairly catastrophic. Either we have some
// layout corruption or a hash collision for the ref key.
return nil, errors.Wrapf(err, "ref key does not match: %v != %v", ref, status.Ref)
}
if total > 0 && status.Total > 0 && total != status.Total {
return nil, errors.Errorf("provided total differs from status: %v != %v", total, status.Total)
}
// slow slow slow!!, send to goroutine or use resumable hashes
fp, err := os.Open(data)
if err != nil {
return nil, err
}
defer fp.Close()
p := bufPool.Get().([]byte)
defer bufPool.Put(p)
offset, err = io.CopyBuffer(digester.Hash(), fp, p)
if err != nil {
return nil, err
}
updatedAt = status.UpdatedAt
startedAt = status.StartedAt
total = status.Total
} else {
// the ingest is new, we need to setup the target location.
// write the ref to a file for later use
if err := ioutil.WriteFile(refp, []byte(ref), 0666); err != nil {
return nil, err
}
if total > 0 {
if err := ioutil.WriteFile(filepath.Join(path, "total"), []byte(fmt.Sprint(total)), 0666); err != nil {
return nil, err
}
}
startedAt = time.Now()
updatedAt = startedAt
}
fp, err := os.OpenFile(data, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
return nil, errors.Wrap(err, "failed to open data file")
}
return &writer{
s: s,
fp: fp,
ref: ref,
path: path,
offset: offset,
total: total,
digester: digester,
startedAt: startedAt,
updatedAt: updatedAt,
}, nil
}
// Abort an active transaction keyed by ref. If the ingest is active, it will
// be cancelled. Any resources associated with the ingest will be cleaned.
func (s *store) Abort(ctx context.Context, ref string) error {
root := s.ingestRoot(ref)
if err := os.RemoveAll(root); err != nil {
if os.IsNotExist(err) {
return errors.Wrapf(errdefs.ErrNotFound, "ingest ref %q", ref)
}
return err
}
return nil
}
func (cs *store) blobPath(dgst digest.Digest) string {
return filepath.Join(cs.root, "blobs", dgst.Algorithm().String(), dgst.Hex())
}
func (s *store) ingestRoot(ref string) string {
dgst := digest.FromString(ref)
return filepath.Join(s.root, "ingest", dgst.Hex())
}
// ingestPaths are returned. The paths are the following:
//
// - root: entire ingest directory
// - ref: name of the starting ref, must be unique
// - data: file where data is written
//
func (s *store) ingestPaths(ref string) (string, string, string) {
var (
fp = s.ingestRoot(ref)
rp = filepath.Join(fp, "ref")
dp = filepath.Join(fp, "data")
)
return fp, rp, dp
}
func readFileString(path string) (string, error) {
p, err := ioutil.ReadFile(path)
return string(p), err
}

15
content/fs/store_linux.go Normal file
View File

@@ -0,0 +1,15 @@
package fs
import (
"os"
"syscall"
"time"
)
func getStartTime(fi os.FileInfo) time.Time {
if st, ok := fi.Sys().(*syscall.Stat_t); ok {
return time.Unix(int64(st.Ctim.Sec), int64(st.Ctim.Nsec))
}
return fi.ModTime()
}

17
content/fs/store_unix.go Normal file
View File

@@ -0,0 +1,17 @@
// +build darwin freebsd
package fs
import (
"os"
"syscall"
"time"
)
func getStartTime(fi os.FileInfo) time.Time {
if st, ok := fi.Sys().(*syscall.Stat_t); ok {
return time.Unix(int64(st.Ctimespec.Sec), int64(st.Ctimespec.Nsec))
}
return fi.ModTime()
}

View File

@@ -0,0 +1,10 @@
package fs
import (
"os"
"time"
)
func getStartTime(fi os.FileInfo) time.Time {
return fi.ModTime()
}

147
content/fs/writer.go Normal file
View File

@@ -0,0 +1,147 @@
package fs
import (
"os"
"path/filepath"
"runtime"
"time"
"github.com/containerd/containerd/content"
"github.com/containerd/containerd/errdefs"
"github.com/opencontainers/go-digest"
"github.com/pkg/errors"
)
// writer represents a write transaction against the blob store.
type writer struct {
s *store
fp *os.File // opened data file
path string // path to writer dir
ref string // ref key
offset int64
total int64
digester digest.Digester
startedAt time.Time
updatedAt time.Time
}
func (w *writer) Status() (content.Status, error) {
return content.Status{
Ref: w.ref,
Offset: w.offset,
Total: w.total,
StartedAt: w.startedAt,
UpdatedAt: w.updatedAt,
}, nil
}
// Digest returns the current digest of the content, up to the current write.
//
// Cannot be called concurrently with `Write`.
func (w *writer) Digest() digest.Digest {
return w.digester.Digest()
}
// Write p to the transaction.
//
// Note that writes are unbuffered to the backing file. When writing, it is
// recommended to wrap in a bufio.Writer or, preferably, use io.CopyBuffer.
func (w *writer) Write(p []byte) (n int, err error) {
n, err = w.fp.Write(p)
w.digester.Hash().Write(p[:n])
w.offset += int64(len(p))
w.updatedAt = time.Now()
return n, err
}
func (w *writer) Commit(size int64, expected digest.Digest) error {
if err := w.fp.Sync(); err != nil {
return errors.Wrap(err, "sync failed")
}
fi, err := w.fp.Stat()
if err != nil {
return errors.Wrap(err, "stat on ingest file failed")
}
// change to readonly, more important for read, but provides _some_
// protection from this point on. We use the existing perms with a mask
// only allowing reads honoring the umask on creation.
//
// This removes write and exec, only allowing read per the creation umask.
//
// NOTE: Windows does not support this operation
if runtime.GOOS != "windows" {
if err := w.fp.Chmod((fi.Mode() & os.ModePerm) &^ 0333); err != nil {
return errors.Wrap(err, "failed to change ingest file permissions")
}
}
if size > 0 && size != fi.Size() {
return errors.Errorf("%q failed size validation: %v != %v", w.ref, fi.Size(), size)
}
if err := w.fp.Close(); err != nil {
return errors.Wrap(err, "failed closing ingest")
}
dgst := w.digester.Digest()
if expected != "" && expected != dgst {
return errors.Errorf("unexpected digest: %v != %v", dgst, expected)
}
var (
ingest = filepath.Join(w.path, "data")
target = w.s.blobPath(dgst)
)
// make sure parent directories of blob exist
if err := os.MkdirAll(filepath.Dir(target), 0755); err != nil {
return err
}
// clean up!!
defer os.RemoveAll(w.path)
if err := os.Rename(ingest, target); err != nil {
if os.IsExist(err) {
// collision with the target file!
return errors.Wrapf(errdefs.ErrAlreadyExists, "content %v", dgst)
}
return err
}
unlock(w.ref)
w.fp = nil
return nil
}
// Close the writer, flushing any unwritten data and leaving the progress in
// tact.
//
// If one needs to resume the transaction, a new writer can be obtained from
// `ContentStore.Resume` using the same key. The write can then be continued
// from it was left off.
//
// To abandon a transaction completely, first call close then `Store.Remove` to
// clean up the associated resources.
func (cw *writer) Close() (err error) {
unlock(cw.ref)
if cw.fp != nil {
cw.fp.Sync()
return cw.fp.Close()
}
return nil
}
func (w *writer) Truncate(size int64) error {
if size != 0 {
return errors.New("Truncate: unsupported size")
}
w.offset = 0
w.digester.Hash().Reset()
return w.fp.Truncate(0)
}