diff --git a/vendor.conf b/vendor.conf index 82e377bff..7a47f7512 100644 --- a/vendor.conf +++ b/vendor.conf @@ -40,7 +40,7 @@ github.com/containerd/ttrpc f02858b1457c5ca3aaec3a0803eb0d59f96e41d6 github.com/syndtr/gocapability db04d3cc01c8b54962a58ec7e491717d06cfcc16 gotest.tools v2.1.0 github.com/google/go-cmp v0.1.0 -go.etcd.io/bbolt v1.3.1-etcd.8 +go.etcd.io/bbolt v1.3.2 # cri dependencies github.com/containerd/cri 4dd6735020f5596dd41738f8c4f5cb07fa804c5e # master diff --git a/vendor/go.etcd.io/bbolt/README.md b/vendor/go.etcd.io/bbolt/README.md index 6546fc01a..e9989efc5 100644 --- a/vendor/go.etcd.io/bbolt/README.md +++ b/vendor/go.etcd.io/bbolt/README.md @@ -929,6 +929,7 @@ Below is a list of public, open source projects that use Bolt: * [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed. * [Ironsmith](https://github.com/timshannon/ironsmith) - A simple, script-driven continuous integration (build - > test -> release) tool, with no external dependencies * [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistent, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs. +* [Key Value Access Langusge (KVAL)](https://github.com/kval-access-language) - A proposed grammar for key-value datastores offering a bbolt binding. * [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage. * [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores. * [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets. diff --git a/vendor/go.etcd.io/bbolt/db.go b/vendor/go.etcd.io/bbolt/db.go index d91dcf813..962248c99 100644 --- a/vendor/go.etcd.io/bbolt/db.go +++ b/vendor/go.etcd.io/bbolt/db.go @@ -43,6 +43,16 @@ var defaultPageSize = os.Getpagesize() // The time elapsed between consecutive file locking attempts. const flockRetryTimeout = 50 * time.Millisecond +// FreelistType is the type of the freelist backend +type FreelistType string + +const ( + // FreelistArrayType indicates backend freelist type is array + FreelistArrayType = FreelistType("array") + // FreelistMapType indicates backend freelist type is hashmap + FreelistMapType = FreelistType("hashmap") +) + // DB represents a collection of buckets persisted to a file on disk. // All data access is performed through transactions which can be obtained through the DB. // All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called. @@ -70,6 +80,13 @@ type DB struct { // re-sync during recovery. NoFreelistSync bool + // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures + // dramatic performance degradation if database is large and framentation in freelist is common. + // The alternative one is using hashmap, it is faster in almost all circumstances + // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. + // The default type is array + FreelistType FreelistType + // When true, skips the truncate call when growing the database. // Setting this to true is only safe on non-ext3/ext4 systems. // Skipping truncation avoids preallocation of hard drive space and @@ -169,6 +186,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { db.NoGrowSync = options.NoGrowSync db.MmapFlags = options.MmapFlags db.NoFreelistSync = options.NoFreelistSync + db.FreelistType = options.FreelistType // Set default values for later DB operations. db.MaxBatchSize = DefaultMaxBatchSize @@ -283,7 +301,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { // concurrent accesses being made to the freelist. func (db *DB) loadFreelist() { db.freelistLoad.Do(func() { - db.freelist = newFreelist() + db.freelist = newFreelist(db.FreelistType) if !db.hasSyncedFreelist() { // Reconstruct free list by scanning the DB. db.freelist.readIDs(db.freepages()) @@ -291,7 +309,7 @@ func (db *DB) loadFreelist() { // Read free list from freelist page. db.freelist.read(db.page(db.meta().freelist)) } - db.stats.FreePageN = len(db.freelist.ids) + db.stats.FreePageN = db.freelist.free_count() }) } @@ -1005,6 +1023,13 @@ type Options struct { // under normal operation, but requires a full database re-sync during recovery. NoFreelistSync bool + // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures + // dramatic performance degradation if database is large and framentation in freelist is common. + // The alternative one is using hashmap, it is faster in almost all circumstances + // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. + // The default type is array + FreelistType FreelistType + // Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to // grab a shared lock (UNIX). ReadOnly bool @@ -1034,8 +1059,9 @@ type Options struct { // DefaultOptions represent the options used if nil options are passed into Open(). // No timeout is used which will cause Bolt to wait indefinitely for a lock. var DefaultOptions = &Options{ - Timeout: 0, - NoGrowSync: false, + Timeout: 0, + NoGrowSync: false, + FreelistType: FreelistArrayType, } // Stats represents statistics about the database. diff --git a/vendor/go.etcd.io/bbolt/freelist.go b/vendor/go.etcd.io/bbolt/freelist.go index e4bcb2dcf..93fd85d50 100644 --- a/vendor/go.etcd.io/bbolt/freelist.go +++ b/vendor/go.etcd.io/bbolt/freelist.go @@ -14,22 +14,54 @@ type txPending struct { lastReleaseBegin txid // beginning txid of last matching releaseRange } +// pidSet holds the set of starting pgids which have the same span size +type pidSet map[pgid]struct{} + // freelist represents a list of all pages that are available for allocation. // It also tracks pages that have been freed but are still in use by open transactions. type freelist struct { - ids []pgid // all free and available free page ids. - allocs map[pgid]txid // mapping of txid that allocated a pgid. - pending map[txid]*txPending // mapping of soon-to-be free page ids by tx. - cache map[pgid]bool // fast lookup of all free and pending page ids. + freelistType FreelistType // freelist type + ids []pgid // all free and available free page ids. + allocs map[pgid]txid // mapping of txid that allocated a pgid. + pending map[txid]*txPending // mapping of soon-to-be free page ids by tx. + cache map[pgid]bool // fast lookup of all free and pending page ids. + freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size + forwardMap map[pgid]uint64 // key is start pgid, value is its span size + backwardMap map[pgid]uint64 // key is end pgid, value is its span size + allocate func(txid txid, n int) pgid // the freelist allocate func + free_count func() int // the function which gives you free page number + mergeSpans func(ids pgids) // the mergeSpan func + getFreePageIDs func() []pgid // get free pgids func + readIDs func(pgids []pgid) // readIDs func reads list of pages and init the freelist } // newFreelist returns an empty, initialized freelist. -func newFreelist() *freelist { - return &freelist{ - allocs: make(map[pgid]txid), - pending: make(map[txid]*txPending), - cache: make(map[pgid]bool), +func newFreelist(freelistType FreelistType) *freelist { + f := &freelist{ + freelistType: freelistType, + allocs: make(map[pgid]txid), + pending: make(map[txid]*txPending), + cache: make(map[pgid]bool), + freemaps: make(map[uint64]pidSet), + forwardMap: make(map[pgid]uint64), + backwardMap: make(map[pgid]uint64), } + + if freelistType == FreelistMapType { + f.allocate = f.hashmapAllocate + f.free_count = f.hashmapFreeCount + f.mergeSpans = f.hashmapMergeSpans + f.getFreePageIDs = f.hashmapGetFreePageIDs + f.readIDs = f.hashmapReadIDs + } else { + f.allocate = f.arrayAllocate + f.free_count = f.arrayFreeCount + f.mergeSpans = f.arrayMergeSpans + f.getFreePageIDs = f.arrayGetFreePageIDs + f.readIDs = f.arrayReadIDs + } + + return f } // size returns the size of the page after serialization. @@ -47,8 +79,8 @@ func (f *freelist) count() int { return f.free_count() + f.pending_count() } -// free_count returns count of free pages -func (f *freelist) free_count() int { +// arrayFreeCount returns count of free pages(array version) +func (f *freelist) arrayFreeCount() int { return len(f.ids) } @@ -69,12 +101,12 @@ func (f *freelist) copyall(dst []pgid) { m = append(m, txp.ids...) } sort.Sort(m) - mergepgids(dst, f.ids, m) + mergepgids(dst, f.getFreePageIDs(), m) } -// allocate returns the starting page id of a contiguous list of pages of a given size. +// arrayAllocate returns the starting page id of a contiguous list of pages of a given size. // If a contiguous block cannot be found then 0 is returned. -func (f *freelist) allocate(txid txid, n int) pgid { +func (f *freelist) arrayAllocate(txid txid, n int) pgid { if len(f.ids) == 0 { return 0 } @@ -160,8 +192,7 @@ func (f *freelist) release(txid txid) { delete(f.pending, tid) } } - sort.Sort(m) - f.ids = pgids(f.ids).merge(m) + f.mergeSpans(m) } // releaseRange moves pending pages allocated within an extent [begin,end] to the free list. @@ -194,8 +225,7 @@ func (f *freelist) releaseRange(begin, end txid) { delete(f.pending, tid) } } - sort.Sort(m) - f.ids = pgids(f.ids).merge(m) + f.mergeSpans(m) } // rollback removes the pages from a given pending tx. @@ -222,8 +252,7 @@ func (f *freelist) rollback(txid txid) { } // Remove pages from pending list and mark as free if allocated by txid. delete(f.pending, txid) - sort.Sort(m) - f.ids = pgids(f.ids).merge(m) + f.mergeSpans(m) } // freed returns whether a given page is in the free list. @@ -249,21 +278,25 @@ func (f *freelist) read(p *page) { f.ids = nil } else { ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx : idx+count] - f.ids = make([]pgid, len(ids)) - copy(f.ids, ids) + // copy the ids, so we don't modify on the freelist page directly + idsCopy := make([]pgid, count) + copy(idsCopy, ids) // Make sure they're sorted. - sort.Sort(pgids(f.ids)) - } + sort.Sort(pgids(idsCopy)) - // Rebuild the page cache. + f.readIDs(idsCopy) + } +} + +// arrayReadIDs initializes the freelist from a given list of ids. +func (f *freelist) arrayReadIDs(ids []pgid) { + f.ids = ids f.reindex() } -// read initializes the freelist from a given list of ids. -func (f *freelist) readIDs(ids []pgid) { - f.ids = ids - f.reindex() +func (f *freelist) arrayGetFreePageIDs() []pgid { + return f.ids } // write writes the page ids onto a freelist page. All free and pending ids are @@ -307,22 +340,20 @@ func (f *freelist) reload(p *page) { // Check each page in the freelist and build a new available freelist // with any pages not in the pending lists. var a []pgid - for _, id := range f.ids { + for _, id := range f.getFreePageIDs() { if !pcache[id] { a = append(a, id) } } - f.ids = a - // Once the available list is rebuilt then rebuild the free cache so that - // it includes the available and pending free pages. - f.reindex() + f.readIDs(a) } // reindex rebuilds the free cache based on available and pending free lists. func (f *freelist) reindex() { - f.cache = make(map[pgid]bool, len(f.ids)) - for _, id := range f.ids { + ids := f.getFreePageIDs() + f.cache = make(map[pgid]bool, len(ids)) + for _, id := range ids { f.cache[id] = true } for _, txp := range f.pending { @@ -331,3 +362,9 @@ func (f *freelist) reindex() { } } } + +// arrayMergeSpans try to merge list of pages(represented by pgids) with existing spans but using array +func (f *freelist) arrayMergeSpans(ids pgids) { + sort.Sort(ids) + f.ids = pgids(f.ids).merge(ids) +} diff --git a/vendor/go.etcd.io/bbolt/freelist_hmap.go b/vendor/go.etcd.io/bbolt/freelist_hmap.go new file mode 100644 index 000000000..6a03a6c3c --- /dev/null +++ b/vendor/go.etcd.io/bbolt/freelist_hmap.go @@ -0,0 +1,178 @@ +package bbolt + +import "sort" + +// hashmapFreeCount returns count of free pages(hashmap version) +func (f *freelist) hashmapFreeCount() int { + // use the forwardmap to get the total count + count := 0 + for _, size := range f.forwardMap { + count += int(size) + } + return count +} + +// hashmapAllocate serves the same purpose as arrayAllocate, but use hashmap as backend +func (f *freelist) hashmapAllocate(txid txid, n int) pgid { + if n == 0 { + return 0 + } + + // if we have a exact size match just return short path + if bm, ok := f.freemaps[uint64(n)]; ok { + for pid := range bm { + // remove the span + f.delSpan(pid, uint64(n)) + + f.allocs[pid] = txid + + for i := pgid(0); i < pgid(n); i++ { + delete(f.cache, pid+pgid(i)) + } + return pid + } + } + + // lookup the map to find larger span + for size, bm := range f.freemaps { + if size < uint64(n) { + continue + } + + for pid := range bm { + // remove the initial + f.delSpan(pid, uint64(size)) + + f.allocs[pid] = txid + + remain := size - uint64(n) + + // add remain span + f.addSpan(pid+pgid(n), remain) + + for i := pgid(0); i < pgid(n); i++ { + delete(f.cache, pid+pgid(i)) + } + return pid + } + } + + return 0 +} + +// hashmapReadIDs reads pgids as input an initial the freelist(hashmap version) +func (f *freelist) hashmapReadIDs(pgids []pgid) { + f.init(pgids) + + // Rebuild the page cache. + f.reindex() +} + +// hashmapGetFreePageIDs returns the sorted free page ids +func (f *freelist) hashmapGetFreePageIDs() []pgid { + count := f.free_count() + if count == 0 { + return nil + } + + m := make([]pgid, 0, count) + for start, size := range f.forwardMap { + for i := 0; i < int(size); i++ { + m = append(m, start+pgid(i)) + } + } + sort.Sort(pgids(m)) + + return m +} + +// hashmapMergeSpans try to merge list of pages(represented by pgids) with existing spans +func (f *freelist) hashmapMergeSpans(ids pgids) { + for _, id := range ids { + // try to see if we can merge and update + f.mergeWithExistingSpan(id) + } +} + +// mergeWithExistingSpan merges pid to the existing free spans, try to merge it backward and forward +func (f *freelist) mergeWithExistingSpan(pid pgid) { + prev := pid - 1 + next := pid + 1 + + preSize, mergeWithPrev := f.backwardMap[prev] + nextSize, mergeWithNext := f.forwardMap[next] + newStart := pid + newSize := uint64(1) + + if mergeWithPrev { + //merge with previous span + start := prev + 1 - pgid(preSize) + f.delSpan(start, preSize) + + newStart -= pgid(preSize) + newSize += preSize + } + + if mergeWithNext { + // merge with next span + f.delSpan(next, nextSize) + newSize += nextSize + } + + f.addSpan(newStart, newSize) +} + +func (f *freelist) addSpan(start pgid, size uint64) { + f.backwardMap[start-1+pgid(size)] = size + f.forwardMap[start] = size + if _, ok := f.freemaps[size]; !ok { + f.freemaps[size] = make(map[pgid]struct{}) + } + + f.freemaps[size][start] = struct{}{} +} + +func (f *freelist) delSpan(start pgid, size uint64) { + delete(f.forwardMap, start) + delete(f.backwardMap, start+pgid(size-1)) + delete(f.freemaps[size], start) + if len(f.freemaps[size]) == 0 { + delete(f.freemaps, size) + } +} + +// initial from pgids using when use hashmap version +// pgids must be sorted +func (f *freelist) init(pgids []pgid) { + if len(pgids) == 0 { + return + } + + size := uint64(1) + start := pgids[0] + + if !sort.SliceIsSorted([]pgid(pgids), func(i, j int) bool { return pgids[i] < pgids[j] }) { + panic("pgids not sorted") + } + + f.freemaps = make(map[uint64]pidSet) + f.forwardMap = make(map[pgid]uint64) + f.backwardMap = make(map[pgid]uint64) + + for i := 1; i < len(pgids); i++ { + // continuous page + if pgids[i] == pgids[i-1]+1 { + size++ + } else { + f.addSpan(start, size) + + size = 1 + start = pgids[i] + } + } + + // init the tail + if size != 0 && start != 0 { + f.addSpan(start, size) + } +}