Update etcd client to 3.2.24 for latest release

Signed-off-by: Timothy St. Clair <timothysc@gmail.com>
2018-08-31 13:57:37 -05:00
parent 674401ace1
commit 0bb21f647f
54 changed files with 1472 additions and 613 deletions
--- a/vendor/github.com/coreos/etcd/mvcc/backend/backend.go
+++ b/vendor/github.com/coreos/etcd/mvcc/backend/backend.go
@@ -54,6 +54,10 @@ type Backend interface {
 	Hash(ignores map[IgnoreKey]struct{}) (uint32, error)
 	// Size returns the current size of the backend.
 	Size() int64
+	// SizeInUse returns the current size of the backend logically in use.
+	// Since the backend can manage free space in a non-byte unit such as
+	// number of pages, the returned value can be not exactly accurate in bytes.
+	SizeInUse() int64
 	Defrag() error
 	ForceCommit()
 	Close() error
@@ -74,6 +78,10 @@ type backend struct {

 	// size is the number of bytes in the backend
 	size int64
+
+	// sizeInUse is the number of bytes actually used in the backend
+	sizeInUse int64
+
 	// commits counts number of commits since start
 	commits int64

@@ -244,6 +252,10 @@ func (b *backend) Size() int64 {
 	return atomic.LoadInt64(&b.size)
 }

+func (b *backend) SizeInUse() int64 {
+	return atomic.LoadInt64(&b.sizeInUse)
+}
+
 func (b *backend) run() {
 	defer close(b.donec)
 	t := time.NewTimer(b.batchInterval)
@@ -272,18 +284,12 @@ func (b *backend) Commits() int64 {
 }

 func (b *backend) Defrag() error {
-	err := b.defrag()
-	if err != nil {
-		return err
-	}
-
-	// commit to update metadata like db.size
-	b.batchTx.Commit()
-
-	return nil
+	return b.defrag()
 }

 func (b *backend) defrag() error {
+	now := time.Now()
+
 	// TODO: make this non-blocking?
 	// lock batchTx to ensure nobody is using previous tx, and then
 	// close previous ongoing tx.
@@ -341,7 +347,14 @@ func (b *backend) defrag() error {

 	b.readTx.buf.reset()
 	b.readTx.tx = b.unsafeBegin(false)
-	atomic.StoreInt64(&b.size, b.readTx.tx.Size())
+
+	size := b.readTx.tx.Size()
+	db := b.db
+	atomic.StoreInt64(&b.size, size)
+	atomic.StoreInt64(&b.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize)))
+
+	took := time.Since(now)
+	defragDurations.Observe(took.Seconds())

 	return nil
 }
@@ -370,10 +383,10 @@ func defragdb(odb, tmpdb *bolt.DB, limit int) error {
 		}

 		tmpb, berr := tmptx.CreateBucketIfNotExists(next)
-		tmpb.FillPercent = 0.9 // for seq write in for each
 		if berr != nil {
 			return berr
 		}
+		tmpb.FillPercent = 0.9 // for seq write in for each

 		b.ForEach(func(k, v []byte) error {
 			count++
@@ -402,7 +415,12 @@ func (b *backend) begin(write bool) *bolt.Tx {
 	b.mu.RLock()
 	tx := b.unsafeBegin(write)
 	b.mu.RUnlock()
-	atomic.StoreInt64(&b.size, tx.Size())
+
+	size := tx.Size()
+	db := tx.DB()
+	atomic.StoreInt64(&b.size, size)
+	atomic.StoreInt64(&b.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize)))
+
 	return tx
 }

--- a/vendor/github.com/coreos/etcd/mvcc/backend/batch_tx.go
+++ b/vendor/github.com/coreos/etcd/mvcc/backend/batch_tx.go
@@ -141,15 +141,15 @@ func unsafeForEach(tx *bolt.Tx, bucket []byte, visitor func(k, v []byte) error)
 // Commit commits a previous tx and begins a new writable one.
 func (t *batchTx) Commit() {
 	t.Lock()
-	defer t.Unlock()
 	t.commit(false)
+	t.Unlock()
 }

 // CommitAndStop commits the previous tx and does not create a new one.
 func (t *batchTx) CommitAndStop() {
 	t.Lock()
-	defer t.Unlock()
 	t.commit(true)
+	t.Unlock()
 }

 func (t *batchTx) Unlock() {
@@ -163,21 +163,15 @@ func (t *batchTx) commit(stop bool) {
 	// commit the last tx
 	if t.tx != nil {
 		if t.pending == 0 && !stop {
-			t.backend.mu.RLock()
-			defer t.backend.mu.RUnlock()
-
-			// t.tx.DB()==nil if 'CommitAndStop' calls 'batchTx.commit(true)',
-			// which initializes *bolt.Tx.db and *bolt.Tx.meta as nil; panics t.tx.Size().
-			// Server must make sure 'batchTx.commit(false)' does not follow
-			// 'batchTx.commit(true)' (e.g. stopping backend, and inflight Hash call).
-			atomic.StoreInt64(&t.backend.size, t.tx.Size())
 			return
 		}

 		start := time.Now()
+
 		// gofail: var beforeCommit struct{}
 		err := t.tx.Commit()
 		// gofail: var afterCommit struct{}
+
 		commitDurations.Observe(time.Since(start).Seconds())
 		atomic.AddInt64(&t.backend.commits, 1)

@@ -222,21 +216,21 @@ func (t *batchTxBuffered) Unlock() {

 func (t *batchTxBuffered) Commit() {
 	t.Lock()
-	defer t.Unlock()
 	t.commit(false)
+	t.Unlock()
 }

 func (t *batchTxBuffered) CommitAndStop() {
 	t.Lock()
-	defer t.Unlock()
 	t.commit(true)
+	t.Unlock()
 }

 func (t *batchTxBuffered) commit(stop bool) {
 	// all read txs must be closed to acquire boltdb commit rwlock
 	t.backend.readTx.mu.Lock()
-	defer t.backend.readTx.mu.Unlock()
 	t.unsafeCommit(stop)
+	t.backend.readTx.mu.Unlock()
 }

 func (t *batchTxBuffered) unsafeCommit(stop bool) {
--- a/vendor/github.com/coreos/etcd/mvcc/backend/metrics.go
+++ b/vendor/github.com/coreos/etcd/mvcc/backend/metrics.go
@@ -22,7 +22,22 @@ var (
 		Subsystem: "disk",
 		Name:      "backend_commit_duration_seconds",
 		Help:      "The latency distributions of commit called by backend.",
-		Buckets:   prometheus.ExponentialBuckets(0.001, 2, 14),
+
+		// lowest bucket start of upper bound 0.001 sec (1 ms) with factor 2
+		// highest bucket start of 0.001 sec * 2^13 == 8.192 sec
+		Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
+	})
+
+	defragDurations = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "etcd",
+		Subsystem: "disk",
+		Name:      "backend_defrag_duration_seconds",
+		Help:      "The latency distribution of backend defragmentation.",
+
+		// 100 MB usually takes 1 sec, so start with 10 MB of 100 ms
+		// lowest bucket start of upper bound 0.1 sec (100 ms) with factor 2
+		// highest bucket start of 0.1 sec * 2^12 == 409.6 sec
+		Buckets: prometheus.ExponentialBuckets(.1, 2, 13),
 	})

 	snapshotDurations = prometheus.NewHistogram(prometheus.HistogramOpts{
@@ -30,12 +45,15 @@ var (
 		Subsystem: "disk",
 		Name:      "backend_snapshot_duration_seconds",
 		Help:      "The latency distribution of backend snapshots.",
-		// 10 ms -> 655 seconds
+
+		// lowest bucket start of upper bound 0.01 sec (10 ms) with factor 2
+		// highest bucket start of 0.01 sec * 2^16 == 655.36 sec
 		Buckets: prometheus.ExponentialBuckets(.01, 2, 17),
 	})
 )

 func init() {
 	prometheus.MustRegister(commitDurations)
+	prometheus.MustRegister(defragDurations)
 	prometheus.MustRegister(snapshotDurations)
 }
--- a/vendor/github.com/coreos/etcd/mvcc/kvstore.go
+++ b/vendor/github.com/coreos/etcd/mvcc/kvstore.go
@@ -150,8 +150,12 @@ func (s *store) compactBarrier(ctx context.Context, ch chan struct{}) {
 }

 func (s *store) Hash() (hash uint32, revision int64, err error) {
+	start := time.Now()
+
 	s.b.ForceCommit()
 	h, err := s.b.Hash(DefaultIgnores)
+
+	hashDurations.Observe(time.Since(start).Seconds())
 	return h, s.currentRev, err
 }

@@ -245,10 +249,14 @@ func (s *store) Restore(b backend.Backend) error {
 }

 func (s *store) restore() error {
-	reportDbTotalSizeInBytesMu.Lock()
 	b := s.b
+
+	reportDbTotalSizeInBytesMu.Lock()
 	reportDbTotalSizeInBytes = func() float64 { return float64(b.Size()) }
 	reportDbTotalSizeInBytesMu.Unlock()
+	reportDbTotalSizeInUseInBytesMu.Lock()
+	reportDbTotalSizeInUseInBytes = func() float64 { return float64(b.SizeInUse()) }
+	reportDbTotalSizeInUseInBytesMu.Unlock()

 	min, max := newRevBytes(), newRevBytes()
 	revToBytes(revision{main: 1}, min)
--- a/vendor/github.com/coreos/etcd/mvcc/metrics.go
+++ b/vendor/github.com/coreos/etcd/mvcc/metrics.go
@@ -131,11 +131,23 @@ var (
 			Buckets: prometheus.ExponentialBuckets(100, 2, 14),
 		})

-	dbTotalSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
+	dbTotalSizeDebugging = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
 		Namespace: "etcd_debugging",
 		Subsystem: "mvcc",
 		Name:      "db_total_size_in_bytes",
-		Help:      "Total size of the underlying database in bytes.",
+		Help:      "Total size of the underlying database physically allocated in bytes. Use etcd_mvcc_db_total_size_in_bytes",
+	},
+		func() float64 {
+			reportDbTotalSizeInBytesMu.RLock()
+			defer reportDbTotalSizeInBytesMu.RUnlock()
+			return reportDbTotalSizeInBytes()
+		},
+	)
+	dbTotalSize = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
+		Namespace: "etcd",
+		Subsystem: "mvcc",
+		Name:      "db_total_size_in_bytes",
+		Help:      "Total size of the underlying database physically allocated in bytes.",
 	},
 		func() float64 {
 			reportDbTotalSizeInBytesMu.RLock()
@@ -145,7 +157,35 @@ var (
 	)
 	// overridden by mvcc initialization
 	reportDbTotalSizeInBytesMu sync.RWMutex
-	reportDbTotalSizeInBytes   func() float64 = func() float64 { return 0 }
+	reportDbTotalSizeInBytes   = func() float64 { return 0 }
+
+	dbTotalSizeInUse = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
+		Namespace: "etcd",
+		Subsystem: "mvcc",
+		Name:      "db_total_size_in_use_in_bytes",
+		Help:      "Total size of the underlying database logically in use in bytes.",
+	},
+		func() float64 {
+			reportDbTotalSizeInUseInBytesMu.RLock()
+			defer reportDbTotalSizeInUseInBytesMu.RUnlock()
+			return reportDbTotalSizeInUseInBytes()
+		},
+	)
+	// overridden by mvcc initialization
+	reportDbTotalSizeInUseInBytesMu sync.RWMutex
+	reportDbTotalSizeInUseInBytes   func() float64 = func() float64 { return 0 }
+
+	hashDurations = prometheus.NewHistogram(prometheus.HistogramOpts{
+		Namespace: "etcd",
+		Subsystem: "mvcc",
+		Name:      "hash_duration_seconds",
+		Help:      "The latency distribution of storage hash operation.",
+
+		// 100 MB usually takes 100 ms, so start with 10 MB of 10 ms
+		// lowest bucket start of upper bound 0.01 sec (10 ms) with factor 2
+		// highest bucket start of 0.01 sec * 2^14 == 163.84 sec
+		Buckets: prometheus.ExponentialBuckets(.01, 2, 15),
+	})
 )

 func init() {
@@ -162,7 +202,10 @@ func init() {
 	prometheus.MustRegister(indexCompactionPauseDurations)
 	prometheus.MustRegister(dbCompactionPauseDurations)
 	prometheus.MustRegister(dbCompactionTotalDurations)
+	prometheus.MustRegister(dbTotalSizeDebugging)
 	prometheus.MustRegister(dbTotalSize)
+	prometheus.MustRegister(dbTotalSizeInUse)
+	prometheus.MustRegister(hashDurations)
 }

 // ReportEventReceived reports that an event is received.
--- a/vendor/github.com/coreos/etcd/mvcc/watchable_store.go
+++ b/vendor/github.com/coreos/etcd/mvcc/watchable_store.go
@@ -188,7 +188,8 @@ func (s *watchableStore) Restore(b backend.Backend) error {
 	}

 	for wa := range s.synced.watchers {
-		s.unsynced.watchers.add(wa)
+		wa.restore = true
+		s.unsynced.add(wa)
 	}
 	s.synced = newWatcherGroup()
 	return nil
@@ -479,6 +480,14 @@ type watcher struct {
 	// compacted is set when the watcher is removed because of compaction
 	compacted bool

+	// restore is true when the watcher is being restored from leader snapshot
+	// which means that this watcher has just been moved from "synced" to "unsynced"
+	// watcher group, possibly with a future revision when it was first added
+	// to the synced watcher
+	// "unsynced" watcher revision must always be <= current revision,
+	// except when the watcher were to be moved from "synced" watcher group
+	restore bool
+
 	// minRev is the minimum revision update the watcher will accept
 	minRev int64
 	id     WatchID
--- a/vendor/github.com/coreos/etcd/mvcc/watcher_group.go
+++ b/vendor/github.com/coreos/etcd/mvcc/watcher_group.go
@@ -15,6 +15,7 @@
 package mvcc

 import (
+	"fmt"
 	"math"

 	"github.com/coreos/etcd/mvcc/mvccpb"
@@ -238,7 +239,15 @@ func (wg *watcherGroup) chooseAll(curRev, compactRev int64) int64 {
 	minRev := int64(math.MaxInt64)
 	for w := range wg.watchers {
 		if w.minRev > curRev {
-			panic("watcher current revision should not exceed current revision")
+			// after network partition, possibly choosing future revision watcher from restore operation
+			// with watch key "proxy-namespace__lostleader" and revision "math.MaxInt64 - 2"
+			// do not panic when such watcher had been moved from "synced" watcher during restore operation
+			if !w.restore {
+				panic(fmt.Errorf("watcher minimum revision %d should not exceed current revision %d", w.minRev, curRev))
+			}
+
+			// mark 'restore' done, since it's chosen
+			w.restore = false
 		}
 		if w.minRev < compactRev {
 			select {