Update godeps for etcd 3.0.4

This commit is contained in:
Timothy St. Clair
2016-07-22 13:54:40 -05:00
parent 456c43c22d
commit 5f008faa8b
457 changed files with 25492 additions and 10481 deletions

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -27,6 +27,8 @@ import (
"github.com/coreos/etcd/wal/walpb"
)
const minSectorSize = 512
type decoder struct {
mu sync.Mutex
brs []*bufio.Reader
@@ -73,7 +75,9 @@ func (d *decoder) decodeRecord(rec *walpb.Record) error {
return err
}
data := make([]byte, l)
recBytes, padBytes := decodeFrameSize(l)
data := make([]byte, recBytes+padBytes)
if _, err = io.ReadFull(d.brs[0], data); err != nil {
// ReadFull returns io.EOF only if no bytes were read
// the decoder should treat this as an ErrUnexpectedEOF instead.
@@ -82,7 +86,10 @@ func (d *decoder) decodeRecord(rec *walpb.Record) error {
}
return err
}
if err := rec.Unmarshal(data); err != nil {
if err := rec.Unmarshal(data[:recBytes]); err != nil {
if d.isTornEntry(data) {
return io.ErrUnexpectedEOF
}
return err
}
@@ -90,14 +97,65 @@ func (d *decoder) decodeRecord(rec *walpb.Record) error {
if rec.Type != crcType {
d.crc.Write(rec.Data)
if err := rec.Validate(d.crc.Sum32()); err != nil {
if d.isTornEntry(data) {
return io.ErrUnexpectedEOF
}
return err
}
}
// record decoded as valid; point last valid offset to end of record
d.lastValidOff += l + 8
d.lastValidOff += recBytes + padBytes + 8
return nil
}
func decodeFrameSize(lenField int64) (recBytes int64, padBytes int64) {
// the record size is stored in the lower 56 bits of the 64-bit length
recBytes = int64(uint64(lenField) & ^(uint64(0xff) << 56))
// non-zero padding is indicated by set MSb / a negative length
if lenField < 0 {
// padding is stored in lower 3 bits of length MSB
padBytes = int64((uint64(lenField) >> 56) & 0x7)
}
return
}
// isTornEntry determines whether the last entry of the WAL was partially written
// and corrupted because of a torn write.
func (d *decoder) isTornEntry(data []byte) bool {
if len(d.brs) != 1 {
return false
}
fileOff := d.lastValidOff + 8
curOff := 0
chunks := [][]byte{}
// split data on sector boundaries
for curOff < len(data) {
chunkLen := int(minSectorSize - (fileOff % minSectorSize))
if chunkLen > len(data)-curOff {
chunkLen = len(data) - curOff
}
chunks = append(chunks, data[curOff:curOff+chunkLen])
fileOff += int64(chunkLen)
curOff += chunkLen
}
// if any data for a sector chunk is all 0, it's a torn write
for _, sect := range chunks {
isZero := true
for _, v := range sect {
if v != 0 {
isZero = false
break
}
}
if isZero {
return true
}
}
return false
}
func (d *decoder) updateCRC(prevCrc uint32) {
d.crc = crc.New(prevCrc, crcTable)
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -34,6 +34,13 @@ When a user has finished using a WAL it must be closed:
w.Close()
Each WAL file is a stream of WAL records. A WAL record is a length field and a wal record
protobuf. The record protobuf contains a CRC, a type, and a data payload. The length field is a
64-bit packed structure holding the length of the remaining logical record data in its lower
56 bits and its physical padding in the first three bits of the most significant byte. Each
record is 8-byte aligned so that the length field is never torn. The CRC contains the CRC32
value of all record protobufs preceding the current record.
WAL files are placed inside of the directory in the following format:
$seq-$index.wal
@@ -41,7 +48,7 @@ The first WAL file to be created will be 0000000000000000-0000000000000000.wal
indicating an initial sequence of 0 and an initial raft index of 0. The first
entry written to WAL MUST have raft index 0.
WAL will cuts its current wal files if its size exceeds 8MB. This will increment an internal
WAL will cut its current tail wal file if its size exceeds 64MB. This will increment an internal
sequence number and cause a new file to be created. If the last raft index saved
was 0x20 and this is the first time cut has been called on this WAL then the sequence will
increment from 0x0 to 0x1. The new file will be: 0000000000000001-0000000000000021.wal.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -68,22 +68,38 @@ func (e *encoder) encode(rec *walpb.Record) error {
}
data = e.buf[:n]
}
if err = writeInt64(e.bw, int64(len(data)), e.uint64buf); err != nil {
lenField, padBytes := encodeFrameSize(len(data))
if err = writeUint64(e.bw, lenField, e.uint64buf); err != nil {
return err
}
if padBytes != 0 {
data = append(data, make([]byte, padBytes)...)
}
_, err = e.bw.Write(data)
return err
}
func encodeFrameSize(dataBytes int) (lenField uint64, padBytes int) {
lenField = uint64(dataBytes)
// force 8 byte alignment so length never gets a torn write
padBytes = (8 - (dataBytes % 8)) % 8
if padBytes != 0 {
lenField |= uint64(0x80|padBytes) << 56
}
return
}
func (e *encoder) flush() error {
e.mu.Lock()
defer e.mu.Unlock()
return e.bw.Flush()
}
func writeInt64(w io.Writer, n int64, buf []byte) error {
func writeUint64(w io.Writer, n uint64, buf []byte) error {
// http://golang.org/src/encoding/binary/binary.go
binary.LittleEndian.PutUint64(buf, uint64(n))
binary.LittleEndian.PutUint64(buf, n)
_, err := w.Write(buf)
return err
}

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -48,7 +48,8 @@ func newFilePipeline(dir string, fileSize int64) *filePipeline {
return fp
}
// Open returns a fresh file for writing
// Open returns a fresh file for writing. Rename the file before calling
// Open again or there will be file collisions.
func (fp *filePipeline) Open() (f *fileutil.LockedFile, err error) {
select {
case f = <-fp.filec:
@@ -63,8 +64,9 @@ func (fp *filePipeline) Close() error {
}
func (fp *filePipeline) alloc() (f *fileutil.LockedFile, err error) {
fpath := path.Join(fp.dir, fmt.Sprintf("%d.tmp", fp.count))
if f, err = fileutil.LockFile(fpath, os.O_CREATE|os.O_WRONLY, 0600); err != nil {
// count % 2 so this file isn't the same as the one last published
fpath := path.Join(fp.dir, fmt.Sprintf("%d.tmp", fp.count%2))
if f, err = fileutil.LockFile(fpath, os.O_CREATE|os.O_WRONLY, fileutil.PrivateFileMode); err != nil {
return nil, err
}
if err = fileutil.Preallocate(f.File, fp.size, true); err != nil {

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -19,20 +19,13 @@ import "github.com/prometheus/client_golang/prometheus"
var (
syncDurations = prometheus.NewHistogram(prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "wal",
Name: "fsync_durations_seconds",
Subsystem: "disk",
Name: "wal_fsync_duration_seconds",
Help: "The latency distributions of fsync called by wal.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
})
lastIndexSaved = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "wal",
Name: "last_index_saved",
Help: "The index of the last entry saved by wal.",
})
)
func init() {
prometheus.MustRegister(syncDurations)
prometheus.MustRegister(lastIndexSaved)
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -32,15 +32,13 @@ func Repair(dirpath string) bool {
}
defer f.Close()
n := 0
rec := &walpb.Record{}
decoder := newDecoder(f)
for {
lastOffset := decoder.lastOffset()
err := decoder.decode(rec)
switch err {
case nil:
n += 8 + rec.Size()
// update crc of the decoder when necessary
switch rec.Type {
case crcType:
@@ -74,11 +72,11 @@ func Repair(dirpath string) bool {
return false
}
if err = f.Truncate(int64(n)); err != nil {
if err = f.Truncate(int64(lastOffset)); err != nil {
plog.Errorf("could not repair %v, failed to truncate file", f.Name())
return false
}
if err = f.Sync(); err != nil {
if err = fileutil.Fsync(f.File); err != nil {
plog.Errorf("could not repair %v, failed to sync file", f.Name())
return false
}
@@ -91,15 +89,11 @@ func Repair(dirpath string) bool {
}
// openLast opens the last wal file for read and write.
func openLast(dirpath string) (*os.File, error) {
names, err := fileutil.ReadDir(dirpath)
func openLast(dirpath string) (*fileutil.LockedFile, error) {
names, err := readWalNames(dirpath)
if err != nil {
return nil, err
}
names = checkWalNames(names)
if len(names) == 0 {
return nil, ErrFileNotFound
}
last := path.Join(dirpath, names[len(names)-1])
return os.OpenFile(last, os.O_RDWR, 0)
return fileutil.LockFile(last, os.O_RDWR, fileutil.PrivateFileMode)
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -67,12 +67,26 @@ func isValidSeq(names []string) bool {
}
return true
}
func readWalNames(dirpath string) ([]string, error) {
names, err := fileutil.ReadDir(dirpath)
if err != nil {
return nil, err
}
wnames := checkWalNames(names)
if len(wnames) == 0 {
return nil, ErrFileNotFound
}
return wnames, nil
}
func checkWalNames(names []string) []string {
wnames := make([]string, 0)
for _, name := range names {
if _, _, err := parseWalName(name); err != nil {
plog.Warningf("ignored file %v in wal", name)
// don't complain about left over tmp files
if !strings.HasSuffix(name, ".tmp") {
plog.Warningf("ignored file %v in wal", name)
}
continue
}
wnames = append(wnames, name)

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -15,13 +15,13 @@
package wal
import (
"bytes"
"errors"
"fmt"
"hash/crc32"
"io"
"os"
"path"
"reflect"
"sync"
"time"
@@ -41,12 +41,13 @@ const (
crcType
snapshotType
// the owner can make/remove files inside the directory
privateDirMode = 0700
// the expected size of each wal segment file.
// the actual size might be bigger than it.
segmentSizeBytes = 64 * 1000 * 1000 // 64MB
// warnSyncDuration is the amount of time allotted to an fsync before
// logging a warning
warnSyncDuration = time.Second
)
var (
@@ -89,12 +90,19 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
return nil, os.ErrExist
}
if err := os.MkdirAll(dirpath, privateDirMode); err != nil {
// keep temporary wal directory so WAL initialization appears atomic
tmpdirpath := path.Clean(dirpath) + ".tmp"
if fileutil.Exist(tmpdirpath) {
if err := os.RemoveAll(tmpdirpath); err != nil {
return nil, err
}
}
if err := fileutil.CreateDirAll(tmpdirpath); err != nil {
return nil, err
}
p := path.Join(dirpath, walName(0, 0))
f, err := fileutil.LockFile(p, os.O_WRONLY|os.O_CREATE, 0600)
p := path.Join(tmpdirpath, walName(0, 0))
f, err := fileutil.LockFile(p, os.O_WRONLY|os.O_CREATE, fileutil.PrivateFileMode)
if err != nil {
return nil, err
}
@@ -109,7 +117,6 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
dir: dirpath,
metadata: metadata,
encoder: newEncoder(f, 0),
fp: newFilePipeline(dirpath, segmentSizeBytes),
}
w.locks = append(w.locks, f)
if err := w.saveCrc(0); err != nil {
@@ -121,7 +128,23 @@ func Create(dirpath string, metadata []byte) (*WAL, error) {
if err := w.SaveSnapshot(walpb.Snapshot{}); err != nil {
return nil, err
}
return w, nil
// rename of directory with locked files doesn't work on windows; close
// the WAL to release the locks so the directory can be renamed
w.Close()
if err := os.Rename(tmpdirpath, dirpath); err != nil {
return nil, err
}
// reopen and relock
newWAL, oerr := Open(dirpath, walpb.Snapshot{})
if oerr != nil {
return nil, oerr
}
if _, _, _, err := newWAL.ReadAll(); err != nil {
newWAL.Close()
return nil, err
}
return newWAL, nil
}
// Open opens the WAL at the given snap.
@@ -141,14 +164,10 @@ func OpenForRead(dirpath string, snap walpb.Snapshot) (*WAL, error) {
}
func openAtIndex(dirpath string, snap walpb.Snapshot, write bool) (*WAL, error) {
names, err := fileutil.ReadDir(dirpath)
names, err := readWalNames(dirpath)
if err != nil {
return nil, err
}
names = checkWalNames(names)
if len(names) == 0 {
return nil, ErrFileNotFound
}
nameIndex, ok := searchIndex(names, snap.Index)
if !ok || !isValidSeq(names[nameIndex:]) {
@@ -162,7 +181,7 @@ func openAtIndex(dirpath string, snap walpb.Snapshot, write bool) (*WAL, error)
for _, name := range names[nameIndex:] {
p := path.Join(dirpath, name)
if write {
l, err := fileutil.TryLockFile(p, os.O_RDWR, 0600)
l, err := fileutil.TryLockFile(p, os.O_RDWR, fileutil.PrivateFileMode)
if err != nil {
closeAll(rcs...)
return nil, err
@@ -170,7 +189,7 @@ func openAtIndex(dirpath string, snap walpb.Snapshot, write bool) (*WAL, error)
ls = append(ls, l)
rcs = append(rcs, l)
} else {
rf, err := os.OpenFile(p, os.O_RDONLY, 0600)
rf, err := os.OpenFile(p, os.O_RDONLY, fileutil.PrivateFileMode)
if err != nil {
closeAll(rcs...)
return nil, err
@@ -196,17 +215,10 @@ func openAtIndex(dirpath string, snap walpb.Snapshot, write bool) (*WAL, error)
// write reuses the file descriptors from read; don't close so
// WAL can append without dropping the file lock
w.readClose = nil
if _, _, err := parseWalName(path.Base(w.tail().Name())); err != nil {
closer()
return nil, err
}
// don't resize file for preallocation in case tail is corrupted
if err := fileutil.Preallocate(w.tail().File, segmentSizeBytes, false); err != nil {
closer()
plog.Errorf("failed to allocate space when creating new wal file (%v)", err)
return nil, err
}
w.fp = newFilePipeline(w.dir, segmentSizeBytes)
}
@@ -242,7 +254,7 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
case stateType:
state = mustUnmarshalState(rec.Data)
case metadataType:
if metadata != nil && !reflect.DeepEqual(metadata, rec.Data) {
if metadata != nil && !bytes.Equal(metadata, rec.Data) {
state.Reset()
return nil, state, nil, ErrMetadataConflict
}
@@ -307,7 +319,6 @@ func (w *WAL) ReadAll() (metadata []byte, state raftpb.HardState, ents []raftpb.
// create encoder (chain crc with the decoder), enable appending
_, err = w.tail().Seek(w.decoder.lastOffset(), os.SEEK_SET)
w.encoder = newEncoder(w.tail(), w.decoder.lastCRC())
lastIndexSaved.Set(float64(w.enti))
}
w.decoder = nil
@@ -366,7 +377,7 @@ func (w *WAL) cut() error {
}
newTail.Close()
if newTail, err = fileutil.LockFile(fpath, os.O_WRONLY, 0600); err != nil {
if newTail, err = fileutil.LockFile(fpath, os.O_WRONLY, fileutil.PrivateFileMode); err != nil {
return err
}
if _, err = newTail.Seek(off, os.SEEK_SET); err != nil {
@@ -390,7 +401,13 @@ func (w *WAL) sync() error {
}
start := time.Now()
err := fileutil.Fdatasync(w.tail().File)
syncDurations.Observe(float64(time.Since(start)) / float64(time.Second))
duration := time.Since(start)
if duration > warnSyncDuration {
plog.Warningf("sync duration of %v, expected less than %v", duration, warnSyncDuration)
}
syncDurations.Observe(duration.Seconds())
return err
}
@@ -471,7 +488,6 @@ func (w *WAL) saveEntry(e *raftpb.Entry) error {
return err
}
w.enti = e.Index
lastIndexSaved.Set(float64(w.enti))
return nil
}
@@ -534,7 +550,6 @@ func (w *WAL) SaveSnapshot(e walpb.Snapshot) error {
if w.enti < e.Index {
w.enti = e.Index
}
lastIndexSaved.Set(float64(w.enti))
return w.sync()
}
@@ -567,10 +582,7 @@ func mustSync(st, prevst raftpb.HardState, entsnum int) bool {
// currentTerm
// votedFor
// log entries[]
if entsnum != 0 || st.Vote != prevst.Vote || st.Term != prevst.Term {
return true
}
return false
return entsnum != 0 || st.Vote != prevst.Vote || st.Term != prevst.Term
}
func closeAll(rcs ...io.ReadCloser) error {

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -17,7 +17,7 @@ package walpb
import (
"fmt"
proto "github.com/gogo/protobuf/proto"
proto "github.com/golang/protobuf/proto"
math "math"
)
@@ -29,6 +29,10 @@ var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
const _ = proto.ProtoPackageIsVersion1
type Record struct {
Type int64 `protobuf:"varint,1,opt,name=type" json:"type"`
Crc uint32 `protobuf:"varint,2,opt,name=crc" json:"crc"`
@@ -36,9 +40,10 @@ type Record struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *Record) Reset() { *m = Record{} }
func (m *Record) String() string { return proto.CompactTextString(m) }
func (*Record) ProtoMessage() {}
func (m *Record) Reset() { *m = Record{} }
func (m *Record) String() string { return proto.CompactTextString(m) }
func (*Record) ProtoMessage() {}
func (*Record) Descriptor() ([]byte, []int) { return fileDescriptorRecord, []int{0} }
type Snapshot struct {
Index uint64 `protobuf:"varint,1,opt,name=index" json:"index"`
@@ -46,9 +51,10 @@ type Snapshot struct {
XXX_unrecognized []byte `json:"-"`
}
func (m *Snapshot) Reset() { *m = Snapshot{} }
func (m *Snapshot) String() string { return proto.CompactTextString(m) }
func (*Snapshot) ProtoMessage() {}
func (m *Snapshot) Reset() { *m = Snapshot{} }
func (m *Snapshot) String() string { return proto.CompactTextString(m) }
func (*Snapshot) ProtoMessage() {}
func (*Snapshot) Descriptor() ([]byte, []int) { return fileDescriptorRecord, []int{1} }
func init() {
proto.RegisterType((*Record)(nil), "walpb.Record")
@@ -493,3 +499,19 @@ var (
ErrInvalidLengthRecord = fmt.Errorf("proto: negative length found during unmarshaling")
ErrIntOverflowRecord = fmt.Errorf("proto: integer overflow")
)
var fileDescriptorRecord = []byte{
// 186 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0xe2, 0xe2, 0x29, 0x4a, 0x4d, 0xce,
0x2f, 0x4a, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0x2d, 0x4f, 0xcc, 0x29, 0x48, 0x92,
0x12, 0x49, 0xcf, 0x4f, 0xcf, 0x07, 0x8b, 0xe8, 0x83, 0x58, 0x10, 0x49, 0x25, 0x3f, 0x2e, 0xb6,
0x20, 0xb0, 0x62, 0x21, 0x09, 0x2e, 0x96, 0x92, 0xca, 0x82, 0x54, 0x09, 0x46, 0x05, 0x46, 0x0d,
0x66, 0x27, 0x96, 0x13, 0xf7, 0xe4, 0x19, 0x82, 0xc0, 0x22, 0x42, 0x62, 0x5c, 0xcc, 0xc9, 0x45,
0xc9, 0x12, 0x4c, 0x0a, 0x8c, 0x1a, 0xbc, 0x50, 0x09, 0x90, 0x80, 0x90, 0x10, 0x17, 0x4b, 0x4a,
0x62, 0x49, 0xa2, 0x04, 0xb3, 0x02, 0xa3, 0x06, 0x4f, 0x10, 0x98, 0xad, 0xe4, 0xc0, 0xc5, 0x11,
0x9c, 0x97, 0x58, 0x50, 0x9c, 0x91, 0x5f, 0x22, 0x24, 0xc5, 0xc5, 0x9a, 0x99, 0x97, 0x92, 0x5a,
0x01, 0x36, 0x92, 0x05, 0xaa, 0x13, 0x22, 0x04, 0xb6, 0x2d, 0xb5, 0x28, 0x17, 0x6c, 0x28, 0x0b,
0xdc, 0xb6, 0xd4, 0xa2, 0x5c, 0x27, 0x91, 0x13, 0x0f, 0xe5, 0x18, 0x4e, 0x3c, 0x92, 0x63, 0xbc,
0xf0, 0x48, 0x8e, 0xf1, 0xc1, 0x23, 0x39, 0xc6, 0x19, 0x8f, 0xe5, 0x18, 0x00, 0x01, 0x00, 0x00,
0xff, 0xff, 0x7f, 0x5e, 0x5c, 0x46, 0xd3, 0x00, 0x00, 0x00,
}