Move deps from _workspace/ to vendor/
godep restore pushd $GOPATH/src/github.com/appc/spec git co master popd go get go4.org/errorutil rm -rf Godeps godep save ./... git add vendor git add -f $(git ls-files --other vendor/) git co -- Godeps/LICENSES Godeps/.license_file_state Godeps/OWNERS
This commit is contained in:
23
vendor/github.com/hashicorp/raft/.gitignore
generated
vendored
Normal file
23
vendor/github.com/hashicorp/raft/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
14
vendor/github.com/hashicorp/raft/.travis.yml
generated
vendored
Normal file
14
vendor/github.com/hashicorp/raft/.travis.yml
generated
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.2
|
||||
- tip
|
||||
|
||||
install: make deps
|
||||
script:
|
||||
- make integ
|
||||
|
||||
notifications:
|
||||
flowdock:
|
||||
secure: fZrcf9rlh2IrQrlch1sHkn3YI7SKvjGnAl/zyV5D6NROe1Bbr6d3QRMuCXWWdhJHzjKmXk5rIzbqJhUc0PNF7YjxGNKSzqWMQ56KcvN1k8DzlqxpqkcA3Jbs6fXCWo2fssRtZ7hj/wOP1f5n6cc7kzHDt9dgaYJ6nO2fqNPJiTc=
|
||||
|
354
vendor/github.com/hashicorp/raft/LICENSE
generated
vendored
Normal file
354
vendor/github.com/hashicorp/raft/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,354 @@
|
||||
Mozilla Public License, version 2.0
|
||||
|
||||
1. Definitions
|
||||
|
||||
1.1. “Contributor”
|
||||
|
||||
means each individual or legal entity that creates, contributes to the
|
||||
creation of, or owns Covered Software.
|
||||
|
||||
1.2. “Contributor Version”
|
||||
|
||||
means the combination of the Contributions of others (if any) used by a
|
||||
Contributor and that particular Contributor’s Contribution.
|
||||
|
||||
1.3. “Contribution”
|
||||
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. “Covered Software”
|
||||
|
||||
means Source Code Form to which the initial Contributor has attached the
|
||||
notice in Exhibit A, the Executable Form of such Source Code Form, and
|
||||
Modifications of such Source Code Form, in each case including portions
|
||||
thereof.
|
||||
|
||||
1.5. “Incompatible With Secondary Licenses”
|
||||
means
|
||||
|
||||
a. that the initial Contributor has attached the notice described in
|
||||
Exhibit B to the Covered Software; or
|
||||
|
||||
b. that the Covered Software was made available under the terms of version
|
||||
1.1 or earlier of the License, but not also under the terms of a
|
||||
Secondary License.
|
||||
|
||||
1.6. “Executable Form”
|
||||
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. “Larger Work”
|
||||
|
||||
means a work that combines Covered Software with other material, in a separate
|
||||
file or files, that is not Covered Software.
|
||||
|
||||
1.8. “License”
|
||||
|
||||
means this document.
|
||||
|
||||
1.9. “Licensable”
|
||||
|
||||
means having the right to grant, to the maximum extent possible, whether at the
|
||||
time of the initial grant or subsequently, any and all of the rights conveyed by
|
||||
this License.
|
||||
|
||||
1.10. “Modifications”
|
||||
|
||||
means any of the following:
|
||||
|
||||
a. any file in Source Code Form that results from an addition to, deletion
|
||||
from, or modification of the contents of Covered Software; or
|
||||
|
||||
b. any new file in Source Code Form that contains any Covered Software.
|
||||
|
||||
1.11. “Patent Claims” of a Contributor
|
||||
|
||||
means any patent claim(s), including without limitation, method, process,
|
||||
and apparatus claims, in any patent Licensable by such Contributor that
|
||||
would be infringed, but for the grant of the License, by the making,
|
||||
using, selling, offering for sale, having made, import, or transfer of
|
||||
either its Contributions or its Contributor Version.
|
||||
|
||||
1.12. “Secondary License”
|
||||
|
||||
means either the GNU General Public License, Version 2.0, the GNU Lesser
|
||||
General Public License, Version 2.1, the GNU Affero General Public
|
||||
License, Version 3.0, or any later versions of those licenses.
|
||||
|
||||
1.13. “Source Code Form”
|
||||
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. “You” (or “Your”)
|
||||
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, “You” includes any entity that controls, is
|
||||
controlled by, or is under common control with You. For purposes of this
|
||||
definition, “control” means (a) the power, direct or indirect, to cause
|
||||
the direction or management of such entity, whether by contract or
|
||||
otherwise, or (b) ownership of more than fifty percent (50%) of the
|
||||
outstanding shares or beneficial ownership of such entity.
|
||||
|
||||
|
||||
2. License Grants and Conditions
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
a. under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or as
|
||||
part of a Larger Work; and
|
||||
|
||||
b. under Patent Claims of such Contributor to make, use, sell, offer for
|
||||
sale, have made, import, and otherwise transfer either its Contributions
|
||||
or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution become
|
||||
effective for each Contribution on the date the Contributor first distributes
|
||||
such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under this
|
||||
License. No additional rights or licenses will be implied from the distribution
|
||||
or licensing of Covered Software under this License. Notwithstanding Section
|
||||
2.1(b) above, no patent license is granted by a Contributor:
|
||||
|
||||
a. for any code that a Contributor has removed from Covered Software; or
|
||||
|
||||
b. for infringements caused by: (i) Your and any other third party’s
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
c. under Patent Claims infringed by Covered Software in the absence of its
|
||||
Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks, or
|
||||
logos of any Contributor (except as may be necessary to comply with the
|
||||
notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this License
|
||||
(see Section 10.2) or under the terms of a Secondary License (if permitted
|
||||
under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its Contributions
|
||||
are its original creation(s) or it has sufficient rights to grant the
|
||||
rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under applicable
|
||||
copyright doctrines of fair use, fair dealing, or other equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
|
||||
Section 2.1.
|
||||
|
||||
|
||||
3. Responsibilities
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under the
|
||||
terms of this License. You must inform recipients that the Source Code Form
|
||||
of the Covered Software is governed by the terms of this License, and how
|
||||
they can obtain a copy of this License. You may not attempt to alter or
|
||||
restrict the recipients’ rights in the Source Code Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
a. such Covered Software must also be made available in Source Code Form,
|
||||
as described in Section 3.1, and You must inform recipients of the
|
||||
Executable Form how they can obtain a copy of such Source Code Form by
|
||||
reasonable means in a timely manner, at a charge no more than the cost
|
||||
of distribution to the recipient; and
|
||||
|
||||
b. You may distribute such Executable Form under the terms of this License,
|
||||
or sublicense it under different terms, provided that the license for
|
||||
the Executable Form does not attempt to limit or alter the recipients’
|
||||
rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for the
|
||||
Covered Software. If the Larger Work is a combination of Covered Software
|
||||
with a work governed by one or more Secondary Licenses, and the Covered
|
||||
Software is not Incompatible With Secondary Licenses, this License permits
|
||||
You to additionally distribute such Covered Software under the terms of
|
||||
such Secondary License(s), so that the recipient of the Larger Work may, at
|
||||
their option, further distribute the Covered Software under the terms of
|
||||
either this License or such Secondary License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices (including
|
||||
copyright notices, patent notices, disclaimers of warranty, or limitations
|
||||
of liability) contained within the Source Code Form of the Covered
|
||||
Software, except that You may alter any license notices to the extent
|
||||
required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on behalf
|
||||
of any Contributor. You must make it absolutely clear that any such
|
||||
warranty, support, indemnity, or liability obligation is offered by You
|
||||
alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this License
|
||||
with respect to some or all of the Covered Software due to statute, judicial
|
||||
order, or regulation then You must: (a) comply with the terms of this License
|
||||
to the maximum extent possible; and (b) describe the limitations and the code
|
||||
they affect. Such description must be placed in a text file included with all
|
||||
distributions of the Covered Software under this License. Except to the
|
||||
extent prohibited by statute or regulation, such description must be
|
||||
sufficiently detailed for a recipient of ordinary skill to be able to
|
||||
understand it.
|
||||
|
||||
5. Termination
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically if You
|
||||
fail to comply with any of its terms. However, if You become compliant,
|
||||
then the rights granted under this License from a particular Contributor
|
||||
are reinstated (a) provisionally, unless and until such Contributor
|
||||
explicitly and finally terminates Your grants, and (b) on an ongoing basis,
|
||||
if such Contributor fails to notify You of the non-compliance by some
|
||||
reasonable means prior to 60 days after You have come back into compliance.
|
||||
Moreover, Your grants from a particular Contributor are reinstated on an
|
||||
ongoing basis if such Contributor notifies You of the non-compliance by
|
||||
some reasonable means, this is the first time You have received notice of
|
||||
non-compliance with this License from such Contributor, and You become
|
||||
compliant prior to 30 days after Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions, counter-claims,
|
||||
and cross-claims) alleging that a Contributor Version directly or
|
||||
indirectly infringes any patent, then the rights granted to You by any and
|
||||
all Contributors for the Covered Software under Section 2.1 of this License
|
||||
shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
|
||||
license agreements (excluding distributors and resellers) which have been
|
||||
validly granted by You or Your distributors under this License prior to
|
||||
termination shall survive termination.
|
||||
|
||||
6. Disclaimer of Warranty
|
||||
|
||||
Covered Software is provided under this License on an “as is” basis, without
|
||||
warranty of any kind, either expressed, implied, or statutory, including,
|
||||
without limitation, warranties that the Covered Software is free of defects,
|
||||
merchantable, fit for a particular purpose or non-infringing. The entire
|
||||
risk as to the quality and performance of the Covered Software is with You.
|
||||
Should any Covered Software prove defective in any respect, You (not any
|
||||
Contributor) assume the cost of any necessary servicing, repair, or
|
||||
correction. This disclaimer of warranty constitutes an essential part of this
|
||||
License. No use of any Covered Software is authorized under this License
|
||||
except under this disclaimer.
|
||||
|
||||
7. Limitation of Liability
|
||||
|
||||
Under no circumstances and under no legal theory, whether tort (including
|
||||
negligence), contract, or otherwise, shall any Contributor, or anyone who
|
||||
distributes Covered Software as permitted above, be liable to You for any
|
||||
direct, indirect, special, incidental, or consequential damages of any
|
||||
character including, without limitation, damages for lost profits, loss of
|
||||
goodwill, work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses, even if such party shall have been
|
||||
informed of the possibility of such damages. This limitation of liability
|
||||
shall not apply to liability for death or personal injury resulting from such
|
||||
party’s negligence to the extent applicable law prohibits such limitation.
|
||||
Some jurisdictions do not allow the exclusion or limitation of incidental or
|
||||
consequential damages, so this exclusion and limitation may not apply to You.
|
||||
|
||||
8. Litigation
|
||||
|
||||
Any litigation relating to this License may be brought only in the courts of
|
||||
a jurisdiction where the defendant maintains its principal place of business
|
||||
and such litigation shall be governed by laws of that jurisdiction, without
|
||||
reference to its conflict-of-law provisions. Nothing in this Section shall
|
||||
prevent a party’s ability to bring cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
|
||||
This License represents the complete agreement concerning the subject matter
|
||||
hereof. If any provision of this License is held to be unenforceable, such
|
||||
provision shall be reformed only to the extent necessary to make it
|
||||
enforceable. Any law or regulation which provides that the language of a
|
||||
contract shall be construed against the drafter shall not be used to construe
|
||||
this License against a Contributor.
|
||||
|
||||
|
||||
10. Versions of the License
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version of
|
||||
the License under which You originally received the Covered Software, or
|
||||
under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a modified
|
||||
version of this License if you rename the license and remove any
|
||||
references to the name of the license steward (except to note that such
|
||||
modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
|
||||
This Source Code Form is subject to the
|
||||
terms of the Mozilla Public License, v.
|
||||
2.0. If a copy of the MPL was not
|
||||
distributed with this file, You can
|
||||
obtain one at
|
||||
http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular file, then
|
||||
You may include the notice in a location (such as a LICENSE file in a relevant
|
||||
directory) where a recipient would be likely to look for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - “Incompatible With Secondary Licenses” Notice
|
||||
|
||||
This Source Code Form is “Incompatible
|
||||
With Secondary Licenses”, as defined by
|
||||
the Mozilla Public License, v. 2.0.
|
||||
|
17
vendor/github.com/hashicorp/raft/Makefile
generated
vendored
Normal file
17
vendor/github.com/hashicorp/raft/Makefile
generated
vendored
Normal file
@@ -0,0 +1,17 @@
|
||||
DEPS = $(go list -f '{{range .TestImports}}{{.}} {{end}}' ./...)
|
||||
|
||||
test:
|
||||
go test -timeout=5s ./...
|
||||
|
||||
integ: test
|
||||
INTEG_TESTS=yes go test -timeout=3s -run=Integ ./...
|
||||
|
||||
deps:
|
||||
go get -d -v ./...
|
||||
echo $(DEPS) | xargs -n1 go get -d
|
||||
|
||||
cov:
|
||||
INTEG_TESTS=yes gocov test github.com/hashicorp/raft | gocov-html > /tmp/coverage.html
|
||||
open /tmp/coverage.html
|
||||
|
||||
.PHONY: test cov integ deps
|
89
vendor/github.com/hashicorp/raft/README.md
generated
vendored
Normal file
89
vendor/github.com/hashicorp/raft/README.md
generated
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
raft [](https://travis-ci.org/hashicorp/raft)
|
||||
====
|
||||
|
||||
raft is a [Go](http://www.golang.org) library that manages a replicated
|
||||
log and can be used with an FSM to manage replicated state machines. It
|
||||
is library for providing [consensus](http://en.wikipedia.org/wiki/Consensus_(computer_science)).
|
||||
|
||||
The use cases for such a library are far-reaching as replicated state
|
||||
machines are a key component of many distributed systems. They enable
|
||||
building Consistent, Partition Tolerant (CP) systems, with limited
|
||||
fault tolerance as well.
|
||||
|
||||
## Building
|
||||
|
||||
If you wish to build raft you'll need Go version 1.2+ installed.
|
||||
|
||||
Please check your installation with:
|
||||
|
||||
```
|
||||
go version
|
||||
```
|
||||
|
||||
## Documentation
|
||||
|
||||
For complete documentation, see the associated [Godoc](http://godoc.org/github.com/hashicorp/raft).
|
||||
|
||||
To prevent complications with cgo, the primary backend `MDBStore` is in a separate repositoy,
|
||||
called [raft-mdb](http://github.com/hashicorp/raft-mdb). That is the recommended implementation
|
||||
for the `LogStore` and `StableStore`.
|
||||
|
||||
A pure Go backend using [BoltDB](https://github.com/boltdb/bolt) is also available called
|
||||
[raft-boltdb](https://github.com/hashicorp/raft-boltdb). It can also be used as a `LogStore`
|
||||
and `StableStore`.
|
||||
|
||||
## Protocol
|
||||
|
||||
raft is based on ["Raft: In Search of an Understandable Consensus Algorithm"](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
|
||||
|
||||
A high level overview of the Raft protocol is described below, but for details please read the full
|
||||
[Raft paper](https://ramcloud.stanford.edu/wiki/download/attachments/11370504/raft.pdf)
|
||||
followed by the raft source. Any questions about the raft protocol should be sent to the
|
||||
[raft-dev mailing list](https://groups.google.com/forum/#!forum/raft-dev).
|
||||
|
||||
### Protocol Description
|
||||
|
||||
Raft nodes are always in one of three states: follower, candidate or leader. All
|
||||
nodes initially start out as a follower. In this state, nodes can accept log entries
|
||||
from a leader and cast votes. If no entries are received for some time, nodes
|
||||
self-promote to the candidate state. In the candidate state nodes request votes from
|
||||
their peers. If a candidate receives a quorum of votes, then it is promoted to a leader.
|
||||
The leader must accept new log entries and replicate to all the other followers.
|
||||
In addition, if stale reads are not acceptable, all queries must also be performed on
|
||||
the leader.
|
||||
|
||||
Once a cluster has a leader, it is able to accept new log entries. A client can
|
||||
request that a leader append a new log entry, which is an opaque binary blob to
|
||||
Raft. The leader then writes the entry to durable storage and attempts to replicate
|
||||
to a quorum of followers. Once the log entry is considered *committed*, it can be
|
||||
*applied* to a finite state machine. The finite state machine is application specific,
|
||||
and is implemented using an interface.
|
||||
|
||||
An obvious question relates to the unbounded nature of a replicated log. Raft provides
|
||||
a mechanism by which the current state is snapshotted, and the log is compacted. Because
|
||||
of the FSM abstraction, restoring the state of the FSM must result in the same state
|
||||
as a replay of old logs. This allows Raft to capture the FSM state at a point in time,
|
||||
and then remove all the logs that were used to reach that state. This is performed automatically
|
||||
without user intervention, and prevents unbounded disk usage as well as minimizing
|
||||
time spent replaying logs.
|
||||
|
||||
Lastly, there is the issue of updating the peer set when new servers are joining
|
||||
or existing servers are leaving. As long as a quorum of nodes is available, this
|
||||
is not an issue as Raft provides mechanisms to dynamically update the peer set.
|
||||
If a quorum of nodes is unavailable, then this becomes a very challenging issue.
|
||||
For example, suppose there are only 2 peers, A and B. The quorum size is also
|
||||
2, meaning both nodes must agree to commit a log entry. If either A or B fails,
|
||||
it is now impossible to reach quorum. This means the cluster is unable to add,
|
||||
or remove a node, or commit any additional log entries. This results in *unavailability*.
|
||||
At this point, manual intervention would be required to remove either A or B,
|
||||
and to restart the remaining node in bootstrap mode.
|
||||
|
||||
A Raft cluster of 3 nodes can tolerate a single node failure, while a cluster
|
||||
of 5 can tolerate 2 node failures. The recommended configuration is to either
|
||||
run 3 or 5 raft servers. This maximizes availability without
|
||||
greatly sacrificing performance.
|
||||
|
||||
In terms of performance, Raft is comparable to Paxos. Assuming stable leadership,
|
||||
committing a log entry requires a single round trip to half of the cluster.
|
||||
Thus performance is bound by disk I/O and network latency.
|
||||
|
171
vendor/github.com/hashicorp/raft/bench/bench.go
generated
vendored
Normal file
171
vendor/github.com/hashicorp/raft/bench/bench.go
generated
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
package raftbench
|
||||
|
||||
// raftbench provides common benchmarking functions which can be used by
|
||||
// anything which implements the raft.LogStore and raft.StableStore interfaces.
|
||||
// All functions accept these interfaces and perform benchmarking. This
|
||||
// makes comparing backend performance easier by sharing the tests.
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/raft"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func FirstIndex(b *testing.B, store raft.LogStore) {
|
||||
// Create some fake data
|
||||
var logs []*raft.Log
|
||||
for i := 1; i < 10; i++ {
|
||||
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run FirstIndex a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
store.FirstIndex()
|
||||
}
|
||||
}
|
||||
|
||||
func LastIndex(b *testing.B, store raft.LogStore) {
|
||||
// Create some fake data
|
||||
var logs []*raft.Log
|
||||
for i := 1; i < 10; i++ {
|
||||
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run LastIndex a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
store.LastIndex()
|
||||
}
|
||||
}
|
||||
|
||||
func GetLog(b *testing.B, store raft.LogStore) {
|
||||
// Create some fake data
|
||||
var logs []*raft.Log
|
||||
for i := 1; i < 10; i++ {
|
||||
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run GetLog a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if err := store.GetLog(5, new(raft.Log)); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func StoreLog(b *testing.B, store raft.LogStore) {
|
||||
// Run StoreLog a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
log := &raft.Log{Index: uint64(n), Data: []byte("data")}
|
||||
if err := store.StoreLog(log); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func StoreLogs(b *testing.B, store raft.LogStore) {
|
||||
// Run StoreLogs a number of times. We want to set multiple logs each
|
||||
// run, so we create 3 logs with incrementing indexes for each iteration.
|
||||
for n := 0; n < b.N; n++ {
|
||||
b.StopTimer()
|
||||
offset := 3 * (n + 1)
|
||||
logs := []*raft.Log{
|
||||
&raft.Log{Index: uint64(offset - 2), Data: []byte("data")},
|
||||
&raft.Log{Index: uint64(offset - 1), Data: []byte("data")},
|
||||
&raft.Log{Index: uint64(offset), Data: []byte("data")},
|
||||
}
|
||||
b.StartTimer()
|
||||
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func DeleteRange(b *testing.B, store raft.LogStore) {
|
||||
// Create some fake data. In this case, we create 3 new log entries for each
|
||||
// test case, and separate them by index in multiples of 10. This allows
|
||||
// some room so that we can test deleting ranges with "extra" logs to
|
||||
// to ensure we stop going to the database once our max index is hit.
|
||||
var logs []*raft.Log
|
||||
for n := 0; n < b.N; n++ {
|
||||
offset := 10 * n
|
||||
for i := offset; i < offset+3; i++ {
|
||||
logs = append(logs, &raft.Log{Index: uint64(i), Data: []byte("data")})
|
||||
}
|
||||
}
|
||||
if err := store.StoreLogs(logs); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Delete a range of the data
|
||||
for n := 0; n < b.N; n++ {
|
||||
offset := 10 * n
|
||||
if err := store.DeleteRange(uint64(offset), uint64(offset+9)); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Set(b *testing.B, store raft.StableStore) {
|
||||
// Run Set a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if err := store.Set([]byte{byte(n)}, []byte("val")); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func Get(b *testing.B, store raft.StableStore) {
|
||||
// Create some fake data
|
||||
for i := 1; i < 10; i++ {
|
||||
if err := store.Set([]byte{byte(i)}, []byte("val")); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run Get a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if _, err := store.Get([]byte{0x05}); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func SetUint64(b *testing.B, store raft.StableStore) {
|
||||
// Run SetUint64 a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if err := store.SetUint64([]byte{byte(n)}, uint64(n)); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func GetUint64(b *testing.B, store raft.StableStore) {
|
||||
// Create some fake data
|
||||
for i := 0; i < 10; i++ {
|
||||
if err := store.SetUint64([]byte{byte(i)}, uint64(i)); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
b.ResetTimer()
|
||||
|
||||
// Run GetUint64 a number of times
|
||||
for n := 0; n < b.N; n++ {
|
||||
if _, err := store.Get([]byte{0x05}); err != nil {
|
||||
b.Fatalf("err: %s", err)
|
||||
}
|
||||
}
|
||||
}
|
84
vendor/github.com/hashicorp/raft/commands.go
generated
vendored
Normal file
84
vendor/github.com/hashicorp/raft/commands.go
generated
vendored
Normal file
@@ -0,0 +1,84 @@
|
||||
package raft
|
||||
|
||||
// AppendEntriesRequest is the command used to append entries to the
|
||||
// replicated log.
|
||||
type AppendEntriesRequest struct {
|
||||
// Provide the current term and leader
|
||||
Term uint64
|
||||
Leader []byte
|
||||
|
||||
// Provide the previous entries for integrity checking
|
||||
PrevLogEntry uint64
|
||||
PrevLogTerm uint64
|
||||
|
||||
// New entries to commit
|
||||
Entries []*Log
|
||||
|
||||
// Commit index on the leader
|
||||
LeaderCommitIndex uint64
|
||||
}
|
||||
|
||||
// AppendEntriesResponse is the response returned from an
|
||||
// AppendEntriesRequest.
|
||||
type AppendEntriesResponse struct {
|
||||
// Newer term if leader is out of date
|
||||
Term uint64
|
||||
|
||||
// Last Log is a hint to help accelerate rebuilding slow nodes
|
||||
LastLog uint64
|
||||
|
||||
// We may not succeed if we have a conflicting entry
|
||||
Success bool
|
||||
|
||||
// There are scenarios where this request didn't succeed
|
||||
// but there's no need to wait/back-off the next attempt.
|
||||
NoRetryBackoff bool
|
||||
}
|
||||
|
||||
// RequestVoteRequest is the command used by a candidate to ask a Raft peer
|
||||
// for a vote in an election.
|
||||
type RequestVoteRequest struct {
|
||||
// Provide the term and our id
|
||||
Term uint64
|
||||
Candidate []byte
|
||||
|
||||
// Used to ensure safety
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
}
|
||||
|
||||
// RequestVoteResponse is the response returned from a RequestVoteRequest.
|
||||
type RequestVoteResponse struct {
|
||||
// Newer term if leader is out of date
|
||||
Term uint64
|
||||
|
||||
// Return the peers, so that a node can shutdown on removal
|
||||
Peers []byte
|
||||
|
||||
// Is the vote granted
|
||||
Granted bool
|
||||
}
|
||||
|
||||
// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
|
||||
// log (and state machine) from a snapshot on another peer.
|
||||
type InstallSnapshotRequest struct {
|
||||
Term uint64
|
||||
Leader []byte
|
||||
|
||||
// These are the last index/term included in the snapshot
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
|
||||
// Peer Set in the snapshot
|
||||
Peers []byte
|
||||
|
||||
// Size of the snapshot
|
||||
Size int64
|
||||
}
|
||||
|
||||
// InstallSnapshotResponse is the response returned from an
|
||||
// InstallSnapshotRequest.
|
||||
type InstallSnapshotResponse struct {
|
||||
Term uint64
|
||||
Success bool
|
||||
}
|
134
vendor/github.com/hashicorp/raft/config.go
generated
vendored
Normal file
134
vendor/github.com/hashicorp/raft/config.go
generated
vendored
Normal file
@@ -0,0 +1,134 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Config provides any necessary configuration to
|
||||
// the Raft server
|
||||
type Config struct {
|
||||
// Time in follower state without a leader before we attempt an election.
|
||||
HeartbeatTimeout time.Duration
|
||||
|
||||
// Time in candidate state without a leader before we attempt an election.
|
||||
ElectionTimeout time.Duration
|
||||
|
||||
// Time without an Apply() operation before we heartbeat to ensure
|
||||
// a timely commit. Due to random staggering, may be delayed as much as
|
||||
// 2x this value.
|
||||
CommitTimeout time.Duration
|
||||
|
||||
// MaxAppendEntries controls the maximum number of append entries
|
||||
// to send at once. We want to strike a balance between efficiency
|
||||
// and avoiding waste if the follower is going to reject because of
|
||||
// an inconsistent log.
|
||||
MaxAppendEntries int
|
||||
|
||||
// If we are a member of a cluster, and RemovePeer is invoked for the
|
||||
// local node, then we forget all peers and transition into the follower state.
|
||||
// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
|
||||
// we can become a leader of a cluster containing only this node.
|
||||
ShutdownOnRemove bool
|
||||
|
||||
// DisableBootstrapAfterElect is used to turn off EnableSingleNode
|
||||
// after the node is elected. This is used to prevent self-election
|
||||
// if the node is removed from the Raft cluster via RemovePeer. Setting
|
||||
// it to false will keep the bootstrap mode, allowing the node to self-elect
|
||||
// and potentially bootstrap a separate cluster.
|
||||
DisableBootstrapAfterElect bool
|
||||
|
||||
// TrailingLogs controls how many logs we leave after a snapshot. This is
|
||||
// used so that we can quickly replay logs on a follower instead of being
|
||||
// forced to send an entire snapshot.
|
||||
TrailingLogs uint64
|
||||
|
||||
// SnapshotInterval controls how often we check if we should perform a snapshot.
|
||||
// We randomly stagger between this value and 2x this value to avoid the entire
|
||||
// cluster from performing a snapshot at once.
|
||||
SnapshotInterval time.Duration
|
||||
|
||||
// SnapshotThreshold controls how many outstanding logs there must be before
|
||||
// we perform a snapshot. This is to prevent excessive snapshots when we can
|
||||
// just replay a small set of logs.
|
||||
SnapshotThreshold uint64
|
||||
|
||||
// EnableSingleNode allows for a single node mode of operation. This
|
||||
// is false by default, which prevents a lone node from electing itself.
|
||||
// leader.
|
||||
EnableSingleNode bool
|
||||
|
||||
// LeaderLeaseTimeout is used to control how long the "lease" lasts
|
||||
// for being the leader without being able to contact a quorum
|
||||
// of nodes. If we reach this interval without contact, we will
|
||||
// step down as leader.
|
||||
LeaderLeaseTimeout time.Duration
|
||||
|
||||
// StartAsLeader forces Raft to start in the leader state. This should
|
||||
// never be used except for testing purposes, as it can cause a split-brain.
|
||||
StartAsLeader bool
|
||||
|
||||
// NotifyCh is used to provide a channel that will be notified of leadership
|
||||
// changes. Raft will block writing to this channel, so it should either be
|
||||
// buffered or aggressively consumed.
|
||||
NotifyCh chan<- bool
|
||||
|
||||
// LogOutput is used as a sink for logs, unless Logger is specified.
|
||||
// Defaults to os.Stderr.
|
||||
LogOutput io.Writer
|
||||
|
||||
// Logger is a user-provided logger. If nil, a logger writing to LogOutput
|
||||
// is used.
|
||||
Logger *log.Logger
|
||||
}
|
||||
|
||||
// DefaultConfig returns a Config with usable defaults.
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
HeartbeatTimeout: 1000 * time.Millisecond,
|
||||
ElectionTimeout: 1000 * time.Millisecond,
|
||||
CommitTimeout: 50 * time.Millisecond,
|
||||
MaxAppendEntries: 64,
|
||||
ShutdownOnRemove: true,
|
||||
DisableBootstrapAfterElect: true,
|
||||
TrailingLogs: 10240,
|
||||
SnapshotInterval: 120 * time.Second,
|
||||
SnapshotThreshold: 8192,
|
||||
EnableSingleNode: false,
|
||||
LeaderLeaseTimeout: 500 * time.Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
// ValidateConfig is used to validate a sane configuration
|
||||
func ValidateConfig(config *Config) error {
|
||||
if config.HeartbeatTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Heartbeat timeout is too low")
|
||||
}
|
||||
if config.ElectionTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Election timeout is too low")
|
||||
}
|
||||
if config.CommitTimeout < time.Millisecond {
|
||||
return fmt.Errorf("Commit timeout is too low")
|
||||
}
|
||||
if config.MaxAppendEntries <= 0 {
|
||||
return fmt.Errorf("MaxAppendEntries must be positive")
|
||||
}
|
||||
if config.MaxAppendEntries > 1024 {
|
||||
return fmt.Errorf("MaxAppendEntries is too large")
|
||||
}
|
||||
if config.SnapshotInterval < 5*time.Millisecond {
|
||||
return fmt.Errorf("Snapshot interval is too low")
|
||||
}
|
||||
if config.LeaderLeaseTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Leader lease timeout is too low")
|
||||
}
|
||||
if config.LeaderLeaseTimeout > config.HeartbeatTimeout {
|
||||
return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout")
|
||||
}
|
||||
if config.ElectionTimeout < config.HeartbeatTimeout {
|
||||
return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout")
|
||||
}
|
||||
return nil
|
||||
}
|
48
vendor/github.com/hashicorp/raft/discard_snapshot.go
generated
vendored
Normal file
48
vendor/github.com/hashicorp/raft/discard_snapshot.go
generated
vendored
Normal file
@@ -0,0 +1,48 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// DiscardSnapshotStore is used to successfully snapshot while
|
||||
// always discarding the snapshot. This is useful for when the
|
||||
// log should be truncated but no snapshot should be retained.
|
||||
// This should never be used for production use, and is only
|
||||
// suitable for testing.
|
||||
type DiscardSnapshotStore struct{}
|
||||
|
||||
type DiscardSnapshotSink struct{}
|
||||
|
||||
// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
|
||||
func NewDiscardSnapshotStore() *DiscardSnapshotStore {
|
||||
return &DiscardSnapshotStore{}
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
|
||||
return &DiscardSnapshotSink{}, nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
return nil, nil, fmt.Errorf("open is not supported")
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) ID() string {
|
||||
return "discard"
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Cancel() error {
|
||||
return nil
|
||||
}
|
470
vendor/github.com/hashicorp/raft/file_snapshot.go
generated
vendored
Normal file
470
vendor/github.com/hashicorp/raft/file_snapshot.go
generated
vendored
Normal file
@@ -0,0 +1,470 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/crc64"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
testPath = "permTest"
|
||||
snapPath = "snapshots"
|
||||
metaFilePath = "meta.json"
|
||||
stateFilePath = "state.bin"
|
||||
tmpSuffix = ".tmp"
|
||||
)
|
||||
|
||||
// FileSnapshotStore implements the SnapshotStore interface and allows
|
||||
// snapshots to be made on the local disk.
|
||||
type FileSnapshotStore struct {
|
||||
path string
|
||||
retain int
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
type snapMetaSlice []*fileSnapshotMeta
|
||||
|
||||
// FileSnapshotSink implements SnapshotSink with a file.
|
||||
type FileSnapshotSink struct {
|
||||
store *FileSnapshotStore
|
||||
logger *log.Logger
|
||||
dir string
|
||||
meta fileSnapshotMeta
|
||||
|
||||
stateFile *os.File
|
||||
stateHash hash.Hash64
|
||||
buffered *bufio.Writer
|
||||
|
||||
closed bool
|
||||
}
|
||||
|
||||
// fileSnapshotMeta is stored on disk. We also put a CRC
|
||||
// on disk so that we can verify the snapshot.
|
||||
type fileSnapshotMeta struct {
|
||||
SnapshotMeta
|
||||
CRC []byte
|
||||
}
|
||||
|
||||
// bufferedFile is returned when we open a snapshot. This way
|
||||
// reads are buffered and the file still gets closed.
|
||||
type bufferedFile struct {
|
||||
bh *bufio.Reader
|
||||
fh *os.File
|
||||
}
|
||||
|
||||
func (b *bufferedFile) Read(p []byte) (n int, err error) {
|
||||
return b.bh.Read(p)
|
||||
}
|
||||
|
||||
func (b *bufferedFile) Close() error {
|
||||
return b.fh.Close()
|
||||
}
|
||||
|
||||
// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based
|
||||
// on a base directory. The `retain` parameter controls how many
|
||||
// snapshots are retained. Must be at least 1.
|
||||
func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) {
|
||||
if retain < 1 {
|
||||
return nil, fmt.Errorf("must retain at least one snapshot")
|
||||
}
|
||||
if logger == nil {
|
||||
logger = log.New(os.Stderr, "", log.LstdFlags)
|
||||
}
|
||||
|
||||
// Ensure our path exists
|
||||
path := filepath.Join(base, snapPath)
|
||||
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return nil, fmt.Errorf("snapshot path not accessible: %v", err)
|
||||
}
|
||||
|
||||
// Setup the store
|
||||
store := &FileSnapshotStore{
|
||||
path: path,
|
||||
retain: retain,
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
// Do a permissions test
|
||||
if err := store.testPermissions(); err != nil {
|
||||
return nil, fmt.Errorf("permissions test failed: %v", err)
|
||||
}
|
||||
return store, nil
|
||||
}
|
||||
|
||||
// NewFileSnapshotStore creates a new FileSnapshotStore based
|
||||
// on a base directory. The `retain` parameter controls how many
|
||||
// snapshots are retained. Must be at least 1.
|
||||
func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {
|
||||
if logOutput == nil {
|
||||
logOutput = os.Stderr
|
||||
}
|
||||
return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags))
|
||||
}
|
||||
|
||||
// testPermissions tries to touch a file in our path to see if it works.
|
||||
func (f *FileSnapshotStore) testPermissions() error {
|
||||
path := filepath.Join(f.path, testPath)
|
||||
fh, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
fh.Close()
|
||||
os.Remove(path)
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshotName generates a name for the snapshot.
|
||||
func snapshotName(term, index uint64) string {
|
||||
now := time.Now()
|
||||
msec := now.UnixNano() / int64(time.Millisecond)
|
||||
return fmt.Sprintf("%d-%d-%d", term, index, msec)
|
||||
}
|
||||
|
||||
// Create is used to start a new snapshot
|
||||
func (f *FileSnapshotStore) Create(index, term uint64, peers []byte) (SnapshotSink, error) {
|
||||
// Create a new path
|
||||
name := snapshotName(term, index)
|
||||
path := filepath.Join(f.path, name+tmpSuffix)
|
||||
f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path)
|
||||
|
||||
// Make the directory
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the sink
|
||||
sink := &FileSnapshotSink{
|
||||
store: f,
|
||||
logger: f.logger,
|
||||
dir: path,
|
||||
meta: fileSnapshotMeta{
|
||||
SnapshotMeta: SnapshotMeta{
|
||||
ID: name,
|
||||
Index: index,
|
||||
Term: term,
|
||||
Peers: peers,
|
||||
},
|
||||
CRC: nil,
|
||||
},
|
||||
}
|
||||
|
||||
// Write out the meta data
|
||||
if err := sink.writeMeta(); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Open the state file
|
||||
statePath := filepath.Join(path, stateFilePath)
|
||||
fh, err := os.Create(statePath)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
sink.stateFile = fh
|
||||
|
||||
// Create a CRC64 hash
|
||||
sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))
|
||||
|
||||
// Wrap both the hash and file in a MultiWriter with buffering
|
||||
multi := io.MultiWriter(sink.stateFile, sink.stateHash)
|
||||
sink.buffered = bufio.NewWriter(multi)
|
||||
|
||||
// Done
|
||||
return sink, nil
|
||||
}
|
||||
|
||||
// List returns available snapshots in the store.
|
||||
func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
// Get the eligible snapshots
|
||||
snapshots, err := f.getSnapshots()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var snapMeta []*SnapshotMeta
|
||||
for _, meta := range snapshots {
|
||||
snapMeta = append(snapMeta, &meta.SnapshotMeta)
|
||||
if len(snapMeta) == f.retain {
|
||||
break
|
||||
}
|
||||
}
|
||||
return snapMeta, nil
|
||||
}
|
||||
|
||||
// getSnapshots returns all the known snapshots.
|
||||
func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {
|
||||
// Get the eligible snapshots
|
||||
snapshots, err := ioutil.ReadDir(f.path)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Populate the metadata
|
||||
var snapMeta []*fileSnapshotMeta
|
||||
for _, snap := range snapshots {
|
||||
// Ignore any files
|
||||
if !snap.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
// Ignore any temporary snapshots
|
||||
dirName := snap.Name()
|
||||
if strings.HasSuffix(dirName, tmpSuffix) {
|
||||
f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName)
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to read the meta data
|
||||
meta, err := f.readMeta(dirName)
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Append, but only return up to the retain count
|
||||
snapMeta = append(snapMeta, meta)
|
||||
}
|
||||
|
||||
// Sort the snapshot, reverse so we get new -> old
|
||||
sort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))
|
||||
|
||||
return snapMeta, nil
|
||||
}
|
||||
|
||||
// readMeta is used to read the meta data for a given named backup
|
||||
func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {
|
||||
// Open the meta file
|
||||
metaPath := filepath.Join(f.path, name, metaFilePath)
|
||||
fh, err := os.Open(metaPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Buffer the file IO
|
||||
buffered := bufio.NewReader(fh)
|
||||
|
||||
// Read in the JSON
|
||||
meta := &fileSnapshotMeta{}
|
||||
dec := json.NewDecoder(buffered)
|
||||
if err := dec.Decode(meta); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return meta, nil
|
||||
}
|
||||
|
||||
// Open takes a snapshot ID and returns a ReadCloser for that snapshot.
|
||||
func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
// Get the metadata
|
||||
meta, err := f.readMeta(id)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Open the state file
|
||||
statePath := filepath.Join(f.path, id, stateFilePath)
|
||||
fh, err := os.Open(statePath)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Create a CRC64 hash
|
||||
stateHash := crc64.New(crc64.MakeTable(crc64.ECMA))
|
||||
|
||||
// Compute the hash
|
||||
_, err = io.Copy(stateHash, fh)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err)
|
||||
fh.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Verify the hash
|
||||
computed := stateHash.Sum(nil)
|
||||
if bytes.Compare(meta.CRC, computed) != 0 {
|
||||
f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)",
|
||||
meta.CRC, computed)
|
||||
fh.Close()
|
||||
return nil, nil, fmt.Errorf("CRC mismatch")
|
||||
}
|
||||
|
||||
// Seek to the start
|
||||
if _, err := fh.Seek(0, 0); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err)
|
||||
fh.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Return a buffered file
|
||||
buffered := &bufferedFile{
|
||||
bh: bufio.NewReader(fh),
|
||||
fh: fh,
|
||||
}
|
||||
|
||||
return &meta.SnapshotMeta, buffered, nil
|
||||
}
|
||||
|
||||
// ReapSnapshots reaps any snapshots beyond the retain count.
|
||||
func (f *FileSnapshotStore) ReapSnapshots() error {
|
||||
snapshots, err := f.getSnapshots()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
for i := f.retain; i < len(snapshots); i++ {
|
||||
path := filepath.Join(f.path, snapshots[i].ID)
|
||||
f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path)
|
||||
if err := os.RemoveAll(path); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ID returns the ID of the snapshot, can be used with Open()
|
||||
// after the snapshot is finalized.
|
||||
func (s *FileSnapshotSink) ID() string {
|
||||
return s.meta.ID
|
||||
}
|
||||
|
||||
// Write is used to append to the state file. We write to the
|
||||
// buffered IO object to reduce the amount of context switches.
|
||||
func (s *FileSnapshotSink) Write(b []byte) (int, error) {
|
||||
return s.buffered.Write(b)
|
||||
}
|
||||
|
||||
// Close is used to indicate a successful end.
|
||||
func (s *FileSnapshotSink) Close() error {
|
||||
// Make sure close is idempotent
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
|
||||
// Close the open handles
|
||||
if err := s.finalize(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Write out the meta data
|
||||
if err := s.writeMeta(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Move the directory into place
|
||||
newPath := strings.TrimSuffix(s.dir, tmpSuffix)
|
||||
if err := os.Rename(s.dir, newPath); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Reap any old snapshots
|
||||
s.store.ReapSnapshots()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cancel is used to indicate an unsuccessful end.
|
||||
func (s *FileSnapshotSink) Cancel() error {
|
||||
// Make sure close is idempotent
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
|
||||
// Close the open handles
|
||||
if err := s.finalize(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Attempt to remove all artifacts
|
||||
return os.RemoveAll(s.dir)
|
||||
}
|
||||
|
||||
// finalize is used to close all of our resources.
|
||||
func (s *FileSnapshotSink) finalize() error {
|
||||
// Flush any remaining data
|
||||
if err := s.buffered.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get the file size
|
||||
stat, statErr := s.stateFile.Stat()
|
||||
|
||||
// Close the file
|
||||
if err := s.stateFile.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the file size, check after we close
|
||||
if statErr != nil {
|
||||
return statErr
|
||||
}
|
||||
s.meta.Size = stat.Size()
|
||||
|
||||
// Set the CRC
|
||||
s.meta.CRC = s.stateHash.Sum(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeMeta is used to write out the metadata we have.
|
||||
func (s *FileSnapshotSink) writeMeta() error {
|
||||
// Open the meta file
|
||||
metaPath := filepath.Join(s.dir, metaFilePath)
|
||||
fh, err := os.Create(metaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Buffer the file IO
|
||||
buffered := bufio.NewWriter(fh)
|
||||
defer buffered.Flush()
|
||||
|
||||
// Write out as JSON
|
||||
enc := json.NewEncoder(buffered)
|
||||
if err := enc.Encode(&s.meta); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Implement the sort interface for []*fileSnapshotMeta.
|
||||
func (s snapMetaSlice) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s snapMetaSlice) Less(i, j int) bool {
|
||||
if s[i].Term != s[j].Term {
|
||||
return s[i].Term < s[j].Term
|
||||
}
|
||||
if s[i].Index != s[j].Index {
|
||||
return s[i].Index < s[j].Index
|
||||
}
|
||||
return s[i].ID < s[j].ID
|
||||
}
|
||||
|
||||
func (s snapMetaSlice) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
37
vendor/github.com/hashicorp/raft/fsm.go
generated
vendored
Normal file
37
vendor/github.com/hashicorp/raft/fsm.go
generated
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// FSM provides an interface that can be implemented by
|
||||
// clients to make use of the replicated log.
|
||||
type FSM interface {
|
||||
// Apply log is invoked once a log entry is committed.
|
||||
Apply(*Log) interface{}
|
||||
|
||||
// Snapshot is used to support log compaction. This call should
|
||||
// return an FSMSnapshot which can be used to save a point-in-time
|
||||
// snapshot of the FSM. Apply and Snapshot are not called in multiple
|
||||
// threads, but Apply will be called concurrently with Persist. This means
|
||||
// the FSM should be implemented in a fashion that allows for concurrent
|
||||
// updates while a snapshot is happening.
|
||||
Snapshot() (FSMSnapshot, error)
|
||||
|
||||
// Restore is used to restore an FSM from a snapshot. It is not called
|
||||
// concurrently with any other command. The FSM must discard all previous
|
||||
// state.
|
||||
Restore(io.ReadCloser) error
|
||||
}
|
||||
|
||||
// FSMSnapshot is returned by an FSM in response to a Snapshot
|
||||
// It must be safe to invoke FSMSnapshot methods with concurrent
|
||||
// calls to Apply.
|
||||
type FSMSnapshot interface {
|
||||
// Persist should dump all necessary state to the WriteCloser 'sink',
|
||||
// and call sink.Close() when finished or call sink.Cancel() on error.
|
||||
Persist(sink SnapshotSink) error
|
||||
|
||||
// Release is invoked when we are finished with the snapshot.
|
||||
Release()
|
||||
}
|
182
vendor/github.com/hashicorp/raft/future.go
generated
vendored
Normal file
182
vendor/github.com/hashicorp/raft/future.go
generated
vendored
Normal file
@@ -0,0 +1,182 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Future is used to represent an action that may occur in the future.
|
||||
type Future interface {
|
||||
Error() error
|
||||
}
|
||||
|
||||
// ApplyFuture is used for Apply() and can returns the FSM response.
|
||||
type ApplyFuture interface {
|
||||
Future
|
||||
Response() interface{}
|
||||
Index() uint64
|
||||
}
|
||||
|
||||
// errorFuture is used to return a static error.
|
||||
type errorFuture struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (e errorFuture) Error() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
func (e errorFuture) Response() interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e errorFuture) Index() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// deferError can be embedded to allow a future
|
||||
// to provide an error in the future.
|
||||
type deferError struct {
|
||||
err error
|
||||
errCh chan error
|
||||
responded bool
|
||||
}
|
||||
|
||||
func (d *deferError) init() {
|
||||
d.errCh = make(chan error, 1)
|
||||
}
|
||||
|
||||
func (d *deferError) Error() error {
|
||||
if d.err != nil {
|
||||
return d.err
|
||||
}
|
||||
if d.errCh == nil {
|
||||
panic("waiting for response on nil channel")
|
||||
}
|
||||
d.err = <-d.errCh
|
||||
return d.err
|
||||
}
|
||||
|
||||
func (d *deferError) respond(err error) {
|
||||
if d.errCh == nil {
|
||||
return
|
||||
}
|
||||
if d.responded {
|
||||
return
|
||||
}
|
||||
d.errCh <- err
|
||||
close(d.errCh)
|
||||
d.responded = true
|
||||
}
|
||||
|
||||
// logFuture is used to apply a log entry and waits until
|
||||
// the log is considered committed.
|
||||
type logFuture struct {
|
||||
deferError
|
||||
log Log
|
||||
policy quorumPolicy
|
||||
response interface{}
|
||||
dispatch time.Time
|
||||
}
|
||||
|
||||
func (l *logFuture) Response() interface{} {
|
||||
return l.response
|
||||
}
|
||||
|
||||
func (l *logFuture) Index() uint64 {
|
||||
return l.log.Index
|
||||
}
|
||||
|
||||
type peerFuture struct {
|
||||
deferError
|
||||
peers []string
|
||||
}
|
||||
|
||||
type shutdownFuture struct {
|
||||
raft *Raft
|
||||
}
|
||||
|
||||
func (s *shutdownFuture) Error() error {
|
||||
for s.raft.getRoutines() > 0 {
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshotFuture is used for waiting on a snapshot to complete.
|
||||
type snapshotFuture struct {
|
||||
deferError
|
||||
}
|
||||
|
||||
// reqSnapshotFuture is used for requesting a snapshot start.
|
||||
// It is only used internally.
|
||||
type reqSnapshotFuture struct {
|
||||
deferError
|
||||
|
||||
// snapshot details provided by the FSM runner before responding
|
||||
index uint64
|
||||
term uint64
|
||||
peers []string
|
||||
snapshot FSMSnapshot
|
||||
}
|
||||
|
||||
// restoreFuture is used for requesting an FSM to perform a
|
||||
// snapshot restore. Used internally only.
|
||||
type restoreFuture struct {
|
||||
deferError
|
||||
ID string
|
||||
}
|
||||
|
||||
// verifyFuture is used to verify the current node is still
|
||||
// the leader. This is to prevent a stale read.
|
||||
type verifyFuture struct {
|
||||
deferError
|
||||
notifyCh chan *verifyFuture
|
||||
quorumSize int
|
||||
votes int
|
||||
voteLock sync.Mutex
|
||||
}
|
||||
|
||||
// vote is used to respond to a verifyFuture.
|
||||
// This may block when responding on the notifyCh.
|
||||
func (v *verifyFuture) vote(leader bool) {
|
||||
v.voteLock.Lock()
|
||||
defer v.voteLock.Unlock()
|
||||
|
||||
// Guard against having notified already
|
||||
if v.notifyCh == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if leader {
|
||||
v.votes++
|
||||
if v.votes >= v.quorumSize {
|
||||
v.notifyCh <- v
|
||||
v.notifyCh = nil
|
||||
}
|
||||
} else {
|
||||
v.notifyCh <- v
|
||||
v.notifyCh = nil
|
||||
}
|
||||
}
|
||||
|
||||
// appendFuture is used for waiting on a pipelined append
|
||||
// entries RPC.
|
||||
type appendFuture struct {
|
||||
deferError
|
||||
start time.Time
|
||||
args *AppendEntriesRequest
|
||||
resp *AppendEntriesResponse
|
||||
}
|
||||
|
||||
func (a *appendFuture) Start() time.Time {
|
||||
return a.start
|
||||
}
|
||||
|
||||
func (a *appendFuture) Request() *AppendEntriesRequest {
|
||||
return a.args
|
||||
}
|
||||
|
||||
func (a *appendFuture) Response() *AppendEntriesResponse {
|
||||
return a.resp
|
||||
}
|
213
vendor/github.com/hashicorp/raft/inflight.go
generated
vendored
Normal file
213
vendor/github.com/hashicorp/raft/inflight.go
generated
vendored
Normal file
@@ -0,0 +1,213 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"container/list"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// QuorumPolicy allows individual logFutures to have different
|
||||
// commitment rules while still using the inflight mechanism.
|
||||
type quorumPolicy interface {
|
||||
// Checks if a commit from a given peer is enough to
|
||||
// satisfy the commitment rules
|
||||
Commit() bool
|
||||
|
||||
// Checks if a commit is committed
|
||||
IsCommitted() bool
|
||||
}
|
||||
|
||||
// MajorityQuorum is used by Apply transactions and requires
|
||||
// a simple majority of nodes.
|
||||
type majorityQuorum struct {
|
||||
count int
|
||||
votesNeeded int
|
||||
}
|
||||
|
||||
func newMajorityQuorum(clusterSize int) *majorityQuorum {
|
||||
votesNeeded := (clusterSize / 2) + 1
|
||||
return &majorityQuorum{count: 0, votesNeeded: votesNeeded}
|
||||
}
|
||||
|
||||
func (m *majorityQuorum) Commit() bool {
|
||||
m.count++
|
||||
return m.count >= m.votesNeeded
|
||||
}
|
||||
|
||||
func (m *majorityQuorum) IsCommitted() bool {
|
||||
return m.count >= m.votesNeeded
|
||||
}
|
||||
|
||||
// Inflight is used to track operations that are still in-flight.
|
||||
type inflight struct {
|
||||
sync.Mutex
|
||||
committed *list.List
|
||||
commitCh chan struct{}
|
||||
minCommit uint64
|
||||
maxCommit uint64
|
||||
operations map[uint64]*logFuture
|
||||
stopCh chan struct{}
|
||||
}
|
||||
|
||||
// NewInflight returns an inflight struct that notifies
|
||||
// the provided channel when logs are finished committing.
|
||||
func newInflight(commitCh chan struct{}) *inflight {
|
||||
return &inflight{
|
||||
committed: list.New(),
|
||||
commitCh: commitCh,
|
||||
minCommit: 0,
|
||||
maxCommit: 0,
|
||||
operations: make(map[uint64]*logFuture),
|
||||
stopCh: make(chan struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// Start is used to mark a logFuture as being inflight. It
|
||||
// also commits the entry, as it is assumed the leader is
|
||||
// starting.
|
||||
func (i *inflight) Start(l *logFuture) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.start(l)
|
||||
}
|
||||
|
||||
// StartAll is used to mark a list of logFuture's as being
|
||||
// inflight. It also commits each entry as the leader is
|
||||
// assumed to be starting.
|
||||
func (i *inflight) StartAll(logs []*logFuture) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
for _, l := range logs {
|
||||
i.start(l)
|
||||
}
|
||||
}
|
||||
|
||||
// start is used to mark a single entry as inflight,
|
||||
// must be invoked with the lock held.
|
||||
func (i *inflight) start(l *logFuture) {
|
||||
idx := l.log.Index
|
||||
i.operations[idx] = l
|
||||
|
||||
if idx > i.maxCommit {
|
||||
i.maxCommit = idx
|
||||
}
|
||||
if i.minCommit == 0 {
|
||||
i.minCommit = idx
|
||||
}
|
||||
i.commit(idx)
|
||||
}
|
||||
|
||||
// Cancel is used to cancel all in-flight operations.
|
||||
// This is done when the leader steps down, and all futures
|
||||
// are sent the given error.
|
||||
func (i *inflight) Cancel(err error) {
|
||||
// Close the channel first to unblock any pending commits
|
||||
close(i.stopCh)
|
||||
|
||||
// Lock after close to avoid deadlock
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
|
||||
// Respond to all inflight operations
|
||||
for _, op := range i.operations {
|
||||
op.respond(err)
|
||||
}
|
||||
|
||||
// Clear all the committed but not processed
|
||||
for e := i.committed.Front(); e != nil; e = e.Next() {
|
||||
e.Value.(*logFuture).respond(err)
|
||||
}
|
||||
|
||||
// Clear the map
|
||||
i.operations = make(map[uint64]*logFuture)
|
||||
|
||||
// Clear the list of committed
|
||||
i.committed = list.New()
|
||||
|
||||
// Close the commmitCh
|
||||
close(i.commitCh)
|
||||
|
||||
// Reset indexes
|
||||
i.minCommit = 0
|
||||
i.maxCommit = 0
|
||||
}
|
||||
|
||||
// Committed returns all the committed operations in order.
|
||||
func (i *inflight) Committed() (l *list.List) {
|
||||
i.Lock()
|
||||
l, i.committed = i.committed, list.New()
|
||||
i.Unlock()
|
||||
return l
|
||||
}
|
||||
|
||||
// Commit is used by leader replication routines to indicate that
|
||||
// a follower was finished committing a log to disk.
|
||||
func (i *inflight) Commit(index uint64) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.commit(index)
|
||||
}
|
||||
|
||||
// CommitRange is used to commit a range of indexes inclusively.
|
||||
// It is optimized to avoid commits for indexes that are not tracked.
|
||||
func (i *inflight) CommitRange(minIndex, maxIndex uint64) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
|
||||
// Update the minimum index
|
||||
minIndex = max(i.minCommit, minIndex)
|
||||
|
||||
// Commit each index
|
||||
for idx := minIndex; idx <= maxIndex; idx++ {
|
||||
i.commit(idx)
|
||||
}
|
||||
}
|
||||
|
||||
// commit is used to commit a single index. Must be called with the lock held.
|
||||
func (i *inflight) commit(index uint64) {
|
||||
op, ok := i.operations[index]
|
||||
if !ok {
|
||||
// Ignore if not in the map, as it may be committed already
|
||||
return
|
||||
}
|
||||
|
||||
// Check if we've satisfied the commit
|
||||
if !op.policy.Commit() {
|
||||
return
|
||||
}
|
||||
|
||||
// Cannot commit if this is not the minimum inflight. This can happen
|
||||
// if the quorum size changes, meaning a previous commit requires a larger
|
||||
// quorum that this commit. We MUST block until the previous log is committed,
|
||||
// otherwise logs will be applied out of order.
|
||||
if index != i.minCommit {
|
||||
return
|
||||
}
|
||||
|
||||
NOTIFY:
|
||||
// Add the operation to the committed list
|
||||
i.committed.PushBack(op)
|
||||
|
||||
// Stop tracking since it is committed
|
||||
delete(i.operations, index)
|
||||
|
||||
// Update the indexes
|
||||
if index == i.maxCommit {
|
||||
i.minCommit = 0
|
||||
i.maxCommit = 0
|
||||
|
||||
} else {
|
||||
i.minCommit++
|
||||
}
|
||||
|
||||
// Check if the next in-flight operation is ready
|
||||
if i.minCommit != 0 {
|
||||
op = i.operations[i.minCommit]
|
||||
if op.policy.IsCommitted() {
|
||||
index = i.minCommit
|
||||
goto NOTIFY
|
||||
}
|
||||
}
|
||||
|
||||
// Async notify of ready operations
|
||||
asyncNotifyCh(i.commitCh)
|
||||
}
|
116
vendor/github.com/hashicorp/raft/inmem_store.go
generated
vendored
Normal file
116
vendor/github.com/hashicorp/raft/inmem_store.go
generated
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// InmemStore implements the LogStore and StableStore interface.
|
||||
// It should NOT EVER be used for production. It is used only for
|
||||
// unit tests. Use the MDBStore implementation instead.
|
||||
type InmemStore struct {
|
||||
l sync.RWMutex
|
||||
lowIndex uint64
|
||||
highIndex uint64
|
||||
logs map[uint64]*Log
|
||||
kv map[string][]byte
|
||||
kvInt map[string]uint64
|
||||
}
|
||||
|
||||
// NewInmemStore returns a new in-memory backend. Do not ever
|
||||
// use for production. Only for testing.
|
||||
func NewInmemStore() *InmemStore {
|
||||
i := &InmemStore{
|
||||
logs: make(map[uint64]*Log),
|
||||
kv: make(map[string][]byte),
|
||||
kvInt: make(map[string]uint64),
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// FirstIndex implements the LogStore interface.
|
||||
func (i *InmemStore) FirstIndex() (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.lowIndex, nil
|
||||
}
|
||||
|
||||
// LastIndex implements the LogStore interface.
|
||||
func (i *InmemStore) LastIndex() (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.highIndex, nil
|
||||
}
|
||||
|
||||
// GetLog implements the LogStore interface.
|
||||
func (i *InmemStore) GetLog(index uint64, log *Log) error {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
l, ok := i.logs[index]
|
||||
if !ok {
|
||||
return ErrLogNotFound
|
||||
}
|
||||
*log = *l
|
||||
return nil
|
||||
}
|
||||
|
||||
// StoreLog implements the LogStore interface.
|
||||
func (i *InmemStore) StoreLog(log *Log) error {
|
||||
return i.StoreLogs([]*Log{log})
|
||||
}
|
||||
|
||||
// StoreLogs implements the LogStore interface.
|
||||
func (i *InmemStore) StoreLogs(logs []*Log) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
for _, l := range logs {
|
||||
i.logs[l.Index] = l
|
||||
if i.lowIndex == 0 {
|
||||
i.lowIndex = l.Index
|
||||
}
|
||||
if l.Index > i.highIndex {
|
||||
i.highIndex = l.Index
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteRange implements the LogStore interface.
|
||||
func (i *InmemStore) DeleteRange(min, max uint64) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
for j := min; j <= max; j++ {
|
||||
delete(i.logs, j)
|
||||
}
|
||||
i.lowIndex = max + 1
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set implements the StableStore interface.
|
||||
func (i *InmemStore) Set(key []byte, val []byte) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
i.kv[string(key)] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get implements the StableStore interface.
|
||||
func (i *InmemStore) Get(key []byte) ([]byte, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.kv[string(key)], nil
|
||||
}
|
||||
|
||||
// SetUint64 implements the StableStore interface.
|
||||
func (i *InmemStore) SetUint64(key []byte, val uint64) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
i.kvInt[string(key)] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetUint64 implements the StableStore interface.
|
||||
func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.kvInt[string(key)], nil
|
||||
}
|
315
vendor/github.com/hashicorp/raft/inmem_transport.go
generated
vendored
Normal file
315
vendor/github.com/hashicorp/raft/inmem_transport.go
generated
vendored
Normal file
@@ -0,0 +1,315 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// NewInmemAddr returns a new in-memory addr with
|
||||
// a randomly generate UUID as the ID.
|
||||
func NewInmemAddr() string {
|
||||
return generateUUID()
|
||||
}
|
||||
|
||||
// inmemPipeline is used to pipeline requests for the in-mem transport.
|
||||
type inmemPipeline struct {
|
||||
trans *InmemTransport
|
||||
peer *InmemTransport
|
||||
peerAddr string
|
||||
|
||||
doneCh chan AppendFuture
|
||||
inprogressCh chan *inmemPipelineInflight
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
}
|
||||
|
||||
type inmemPipelineInflight struct {
|
||||
future *appendFuture
|
||||
respCh <-chan RPCResponse
|
||||
}
|
||||
|
||||
// InmemTransport Implements the Transport interface, to allow Raft to be
|
||||
// tested in-memory without going over a network.
|
||||
type InmemTransport struct {
|
||||
sync.RWMutex
|
||||
consumerCh chan RPC
|
||||
localAddr string
|
||||
peers map[string]*InmemTransport
|
||||
pipelines []*inmemPipeline
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewInmemTransport is used to initialize a new transport
|
||||
// and generates a random local address.
|
||||
func NewInmemTransport() (string, *InmemTransport) {
|
||||
addr := NewInmemAddr()
|
||||
trans := &InmemTransport{
|
||||
consumerCh: make(chan RPC, 16),
|
||||
localAddr: addr,
|
||||
peers: make(map[string]*InmemTransport),
|
||||
timeout: 50 * time.Millisecond,
|
||||
}
|
||||
return addr, trans
|
||||
}
|
||||
|
||||
// SetHeartbeatHandler is used to set optional fast-path for
|
||||
// heartbeats, not supported for this transport.
|
||||
func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {
|
||||
}
|
||||
|
||||
// Consumer implements the Transport interface.
|
||||
func (i *InmemTransport) Consumer() <-chan RPC {
|
||||
return i.consumerCh
|
||||
}
|
||||
|
||||
// LocalAddr implements the Transport interface.
|
||||
func (i *InmemTransport) LocalAddr() string {
|
||||
return i.localAddr
|
||||
}
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
func (i *InmemTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
|
||||
i.RLock()
|
||||
peer, ok := i.peers[target]
|
||||
i.RUnlock()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("failed to connect to peer: %v", target)
|
||||
}
|
||||
pipeline := newInmemPipeline(i, peer, target)
|
||||
i.Lock()
|
||||
i.pipelines = append(i.pipelines, pipeline)
|
||||
i.Unlock()
|
||||
return pipeline, nil
|
||||
}
|
||||
|
||||
// AppendEntries implements the Transport interface.
|
||||
func (i *InmemTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
|
||||
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*AppendEntriesResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
// RequestVote implements the Transport interface.
|
||||
func (i *InmemTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
|
||||
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*RequestVoteResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
// InstallSnapshot implements the Transport interface.
|
||||
func (i *InmemTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
|
||||
rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*InstallSnapshotResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *InmemTransport) makeRPC(target string, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {
|
||||
i.RLock()
|
||||
peer, ok := i.peers[target]
|
||||
i.RUnlock()
|
||||
|
||||
if !ok {
|
||||
err = fmt.Errorf("failed to connect to peer: %v", target)
|
||||
return
|
||||
}
|
||||
|
||||
// Send the RPC over
|
||||
respCh := make(chan RPCResponse)
|
||||
peer.consumerCh <- RPC{
|
||||
Command: args,
|
||||
Reader: r,
|
||||
RespChan: respCh,
|
||||
}
|
||||
|
||||
// Wait for a response
|
||||
select {
|
||||
case rpcResp = <-respCh:
|
||||
if rpcResp.Error != nil {
|
||||
err = rpcResp.Error
|
||||
}
|
||||
case <-time.After(timeout):
|
||||
err = fmt.Errorf("command timed out")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodePeer implements the Transport interface. It uses the UUID as the
|
||||
// address directly.
|
||||
func (i *InmemTransport) EncodePeer(p string) []byte {
|
||||
return []byte(p)
|
||||
}
|
||||
|
||||
// DecodePeer implements the Transport interface. It wraps the UUID in an
|
||||
// InmemAddr.
|
||||
func (i *InmemTransport) DecodePeer(buf []byte) string {
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
// Connect is used to connect this transport to another transport for
|
||||
// a given peer name. This allows for local routing.
|
||||
func (i *InmemTransport) Connect(peer string, trans *InmemTransport) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.peers[peer] = trans
|
||||
}
|
||||
|
||||
// Disconnect is used to remove the ability to route to a given peer.
|
||||
func (i *InmemTransport) Disconnect(peer string) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
delete(i.peers, peer)
|
||||
|
||||
// Disconnect any pipelines
|
||||
n := len(i.pipelines)
|
||||
for idx := 0; idx < n; idx++ {
|
||||
if i.pipelines[idx].peerAddr == peer {
|
||||
i.pipelines[idx].Close()
|
||||
i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil
|
||||
idx--
|
||||
n--
|
||||
}
|
||||
}
|
||||
i.pipelines = i.pipelines[:n]
|
||||
}
|
||||
|
||||
// DisconnectAll is used to remove all routes to peers.
|
||||
func (i *InmemTransport) DisconnectAll() {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.peers = make(map[string]*InmemTransport)
|
||||
|
||||
// Handle pipelines
|
||||
for _, pipeline := range i.pipelines {
|
||||
pipeline.Close()
|
||||
}
|
||||
i.pipelines = nil
|
||||
}
|
||||
|
||||
func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr string) *inmemPipeline {
|
||||
i := &inmemPipeline{
|
||||
trans: trans,
|
||||
peer: peer,
|
||||
peerAddr: addr,
|
||||
doneCh: make(chan AppendFuture, 16),
|
||||
inprogressCh: make(chan *inmemPipelineInflight, 16),
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
go i.decodeResponses()
|
||||
return i
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) decodeResponses() {
|
||||
timeout := i.trans.timeout
|
||||
for {
|
||||
select {
|
||||
case inp := <-i.inprogressCh:
|
||||
var timeoutCh <-chan time.Time
|
||||
if timeout > 0 {
|
||||
timeoutCh = time.After(timeout)
|
||||
}
|
||||
|
||||
select {
|
||||
case rpcResp := <-inp.respCh:
|
||||
// Copy the result back
|
||||
*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)
|
||||
inp.future.respond(rpcResp.Error)
|
||||
|
||||
select {
|
||||
case i.doneCh <- inp.future:
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
case <-timeoutCh:
|
||||
inp.future.respond(fmt.Errorf("command timed out"))
|
||||
select {
|
||||
case i.doneCh <- inp.future:
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
|
||||
// Create a new future
|
||||
future := &appendFuture{
|
||||
start: time.Now(),
|
||||
args: args,
|
||||
resp: resp,
|
||||
}
|
||||
future.init()
|
||||
|
||||
// Handle a timeout
|
||||
var timeout <-chan time.Time
|
||||
if i.trans.timeout > 0 {
|
||||
timeout = time.After(i.trans.timeout)
|
||||
}
|
||||
|
||||
// Send the RPC over
|
||||
respCh := make(chan RPCResponse, 1)
|
||||
rpc := RPC{
|
||||
Command: args,
|
||||
RespChan: respCh,
|
||||
}
|
||||
select {
|
||||
case i.peer.consumerCh <- rpc:
|
||||
case <-timeout:
|
||||
return nil, fmt.Errorf("command enqueue timeout")
|
||||
case <-i.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
|
||||
// Send to be decoded
|
||||
select {
|
||||
case i.inprogressCh <- &inmemPipelineInflight{future, respCh}:
|
||||
return future, nil
|
||||
case <-i.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) Consumer() <-chan AppendFuture {
|
||||
return i.doneCh
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) Close() error {
|
||||
i.shutdownLock.Lock()
|
||||
defer i.shutdownLock.Unlock()
|
||||
if i.shutdown {
|
||||
return nil
|
||||
}
|
||||
|
||||
i.shutdown = true
|
||||
close(i.shutdownCh)
|
||||
return nil
|
||||
}
|
60
vendor/github.com/hashicorp/raft/log.go
generated
vendored
Normal file
60
vendor/github.com/hashicorp/raft/log.go
generated
vendored
Normal file
@@ -0,0 +1,60 @@
|
||||
package raft
|
||||
|
||||
// LogType describes various types of log entries.
|
||||
type LogType uint8
|
||||
|
||||
const (
|
||||
// LogCommand is applied to a user FSM.
|
||||
LogCommand LogType = iota
|
||||
|
||||
// LogNoop is used to assert leadership.
|
||||
LogNoop
|
||||
|
||||
// LogAddPeer is used to add a new peer.
|
||||
LogAddPeer
|
||||
|
||||
// LogRemovePeer is used to remove an existing peer.
|
||||
LogRemovePeer
|
||||
|
||||
// LogBarrier is used to ensure all preceding operations have been
|
||||
// applied to the FSM. It is similar to LogNoop, but instead of returning
|
||||
// once committed, it only returns once the FSM manager acks it. Otherwise
|
||||
// it is possible there are operations committed but not yet applied to
|
||||
// the FSM.
|
||||
LogBarrier
|
||||
)
|
||||
|
||||
// Log entries are replicated to all members of the Raft cluster
|
||||
// and form the heart of the replicated state machine.
|
||||
type Log struct {
|
||||
Index uint64
|
||||
Term uint64
|
||||
Type LogType
|
||||
Data []byte
|
||||
|
||||
// peer is not exported since it is not transmitted, only used
|
||||
// internally to construct the Data field.
|
||||
peer string
|
||||
}
|
||||
|
||||
// LogStore is used to provide an interface for storing
|
||||
// and retrieving logs in a durable fashion.
|
||||
type LogStore interface {
|
||||
// Returns the first index written. 0 for no entries.
|
||||
FirstIndex() (uint64, error)
|
||||
|
||||
// Returns the last index written. 0 for no entries.
|
||||
LastIndex() (uint64, error)
|
||||
|
||||
// Gets a log entry at a given index.
|
||||
GetLog(index uint64, log *Log) error
|
||||
|
||||
// Stores a log entry.
|
||||
StoreLog(log *Log) error
|
||||
|
||||
// Stores multiple log entries.
|
||||
StoreLogs(logs []*Log) error
|
||||
|
||||
// Deletes a range of log entries. The range is inclusive.
|
||||
DeleteRange(min, max uint64) error
|
||||
}
|
79
vendor/github.com/hashicorp/raft/log_cache.go
generated
vendored
Normal file
79
vendor/github.com/hashicorp/raft/log_cache.go
generated
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// LogCache wraps any LogStore implementation to provide an
|
||||
// in-memory ring buffer. This is used to cache access to
|
||||
// the recently written entries. For implementations that do not
|
||||
// cache themselves, this can provide a substantial boost by
|
||||
// avoiding disk I/O on recent entries.
|
||||
type LogCache struct {
|
||||
store LogStore
|
||||
|
||||
cache []*Log
|
||||
l sync.RWMutex
|
||||
}
|
||||
|
||||
// NewLogCache is used to create a new LogCache with the
|
||||
// given capacity and backend store.
|
||||
func NewLogCache(capacity int, store LogStore) (*LogCache, error) {
|
||||
if capacity <= 0 {
|
||||
return nil, fmt.Errorf("capacity must be positive")
|
||||
}
|
||||
c := &LogCache{
|
||||
store: store,
|
||||
cache: make([]*Log, capacity),
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *LogCache) GetLog(idx uint64, log *Log) error {
|
||||
// Check the buffer for an entry
|
||||
c.l.RLock()
|
||||
cached := c.cache[idx%uint64(len(c.cache))]
|
||||
c.l.RUnlock()
|
||||
|
||||
// Check if entry is valid
|
||||
if cached != nil && cached.Index == idx {
|
||||
*log = *cached
|
||||
return nil
|
||||
}
|
||||
|
||||
// Forward request on cache miss
|
||||
return c.store.GetLog(idx, log)
|
||||
}
|
||||
|
||||
func (c *LogCache) StoreLog(log *Log) error {
|
||||
return c.StoreLogs([]*Log{log})
|
||||
}
|
||||
|
||||
func (c *LogCache) StoreLogs(logs []*Log) error {
|
||||
// Insert the logs into the ring buffer
|
||||
c.l.Lock()
|
||||
for _, l := range logs {
|
||||
c.cache[l.Index%uint64(len(c.cache))] = l
|
||||
}
|
||||
c.l.Unlock()
|
||||
|
||||
return c.store.StoreLogs(logs)
|
||||
}
|
||||
|
||||
func (c *LogCache) FirstIndex() (uint64, error) {
|
||||
return c.store.FirstIndex()
|
||||
}
|
||||
|
||||
func (c *LogCache) LastIndex() (uint64, error) {
|
||||
return c.store.LastIndex()
|
||||
}
|
||||
|
||||
func (c *LogCache) DeleteRange(min, max uint64) error {
|
||||
// Invalidate the cache on deletes
|
||||
c.l.Lock()
|
||||
c.cache = make([]*Log, len(c.cache))
|
||||
c.l.Unlock()
|
||||
|
||||
return c.store.DeleteRange(min, max)
|
||||
}
|
622
vendor/github.com/hashicorp/raft/net_transport.go
generated
vendored
Normal file
622
vendor/github.com/hashicorp/raft/net_transport.go
generated
vendored
Normal file
@@ -0,0 +1,622 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
)
|
||||
|
||||
const (
|
||||
rpcAppendEntries uint8 = iota
|
||||
rpcRequestVote
|
||||
rpcInstallSnapshot
|
||||
|
||||
// DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport.
|
||||
DefaultTimeoutScale = 256 * 1024 // 256KB
|
||||
|
||||
// rpcMaxPipeline controls the maximum number of outstanding
|
||||
// AppendEntries RPC calls.
|
||||
rpcMaxPipeline = 128
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrTransportShutdown is returned when operations on a transport are
|
||||
// invoked after it's been terminated.
|
||||
ErrTransportShutdown = errors.New("transport shutdown")
|
||||
|
||||
// ErrPipelineShutdown is returned when the pipeline is closed.
|
||||
ErrPipelineShutdown = errors.New("append pipeline closed")
|
||||
)
|
||||
|
||||
/*
|
||||
|
||||
NetworkTransport provides a network based transport that can be
|
||||
used to communicate with Raft on remote machines. It requires
|
||||
an underlying stream layer to provide a stream abstraction, which can
|
||||
be simple TCP, TLS, etc.
|
||||
|
||||
This transport is very simple and lightweight. Each RPC request is
|
||||
framed by sending a byte that indicates the message type, followed
|
||||
by the MsgPack encoded request.
|
||||
|
||||
The response is an error string followed by the response object,
|
||||
both are encoded using MsgPack.
|
||||
|
||||
InstallSnapshot is special, in that after the RPC request we stream
|
||||
the entire state. That socket is not re-used as the connection state
|
||||
is not known if there is an error.
|
||||
|
||||
*/
|
||||
type NetworkTransport struct {
|
||||
connPool map[string][]*netConn
|
||||
connPoolLock sync.Mutex
|
||||
|
||||
consumeCh chan RPC
|
||||
|
||||
heartbeatFn func(RPC)
|
||||
heartbeatFnLock sync.Mutex
|
||||
|
||||
logger *log.Logger
|
||||
|
||||
maxPool int
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
|
||||
stream StreamLayer
|
||||
|
||||
timeout time.Duration
|
||||
TimeoutScale int
|
||||
}
|
||||
|
||||
// StreamLayer is used with the NetworkTransport to provide
|
||||
// the low level stream abstraction.
|
||||
type StreamLayer interface {
|
||||
net.Listener
|
||||
|
||||
// Dial is used to create a new outgoing connection
|
||||
Dial(address string, timeout time.Duration) (net.Conn, error)
|
||||
}
|
||||
|
||||
type netConn struct {
|
||||
target string
|
||||
conn net.Conn
|
||||
r *bufio.Reader
|
||||
w *bufio.Writer
|
||||
dec *codec.Decoder
|
||||
enc *codec.Encoder
|
||||
}
|
||||
|
||||
func (n *netConn) Release() error {
|
||||
return n.conn.Close()
|
||||
}
|
||||
|
||||
type netPipeline struct {
|
||||
conn *netConn
|
||||
trans *NetworkTransport
|
||||
|
||||
doneCh chan AppendFuture
|
||||
inprogressCh chan *appendFuture
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
}
|
||||
|
||||
// NewNetworkTransport creates a new network transport with the given dialer
|
||||
// and listener. The maxPool controls how many connections we will pool. The
|
||||
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
|
||||
// the timeout by (SnapshotSize / TimeoutScale).
|
||||
func NewNetworkTransport(
|
||||
stream StreamLayer,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logOutput io.Writer,
|
||||
) *NetworkTransport {
|
||||
if logOutput == nil {
|
||||
logOutput = os.Stderr
|
||||
}
|
||||
return NewNetworkTransportWithLogger(stream, maxPool, timeout, log.New(logOutput, "", log.LstdFlags))
|
||||
}
|
||||
|
||||
// NewNetworkTransportWithLogger creates a new network transport with the given dialer
|
||||
// and listener. The maxPool controls how many connections we will pool. The
|
||||
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
|
||||
// the timeout by (SnapshotSize / TimeoutScale).
|
||||
func NewNetworkTransportWithLogger(
|
||||
stream StreamLayer,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logger *log.Logger,
|
||||
) *NetworkTransport {
|
||||
if logger == nil {
|
||||
logger = log.New(os.Stderr, "", log.LstdFlags)
|
||||
}
|
||||
trans := &NetworkTransport{
|
||||
connPool: make(map[string][]*netConn),
|
||||
consumeCh: make(chan RPC),
|
||||
logger: logger,
|
||||
maxPool: maxPool,
|
||||
shutdownCh: make(chan struct{}),
|
||||
stream: stream,
|
||||
timeout: timeout,
|
||||
TimeoutScale: DefaultTimeoutScale,
|
||||
}
|
||||
go trans.listen()
|
||||
return trans
|
||||
}
|
||||
|
||||
// SetHeartbeatHandler is used to setup a heartbeat handler
|
||||
// as a fast-pass. This is to avoid head-of-line blocking from
|
||||
// disk IO.
|
||||
func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) {
|
||||
n.heartbeatFnLock.Lock()
|
||||
defer n.heartbeatFnLock.Unlock()
|
||||
n.heartbeatFn = cb
|
||||
}
|
||||
|
||||
// Close is used to stop the network transport.
|
||||
func (n *NetworkTransport) Close() error {
|
||||
n.shutdownLock.Lock()
|
||||
defer n.shutdownLock.Unlock()
|
||||
|
||||
if !n.shutdown {
|
||||
close(n.shutdownCh)
|
||||
n.stream.Close()
|
||||
n.shutdown = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Consumer implements the Transport interface.
|
||||
func (n *NetworkTransport) Consumer() <-chan RPC {
|
||||
return n.consumeCh
|
||||
}
|
||||
|
||||
// LocalAddr implements the Transport interface.
|
||||
func (n *NetworkTransport) LocalAddr() string {
|
||||
return n.stream.Addr().String()
|
||||
}
|
||||
|
||||
// IsShutdown is used to check if the transport is shutdown.
|
||||
func (n *NetworkTransport) IsShutdown() bool {
|
||||
select {
|
||||
case <-n.shutdownCh:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// getExistingConn is used to grab a pooled connection.
|
||||
func (n *NetworkTransport) getPooledConn(target string) *netConn {
|
||||
n.connPoolLock.Lock()
|
||||
defer n.connPoolLock.Unlock()
|
||||
|
||||
conns, ok := n.connPool[target]
|
||||
if !ok || len(conns) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var conn *netConn
|
||||
num := len(conns)
|
||||
conn, conns[num-1] = conns[num-1], nil
|
||||
n.connPool[target] = conns[:num-1]
|
||||
return conn
|
||||
}
|
||||
|
||||
// getConn is used to get a connection from the pool.
|
||||
func (n *NetworkTransport) getConn(target string) (*netConn, error) {
|
||||
// Check for a pooled conn
|
||||
if conn := n.getPooledConn(target); conn != nil {
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
// Dial a new connection
|
||||
conn, err := n.stream.Dial(target, n.timeout)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Wrap the conn
|
||||
netConn := &netConn{
|
||||
target: target,
|
||||
conn: conn,
|
||||
r: bufio.NewReader(conn),
|
||||
w: bufio.NewWriter(conn),
|
||||
}
|
||||
|
||||
// Setup encoder/decoders
|
||||
netConn.dec = codec.NewDecoder(netConn.r, &codec.MsgpackHandle{})
|
||||
netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{})
|
||||
|
||||
// Done
|
||||
return netConn, nil
|
||||
}
|
||||
|
||||
// returnConn returns a connection back to the pool.
|
||||
func (n *NetworkTransport) returnConn(conn *netConn) {
|
||||
n.connPoolLock.Lock()
|
||||
defer n.connPoolLock.Unlock()
|
||||
|
||||
key := conn.target
|
||||
conns, _ := n.connPool[key]
|
||||
|
||||
if !n.IsShutdown() && len(conns) < n.maxPool {
|
||||
n.connPool[key] = append(conns, conn)
|
||||
} else {
|
||||
conn.Release()
|
||||
}
|
||||
}
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
func (n *NetworkTransport) AppendEntriesPipeline(target string) (AppendPipeline, error) {
|
||||
// Get a connection
|
||||
conn, err := n.getConn(target)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the pipeline
|
||||
return newNetPipeline(n, conn), nil
|
||||
}
|
||||
|
||||
// AppendEntries implements the Transport interface.
|
||||
func (n *NetworkTransport) AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
|
||||
return n.genericRPC(target, rpcAppendEntries, args, resp)
|
||||
}
|
||||
|
||||
// RequestVote implements the Transport interface.
|
||||
func (n *NetworkTransport) RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error {
|
||||
return n.genericRPC(target, rpcRequestVote, args, resp)
|
||||
}
|
||||
|
||||
// genericRPC handles a simple request/response RPC.
|
||||
func (n *NetworkTransport) genericRPC(target string, rpcType uint8, args interface{}, resp interface{}) error {
|
||||
// Get a conn
|
||||
conn, err := n.getConn(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set a deadline
|
||||
if n.timeout > 0 {
|
||||
conn.conn.SetDeadline(time.Now().Add(n.timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err := sendRPC(conn, rpcType, args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode the response
|
||||
canReturn, err := decodeResponse(conn, resp)
|
||||
if canReturn {
|
||||
n.returnConn(conn)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// InstallSnapshot implements the Transport interface.
|
||||
func (n *NetworkTransport) InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
|
||||
// Get a conn, always close for InstallSnapshot
|
||||
conn, err := n.getConn(target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Release()
|
||||
|
||||
// Set a deadline, scaled by request size
|
||||
if n.timeout > 0 {
|
||||
timeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale))
|
||||
if timeout < n.timeout {
|
||||
timeout = n.timeout
|
||||
}
|
||||
conn.conn.SetDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err := sendRPC(conn, rpcInstallSnapshot, args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Stream the state
|
||||
if _, err := io.Copy(conn.w, data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Flush
|
||||
if err := conn.w.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode the response, do not return conn
|
||||
_, err = decodeResponse(conn, resp)
|
||||
return err
|
||||
}
|
||||
|
||||
// EncodePeer implements the Transport interface.
|
||||
func (n *NetworkTransport) EncodePeer(p string) []byte {
|
||||
return []byte(p)
|
||||
}
|
||||
|
||||
// DecodePeer implements the Transport interface.
|
||||
func (n *NetworkTransport) DecodePeer(buf []byte) string {
|
||||
return string(buf)
|
||||
}
|
||||
|
||||
// listen is used to handling incoming connections.
|
||||
func (n *NetworkTransport) listen() {
|
||||
for {
|
||||
// Accept incoming connections
|
||||
conn, err := n.stream.Accept()
|
||||
if err != nil {
|
||||
if n.IsShutdown() {
|
||||
return
|
||||
}
|
||||
n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err)
|
||||
continue
|
||||
}
|
||||
n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr())
|
||||
|
||||
// Handle the connection in dedicated routine
|
||||
go n.handleConn(conn)
|
||||
}
|
||||
}
|
||||
|
||||
// handleConn is used to handle an inbound connection for its lifespan.
|
||||
func (n *NetworkTransport) handleConn(conn net.Conn) {
|
||||
defer conn.Close()
|
||||
r := bufio.NewReader(conn)
|
||||
w := bufio.NewWriter(conn)
|
||||
dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
|
||||
enc := codec.NewEncoder(w, &codec.MsgpackHandle{})
|
||||
|
||||
for {
|
||||
if err := n.handleCommand(r, dec, enc); err != nil {
|
||||
if err != io.EOF {
|
||||
n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err := w.Flush(); err != nil {
|
||||
n.logger.Printf("[ERR] raft-net: Failed to flush response: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleCommand is used to decode and dispatch a single command.
|
||||
func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error {
|
||||
// Get the rpc type
|
||||
rpcType, err := r.ReadByte()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create the RPC object
|
||||
respCh := make(chan RPCResponse, 1)
|
||||
rpc := RPC{
|
||||
RespChan: respCh,
|
||||
}
|
||||
|
||||
// Decode the command
|
||||
isHeartbeat := false
|
||||
switch rpcType {
|
||||
case rpcAppendEntries:
|
||||
var req AppendEntriesRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
|
||||
// Check if this is a heartbeat
|
||||
if req.Term != 0 && req.Leader != nil &&
|
||||
req.PrevLogEntry == 0 && req.PrevLogTerm == 0 &&
|
||||
len(req.Entries) == 0 && req.LeaderCommitIndex == 0 {
|
||||
isHeartbeat = true
|
||||
}
|
||||
|
||||
case rpcRequestVote:
|
||||
var req RequestVoteRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
|
||||
case rpcInstallSnapshot:
|
||||
var req InstallSnapshotRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
rpc.Reader = io.LimitReader(r, req.Size)
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unknown rpc type %d", rpcType)
|
||||
}
|
||||
|
||||
// Check for heartbeat fast-path
|
||||
if isHeartbeat {
|
||||
n.heartbeatFnLock.Lock()
|
||||
fn := n.heartbeatFn
|
||||
n.heartbeatFnLock.Unlock()
|
||||
if fn != nil {
|
||||
fn(rpc)
|
||||
goto RESP
|
||||
}
|
||||
}
|
||||
|
||||
// Dispatch the RPC
|
||||
select {
|
||||
case n.consumeCh <- rpc:
|
||||
case <-n.shutdownCh:
|
||||
return ErrTransportShutdown
|
||||
}
|
||||
|
||||
// Wait for response
|
||||
RESP:
|
||||
select {
|
||||
case resp := <-respCh:
|
||||
// Send the error first
|
||||
respErr := ""
|
||||
if resp.Error != nil {
|
||||
respErr = resp.Error.Error()
|
||||
}
|
||||
if err := enc.Encode(respErr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Send the response
|
||||
if err := enc.Encode(resp.Response); err != nil {
|
||||
return err
|
||||
}
|
||||
case <-n.shutdownCh:
|
||||
return ErrTransportShutdown
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeResponse is used to decode an RPC response and reports whether
|
||||
// the connection can be reused.
|
||||
func decodeResponse(conn *netConn, resp interface{}) (bool, error) {
|
||||
// Decode the error if any
|
||||
var rpcError string
|
||||
if err := conn.dec.Decode(&rpcError); err != nil {
|
||||
conn.Release()
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Decode the response
|
||||
if err := conn.dec.Decode(resp); err != nil {
|
||||
conn.Release()
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Format an error if any
|
||||
if rpcError != "" {
|
||||
return true, fmt.Errorf(rpcError)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// sendRPC is used to encode and send the RPC.
|
||||
func sendRPC(conn *netConn, rpcType uint8, args interface{}) error {
|
||||
// Write the request type
|
||||
if err := conn.w.WriteByte(rpcType); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
|
||||
// Send the request
|
||||
if err := conn.enc.Encode(args); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
|
||||
// Flush
|
||||
if err := conn.w.Flush(); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// newNetPipeline is used to construct a netPipeline from a given
|
||||
// transport and connection.
|
||||
func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline {
|
||||
n := &netPipeline{
|
||||
conn: conn,
|
||||
trans: trans,
|
||||
doneCh: make(chan AppendFuture, rpcMaxPipeline),
|
||||
inprogressCh: make(chan *appendFuture, rpcMaxPipeline),
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
go n.decodeResponses()
|
||||
return n
|
||||
}
|
||||
|
||||
// decodeResponses is a long running routine that decodes the responses
|
||||
// sent on the connection.
|
||||
func (n *netPipeline) decodeResponses() {
|
||||
timeout := n.trans.timeout
|
||||
for {
|
||||
select {
|
||||
case future := <-n.inprogressCh:
|
||||
if timeout > 0 {
|
||||
n.conn.conn.SetReadDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
_, err := decodeResponse(n.conn, future.resp)
|
||||
future.respond(err)
|
||||
select {
|
||||
case n.doneCh <- future:
|
||||
case <-n.shutdownCh:
|
||||
return
|
||||
}
|
||||
case <-n.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AppendEntries is used to pipeline a new append entries request.
|
||||
func (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
|
||||
// Create a new future
|
||||
future := &appendFuture{
|
||||
start: time.Now(),
|
||||
args: args,
|
||||
resp: resp,
|
||||
}
|
||||
future.init()
|
||||
|
||||
// Add a send timeout
|
||||
if timeout := n.trans.timeout; timeout > 0 {
|
||||
n.conn.conn.SetWriteDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Hand-off for decoding, this can also cause back-pressure
|
||||
// to prevent too many inflight requests
|
||||
select {
|
||||
case n.inprogressCh <- future:
|
||||
return future, nil
|
||||
case <-n.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
}
|
||||
|
||||
// Consumer returns a channel that can be used to consume complete futures.
|
||||
func (n *netPipeline) Consumer() <-chan AppendFuture {
|
||||
return n.doneCh
|
||||
}
|
||||
|
||||
// Closed is used to shutdown the pipeline connection.
|
||||
func (n *netPipeline) Close() error {
|
||||
n.shutdownLock.Lock()
|
||||
defer n.shutdownLock.Unlock()
|
||||
if n.shutdown {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Release the connection
|
||||
n.conn.Release()
|
||||
|
||||
n.shutdown = true
|
||||
close(n.shutdownCh)
|
||||
return nil
|
||||
}
|
122
vendor/github.com/hashicorp/raft/peer.go
generated
vendored
Normal file
122
vendor/github.com/hashicorp/raft/peer.go
generated
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
jsonPeerPath = "peers.json"
|
||||
)
|
||||
|
||||
// PeerStore provides an interface for persistent storage and
|
||||
// retrieval of peers. We use a separate interface than StableStore
|
||||
// since the peers may need to be edited by a human operator. For example,
|
||||
// in a two node cluster, the failure of either node requires human intervention
|
||||
// since consensus is impossible.
|
||||
type PeerStore interface {
|
||||
// Peers returns the list of known peers.
|
||||
Peers() ([]string, error)
|
||||
|
||||
// SetPeers sets the list of known peers. This is invoked when a peer is
|
||||
// added or removed.
|
||||
SetPeers([]string) error
|
||||
}
|
||||
|
||||
// StaticPeers is used to provide a static list of peers.
|
||||
type StaticPeers struct {
|
||||
StaticPeers []string
|
||||
l sync.Mutex
|
||||
}
|
||||
|
||||
// Peers implements the PeerStore interface.
|
||||
func (s *StaticPeers) Peers() ([]string, error) {
|
||||
s.l.Lock()
|
||||
peers := s.StaticPeers
|
||||
s.l.Unlock()
|
||||
return peers, nil
|
||||
}
|
||||
|
||||
// SetPeers implements the PeerStore interface.
|
||||
func (s *StaticPeers) SetPeers(p []string) error {
|
||||
s.l.Lock()
|
||||
s.StaticPeers = p
|
||||
s.l.Unlock()
|
||||
return nil
|
||||
}
|
||||
|
||||
// JSONPeers is used to provide peer persistence on disk in the form
|
||||
// of a JSON file. This allows human operators to manipulate the file.
|
||||
type JSONPeers struct {
|
||||
l sync.Mutex
|
||||
path string
|
||||
trans Transport
|
||||
}
|
||||
|
||||
// NewJSONPeers creates a new JSONPeers store. Requires a transport
|
||||
// to handle the serialization of network addresses.
|
||||
func NewJSONPeers(base string, trans Transport) *JSONPeers {
|
||||
path := filepath.Join(base, jsonPeerPath)
|
||||
store := &JSONPeers{
|
||||
path: path,
|
||||
trans: trans,
|
||||
}
|
||||
return store
|
||||
}
|
||||
|
||||
// Peers implements the PeerStore interface.
|
||||
func (j *JSONPeers) Peers() ([]string, error) {
|
||||
j.l.Lock()
|
||||
defer j.l.Unlock()
|
||||
|
||||
// Read the file
|
||||
buf, err := ioutil.ReadFile(j.path)
|
||||
if err != nil && !os.IsNotExist(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check for no peers
|
||||
if len(buf) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Decode the peers
|
||||
var peerSet []string
|
||||
dec := json.NewDecoder(bytes.NewReader(buf))
|
||||
if err := dec.Decode(&peerSet); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Deserialize each peer
|
||||
var peers []string
|
||||
for _, p := range peerSet {
|
||||
peers = append(peers, j.trans.DecodePeer([]byte(p)))
|
||||
}
|
||||
return peers, nil
|
||||
}
|
||||
|
||||
// SetPeers implements the PeerStore interface.
|
||||
func (j *JSONPeers) SetPeers(peers []string) error {
|
||||
j.l.Lock()
|
||||
defer j.l.Unlock()
|
||||
|
||||
// Encode each peer
|
||||
var peerSet []string
|
||||
for _, p := range peers {
|
||||
peerSet = append(peerSet, string(j.trans.EncodePeer(p)))
|
||||
}
|
||||
|
||||
// Convert to JSON
|
||||
var buf bytes.Buffer
|
||||
enc := json.NewEncoder(&buf)
|
||||
if err := enc.Encode(peerSet); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write out as JSON
|
||||
return ioutil.WriteFile(j.path, buf.Bytes(), 0755)
|
||||
}
|
1887
vendor/github.com/hashicorp/raft/raft.go
generated
vendored
Normal file
1887
vendor/github.com/hashicorp/raft/raft.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
517
vendor/github.com/hashicorp/raft/replication.go
generated
vendored
Normal file
517
vendor/github.com/hashicorp/raft/replication.go
generated
vendored
Normal file
@@ -0,0 +1,517 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
maxFailureScale = 12
|
||||
failureWait = 10 * time.Millisecond
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrLogNotFound indicates a given log entry is not available.
|
||||
ErrLogNotFound = errors.New("log not found")
|
||||
|
||||
// ErrPipelineReplicationNotSupported can be returned by the transport to
|
||||
// signal that pipeline replication is not supported in general, and that
|
||||
// no error message should be produced.
|
||||
ErrPipelineReplicationNotSupported = errors.New("pipeline replication not supported")
|
||||
)
|
||||
|
||||
type followerReplication struct {
|
||||
peer string
|
||||
inflight *inflight
|
||||
|
||||
stopCh chan uint64
|
||||
triggerCh chan struct{}
|
||||
|
||||
currentTerm uint64
|
||||
matchIndex uint64
|
||||
nextIndex uint64
|
||||
|
||||
lastContact time.Time
|
||||
lastContactLock sync.RWMutex
|
||||
|
||||
failures uint64
|
||||
|
||||
notifyCh chan struct{}
|
||||
notify []*verifyFuture
|
||||
notifyLock sync.Mutex
|
||||
|
||||
// stepDown is used to indicate to the leader that we
|
||||
// should step down based on information from a follower.
|
||||
stepDown chan struct{}
|
||||
|
||||
// allowPipeline is used to control it seems like
|
||||
// pipeline replication should be enabled.
|
||||
allowPipeline bool
|
||||
}
|
||||
|
||||
// notifyAll is used to notify all the waiting verify futures
|
||||
// if the follower believes we are still the leader.
|
||||
func (s *followerReplication) notifyAll(leader bool) {
|
||||
// Clear the waiting notifies minimizing lock time
|
||||
s.notifyLock.Lock()
|
||||
n := s.notify
|
||||
s.notify = nil
|
||||
s.notifyLock.Unlock()
|
||||
|
||||
// Submit our votes
|
||||
for _, v := range n {
|
||||
v.vote(leader)
|
||||
}
|
||||
}
|
||||
|
||||
// LastContact returns the time of last contact.
|
||||
func (s *followerReplication) LastContact() time.Time {
|
||||
s.lastContactLock.RLock()
|
||||
last := s.lastContact
|
||||
s.lastContactLock.RUnlock()
|
||||
return last
|
||||
}
|
||||
|
||||
// setLastContact sets the last contact to the current time.
|
||||
func (s *followerReplication) setLastContact() {
|
||||
s.lastContactLock.Lock()
|
||||
s.lastContact = time.Now()
|
||||
s.lastContactLock.Unlock()
|
||||
}
|
||||
|
||||
// replicate is a long running routine that is used to manage
|
||||
// the process of replicating logs to our followers.
|
||||
func (r *Raft) replicate(s *followerReplication) {
|
||||
// Start an async heartbeating routing
|
||||
stopHeartbeat := make(chan struct{})
|
||||
defer close(stopHeartbeat)
|
||||
r.goFunc(func() { r.heartbeat(s, stopHeartbeat) })
|
||||
|
||||
RPC:
|
||||
shouldStop := false
|
||||
for !shouldStop {
|
||||
select {
|
||||
case maxIndex := <-s.stopCh:
|
||||
// Make a best effort to replicate up to this index
|
||||
if maxIndex > 0 {
|
||||
r.replicateTo(s, maxIndex)
|
||||
}
|
||||
return
|
||||
case <-s.triggerCh:
|
||||
shouldStop = r.replicateTo(s, r.getLastLogIndex())
|
||||
case <-randomTimeout(r.conf.CommitTimeout):
|
||||
shouldStop = r.replicateTo(s, r.getLastLogIndex())
|
||||
}
|
||||
|
||||
// If things looks healthy, switch to pipeline mode
|
||||
if !shouldStop && s.allowPipeline {
|
||||
goto PIPELINE
|
||||
}
|
||||
}
|
||||
return
|
||||
|
||||
PIPELINE:
|
||||
// Disable until re-enabled
|
||||
s.allowPipeline = false
|
||||
|
||||
// Replicates using a pipeline for high performance. This method
|
||||
// is not able to gracefully recover from errors, and so we fall back
|
||||
// to standard mode on failure.
|
||||
if err := r.pipelineReplicate(s); err != nil {
|
||||
if err != ErrPipelineReplicationNotSupported {
|
||||
r.logger.Printf("[ERR] raft: Failed to start pipeline replication to %s: %s", s.peer, err)
|
||||
}
|
||||
}
|
||||
goto RPC
|
||||
}
|
||||
|
||||
// replicateTo is used to replicate the logs up to a given last index.
|
||||
// If the follower log is behind, we take care to bring them up to date.
|
||||
func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) {
|
||||
// Create the base request
|
||||
var req AppendEntriesRequest
|
||||
var resp AppendEntriesResponse
|
||||
var start time.Time
|
||||
START:
|
||||
// Prevent an excessive retry rate on errors
|
||||
if s.failures > 0 {
|
||||
select {
|
||||
case <-time.After(backoff(failureWait, s.failures, maxFailureScale)):
|
||||
case <-r.shutdownCh:
|
||||
}
|
||||
}
|
||||
|
||||
// Setup the request
|
||||
if err := r.setupAppendEntries(s, &req, s.nextIndex, lastIndex); err == ErrLogNotFound {
|
||||
goto SEND_SNAP
|
||||
} else if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Make the RPC call
|
||||
start = time.Now()
|
||||
if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err)
|
||||
s.failures++
|
||||
return
|
||||
}
|
||||
appendStats(s.peer, start, float32(len(req.Entries)))
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return true
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Update s based on success
|
||||
if resp.Success {
|
||||
// Update our replication state
|
||||
updateLastAppended(s, &req)
|
||||
|
||||
// Clear any failures, allow pipelining
|
||||
s.failures = 0
|
||||
s.allowPipeline = true
|
||||
} else {
|
||||
s.nextIndex = max(min(s.nextIndex-1, resp.LastLog+1), 1)
|
||||
s.matchIndex = s.nextIndex - 1
|
||||
if resp.NoRetryBackoff {
|
||||
s.failures = 0
|
||||
} else {
|
||||
s.failures++
|
||||
}
|
||||
r.logger.Printf("[WARN] raft: AppendEntries to %v rejected, sending older logs (next: %d)", s.peer, s.nextIndex)
|
||||
}
|
||||
|
||||
CHECK_MORE:
|
||||
// Check if there are more logs to replicate
|
||||
if s.nextIndex <= lastIndex {
|
||||
goto START
|
||||
}
|
||||
return
|
||||
|
||||
// SEND_SNAP is used when we fail to get a log, usually because the follower
|
||||
// is too far behind, and we must ship a snapshot down instead
|
||||
SEND_SNAP:
|
||||
if stop, err := r.sendLatestSnapshot(s); stop {
|
||||
return true
|
||||
} else if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to send snapshot to %v: %v", s.peer, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if there is more to replicate
|
||||
goto CHECK_MORE
|
||||
}
|
||||
|
||||
// sendLatestSnapshot is used to send the latest snapshot we have
|
||||
// down to our follower.
|
||||
func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) {
|
||||
// Get the snapshots
|
||||
snapshots, err := r.snapshots.List()
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Check we have at least a single snapshot
|
||||
if len(snapshots) == 0 {
|
||||
return false, fmt.Errorf("no snapshots found")
|
||||
}
|
||||
|
||||
// Open the most recent snapshot
|
||||
snapID := snapshots[0].ID
|
||||
meta, snapshot, err := r.snapshots.Open(snapID)
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapID, err)
|
||||
return false, err
|
||||
}
|
||||
defer snapshot.Close()
|
||||
|
||||
// Setup the request
|
||||
req := InstallSnapshotRequest{
|
||||
Term: s.currentTerm,
|
||||
Leader: r.trans.EncodePeer(r.localAddr),
|
||||
LastLogIndex: meta.Index,
|
||||
LastLogTerm: meta.Term,
|
||||
Peers: meta.Peers,
|
||||
Size: meta.Size,
|
||||
}
|
||||
|
||||
// Make the call
|
||||
start := time.Now()
|
||||
var resp InstallSnapshotResponse
|
||||
if err := r.trans.InstallSnapshot(s.peer, &req, &resp, snapshot); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err)
|
||||
s.failures++
|
||||
return false, err
|
||||
}
|
||||
metrics.MeasureSince([]string{"raft", "replication", "installSnapshot", s.peer}, start)
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Check for success
|
||||
if resp.Success {
|
||||
// Mark any inflight logs as committed
|
||||
s.inflight.CommitRange(s.matchIndex+1, meta.Index)
|
||||
|
||||
// Update the indexes
|
||||
s.matchIndex = meta.Index
|
||||
s.nextIndex = s.matchIndex + 1
|
||||
|
||||
// Clear any failures
|
||||
s.failures = 0
|
||||
|
||||
// Notify we are still leader
|
||||
s.notifyAll(true)
|
||||
} else {
|
||||
s.failures++
|
||||
r.logger.Printf("[WARN] raft: InstallSnapshot to %v rejected", s.peer)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// heartbeat is used to periodically invoke AppendEntries on a peer
|
||||
// to ensure they don't time out. This is done async of replicate(),
|
||||
// since that routine could potentially be blocked on disk IO.
|
||||
func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) {
|
||||
var failures uint64
|
||||
req := AppendEntriesRequest{
|
||||
Term: s.currentTerm,
|
||||
Leader: r.trans.EncodePeer(r.localAddr),
|
||||
}
|
||||
var resp AppendEntriesResponse
|
||||
for {
|
||||
// Wait for the next heartbeat interval or forced notify
|
||||
select {
|
||||
case <-s.notifyCh:
|
||||
case <-randomTimeout(r.conf.HeartbeatTimeout / 10):
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
if err := r.trans.AppendEntries(s.peer, &req, &resp); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer, err)
|
||||
failures++
|
||||
select {
|
||||
case <-time.After(backoff(failureWait, failures, maxFailureScale)):
|
||||
case <-stopCh:
|
||||
}
|
||||
} else {
|
||||
s.setLastContact()
|
||||
failures = 0
|
||||
metrics.MeasureSince([]string{"raft", "replication", "heartbeat", s.peer}, start)
|
||||
s.notifyAll(resp.Success)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pipelineReplicate is used when we have synchronized our state with the follower,
|
||||
// and want to switch to a higher performance pipeline mode of replication.
|
||||
// We only pipeline AppendEntries commands, and if we ever hit an error, we fall
|
||||
// back to the standard replication which can handle more complex situations.
|
||||
func (r *Raft) pipelineReplicate(s *followerReplication) error {
|
||||
// Create a new pipeline
|
||||
pipeline, err := r.trans.AppendEntriesPipeline(s.peer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer pipeline.Close()
|
||||
|
||||
// Log start and stop of pipeline
|
||||
r.logger.Printf("[INFO] raft: pipelining replication to peer %v", s.peer)
|
||||
defer r.logger.Printf("[INFO] raft: aborting pipeline replication to peer %v", s.peer)
|
||||
|
||||
// Create a shutdown and finish channel
|
||||
stopCh := make(chan struct{})
|
||||
finishCh := make(chan struct{})
|
||||
|
||||
// Start a dedicated decoder
|
||||
r.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) })
|
||||
|
||||
// Start pipeline sends at the last good nextIndex
|
||||
nextIndex := s.nextIndex
|
||||
|
||||
shouldStop := false
|
||||
SEND:
|
||||
for !shouldStop {
|
||||
select {
|
||||
case <-finishCh:
|
||||
break SEND
|
||||
case maxIndex := <-s.stopCh:
|
||||
if maxIndex > 0 {
|
||||
r.pipelineSend(s, pipeline, &nextIndex, maxIndex)
|
||||
}
|
||||
break SEND
|
||||
case <-s.triggerCh:
|
||||
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, r.getLastLogIndex())
|
||||
case <-randomTimeout(r.conf.CommitTimeout):
|
||||
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, r.getLastLogIndex())
|
||||
}
|
||||
}
|
||||
|
||||
// Stop our decoder, and wait for it to finish
|
||||
close(stopCh)
|
||||
select {
|
||||
case <-finishCh:
|
||||
case <-r.shutdownCh:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// pipelineSend is used to send data over a pipeline.
|
||||
func (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) {
|
||||
// Create a new append request
|
||||
req := new(AppendEntriesRequest)
|
||||
if err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Pipeline the append entries
|
||||
if _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to pipeline AppendEntries to %v: %v", s.peer, err)
|
||||
return true
|
||||
}
|
||||
|
||||
// Increase the next send log to avoid re-sending old logs
|
||||
if n := len(req.Entries); n > 0 {
|
||||
last := req.Entries[n-1]
|
||||
*nextIdx = last.Index + 1
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// pipelineDecode is used to decode the responses of pipelined requests.
|
||||
func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) {
|
||||
defer close(finishCh)
|
||||
respCh := p.Consumer()
|
||||
for {
|
||||
select {
|
||||
case ready := <-respCh:
|
||||
req, resp := ready.Request(), ready.Response()
|
||||
appendStats(s.peer, ready.Start(), float32(len(req.Entries)))
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Abort pipeline if not successful
|
||||
if !resp.Success {
|
||||
return
|
||||
}
|
||||
|
||||
// Update our replication state
|
||||
updateLastAppended(s, req)
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// setupAppendEntries is used to setup an append entries request.
|
||||
func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
|
||||
req.Term = s.currentTerm
|
||||
req.Leader = r.trans.EncodePeer(r.localAddr)
|
||||
req.LeaderCommitIndex = r.getCommitIndex()
|
||||
if err := r.setPreviousLog(req, nextIndex); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.setNewLogs(req, nextIndex, lastIndex); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an
|
||||
// AppendEntriesRequest given the next index to replicate.
|
||||
func (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error {
|
||||
// Guard for the first index, since there is no 0 log entry
|
||||
// Guard against the previous index being a snapshot as well
|
||||
if nextIndex == 1 {
|
||||
req.PrevLogEntry = 0
|
||||
req.PrevLogTerm = 0
|
||||
|
||||
} else if (nextIndex - 1) == r.getLastSnapshotIndex() {
|
||||
req.PrevLogEntry = r.getLastSnapshotIndex()
|
||||
req.PrevLogTerm = r.getLastSnapshotTerm()
|
||||
|
||||
} else {
|
||||
var l Log
|
||||
if err := r.logs.GetLog(nextIndex-1, &l); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v",
|
||||
nextIndex-1, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the previous index and term (0 if nextIndex is 1)
|
||||
req.PrevLogEntry = l.Index
|
||||
req.PrevLogTerm = l.Term
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setNewLogs is used to setup the logs which should be appended for a request.
|
||||
func (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
|
||||
// Append up to MaxAppendEntries or up to the lastIndex
|
||||
req.Entries = make([]*Log, 0, r.conf.MaxAppendEntries)
|
||||
maxIndex := min(nextIndex+uint64(r.conf.MaxAppendEntries)-1, lastIndex)
|
||||
for i := nextIndex; i <= maxIndex; i++ {
|
||||
oldLog := new(Log)
|
||||
if err := r.logs.GetLog(i, oldLog); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", i, err)
|
||||
return err
|
||||
}
|
||||
req.Entries = append(req.Entries, oldLog)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// appendStats is used to emit stats about an AppendEntries invocation.
|
||||
func appendStats(peer string, start time.Time, logs float32) {
|
||||
metrics.MeasureSince([]string{"raft", "replication", "appendEntries", "rpc", peer}, start)
|
||||
metrics.IncrCounter([]string{"raft", "replication", "appendEntries", "logs", peer}, logs)
|
||||
}
|
||||
|
||||
// handleStaleTerm is used when a follower indicates that we have a stale term.
|
||||
func (r *Raft) handleStaleTerm(s *followerReplication) {
|
||||
r.logger.Printf("[ERR] raft: peer %v has newer term, stopping replication", s.peer)
|
||||
s.notifyAll(false) // No longer leader
|
||||
asyncNotifyCh(s.stepDown)
|
||||
}
|
||||
|
||||
// updateLastAppended is used to update follower replication state after a successful
|
||||
// AppendEntries RPC.
|
||||
func updateLastAppended(s *followerReplication, req *AppendEntriesRequest) {
|
||||
// Mark any inflight logs as committed
|
||||
if logs := req.Entries; len(logs) > 0 {
|
||||
first := logs[0]
|
||||
last := logs[len(logs)-1]
|
||||
s.inflight.CommitRange(first.Index, last.Index)
|
||||
|
||||
// Update the indexes
|
||||
s.matchIndex = last.Index
|
||||
s.nextIndex = last.Index + 1
|
||||
}
|
||||
|
||||
// Notify still leader
|
||||
s.notifyAll(true)
|
||||
}
|
40
vendor/github.com/hashicorp/raft/snapshot.go
generated
vendored
Normal file
40
vendor/github.com/hashicorp/raft/snapshot.go
generated
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"io"
|
||||
)
|
||||
|
||||
// SnapshotMeta is for metadata of a snapshot.
|
||||
type SnapshotMeta struct {
|
||||
ID string // ID is opaque to the store, and is used for opening
|
||||
Index uint64
|
||||
Term uint64
|
||||
Peers []byte
|
||||
Size int64
|
||||
}
|
||||
|
||||
// SnapshotStore interface is used to allow for flexible implementations
|
||||
// of snapshot storage and retrieval. For example, a client could implement
|
||||
// a shared state store such as S3, allowing new nodes to restore snapshots
|
||||
// without steaming from the leader.
|
||||
type SnapshotStore interface {
|
||||
// Create is used to begin a snapshot at a given index and term,
|
||||
// with the current peer set already encoded.
|
||||
Create(index, term uint64, peers []byte) (SnapshotSink, error)
|
||||
|
||||
// List is used to list the available snapshots in the store.
|
||||
// It should return then in descending order, with the highest index first.
|
||||
List() ([]*SnapshotMeta, error)
|
||||
|
||||
// Open takes a snapshot ID and provides a ReadCloser. Once close is
|
||||
// called it is assumed the snapshot is no longer needed.
|
||||
Open(id string) (*SnapshotMeta, io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// SnapshotSink is returned by StartSnapshot. The FSM will Write state
|
||||
// to the sink and call Close on completion. On error, Cancel will be invoked.
|
||||
type SnapshotSink interface {
|
||||
io.WriteCloser
|
||||
ID() string
|
||||
Cancel() error
|
||||
}
|
15
vendor/github.com/hashicorp/raft/stable.go
generated
vendored
Normal file
15
vendor/github.com/hashicorp/raft/stable.go
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
package raft
|
||||
|
||||
// StableStore is used to provide stable storage
|
||||
// of key configurations to ensure safety.
|
||||
type StableStore interface {
|
||||
Set(key []byte, val []byte) error
|
||||
|
||||
// Get returns the value for key, or an empty byte slice if key was not found.
|
||||
Get(key []byte) ([]byte, error)
|
||||
|
||||
SetUint64(key []byte, val uint64) error
|
||||
|
||||
// GetUint64 returns the uint64 value for key, or 0 if key was not found.
|
||||
GetUint64(key []byte) (uint64, error)
|
||||
}
|
169
vendor/github.com/hashicorp/raft/state.go
generated
vendored
Normal file
169
vendor/github.com/hashicorp/raft/state.go
generated
vendored
Normal file
@@ -0,0 +1,169 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// RaftState captures the state of a Raft node: Follower, Candidate, Leader,
|
||||
// or Shutdown.
|
||||
type RaftState uint32
|
||||
|
||||
const (
|
||||
// Follower is the initial state of a Raft node.
|
||||
Follower RaftState = iota
|
||||
|
||||
// Candidate is one of the valid states of a Raft node.
|
||||
Candidate
|
||||
|
||||
// Leader is one of the valid states of a Raft node.
|
||||
Leader
|
||||
|
||||
// Shutdown is the terminal state of a Raft node.
|
||||
Shutdown
|
||||
)
|
||||
|
||||
func (s RaftState) String() string {
|
||||
switch s {
|
||||
case Follower:
|
||||
return "Follower"
|
||||
case Candidate:
|
||||
return "Candidate"
|
||||
case Leader:
|
||||
return "Leader"
|
||||
case Shutdown:
|
||||
return "Shutdown"
|
||||
default:
|
||||
return "Unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// raftState is used to maintain various state variables
|
||||
// and provides an interface to set/get the variables in a
|
||||
// thread safe manner.
|
||||
type raftState struct {
|
||||
// The current term, cache of StableStore
|
||||
currentTerm uint64
|
||||
|
||||
// Cache the latest log from LogStore
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
|
||||
// Highest committed log entry
|
||||
commitIndex uint64
|
||||
|
||||
// Last applied log to the FSM
|
||||
lastApplied uint64
|
||||
|
||||
// Cache the latest snapshot index/term
|
||||
lastSnapshotIndex uint64
|
||||
lastSnapshotTerm uint64
|
||||
|
||||
// Tracks the number of live routines
|
||||
runningRoutines int32
|
||||
|
||||
// The current state
|
||||
state RaftState
|
||||
}
|
||||
|
||||
func (r *raftState) getState() RaftState {
|
||||
stateAddr := (*uint32)(&r.state)
|
||||
return RaftState(atomic.LoadUint32(stateAddr))
|
||||
}
|
||||
|
||||
func (r *raftState) setState(s RaftState) {
|
||||
stateAddr := (*uint32)(&r.state)
|
||||
atomic.StoreUint32(stateAddr, uint32(s))
|
||||
}
|
||||
|
||||
func (r *raftState) getCurrentTerm() uint64 {
|
||||
return atomic.LoadUint64(&r.currentTerm)
|
||||
}
|
||||
|
||||
func (r *raftState) setCurrentTerm(term uint64) {
|
||||
atomic.StoreUint64(&r.currentTerm, term)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastLogIndex() uint64 {
|
||||
return atomic.LoadUint64(&r.LastLogIndex)
|
||||
}
|
||||
|
||||
func (r *raftState) setLastLogIndex(term uint64) {
|
||||
atomic.StoreUint64(&r.LastLogIndex, term)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastLogTerm() uint64 {
|
||||
return atomic.LoadUint64(&r.LastLogTerm)
|
||||
}
|
||||
|
||||
func (r *raftState) setLastLogTerm(term uint64) {
|
||||
atomic.StoreUint64(&r.LastLogTerm, term)
|
||||
}
|
||||
|
||||
func (r *raftState) getCommitIndex() uint64 {
|
||||
return atomic.LoadUint64(&r.commitIndex)
|
||||
}
|
||||
|
||||
func (r *raftState) setCommitIndex(term uint64) {
|
||||
atomic.StoreUint64(&r.commitIndex, term)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastApplied() uint64 {
|
||||
return atomic.LoadUint64(&r.lastApplied)
|
||||
}
|
||||
|
||||
func (r *raftState) setLastApplied(term uint64) {
|
||||
atomic.StoreUint64(&r.lastApplied, term)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastSnapshotIndex() uint64 {
|
||||
return atomic.LoadUint64(&r.lastSnapshotIndex)
|
||||
}
|
||||
|
||||
func (r *raftState) setLastSnapshotIndex(term uint64) {
|
||||
atomic.StoreUint64(&r.lastSnapshotIndex, term)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastSnapshotTerm() uint64 {
|
||||
return atomic.LoadUint64(&r.lastSnapshotTerm)
|
||||
}
|
||||
|
||||
func (r *raftState) setLastSnapshotTerm(term uint64) {
|
||||
atomic.StoreUint64(&r.lastSnapshotTerm, term)
|
||||
}
|
||||
|
||||
func (r *raftState) incrRoutines() {
|
||||
atomic.AddInt32(&r.runningRoutines, 1)
|
||||
}
|
||||
|
||||
func (r *raftState) decrRoutines() {
|
||||
atomic.AddInt32(&r.runningRoutines, -1)
|
||||
}
|
||||
|
||||
func (r *raftState) getRoutines() int32 {
|
||||
return atomic.LoadInt32(&r.runningRoutines)
|
||||
}
|
||||
|
||||
// Start a goroutine and properly handle the race between a routine
|
||||
// starting and incrementing, and exiting and decrementing.
|
||||
func (r *raftState) goFunc(f func()) {
|
||||
r.incrRoutines()
|
||||
go func() {
|
||||
defer r.decrRoutines()
|
||||
f()
|
||||
}()
|
||||
}
|
||||
|
||||
// getLastIndex returns the last index in stable storage.
|
||||
// Either from the last log or from the last snapshot.
|
||||
func (r *raftState) getLastIndex() uint64 {
|
||||
return max(r.getLastLogIndex(), r.getLastSnapshotIndex())
|
||||
}
|
||||
|
||||
// getLastEntry returns the last index and term in stable storage.
|
||||
// Either from the last log or from the last snapshot.
|
||||
func (r *raftState) getLastEntry() (uint64, uint64) {
|
||||
if r.getLastLogIndex() >= r.getLastSnapshotIndex() {
|
||||
return r.getLastLogIndex(), r.getLastLogTerm()
|
||||
}
|
||||
return r.getLastSnapshotIndex(), r.getLastSnapshotTerm()
|
||||
}
|
105
vendor/github.com/hashicorp/raft/tcp_transport.go
generated
vendored
Normal file
105
vendor/github.com/hashicorp/raft/tcp_transport.go
generated
vendored
Normal file
@@ -0,0 +1,105 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
errNotAdvertisable = errors.New("local bind address is not advertisable")
|
||||
errNotTCP = errors.New("local address is not a TCP address")
|
||||
)
|
||||
|
||||
// TCPStreamLayer implements StreamLayer interface for plain TCP.
|
||||
type TCPStreamLayer struct {
|
||||
advertise net.Addr
|
||||
listener *net.TCPListener
|
||||
}
|
||||
|
||||
// NewTCPTransport returns a NetworkTransport that is built on top of
|
||||
// a TCP streaming transport layer.
|
||||
func NewTCPTransport(
|
||||
bindAddr string,
|
||||
advertise net.Addr,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logOutput io.Writer,
|
||||
) (*NetworkTransport, error) {
|
||||
return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
|
||||
return NewNetworkTransport(stream, maxPool, timeout, logOutput)
|
||||
})
|
||||
}
|
||||
|
||||
// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
|
||||
// a TCP streaming transport layer, with log output going to the supplied Logger
|
||||
func NewTCPTransportWithLogger(
|
||||
bindAddr string,
|
||||
advertise net.Addr,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logger *log.Logger,
|
||||
) (*NetworkTransport, error) {
|
||||
return newTCPTransport(bindAddr, advertise, maxPool, timeout, func(stream StreamLayer) *NetworkTransport {
|
||||
return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)
|
||||
})
|
||||
}
|
||||
|
||||
func newTCPTransport(bindAddr string,
|
||||
advertise net.Addr,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {
|
||||
// Try to bind
|
||||
list, err := net.Listen("tcp", bindAddr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create stream
|
||||
stream := &TCPStreamLayer{
|
||||
advertise: advertise,
|
||||
listener: list.(*net.TCPListener),
|
||||
}
|
||||
|
||||
// Verify that we have a usable advertise address
|
||||
addr, ok := stream.Addr().(*net.TCPAddr)
|
||||
if !ok {
|
||||
list.Close()
|
||||
return nil, errNotTCP
|
||||
}
|
||||
if addr.IP.IsUnspecified() {
|
||||
list.Close()
|
||||
return nil, errNotAdvertisable
|
||||
}
|
||||
|
||||
// Create the network transport
|
||||
trans := transportCreator(stream)
|
||||
return trans, nil
|
||||
}
|
||||
|
||||
// Dial implements the StreamLayer interface.
|
||||
func (t *TCPStreamLayer) Dial(address string, timeout time.Duration) (net.Conn, error) {
|
||||
return net.DialTimeout("tcp", address, timeout)
|
||||
}
|
||||
|
||||
// Accept implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Accept() (c net.Conn, err error) {
|
||||
return t.listener.Accept()
|
||||
}
|
||||
|
||||
// Close implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Close() (err error) {
|
||||
return t.listener.Close()
|
||||
}
|
||||
|
||||
// Addr implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Addr() net.Addr {
|
||||
// Use an advertise addr if provided
|
||||
if t.advertise != nil {
|
||||
return t.advertise
|
||||
}
|
||||
return t.listener.Addr()
|
||||
}
|
85
vendor/github.com/hashicorp/raft/transport.go
generated
vendored
Normal file
85
vendor/github.com/hashicorp/raft/transport.go
generated
vendored
Normal file
@@ -0,0 +1,85 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"io"
|
||||
"time"
|
||||
)
|
||||
|
||||
// RPCResponse captures both a response and a potential error.
|
||||
type RPCResponse struct {
|
||||
Response interface{}
|
||||
Error error
|
||||
}
|
||||
|
||||
// RPC has a command, and provides a response mechanism.
|
||||
type RPC struct {
|
||||
Command interface{}
|
||||
Reader io.Reader // Set only for InstallSnapshot
|
||||
RespChan chan<- RPCResponse
|
||||
}
|
||||
|
||||
// Respond is used to respond with a response, error or both
|
||||
func (r *RPC) Respond(resp interface{}, err error) {
|
||||
r.RespChan <- RPCResponse{resp, err}
|
||||
}
|
||||
|
||||
// Transport provides an interface for network transports
|
||||
// to allow Raft to communicate with other nodes.
|
||||
type Transport interface {
|
||||
// Consumer returns a channel that can be used to
|
||||
// consume and respond to RPC requests.
|
||||
Consumer() <-chan RPC
|
||||
|
||||
// LocalAddr is used to return our local address to distinguish from our peers.
|
||||
LocalAddr() string
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
AppendEntriesPipeline(target string) (AppendPipeline, error)
|
||||
|
||||
// AppendEntries sends the appropriate RPC to the target node.
|
||||
AppendEntries(target string, args *AppendEntriesRequest, resp *AppendEntriesResponse) error
|
||||
|
||||
// RequestVote sends the appropriate RPC to the target node.
|
||||
RequestVote(target string, args *RequestVoteRequest, resp *RequestVoteResponse) error
|
||||
|
||||
// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
|
||||
// the ReadCloser and streamed to the client.
|
||||
InstallSnapshot(target string, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error
|
||||
|
||||
// EncodePeer is used to serialize a peer name.
|
||||
EncodePeer(string) []byte
|
||||
|
||||
// DecodePeer is used to deserialize a peer name.
|
||||
DecodePeer([]byte) string
|
||||
|
||||
// SetHeartbeatHandler is used to setup a heartbeat handler
|
||||
// as a fast-pass. This is to avoid head-of-line blocking from
|
||||
// disk IO. If a Transport does not support this, it can simply
|
||||
// ignore the call, and push the heartbeat onto the Consumer channel.
|
||||
SetHeartbeatHandler(cb func(rpc RPC))
|
||||
}
|
||||
|
||||
// AppendPipeline is used for pipelining AppendEntries requests. It is used
|
||||
// to increase the replication throughput by masking latency and better
|
||||
// utilizing bandwidth.
|
||||
type AppendPipeline interface {
|
||||
// AppendEntries is used to add another request to the pipeline.
|
||||
// The send may block which is an effective form of back-pressure.
|
||||
AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)
|
||||
|
||||
// Consumer returns a channel that can be used to consume
|
||||
// response futures when they are ready.
|
||||
Consumer() <-chan AppendFuture
|
||||
|
||||
// Closes pipeline and cancels all inflight RPCs
|
||||
Close() error
|
||||
}
|
||||
|
||||
// AppendFuture is used to return information about a pipelined AppendEntries request.
|
||||
type AppendFuture interface {
|
||||
Future
|
||||
Start() time.Time
|
||||
Request() *AppendEntriesRequest
|
||||
Response() *AppendEntriesResponse
|
||||
}
|
200
vendor/github.com/hashicorp/raft/util.go
generated
vendored
Normal file
200
vendor/github.com/hashicorp/raft/util.go
generated
vendored
Normal file
@@ -0,0 +1,200 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
crand "crypto/rand"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/big"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// Ensure we use a high-entropy seed for the psuedo-random generator
|
||||
rand.Seed(newSeed())
|
||||
}
|
||||
|
||||
// returns an int64 from a crypto random source
|
||||
// can be used to seed a source for a math/rand.
|
||||
func newSeed() int64 {
|
||||
r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to read random bytes: %v", err))
|
||||
}
|
||||
return r.Int64()
|
||||
}
|
||||
|
||||
// randomTimeout returns a value that is between the minVal and 2x minVal.
|
||||
func randomTimeout(minVal time.Duration) <-chan time.Time {
|
||||
if minVal == 0 {
|
||||
return nil
|
||||
}
|
||||
extra := (time.Duration(rand.Int63()) % minVal)
|
||||
return time.After(minVal + extra)
|
||||
}
|
||||
|
||||
// min returns the minimum.
|
||||
func min(a, b uint64) uint64 {
|
||||
if a <= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// max returns the maximum.
|
||||
func max(a, b uint64) uint64 {
|
||||
if a >= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// generateUUID is used to generate a random UUID.
|
||||
func generateUUID() string {
|
||||
buf := make([]byte, 16)
|
||||
if _, err := crand.Read(buf); err != nil {
|
||||
panic(fmt.Errorf("failed to read random bytes: %v", err))
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
|
||||
buf[0:4],
|
||||
buf[4:6],
|
||||
buf[6:8],
|
||||
buf[8:10],
|
||||
buf[10:16])
|
||||
}
|
||||
|
||||
// asyncNotify is used to do an async channel send to
|
||||
// a list of channels. This will not block.
|
||||
func asyncNotify(chans []chan struct{}) {
|
||||
for _, ch := range chans {
|
||||
asyncNotifyCh(ch)
|
||||
}
|
||||
}
|
||||
|
||||
// asyncNotifyCh is used to do an async channel send
|
||||
// to a single channel without blocking.
|
||||
func asyncNotifyCh(ch chan struct{}) {
|
||||
select {
|
||||
case ch <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// asyncNotifyBool is used to do an async notification
|
||||
// on a bool channel.
|
||||
func asyncNotifyBool(ch chan bool, v bool) {
|
||||
select {
|
||||
case ch <- v:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// ExcludePeer is used to exclude a single peer from a list of peers.
|
||||
func ExcludePeer(peers []string, peer string) []string {
|
||||
otherPeers := make([]string, 0, len(peers))
|
||||
for _, p := range peers {
|
||||
if p != peer {
|
||||
otherPeers = append(otherPeers, p)
|
||||
}
|
||||
}
|
||||
return otherPeers
|
||||
}
|
||||
|
||||
// PeerContained checks if a given peer is contained in a list.
|
||||
func PeerContained(peers []string, peer string) bool {
|
||||
for _, p := range peers {
|
||||
if p == peer {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// AddUniquePeer is used to add a peer to a list of existing
|
||||
// peers only if it is not already contained.
|
||||
func AddUniquePeer(peers []string, peer string) []string {
|
||||
if PeerContained(peers, peer) {
|
||||
return peers
|
||||
}
|
||||
return append(peers, peer)
|
||||
}
|
||||
|
||||
// encodePeers is used to serialize a list of peers.
|
||||
func encodePeers(peers []string, trans Transport) []byte {
|
||||
// Encode each peer
|
||||
var encPeers [][]byte
|
||||
for _, p := range peers {
|
||||
encPeers = append(encPeers, trans.EncodePeer(p))
|
||||
}
|
||||
|
||||
// Encode the entire array
|
||||
buf, err := encodeMsgPack(encPeers)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to encode peers: %v", err))
|
||||
}
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// decodePeers is used to deserialize a list of peers.
|
||||
func decodePeers(buf []byte, trans Transport) []string {
|
||||
// Decode the buffer first
|
||||
var encPeers [][]byte
|
||||
if err := decodeMsgPack(buf, &encPeers); err != nil {
|
||||
panic(fmt.Errorf("failed to decode peers: %v", err))
|
||||
}
|
||||
|
||||
// Deserialize each peer
|
||||
var peers []string
|
||||
for _, enc := range encPeers {
|
||||
peers = append(peers, trans.DecodePeer(enc))
|
||||
}
|
||||
|
||||
return peers
|
||||
}
|
||||
|
||||
// Decode reverses the encode operation on a byte slice input.
|
||||
func decodeMsgPack(buf []byte, out interface{}) error {
|
||||
r := bytes.NewBuffer(buf)
|
||||
hd := codec.MsgpackHandle{}
|
||||
dec := codec.NewDecoder(r, &hd)
|
||||
return dec.Decode(out)
|
||||
}
|
||||
|
||||
// Encode writes an encoded object to a new bytes buffer.
|
||||
func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
hd := codec.MsgpackHandle{}
|
||||
enc := codec.NewEncoder(buf, &hd)
|
||||
err := enc.Encode(in)
|
||||
return buf, err
|
||||
}
|
||||
|
||||
// Converts bytes to an integer.
|
||||
func bytesToUint64(b []byte) uint64 {
|
||||
return binary.BigEndian.Uint64(b)
|
||||
}
|
||||
|
||||
// Converts a uint64 to a byte slice.
|
||||
func uint64ToBytes(u uint64) []byte {
|
||||
buf := make([]byte, 8)
|
||||
binary.BigEndian.PutUint64(buf, u)
|
||||
return buf
|
||||
}
|
||||
|
||||
// backoff is used to compute an exponential backoff
|
||||
// duration. Base time is scaled by the current round,
|
||||
// up to some maximum scale factor.
|
||||
func backoff(base time.Duration, round, limit uint64) time.Duration {
|
||||
power := min(round, limit)
|
||||
for power > 2 {
|
||||
base *= 2
|
||||
power--
|
||||
}
|
||||
return base
|
||||
}
|
Reference in New Issue
Block a user