Update godeps for etcd 3.0.4

This commit is contained in:
Timothy St. Clair
2016-07-22 13:54:40 -05:00
parent 456c43c22d
commit 5f008faa8b
457 changed files with 25492 additions and 10481 deletions

View File

@@ -0,0 +1,85 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package api
import (
"sync"
"github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver"
"github.com/coreos/pkg/capnslog"
)
type Capability string
const (
AuthCapability Capability = "auth"
V3rpcCapability Capability = "v3rpc"
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd/etcdserver", "api")
// capabilityMaps is a static map of version to capability map.
// the base capabilities is the set of capability 2.0 supports.
capabilityMaps = map[string]map[Capability]bool{
"2.1.0": {AuthCapability: true},
"2.2.0": {AuthCapability: true},
"2.3.0": {AuthCapability: true},
"3.0.0": {AuthCapability: true, V3rpcCapability: true},
}
enableMapMu sync.RWMutex
// enabledMap points to a map in capabilityMaps
enabledMap map[Capability]bool
curVersion *semver.Version
)
func init() {
enabledMap = make(map[Capability]bool)
}
// UpdateCapability updates the enabledMap when the cluster version increases.
func UpdateCapability(v *semver.Version) {
if v == nil {
// if recovered but version was never set by cluster
return
}
enableMapMu.Lock()
if curVersion != nil && !curVersion.LessThan(*v) {
enableMapMu.Unlock()
return
}
curVersion = v
enabledMap = capabilityMaps[curVersion.String()]
enableMapMu.Unlock()
plog.Infof("enabled capabilities for version %s", version.Cluster(v.String()))
}
func IsCapabilityEnabled(c Capability) bool {
enableMapMu.RLock()
defer enableMapMu.RUnlock()
if enabledMap == nil {
return false
}
return enabledMap[c]
}
func EnableCapability(c Capability) {
enableMapMu.Lock()
defer enableMapMu.Unlock()
enabledMap[c] = true
}

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

16
vendor/github.com/coreos/etcd/etcdserver/api/doc.go generated vendored Normal file
View File

@@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package api manages the capabilities and features that are exposed to clients by the etcd cluster.
package api

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -17,75 +17,14 @@ package v2http
import (
"fmt"
"net/http"
"sync"
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api"
"github.com/coreos/etcd/etcdserver/api/v2http/httptypes"
"github.com/coreos/go-semver/semver"
)
type capability string
const (
authCapability capability = "auth"
)
var (
// capabilityMaps is a static map of version to capability map.
// the base capabilities is the set of capability 2.0 supports.
capabilityMaps = map[string]map[capability]bool{
"2.1.0": {authCapability: true},
"2.2.0": {authCapability: true},
"2.3.0": {authCapability: true},
}
enableMapMu sync.Mutex
// enabledMap points to a map in capabilityMaps
enabledMap map[capability]bool
)
// capabilityLoop checks the cluster version every 500ms and updates
// the enabledMap when the cluster version increased.
// capabilityLoop MUST be ran in a goroutine before checking capability
// or using capabilityHandler.
func capabilityLoop(s *etcdserver.EtcdServer) {
stopped := s.StopNotify()
var pv *semver.Version
for {
if v := s.ClusterVersion(); v != pv {
if pv == nil {
pv = v
} else if v != nil && pv.LessThan(*v) {
pv = v
}
enableMapMu.Lock()
enabledMap = capabilityMaps[pv.String()]
enableMapMu.Unlock()
plog.Infof("enabled capabilities for version %s", pv)
}
select {
case <-stopped:
return
case <-time.After(500 * time.Millisecond):
}
}
}
func isCapabilityEnabled(c capability) bool {
enableMapMu.Lock()
defer enableMapMu.Unlock()
if enabledMap == nil {
return false
}
return enabledMap[c]
}
func capabilityHandler(c capability, fn func(http.ResponseWriter, *http.Request)) http.HandlerFunc {
func capabilityHandler(c api.Capability, fn func(http.ResponseWriter, *http.Request)) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if !isCapabilityEnabled(c) {
if !api.IsCapabilityEnabled(c) {
notCapable(w, r, c)
return
}
@@ -93,7 +32,7 @@ func capabilityHandler(c capability, fn func(http.ResponseWriter, *http.Request)
}
}
func notCapable(w http.ResponseWriter, r *http.Request, c capability) {
func notCapable(w http.ResponseWriter, r *http.Request, c api.Capability) {
herr := httptypes.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("Not capable of accessing %s feature during rolling upgrades.", c))
if err := herr.WriteTo(w); err != nil {
plog.Debugf("error writing HTTPError (%v) to %s", err, r.RemoteAddr)

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -62,16 +62,15 @@ const (
// NewClientHandler generates a muxed http.Handler with the given parameters to serve etcd client requests.
func NewClientHandler(server *etcdserver.EtcdServer, timeout time.Duration) http.Handler {
go capabilityLoop(server)
sec := auth.NewStore(server, timeout)
kh := &keysHandler{
sec: sec,
server: server,
cluster: server.Cluster(),
timer: server,
timeout: timeout,
sec: sec,
server: server,
cluster: server.Cluster(),
timer: server,
timeout: timeout,
clientCertAuthEnabled: server.Cfg.ClientCertAuthEnabled,
}
sh := &statsHandler{
@@ -84,6 +83,7 @@ func NewClientHandler(server *etcdserver.EtcdServer, timeout time.Duration) http
cluster: server.Cluster(),
timeout: timeout,
clock: clockwork.NewRealClock(),
clientCertAuthEnabled: server.Cfg.ClientCertAuthEnabled,
}
dmh := &deprecatedMachinesHandler{
@@ -91,8 +91,9 @@ func NewClientHandler(server *etcdserver.EtcdServer, timeout time.Duration) http
}
sech := &authHandler{
sec: sec,
cluster: server.Cluster(),
sec: sec,
cluster: server.Cluster(),
clientCertAuthEnabled: server.Cfg.ClientCertAuthEnabled,
}
mux := http.NewServeMux()
@@ -133,11 +134,12 @@ func NewClientHandler(server *etcdserver.EtcdServer, timeout time.Duration) http
}
type keysHandler struct {
sec auth.Store
server etcdserver.Server
cluster api.Cluster
timer etcdserver.RaftTimer
timeout time.Duration
sec auth.Store
server etcdserver.Server
cluster api.Cluster
timer etcdserver.RaftTimer
timeout time.Duration
clientCertAuthEnabled bool
}
func (h *keysHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
@@ -157,7 +159,7 @@ func (h *keysHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
return
}
// The path must be valid at this point (we've parsed the request successfully).
if !hasKeyPrefixAccess(h.sec, r, r.URL.Path[len(keysPrefix):], rr.Recursive) {
if !hasKeyPrefixAccess(h.sec, r, r.URL.Path[len(keysPrefix):], rr.Recursive, h.clientCertAuthEnabled) {
writeKeyNoAuth(w)
return
}
@@ -200,18 +202,19 @@ func (h *deprecatedMachinesHandler) ServeHTTP(w http.ResponseWriter, r *http.Req
}
type membersHandler struct {
sec auth.Store
server etcdserver.Server
cluster api.Cluster
timeout time.Duration
clock clockwork.Clock
sec auth.Store
server etcdserver.Server
cluster api.Cluster
timeout time.Duration
clock clockwork.Clock
clientCertAuthEnabled bool
}
func (h *membersHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET", "POST", "DELETE", "PUT") {
return
}
if !hasWriteRootAccess(h.sec, r) {
if !hasWriteRootAccess(h.sec, r, h.clientCertAuthEnabled) {
writeNoAuth(w, r)
return
}
@@ -720,20 +723,19 @@ func trimEventPrefix(ev *store.Event, prefix string) *store.Event {
// Since the *Event may reference one in the store history
// history, we must copy it before modifying
e := ev.Clone()
e.Node = trimNodeExternPrefix(e.Node, prefix)
e.PrevNode = trimNodeExternPrefix(e.PrevNode, prefix)
trimNodeExternPrefix(e.Node, prefix)
trimNodeExternPrefix(e.PrevNode, prefix)
return e
}
func trimNodeExternPrefix(n *store.NodeExtern, prefix string) *store.NodeExtern {
func trimNodeExternPrefix(n *store.NodeExtern, prefix string) {
if n == nil {
return nil
return
}
n.Key = strings.TrimPrefix(n.Key, prefix)
for _, nn := range n.Nodes {
nn = trimNodeExternPrefix(nn, prefix)
trimNodeExternPrefix(nn, prefix)
}
return n
}
func trimErrorPrefix(err error, prefix string) error {

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -26,18 +26,56 @@ import (
)
type authHandler struct {
sec auth.Store
cluster api.Cluster
sec auth.Store
cluster api.Cluster
clientCertAuthEnabled bool
}
func hasWriteRootAccess(sec auth.Store, r *http.Request) bool {
func hasWriteRootAccess(sec auth.Store, r *http.Request, clientCertAuthEnabled bool) bool {
if r.Method == "GET" || r.Method == "HEAD" {
return true
}
return hasRootAccess(sec, r)
return hasRootAccess(sec, r, clientCertAuthEnabled)
}
func hasRootAccess(sec auth.Store, r *http.Request) bool {
func userFromBasicAuth(sec auth.Store, r *http.Request) *auth.User {
username, password, ok := r.BasicAuth()
if !ok {
plog.Warningf("auth: malformed basic auth encoding")
return nil
}
user, err := sec.GetUser(username)
if err != nil {
return nil
}
ok = sec.CheckPassword(user, password)
if !ok {
plog.Warningf("auth: incorrect password for user: %s", username)
return nil
}
return &user
}
func userFromClientCertificate(sec auth.Store, r *http.Request) *auth.User {
if r.TLS == nil {
return nil
}
for _, chains := range r.TLS.VerifiedChains {
for _, chain := range chains {
plog.Debugf("auth: found common name %s.\n", chain.Subject.CommonName)
user, err := sec.GetUser(chain.Subject.CommonName)
if err == nil {
plog.Debugf("auth: authenticated user %s by cert common name.", user.User)
return &user
}
}
}
return nil
}
func hasRootAccess(sec auth.Store, r *http.Request, clientCertAuthEnabled bool) bool {
if sec == nil {
// No store means no auth available, eg, tests.
return true
@@ -45,30 +83,30 @@ func hasRootAccess(sec auth.Store, r *http.Request) bool {
if !sec.AuthEnabled() {
return true
}
username, password, ok := r.BasicAuth()
if !ok {
return false
}
rootUser, err := sec.GetUser(username)
if err != nil {
return false
var rootUser *auth.User
if r.Header.Get("Authorization") == "" && clientCertAuthEnabled {
rootUser = userFromClientCertificate(sec, r)
if rootUser == nil {
return false
}
} else {
rootUser = userFromBasicAuth(sec, r)
if rootUser == nil {
return false
}
}
ok = sec.CheckPassword(rootUser, password)
if !ok {
plog.Warningf("auth: wrong password for user %s", username)
return false
}
for _, role := range rootUser.Roles {
if role == auth.RootRoleName {
return true
}
}
plog.Warningf("auth: user %s does not have the %s role for resource %s.", username, auth.RootRoleName, r.URL.Path)
plog.Warningf("auth: user %s does not have the %s role for resource %s.", rootUser.User, auth.RootRoleName, r.URL.Path)
return false
}
func hasKeyPrefixAccess(sec auth.Store, r *http.Request, key string, recursive bool) bool {
func hasKeyPrefixAccess(sec auth.Store, r *http.Request, key string, recursive, clientCertAuthEnabled bool) bool {
if sec == nil {
// No store means no auth available, eg, tests.
return true
@@ -76,25 +114,21 @@ func hasKeyPrefixAccess(sec auth.Store, r *http.Request, key string, recursive b
if !sec.AuthEnabled() {
return true
}
if r.Header.Get("Authorization") == "" {
plog.Warningf("auth: no authorization provided, checking guest access")
return hasGuestAccess(sec, r, key)
}
username, password, ok := r.BasicAuth()
if !ok {
plog.Warningf("auth: malformed basic auth encoding")
return false
}
user, err := sec.GetUser(username)
if err != nil {
plog.Warningf("auth: no such user: %s.", username)
return false
}
authAsUser := sec.CheckPassword(user, password)
if !authAsUser {
plog.Warningf("auth: incorrect password for user: %s.", username)
return false
var user *auth.User
if r.Header.Get("Authorization") == "" && clientCertAuthEnabled {
user = userFromClientCertificate(sec, r)
if user == nil {
plog.Warningf("auth: no authorization provided, checking guest access")
return hasGuestAccess(sec, r, key)
}
} else {
user = userFromBasicAuth(sec, r)
if user == nil {
return false
}
}
writeAccess := r.Method != "GET" && r.Method != "HEAD"
for _, roleName := range user.Roles {
role, err := sec.GetRole(roleName)
@@ -109,7 +143,7 @@ func hasKeyPrefixAccess(sec auth.Store, r *http.Request, key string, recursive b
return true
}
}
plog.Warningf("auth: invalid access for user %s on key %s.", username, key)
plog.Warningf("auth: invalid access for user %s on key %s.", user.User, key)
return false
}
@@ -134,18 +168,18 @@ func writeNoAuth(w http.ResponseWriter, r *http.Request) {
}
func handleAuth(mux *http.ServeMux, sh *authHandler) {
mux.HandleFunc(authPrefix+"/roles", capabilityHandler(authCapability, sh.baseRoles))
mux.HandleFunc(authPrefix+"/roles/", capabilityHandler(authCapability, sh.handleRoles))
mux.HandleFunc(authPrefix+"/users", capabilityHandler(authCapability, sh.baseUsers))
mux.HandleFunc(authPrefix+"/users/", capabilityHandler(authCapability, sh.handleUsers))
mux.HandleFunc(authPrefix+"/enable", capabilityHandler(authCapability, sh.enableDisable))
mux.HandleFunc(authPrefix+"/roles", capabilityHandler(api.AuthCapability, sh.baseRoles))
mux.HandleFunc(authPrefix+"/roles/", capabilityHandler(api.AuthCapability, sh.handleRoles))
mux.HandleFunc(authPrefix+"/users", capabilityHandler(api.AuthCapability, sh.baseUsers))
mux.HandleFunc(authPrefix+"/users/", capabilityHandler(api.AuthCapability, sh.handleUsers))
mux.HandleFunc(authPrefix+"/enable", capabilityHandler(api.AuthCapability, sh.enableDisable))
}
func (sh *authHandler) baseRoles(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
if !hasRootAccess(sh.sec, r) {
if !hasRootAccess(sh.sec, r, sh.clientCertAuthEnabled) {
writeNoAuth(w, r)
return
}
@@ -209,7 +243,7 @@ func (sh *authHandler) forRole(w http.ResponseWriter, r *http.Request, role stri
if !allowMethod(w, r.Method, "GET", "PUT", "DELETE") {
return
}
if !hasRootAccess(sh.sec, r) {
if !hasRootAccess(sh.sec, r, sh.clientCertAuthEnabled) {
writeNoAuth(w, r)
return
}
@@ -285,11 +319,15 @@ type userWithRoles struct {
Roles []auth.Role `json:"roles,omitempty"`
}
type usersCollections struct {
Users []userWithRoles `json:"users"`
}
func (sh *authHandler) baseUsers(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
if !hasRootAccess(sh.sec, r) {
if !hasRootAccess(sh.sec, r, sh.clientCertAuthEnabled) {
writeNoAuth(w, r)
return
}
@@ -311,9 +349,7 @@ func (sh *authHandler) baseUsers(w http.ResponseWriter, r *http.Request) {
return
}
var usersCollections struct {
Users []userWithRoles `json:"users"`
}
ucs := usersCollections{}
for _, userName := range users {
var user auth.User
user, err = sh.sec.GetUser(userName)
@@ -327,15 +363,14 @@ func (sh *authHandler) baseUsers(w http.ResponseWriter, r *http.Request) {
var role auth.Role
role, err = sh.sec.GetRole(roleName)
if err != nil {
writeError(w, r, err)
return
continue
}
uwr.Roles = append(uwr.Roles, role)
}
usersCollections.Users = append(usersCollections.Users, uwr)
ucs.Users = append(ucs.Users, uwr)
}
err = json.NewEncoder(w).Encode(usersCollections)
err = json.NewEncoder(w).Encode(ucs)
if err != nil {
plog.Warningf("baseUsers error encoding on %s", r.URL)
@@ -364,7 +399,7 @@ func (sh *authHandler) forUser(w http.ResponseWriter, r *http.Request, user stri
if !allowMethod(w, r.Method, "GET", "PUT", "DELETE") {
return
}
if !hasRootAccess(sh.sec, r) {
if !hasRootAccess(sh.sec, r, sh.clientCertAuthEnabled) {
writeNoAuth(w, r)
return
}
@@ -477,7 +512,7 @@ func (sh *authHandler) enableDisable(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET", "PUT", "DELETE") {
return
}
if !hasWriteRootAccess(sh.sec, r) {
if !hasWriteRootAccess(sh.sec, r, sh.clientCertAuthEnabled) {
writeNoAuth(w, r)
return
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -48,7 +48,7 @@ var (
prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "http",
Name: "successful_duration_second",
Name: "successful_duration_seconds",
Help: "Bucketed histogram of processing time (s) of successfully handled requests (non-watches), by method (GET/PUT etc.).",
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13),
}, []string{"method"})

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2016 Nippon Telegraph and Telephone Corporation.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -37,13 +37,19 @@ func (as *AuthServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (
}
func (as *AuthServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
plog.Info("not implemented yet")
return nil, nil
resp, err := as.authenticator.AuthDisable(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
plog.Info("not implemented yet")
return nil, nil
resp, err := as.authenticator.Authenticate(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
@@ -55,23 +61,43 @@ func (as *AuthServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*p
}
func (as *AuthServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
plog.Info("not implemented yet")
return nil, nil
resp, err := as.authenticator.RoleDelete(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
plog.Info("not implemented yet")
return nil, nil
resp, err := as.authenticator.RoleGet(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleRevoke(ctx context.Context, r *pb.AuthRoleRevokeRequest) (*pb.AuthRoleRevokeResponse, error) {
plog.Info("not implemented yet")
return nil, nil
func (as *AuthServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
resp, err := as.authenticator.RoleList(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleGrant(ctx context.Context, r *pb.AuthRoleGrantRequest) (*pb.AuthRoleGrantResponse, error) {
plog.Info("not implemented yet")
return nil, nil
func (as *AuthServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
resp, err := as.authenticator.RoleRevokePermission(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
resp, err := as.authenticator.RoleGrantPermission(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
@@ -91,18 +117,35 @@ func (as *AuthServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteReques
}
func (as *AuthServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
plog.Info("not implemented yet")
return nil, nil
resp, err := as.authenticator.UserGet(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserGrant(ctx context.Context, r *pb.AuthUserGrantRequest) (*pb.AuthUserGrantResponse, error) {
plog.Info("not implemented yet")
return nil, nil
func (as *AuthServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
resp, err := as.authenticator.UserList(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserRevoke(ctx context.Context, r *pb.AuthUserRevokeRequest) (*pb.AuthUserRevokeResponse, error) {
plog.Info("not implemented yet")
return nil, nil
func (as *AuthServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
resp, err := as.authenticator.UserGrantRole(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
resp, err := as.authenticator.UserRevokeRole(ctx, r)
if err != nil {
return nil, togRPCError(err)
}
return resp, nil
}
func (as *AuthServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {

View File

@@ -0,0 +1,34 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import "github.com/gogo/protobuf/proto"
type codec struct{}
func (c *codec) Marshal(v interface{}) ([]byte, error) {
b, err := proto.Marshal(v.(proto.Message))
sentBytes.Add(float64(len(b)))
return b, err
}
func (c *codec) Unmarshal(data []byte, v interface{}) error {
receivedBytes.Add(float64(len(data)))
return proto.Unmarshal(data, v.(proto.Message))
}
func (c *codec) String() string {
return "proto"
}

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -19,15 +19,24 @@ import (
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/pkg/capnslog"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"google.golang.org/grpc/grpclog"
)
func init() {
grpclog.SetLogger(capnslog.NewPackageLogger("github.com/coreos/etcd/etcdserver", "v3rpc/grpc"))
}
func Server(s *etcdserver.EtcdServer, tls *tls.Config) *grpc.Server {
var opts []grpc.ServerOption
opts = append(opts, grpc.CustomCodec(&codec{}))
if tls != nil {
opts = append(opts, grpc.Creds(credentials.NewTLS(tls)))
}
opts = append(opts, grpc.UnaryInterceptor(newUnaryInterceptor(s)))
opts = append(opts, grpc.StreamInterceptor(newStreamInterceptor(s)))
grpcServer := grpc.NewServer(opts...)
pb.RegisterKVServer(grpcServer, NewQuotaKVServer(s))
@@ -36,5 +45,6 @@ func Server(s *etcdserver.EtcdServer, tls *tls.Config) *grpc.Server {
pb.RegisterClusterServer(grpcServer, NewClusterServer(s))
pb.RegisterAuthServer(grpcServer, NewAuthServer(s))
pb.RegisterMaintenanceServer(grpcServer, NewMaintenanceServer(s))
return grpcServer
}

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -40,4 +40,7 @@ func (h *header) fill(rh *pb.ResponseHeader) {
rh.ClusterId = uint64(h.clusterID)
rh.MemberId = uint64(h.memberID)
rh.RaftTerm = h.raftTimer.Term()
if rh.Revision == 0 {
rh.Revision = h.rev()
}
}

View File

@@ -0,0 +1,176 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"strings"
"sync"
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
"golang.org/x/net/context"
"google.golang.org/grpc"
"google.golang.org/grpc/metadata"
)
const (
maxNoLeaderCnt = 3
)
type streamsMap struct {
mu sync.Mutex
streams map[grpc.ServerStream]struct{}
}
func newUnaryInterceptor(s *etcdserver.EtcdServer) grpc.UnaryServerInterceptor {
return func(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
return nil, rpctypes.ErrGRPCNotCapable
}
md, ok := metadata.FromContext(ctx)
if ok {
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
if s.Leader() == types.ID(raft.None) {
return nil, rpctypes.ErrGRPCNoLeader
}
}
}
return metricsUnaryInterceptor(ctx, req, info, handler)
}
}
func newStreamInterceptor(s *etcdserver.EtcdServer) grpc.StreamServerInterceptor {
smap := monitorLeader(s)
return func(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
if !api.IsCapabilityEnabled(api.V3rpcCapability) {
return rpctypes.ErrGRPCNotCapable
}
md, ok := metadata.FromContext(ss.Context())
if ok {
if ks := md[rpctypes.MetadataRequireLeaderKey]; len(ks) > 0 && ks[0] == rpctypes.MetadataHasLeader {
if s.Leader() == types.ID(raft.None) {
return rpctypes.ErrGRPCNoLeader
}
cctx, cancel := context.WithCancel(ss.Context())
ss = serverStreamWithCtx{ctx: cctx, cancel: &cancel, ServerStream: ss}
smap.mu.Lock()
smap.streams[ss] = struct{}{}
smap.mu.Unlock()
defer func() {
smap.mu.Lock()
delete(smap.streams, ss)
smap.mu.Unlock()
cancel()
}()
}
}
return metricsStreamInterceptor(srv, ss, info, handler)
}
}
func metricsUnaryInterceptor(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (resp interface{}, err error) {
service, method := splitMethodName(info.FullMethod)
receivedCounter.WithLabelValues(service, method).Inc()
start := time.Now()
resp, err = handler(ctx, req)
if err != nil {
failedCounter.WithLabelValues(service, method, grpc.Code(err).String()).Inc()
}
handlingDuration.WithLabelValues(service, method).Observe(time.Since(start).Seconds())
return resp, err
}
func metricsStreamInterceptor(srv interface{}, ss grpc.ServerStream, info *grpc.StreamServerInfo, handler grpc.StreamHandler) error {
service, method := splitMethodName(info.FullMethod)
receivedCounter.WithLabelValues(service, method).Inc()
err := handler(srv, ss)
if err != nil {
failedCounter.WithLabelValues(service, method, grpc.Code(err).String()).Inc()
}
return err
}
func splitMethodName(fullMethodName string) (string, string) {
fullMethodName = strings.TrimPrefix(fullMethodName, "/") // remove leading slash
if i := strings.Index(fullMethodName, "/"); i >= 0 {
return fullMethodName[:i], fullMethodName[i+1:]
}
return "unknown", "unknown"
}
type serverStreamWithCtx struct {
grpc.ServerStream
ctx context.Context
cancel *context.CancelFunc
}
func (ssc serverStreamWithCtx) Context() context.Context { return ssc.ctx }
func monitorLeader(s *etcdserver.EtcdServer) *streamsMap {
smap := &streamsMap{
streams: make(map[grpc.ServerStream]struct{}),
}
go func() {
election := time.Duration(s.Cfg.TickMs) * time.Duration(s.Cfg.ElectionTicks) * time.Millisecond
noLeaderCnt := 0
for {
select {
case <-s.StopNotify():
return
case <-time.After(election):
if s.Leader() == types.ID(raft.None) {
noLeaderCnt++
} else {
noLeaderCnt = 0
}
// We are more conservative on canceling existing streams. Reconnecting streams
// cost much more than just rejecting new requests. So we wait until the member
// cannot find a leader for maxNoLeaderCnt election timeouts to cancel existing streams.
if noLeaderCnt >= maxNoLeaderCnt {
smap.mu.Lock()
for ss := range smap.streams {
if ssWithCtx, ok := ss.(serverStreamWithCtx); ok {
(*ssWithCtx.cancel)()
<-ss.Context().Done()
}
}
smap.streams = make(map[grpc.ServerStream]struct{})
smap.mu.Unlock()
}
}
}
}()
return smap
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -125,38 +125,38 @@ func (s *kvServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.Co
func checkRangeRequest(r *pb.RangeRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrEmptyKey
return rpctypes.ErrGRPCEmptyKey
}
return nil
}
func checkPutRequest(r *pb.PutRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrEmptyKey
return rpctypes.ErrGRPCEmptyKey
}
return nil
}
func checkDeleteRequest(r *pb.DeleteRangeRequest) error {
if len(r.Key) == 0 {
return rpctypes.ErrEmptyKey
return rpctypes.ErrGRPCEmptyKey
}
return nil
}
func checkTxnRequest(r *pb.TxnRequest) error {
if len(r.Compare) > MaxOpsPerTxn || len(r.Success) > MaxOpsPerTxn || len(r.Failure) > MaxOpsPerTxn {
return rpctypes.ErrTooManyOps
return rpctypes.ErrGRPCTooManyOps
}
for _, c := range r.Compare {
if len(c.Key) == 0 {
return rpctypes.ErrEmptyKey
return rpctypes.ErrGRPCEmptyKey
}
}
for _, u := range r.Success {
if err := checkRequestUnion(u); err != nil {
if err := checkRequestOp(u); err != nil {
return err
}
}
@@ -165,23 +165,19 @@ func checkTxnRequest(r *pb.TxnRequest) error {
}
for _, u := range r.Failure {
if err := checkRequestUnion(u); err != nil {
if err := checkRequestOp(u); err != nil {
return err
}
}
if err := checkRequestDupKeys(r.Failure); err != nil {
return err
}
return nil
return checkRequestDupKeys(r.Failure)
}
// checkRequestDupKeys gives rpctypes.ErrDuplicateKey if the same key is modified twice
func checkRequestDupKeys(reqs []*pb.RequestUnion) error {
// checkRequestDupKeys gives rpctypes.ErrGRPCDuplicateKey if the same key is modified twice
func checkRequestDupKeys(reqs []*pb.RequestOp) error {
// check put overlap
keys := make(map[string]struct{})
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestUnion_RequestPut)
tv, ok := requ.Request.(*pb.RequestOp_RequestPut)
if !ok {
continue
}
@@ -189,11 +185,10 @@ func checkRequestDupKeys(reqs []*pb.RequestUnion) error {
if preq == nil {
continue
}
key := string(preq.Key)
if _, ok := keys[key]; ok {
return rpctypes.ErrDuplicateKey
if _, ok := keys[string(preq.Key)]; ok {
return rpctypes.ErrGRPCDuplicateKey
}
keys[key] = struct{}{}
keys[string(preq.Key)] = struct{}{}
}
// no need to check deletes if no puts; delete overlaps are permitted
@@ -210,7 +205,7 @@ func checkRequestDupKeys(reqs []*pb.RequestUnion) error {
// check put overlap with deletes
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestUnion_RequestDeleteRange)
tv, ok := requ.Request.(*pb.RequestOp_RequestDeleteRange)
if !ok {
continue
}
@@ -218,17 +213,16 @@ func checkRequestDupKeys(reqs []*pb.RequestUnion) error {
if dreq == nil {
continue
}
key := string(dreq.Key)
if dreq.RangeEnd == nil {
if _, found := keys[key]; found {
return rpctypes.ErrDuplicateKey
if _, found := keys[string(dreq.Key)]; found {
return rpctypes.ErrGRPCDuplicateKey
}
} else {
lo := sort.SearchStrings(sortedKeys, key)
lo := sort.SearchStrings(sortedKeys, string(dreq.Key))
hi := sort.SearchStrings(sortedKeys, string(dreq.RangeEnd))
if lo != hi {
// element between lo and hi => overlap
return rpctypes.ErrDuplicateKey
return rpctypes.ErrGRPCDuplicateKey
}
}
}
@@ -236,23 +230,23 @@ func checkRequestDupKeys(reqs []*pb.RequestUnion) error {
return nil
}
func checkRequestUnion(u *pb.RequestUnion) error {
func checkRequestOp(u *pb.RequestOp) error {
// TODO: ensure only one of the field is set.
switch uv := u.Request.(type) {
case *pb.RequestUnion_RequestRange:
case *pb.RequestOp_RequestRange:
if uv.RequestRange != nil {
return checkRangeRequest(uv.RequestRange)
}
case *pb.RequestUnion_RequestPut:
case *pb.RequestOp_RequestPut:
if uv.RequestPut != nil {
return checkPutRequest(uv.RequestPut)
}
case *pb.RequestUnion_RequestDeleteRange:
case *pb.RequestOp_RequestDeleteRange:
if uv.RequestDeleteRange != nil {
return checkDeleteRequest(uv.RequestDeleteRange)
}
default:
// empty union
// empty op
return nil
}
return nil

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -25,27 +25,33 @@ import (
)
type LeaseServer struct {
le etcdserver.Lessor
hdr header
le etcdserver.Lessor
}
func NewLeaseServer(le etcdserver.Lessor) pb.LeaseServer {
return &LeaseServer{le: le}
func NewLeaseServer(s *etcdserver.EtcdServer) pb.LeaseServer {
return &LeaseServer{le: s, hdr: newHeader(s)}
}
func (ls *LeaseServer) LeaseGrant(ctx context.Context, cr *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
resp, err := ls.le.LeaseGrant(ctx, cr)
if err == lease.ErrLeaseExists {
return nil, rpctypes.ErrLeaseExist
return nil, rpctypes.ErrGRPCLeaseExist
}
if err != nil {
return nil, err
}
ls.hdr.fill(resp.Header)
return resp, err
}
func (ls *LeaseServer) LeaseRevoke(ctx context.Context, rr *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
r, err := ls.le.LeaseRevoke(ctx, rr)
resp, err := ls.le.LeaseRevoke(ctx, rr)
if err != nil {
return nil, rpctypes.ErrLeaseNotFound
return nil, rpctypes.ErrGRPCLeaseNotFound
}
return r, nil
ls.hdr.fill(resp.Header)
return resp, nil
}
func (ls *LeaseServer) LeaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) error {
@@ -58,16 +64,26 @@ func (ls *LeaseServer) LeaseKeepAlive(stream pb.Lease_LeaseKeepAliveServer) erro
return err
}
// Create header before we sent out the renew request.
// This can make sure that the revision is strictly smaller or equal to
// when the keepalive happened at the local server (when the local server is the leader)
// or remote leader.
// Without this, a lease might be revoked at rev 3 but client can see the keepalive succeeded
// at rev 4.
resp := &pb.LeaseKeepAliveResponse{ID: req.ID, Header: &pb.ResponseHeader{}}
ls.hdr.fill(resp.Header)
ttl, err := ls.le.LeaseRenew(lease.LeaseID(req.ID))
if err == lease.ErrLeaseNotFound {
return rpctypes.ErrLeaseNotFound
err = nil
ttl = 0
}
if err != nil && err != lease.ErrLeaseNotFound {
if err != nil {
return err
}
resp := &pb.LeaseKeepAliveResponse{ID: req.ID, TTL: ttl}
resp.TTL = ttl
err = stream.Send(resp)
if err != nil {
return err

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -15,13 +15,22 @@
package v3rpc
import (
"crypto/sha256"
"io"
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/storage/backend"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/version"
"golang.org/x/net/context"
)
type KVGetter interface {
KV() mvcc.ConsistentWatchableKV
}
type BackendGetter interface {
Backend() backend.Backend
}
@@ -30,33 +39,86 @@ type Alarmer interface {
Alarm(ctx context.Context, ar *pb.AlarmRequest) (*pb.AlarmResponse, error)
}
type RaftStatusGetter interface {
Index() uint64
Term() uint64
Leader() types.ID
}
type maintenanceServer struct {
rg RaftStatusGetter
kg KVGetter
bg BackendGetter
a Alarmer
hdr header
}
func NewMaintenanceServer(s *etcdserver.EtcdServer) pb.MaintenanceServer {
return &maintenanceServer{bg: s, a: s, hdr: newHeader(s)}
return &maintenanceServer{rg: s, kg: s, bg: s, a: s, hdr: newHeader(s)}
}
func (ms *maintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) {
plog.Noticef("starting to defragment the storage backend...")
err := ms.bg.Backend().Defrag()
if err != nil {
plog.Errorf("failed to deframent the storage backend (%v)", err)
plog.Errorf("failed to defragment the storage backend (%v)", err)
return nil, err
}
plog.Noticef("finished defragmenting the storage backend")
return &pb.DefragmentResponse{}, nil
}
func (ms *maintenanceServer) Snapshot(sr *pb.SnapshotRequest, srv pb.Maintenance_SnapshotServer) error {
snap := ms.bg.Backend().Snapshot()
pr, pw := io.Pipe()
defer pr.Close()
go func() {
snap.WriteTo(pw)
if err := snap.Close(); err != nil {
plog.Errorf("error closing snapshot (%v)", err)
}
pw.Close()
}()
// send file data
h := sha256.New()
br := int64(0)
buf := make([]byte, 32*1024)
sz := snap.Size()
for br < sz {
n, err := io.ReadFull(pr, buf)
if err != nil && err != io.EOF && err != io.ErrUnexpectedEOF {
return togRPCError(err)
}
br += int64(n)
resp := &pb.SnapshotResponse{
RemainingBytes: uint64(sz - br),
Blob: buf[:n],
}
if err = srv.Send(resp); err != nil {
return togRPCError(err)
}
h.Write(buf[:n])
}
// send sha
sha := h.Sum(nil)
hresp := &pb.SnapshotResponse{RemainingBytes: 0, Blob: sha}
if err := srv.Send(hresp); err != nil {
return togRPCError(err)
}
return nil
}
func (ms *maintenanceServer) Hash(ctx context.Context, r *pb.HashRequest) (*pb.HashResponse, error) {
h, err := ms.bg.Backend().Hash()
h, rev, err := ms.kg.KV().Hash()
if err != nil {
return nil, togRPCError(err)
}
resp := &pb.HashResponse{Header: &pb.ResponseHeader{Revision: ms.hdr.rev()}, Hash: h}
resp := &pb.HashResponse{Header: &pb.ResponseHeader{Revision: rev}, Hash: h}
ms.hdr.fill(resp.Header)
return resp, nil
}
@@ -66,7 +128,14 @@ func (ms *maintenanceServer) Alarm(ctx context.Context, ar *pb.AlarmRequest) (*p
}
func (ms *maintenanceServer) Status(ctx context.Context, ar *pb.StatusRequest) (*pb.StatusResponse, error) {
resp := &pb.StatusResponse{Header: &pb.ResponseHeader{Revision: ms.hdr.rev()}, Version: version.Version}
resp := &pb.StatusResponse{
Header: &pb.ResponseHeader{Revision: ms.hdr.rev()},
Version: version.Version,
DbSize: ms.bg.Backend().Size(),
Leader: uint64(ms.rg.Leader()),
RaftIndex: ms.rg.Index(),
RaftTerm: ms.rg.Term(),
}
ms.hdr.fill(resp.Header)
return resp, nil
}

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -45,7 +45,7 @@ func NewClusterServer(s *etcdserver.EtcdServer) *ClusterServer {
func (cs *ClusterServer) MemberAdd(ctx context.Context, r *pb.MemberAddRequest) (*pb.MemberAddResponse, error) {
urls, err := types.NewURLs(r.PeerURLs)
if err != nil {
return nil, rpctypes.ErrMemberBadURLs
return nil, rpctypes.ErrGRPCMemberBadURLs
}
now := time.Now()
@@ -53,16 +53,16 @@ func (cs *ClusterServer) MemberAdd(ctx context.Context, r *pb.MemberAddRequest)
err = cs.server.AddMember(ctx, *m)
switch {
case err == membership.ErrIDExists:
return nil, rpctypes.ErrMemberExist
return nil, rpctypes.ErrGRPCMemberExist
case err == membership.ErrPeerURLexists:
return nil, rpctypes.ErrPeerURLExist
return nil, rpctypes.ErrGRPCPeerURLExist
case err != nil:
return nil, grpc.Errorf(codes.Internal, err.Error())
}
return &pb.MemberAddResponse{
Header: cs.header(),
Member: &pb.Member{ID: uint64(m.ID), IsLeader: m.ID == cs.server.Leader(), PeerURLs: m.PeerURLs},
Member: &pb.Member{ID: uint64(m.ID), PeerURLs: m.PeerURLs},
}, nil
}
@@ -72,7 +72,7 @@ func (cs *ClusterServer) MemberRemove(ctx context.Context, r *pb.MemberRemoveReq
case err == membership.ErrIDRemoved:
fallthrough
case err == membership.ErrIDNotFound:
return nil, rpctypes.ErrMemberNotFound
return nil, rpctypes.ErrGRPCMemberNotFound
case err != nil:
return nil, grpc.Errorf(codes.Internal, err.Error())
}
@@ -88,9 +88,9 @@ func (cs *ClusterServer) MemberUpdate(ctx context.Context, r *pb.MemberUpdateReq
err := cs.server.UpdateMember(ctx, m)
switch {
case err == membership.ErrPeerURLexists:
return nil, rpctypes.ErrPeerURLExist
return nil, rpctypes.ErrGRPCPeerURLExist
case err == membership.ErrIDNotFound:
return nil, rpctypes.ErrMemberNotFound
return nil, rpctypes.ErrGRPCMemberNotFound
case err != nil:
return nil, grpc.Errorf(codes.Internal, err.Error())
}
@@ -106,7 +106,6 @@ func (cs *ClusterServer) MemberList(ctx context.Context, r *pb.MemberListRequest
protoMembs[i] = &pb.Member{
Name: membs[i].Name,
ID: uint64(membs[i].ID),
IsLeader: membs[i].ID == cs.server.Leader(),
PeerURLs: membs[i].PeerURLs,
ClientURLs: membs[i].ClientURLs,
}

View File

@@ -0,0 +1,67 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import "github.com/prometheus/client_golang/prometheus"
var (
receivedCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "grpc",
Name: "requests_total",
Help: "Counter of received requests.",
}, []string{"grpc_service", "grpc_method"})
failedCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "grpc",
Name: "requests_failed_total",
Help: "Counter of failed requests.",
}, []string{"grpc_service", "grpc_method", "grpc_code"})
handlingDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: "etcd",
Subsystem: "grpc",
Name: "unary_requests_duration_seconds",
Help: "Bucketed histogram of processing time (s) of handled unary (non-stream) requests.",
Buckets: prometheus.ExponentialBuckets(0.0005, 2, 13),
}, []string{"grpc_service", "grpc_method"})
sentBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "client_grpc_sent_bytes_total",
Help: "The total number of bytes sent to grpc clients.",
})
receivedBytes = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "network",
Name: "client_grpc_received_bytes_total",
Help: "The total number of bytes received from grpc clients.",
})
)
func init() {
prometheus.MustRegister(receivedCounter)
prometheus.MustRegister(failedCounter)
prometheus.MustRegister(handlingDuration)
prometheus.MustRegister(sentBytes)
prometheus.MustRegister(receivedBytes)
}

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -45,7 +45,7 @@ func (qa *quotaAlarmer) check(ctx context.Context, r interface{}) error {
Alarm: pb.AlarmType_NOSPACE,
}
qa.a.Alarm(ctx, req)
return rpctypes.ErrNoSpace
return rpctypes.ErrGRPCNoSpace
}
func NewQuotaKVServer(s *etcdserver.EtcdServer) pb.KVServer {

View File

@@ -0,0 +1,16 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package rpctypes has types and values shared by the etcd server and client for v3 RPC interaction.
package rpctypes

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -20,24 +20,131 @@ import (
)
var (
ErrEmptyKey = grpc.Errorf(codes.InvalidArgument, "etcdserver: key is not provided")
ErrTooManyOps = grpc.Errorf(codes.InvalidArgument, "etcdserver: too many operations in txn request")
ErrDuplicateKey = grpc.Errorf(codes.InvalidArgument, "etcdserver: duplicate key given in txn request")
ErrCompacted = grpc.Errorf(codes.OutOfRange, "etcdserver: storage: required revision has been compacted")
ErrFutureRev = grpc.Errorf(codes.OutOfRange, "etcdserver: storage: required revision is a future revision")
ErrNoSpace = grpc.Errorf(codes.ResourceExhausted, "etcdserver: storage: database space exceeded")
// server-side error
ErrGRPCEmptyKey = grpc.Errorf(codes.InvalidArgument, "etcdserver: key is not provided")
ErrGRPCTooManyOps = grpc.Errorf(codes.InvalidArgument, "etcdserver: too many operations in txn request")
ErrGRPCDuplicateKey = grpc.Errorf(codes.InvalidArgument, "etcdserver: duplicate key given in txn request")
ErrGRPCCompacted = grpc.Errorf(codes.OutOfRange, "etcdserver: mvcc: required revision has been compacted")
ErrGRPCFutureRev = grpc.Errorf(codes.OutOfRange, "etcdserver: mvcc: required revision is a future revision")
ErrGRPCNoSpace = grpc.Errorf(codes.ResourceExhausted, "etcdserver: mvcc: database space exceeded")
ErrLeaseNotFound = grpc.Errorf(codes.NotFound, "etcdserver: requested lease not found")
ErrLeaseExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: lease already exists")
ErrGRPCLeaseNotFound = grpc.Errorf(codes.NotFound, "etcdserver: requested lease not found")
ErrGRPCLeaseExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: lease already exists")
ErrMemberExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: member ID already exist")
ErrPeerURLExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: Peer URLs already exists")
ErrMemberBadURLs = grpc.Errorf(codes.InvalidArgument, "etcdserver: given member URLs are invalid")
ErrMemberNotFound = grpc.Errorf(codes.NotFound, "etcdserver: member not found")
ErrGRPCMemberExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: member ID already exist")
ErrGRPCPeerURLExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: Peer URLs already exists")
ErrGRPCMemberBadURLs = grpc.Errorf(codes.InvalidArgument, "etcdserver: given member URLs are invalid")
ErrGRPCMemberNotFound = grpc.Errorf(codes.NotFound, "etcdserver: member not found")
ErrRequestTooLarge = grpc.Errorf(codes.InvalidArgument, "etcdserver: request is too large")
ErrGRPCRequestTooLarge = grpc.Errorf(codes.InvalidArgument, "etcdserver: request is too large")
ErrUserAlreadyExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: user name already exists")
ErrUserNotFound = grpc.Errorf(codes.FailedPrecondition, "etcdserver: user name not found")
ErrRoleAlreadyExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: role name already exists")
ErrGRPCRootUserNotExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: root user does not exist")
ErrGRPCRootRoleNotExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: root user does not have root role")
ErrGRPCUserAlreadyExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: user name already exists")
ErrGRPCUserNotFound = grpc.Errorf(codes.FailedPrecondition, "etcdserver: user name not found")
ErrGRPCRoleAlreadyExist = grpc.Errorf(codes.FailedPrecondition, "etcdserver: role name already exists")
ErrGRPCRoleNotFound = grpc.Errorf(codes.FailedPrecondition, "etcdserver: role name not found")
ErrGRPCAuthFailed = grpc.Errorf(codes.InvalidArgument, "etcdserver: authentication failed, invalid user ID or password")
ErrGRPCPermissionDenied = grpc.Errorf(codes.FailedPrecondition, "etcdserver: permission denied")
ErrGRPCRoleNotGranted = grpc.Errorf(codes.FailedPrecondition, "etcdserver: role is not granted to the user")
ErrGRPCPermissionNotGranted = grpc.Errorf(codes.FailedPrecondition, "etcdserver: permission is not granted to the role")
ErrGRPCNoLeader = grpc.Errorf(codes.Unavailable, "etcdserver: no leader")
ErrGRPCNotCapable = grpc.Errorf(codes.Unavailable, "etcdserver: not capable")
ErrGRPCStopped = grpc.Errorf(codes.Unavailable, "etcdserver: server stopped")
errStringToError = map[string]error{
grpc.ErrorDesc(ErrGRPCEmptyKey): ErrGRPCEmptyKey,
grpc.ErrorDesc(ErrGRPCTooManyOps): ErrGRPCTooManyOps,
grpc.ErrorDesc(ErrGRPCDuplicateKey): ErrGRPCDuplicateKey,
grpc.ErrorDesc(ErrGRPCCompacted): ErrGRPCCompacted,
grpc.ErrorDesc(ErrGRPCFutureRev): ErrGRPCFutureRev,
grpc.ErrorDesc(ErrGRPCNoSpace): ErrGRPCNoSpace,
grpc.ErrorDesc(ErrGRPCLeaseNotFound): ErrGRPCLeaseNotFound,
grpc.ErrorDesc(ErrGRPCLeaseExist): ErrGRPCLeaseExist,
grpc.ErrorDesc(ErrGRPCMemberExist): ErrGRPCMemberExist,
grpc.ErrorDesc(ErrGRPCPeerURLExist): ErrGRPCPeerURLExist,
grpc.ErrorDesc(ErrGRPCMemberBadURLs): ErrGRPCMemberBadURLs,
grpc.ErrorDesc(ErrGRPCMemberNotFound): ErrGRPCMemberNotFound,
grpc.ErrorDesc(ErrGRPCRequestTooLarge): ErrGRPCRequestTooLarge,
grpc.ErrorDesc(ErrGRPCRootUserNotExist): ErrGRPCRootUserNotExist,
grpc.ErrorDesc(ErrGRPCRootRoleNotExist): ErrGRPCRootRoleNotExist,
grpc.ErrorDesc(ErrGRPCUserAlreadyExist): ErrGRPCUserAlreadyExist,
grpc.ErrorDesc(ErrGRPCUserNotFound): ErrGRPCUserNotFound,
grpc.ErrorDesc(ErrGRPCRoleAlreadyExist): ErrGRPCRoleAlreadyExist,
grpc.ErrorDesc(ErrGRPCRoleNotFound): ErrGRPCRoleNotFound,
grpc.ErrorDesc(ErrGRPCAuthFailed): ErrGRPCAuthFailed,
grpc.ErrorDesc(ErrGRPCPermissionDenied): ErrGRPCPermissionDenied,
grpc.ErrorDesc(ErrGRPCRoleNotGranted): ErrGRPCRoleNotGranted,
grpc.ErrorDesc(ErrGRPCPermissionNotGranted): ErrGRPCPermissionNotGranted,
grpc.ErrorDesc(ErrGRPCNoLeader): ErrGRPCNoLeader,
grpc.ErrorDesc(ErrGRPCNotCapable): ErrGRPCNotCapable,
grpc.ErrorDesc(ErrGRPCStopped): ErrGRPCStopped,
}
// client-side error
ErrEmptyKey = Error(ErrGRPCEmptyKey)
ErrTooManyOps = Error(ErrGRPCTooManyOps)
ErrDuplicateKey = Error(ErrGRPCDuplicateKey)
ErrCompacted = Error(ErrGRPCCompacted)
ErrFutureRev = Error(ErrGRPCFutureRev)
ErrNoSpace = Error(ErrGRPCNoSpace)
ErrLeaseNotFound = Error(ErrGRPCLeaseNotFound)
ErrLeaseExist = Error(ErrGRPCLeaseExist)
ErrMemberExist = Error(ErrGRPCMemberExist)
ErrPeerURLExist = Error(ErrGRPCPeerURLExist)
ErrMemberBadURLs = Error(ErrGRPCMemberBadURLs)
ErrMemberNotFound = Error(ErrGRPCMemberNotFound)
ErrRequestTooLarge = Error(ErrGRPCRequestTooLarge)
ErrRootUserNotExist = Error(ErrGRPCRootUserNotExist)
ErrRootRoleNotExist = Error(ErrGRPCRootRoleNotExist)
ErrUserAlreadyExist = Error(ErrGRPCUserAlreadyExist)
ErrUserNotFound = Error(ErrGRPCUserNotFound)
ErrRoleAlreadyExist = Error(ErrGRPCRoleAlreadyExist)
ErrRoleNotFound = Error(ErrGRPCRoleNotFound)
ErrAuthFailed = Error(ErrGRPCAuthFailed)
ErrPermissionDenied = Error(ErrGRPCPermissionDenied)
ErrRoleNotGranted = Error(ErrGRPCRoleNotGranted)
ErrPermissionNotGranted = Error(ErrGRPCPermissionNotGranted)
ErrNoLeader = Error(ErrGRPCNoLeader)
ErrNotCapable = Error(ErrGRPCNotCapable)
ErrStopped = Error(ErrGRPCStopped)
)
// EtcdError defines gRPC server errors.
// (https://github.com/grpc/grpc-go/blob/master/rpc_util.go#L319-L323)
type EtcdError struct {
code codes.Code
desc string
}
// Code returns grpc/codes.Code.
// TODO: define clientv3/codes.Code.
func (e EtcdError) Code() codes.Code {
return e.code
}
func (e EtcdError) Error() string {
return e.desc
}
func Error(err error) error {
if err == nil {
return nil
}
verr, ok := errStringToError[grpc.ErrorDesc(err)]
if !ok { // not gRPC error
return err
}
return EtcdError{code: grpc.Code(verr), desc: grpc.ErrorDesc(verr)}
}

View File

@@ -0,0 +1,20 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package rpctypes
var (
MetadataRequireLeaderKey = "hasleader"
MetadataHasLeader = "true"
)

View File

@@ -1,4 +1,4 @@
// Copyright 2016 Nippon Telegraph and Telephone Corporation.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -19,30 +19,45 @@ import (
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/storage"
"github.com/coreos/etcd/mvcc"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
)
func togRPCError(err error) error {
switch err {
case storage.ErrCompacted:
return rpctypes.ErrCompacted
case storage.ErrFutureRev:
return rpctypes.ErrFutureRev
case mvcc.ErrCompacted:
return rpctypes.ErrGRPCCompacted
case mvcc.ErrFutureRev:
return rpctypes.ErrGRPCFutureRev
case lease.ErrLeaseNotFound:
return rpctypes.ErrLeaseNotFound
return rpctypes.ErrGRPCLeaseNotFound
// TODO: handle error from raft and timeout
case etcdserver.ErrRequestTooLarge:
return rpctypes.ErrRequestTooLarge
return rpctypes.ErrGRPCRequestTooLarge
case etcdserver.ErrNoSpace:
return rpctypes.ErrNoSpace
return rpctypes.ErrGRPCNoSpace
case auth.ErrRootUserNotExist:
return rpctypes.ErrGRPCRootUserNotExist
case auth.ErrRootRoleNotExist:
return rpctypes.ErrGRPCRootRoleNotExist
case auth.ErrUserAlreadyExist:
return rpctypes.ErrUserAlreadyExist
return rpctypes.ErrGRPCUserAlreadyExist
case auth.ErrUserNotFound:
return rpctypes.ErrUserNotFound
return rpctypes.ErrGRPCUserNotFound
case auth.ErrRoleAlreadyExist:
return rpctypes.ErrRoleAlreadyExist
return rpctypes.ErrGRPCRoleAlreadyExist
case auth.ErrRoleNotFound:
return rpctypes.ErrGRPCRoleNotFound
case auth.ErrAuthFailed:
return rpctypes.ErrGRPCAuthFailed
case auth.ErrPermissionDenied:
return rpctypes.ErrGRPCPermissionDenied
case auth.ErrRoleNotGranted:
return rpctypes.ErrGRPCRoleNotGranted
case auth.ErrPermissionNotGranted:
return rpctypes.ErrGRPCPermissionNotGranted
default:
return grpc.Errorf(codes.Internal, err.Error())
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -19,17 +19,20 @@ import (
"sync"
"time"
"golang.org/x/net/context"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/storage"
"github.com/coreos/etcd/storage/storagepb"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/mvccpb"
)
type watchServer struct {
clusterID int64
memberID int64
raftTimer etcdserver.RaftTimer
watchable storage.Watchable
watchable mvcc.Watchable
}
func NewWatchServer(s *etcdserver.EtcdServer) pb.WatchServer {
@@ -71,7 +74,7 @@ const (
)
// serverWatchStream is an etcd server side stream. It receives requests
// from client side gRPC stream. It receives watch events from storage.WatchStream,
// from client side gRPC stream. It receives watch events from mvcc.WatchStream,
// and creates responses that forwarded to gRPC stream.
// It also forwards control message like watch created and canceled.
type serverWatchStream struct {
@@ -80,20 +83,23 @@ type serverWatchStream struct {
raftTimer etcdserver.RaftTimer
gRPCStream pb.Watch_WatchServer
watchStream storage.WatchStream
watchStream mvcc.WatchStream
ctrlStream chan *pb.WatchResponse
// mu protects progress, prevKV
mu sync.Mutex
// progress tracks the watchID that stream might need to send
// progress to.
progress map[storage.WatchID]bool
// mu protects progress
mu sync.Mutex
progress map[mvcc.WatchID]bool
// closec indicates the stream is closed.
closec chan struct{}
// wg waits for the send loop to complete
wg sync.WaitGroup
}
func (ws *watchServer) Watch(stream pb.Watch_WatchServer) error {
func (ws *watchServer) Watch(stream pb.Watch_WatchServer) (err error) {
sws := serverWatchStream{
clusterID: ws.clusterID,
memberID: ws.memberID,
@@ -102,16 +108,38 @@ func (ws *watchServer) Watch(stream pb.Watch_WatchServer) error {
watchStream: ws.watchable.NewWatchStream(),
// chan for sending control response like watcher created and canceled.
ctrlStream: make(chan *pb.WatchResponse, ctrlStreamBufLen),
progress: make(map[storage.WatchID]bool),
progress: make(map[mvcc.WatchID]bool),
closec: make(chan struct{}),
}
defer sws.close()
go sws.sendLoop()
return sws.recvLoop()
sws.wg.Add(1)
go func() {
sws.sendLoop()
sws.wg.Done()
}()
errc := make(chan error, 1)
// Ideally recvLoop would also use sws.wg to signal its completion
// but when stream.Context().Done() is closed, the stream's recv
// may continue to block since it uses a different context, leading to
// deadlock when calling sws.close().
go func() { errc <- sws.recvLoop() }()
select {
case err = <-errc:
case <-stream.Context().Done():
err = stream.Context().Err()
// the only server-side cancellation is noleader for now.
if err == context.Canceled {
err = rpctypes.ErrGRPCNoLeader
}
}
sws.close()
return err
}
func (sws *serverWatchStream) recvLoop() error {
defer close(sws.ctrlStream)
for {
req, err := sws.gRPCStream.Recv()
if err == io.EOF {
@@ -143,18 +171,25 @@ func (sws *serverWatchStream) recvLoop() error {
}
id := sws.watchStream.Watch(creq.Key, creq.RangeEnd, rev)
if id != -1 && creq.ProgressNotify {
sws.mu.Lock()
sws.progress[id] = true
sws.mu.Unlock()
}
sws.ctrlStream <- &pb.WatchResponse{
wr := &pb.WatchResponse{
Header: sws.newResponseHeader(wsrev),
WatchId: int64(id),
Created: true,
Canceled: id == -1,
}
select {
case sws.ctrlStream <- wr:
case <-sws.closec:
return nil
}
case *pb.WatchRequest_CancelRequest:
if uv.CancelRequest != nil {
id := uv.CancelRequest.WatchId
err := sws.watchStream.Cancel(storage.WatchID(id))
err := sws.watchStream.Cancel(mvcc.WatchID(id))
if err == nil {
sws.ctrlStream <- &pb.WatchResponse{
Header: sws.newResponseHeader(sws.watchStream.Rev()),
@@ -162,26 +197,40 @@ func (sws *serverWatchStream) recvLoop() error {
Canceled: true,
}
sws.mu.Lock()
delete(sws.progress, storage.WatchID(id))
delete(sws.progress, mvcc.WatchID(id))
sws.mu.Unlock()
}
}
// TODO: do we need to return error back to client?
default:
panic("not implemented")
// we probably should not shutdown the entire stream when
// receive an valid command.
// so just do nothing instead.
continue
}
}
}
func (sws *serverWatchStream) sendLoop() {
// watch ids that are currently active
ids := make(map[storage.WatchID]struct{})
ids := make(map[mvcc.WatchID]struct{})
// watch responses pending on a watch id creation message
pending := make(map[storage.WatchID][]*pb.WatchResponse)
pending := make(map[mvcc.WatchID][]*pb.WatchResponse)
interval := GetProgressReportInterval()
progressTicker := time.NewTicker(interval)
defer progressTicker.Stop()
defer func() {
progressTicker.Stop()
// drain the chan to clean up pending events
for ws := range sws.watchStream.Chan() {
mvcc.ReportEventReceived(len(ws.Events))
}
for _, wrs := range pending {
for _, ws := range wrs {
mvcc.ReportEventReceived(len(ws.Events))
}
}
}()
for {
select {
@@ -190,11 +239,11 @@ func (sws *serverWatchStream) sendLoop() {
return
}
// TODO: evs is []storagepb.Event type
// either return []*storagepb.Event from storage package
// or define protocol buffer with []storagepb.Event.
// TODO: evs is []mvccpb.Event type
// either return []*mvccpb.Event from the mvcc package
// or define protocol buffer with []mvccpb.Event.
evs := wresp.Events
events := make([]*storagepb.Event, len(evs))
events := make([]*mvccpb.Event, len(evs))
for i := range evs {
events[i] = &evs[i]
}
@@ -213,13 +262,14 @@ func (sws *serverWatchStream) sendLoop() {
continue
}
storage.ReportEventReceived()
mvcc.ReportEventReceived(len(evs))
if err := sws.gRPCStream.Send(wr); err != nil {
return
}
sws.mu.Lock()
if _, ok := sws.progress[wresp.WatchID]; ok {
if len(evs) > 0 && sws.progress[wresp.WatchID] {
// elide next progress update if sent a key update
sws.progress[wresp.WatchID] = false
}
sws.mu.Unlock()
@@ -234,7 +284,7 @@ func (sws *serverWatchStream) sendLoop() {
}
// track id creation
wid := storage.WatchID(c.WatchId)
wid := mvcc.WatchID(c.WatchId)
if c.Canceled {
delete(ids, wid)
continue
@@ -243,7 +293,7 @@ func (sws *serverWatchStream) sendLoop() {
// flush buffered events
ids[wid] = struct{}{}
for _, v := range pending[wid] {
storage.ReportEventReceived()
mvcc.ReportEventReceived(len(v.Events))
if err := sws.gRPCStream.Send(v); err != nil {
return
}
@@ -251,22 +301,16 @@ func (sws *serverWatchStream) sendLoop() {
delete(pending, wid)
}
case <-progressTicker.C:
sws.mu.Lock()
for id, ok := range sws.progress {
if ok {
sws.watchStream.RequestProgress(id)
}
sws.progress[id] = true
}
sws.mu.Unlock()
case <-sws.closec:
// drain the chan to clean up pending events
for range sws.watchStream.Chan() {
storage.ReportEventReceived()
}
for _, wrs := range pending {
for range wrs {
storage.ReportEventReceived()
}
}
return
}
}
}
@@ -274,7 +318,7 @@ func (sws *serverWatchStream) sendLoop() {
func (sws *serverWatchStream) close() {
sws.watchStream.Close()
close(sws.closec)
close(sws.ctrlStream)
sws.wg.Wait()
}
func (sws *serverWatchStream) newResponseHeader(rev int64) *pb.ResponseHeader {

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -18,13 +18,15 @@ import (
"bytes"
"fmt"
"sort"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/mvccpb"
"github.com/coreos/etcd/pkg/types"
dstorage "github.com/coreos/etcd/storage"
"github.com/coreos/etcd/storage/storagepb"
"github.com/gogo/protobuf/proto"
"golang.org/x/net/context"
)
const (
@@ -32,6 +34,8 @@ const (
// To apply with independent Range, Put, Delete, you can pass noTxn
// to apply functions instead of a valid txn ID.
noTxn = -1
warnApplyDuration = 10 * time.Millisecond
)
type applyResult struct {
@@ -45,54 +49,103 @@ type applyResult struct {
// applierV3 is the interface for processing V3 raft messages
type applierV3 interface {
Apply(r *pb.InternalRaftRequest) *applyResult
Put(txnID int64, p *pb.PutRequest) (*pb.PutResponse, error)
Range(txnID int64, r *pb.RangeRequest) (*pb.RangeResponse, error)
DeleteRange(txnID int64, dr *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error)
Compaction(compaction *pb.CompactionRequest) (*pb.CompactionResponse, <-chan struct{}, error)
LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
Alarm(*pb.AlarmRequest) (*pb.AlarmResponse, error)
Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error)
AuthEnable() (*pb.AuthEnableResponse, error)
AuthDisable() (*pb.AuthDisableResponse, error)
UserAdd(ua *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
UserDelete(ua *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
UserChangePassword(ua *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
UserGrantRole(ua *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
UserGet(ua *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
UserRevokeRole(ua *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
RoleAdd(ua *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
RoleGrantPermission(ua *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
RoleGet(ua *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
RoleRevokePermission(ua *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
RoleDelete(ua *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
UserList(ua *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
RoleList(ua *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
}
type applierV3backend struct {
s *EtcdServer
}
func (s *EtcdServer) applyV3Request(r *pb.InternalRaftRequest) *applyResult {
func (s *EtcdServer) newApplierV3() applierV3 {
return newAuthApplierV3(
s.AuthStore(),
newQuotaApplierV3(s, &applierV3backend{s}),
)
}
func (a *applierV3backend) Apply(r *pb.InternalRaftRequest) *applyResult {
ar := &applyResult{}
// call into a.s.applyV3.F instead of a.F so upper appliers can check individual calls
switch {
case r.Range != nil:
ar.resp, ar.err = s.applyV3.Range(noTxn, r.Range)
ar.resp, ar.err = a.s.applyV3.Range(noTxn, r.Range)
case r.Put != nil:
ar.resp, ar.err = s.applyV3.Put(noTxn, r.Put)
ar.resp, ar.err = a.s.applyV3.Put(noTxn, r.Put)
case r.DeleteRange != nil:
ar.resp, ar.err = s.applyV3.DeleteRange(noTxn, r.DeleteRange)
ar.resp, ar.err = a.s.applyV3.DeleteRange(noTxn, r.DeleteRange)
case r.Txn != nil:
ar.resp, ar.err = s.applyV3.Txn(r.Txn)
ar.resp, ar.err = a.s.applyV3.Txn(r.Txn)
case r.Compaction != nil:
ar.resp, ar.physc, ar.err = s.applyV3.Compaction(r.Compaction)
ar.resp, ar.physc, ar.err = a.s.applyV3.Compaction(r.Compaction)
case r.LeaseGrant != nil:
ar.resp, ar.err = s.applyV3.LeaseGrant(r.LeaseGrant)
ar.resp, ar.err = a.s.applyV3.LeaseGrant(r.LeaseGrant)
case r.LeaseRevoke != nil:
ar.resp, ar.err = s.applyV3.LeaseRevoke(r.LeaseRevoke)
ar.resp, ar.err = a.s.applyV3.LeaseRevoke(r.LeaseRevoke)
case r.Alarm != nil:
ar.resp, ar.err = s.applyV3.Alarm(r.Alarm)
ar.resp, ar.err = a.s.applyV3.Alarm(r.Alarm)
case r.Authenticate != nil:
ar.resp, ar.err = a.s.applyV3.Authenticate(r.Authenticate)
case r.AuthEnable != nil:
ar.resp, ar.err = s.applyV3.AuthEnable()
ar.resp, ar.err = a.s.applyV3.AuthEnable()
case r.AuthDisable != nil:
ar.resp, ar.err = a.s.applyV3.AuthDisable()
case r.AuthUserAdd != nil:
ar.resp, ar.err = s.applyV3.UserAdd(r.AuthUserAdd)
ar.resp, ar.err = a.s.applyV3.UserAdd(r.AuthUserAdd)
case r.AuthUserDelete != nil:
ar.resp, ar.err = s.applyV3.UserDelete(r.AuthUserDelete)
ar.resp, ar.err = a.s.applyV3.UserDelete(r.AuthUserDelete)
case r.AuthUserChangePassword != nil:
ar.resp, ar.err = s.applyV3.UserChangePassword(r.AuthUserChangePassword)
ar.resp, ar.err = a.s.applyV3.UserChangePassword(r.AuthUserChangePassword)
case r.AuthUserGrantRole != nil:
ar.resp, ar.err = a.s.applyV3.UserGrantRole(r.AuthUserGrantRole)
case r.AuthUserGet != nil:
ar.resp, ar.err = a.s.applyV3.UserGet(r.AuthUserGet)
case r.AuthUserRevokeRole != nil:
ar.resp, ar.err = a.s.applyV3.UserRevokeRole(r.AuthUserRevokeRole)
case r.AuthRoleAdd != nil:
ar.resp, ar.err = s.applyV3.RoleAdd(r.AuthRoleAdd)
ar.resp, ar.err = a.s.applyV3.RoleAdd(r.AuthRoleAdd)
case r.AuthRoleGrantPermission != nil:
ar.resp, ar.err = a.s.applyV3.RoleGrantPermission(r.AuthRoleGrantPermission)
case r.AuthRoleGet != nil:
ar.resp, ar.err = a.s.applyV3.RoleGet(r.AuthRoleGet)
case r.AuthRoleRevokePermission != nil:
ar.resp, ar.err = a.s.applyV3.RoleRevokePermission(r.AuthRoleRevokePermission)
case r.AuthRoleDelete != nil:
ar.resp, ar.err = a.s.applyV3.RoleDelete(r.AuthRoleDelete)
case r.AuthUserList != nil:
ar.resp, ar.err = a.s.applyV3.UserList(r.AuthUserList)
case r.AuthRoleList != nil:
ar.resp, ar.err = a.s.applyV3.RoleList(r.AuthRoleList)
default:
panic("not implemented")
}
@@ -157,8 +210,7 @@ func (a *applierV3backend) Range(txnID int64, r *pb.RangeRequest) (*pb.RangeResp
resp.Header = &pb.ResponseHeader{}
var (
kvs []storagepb.KeyValue
rev int64
rr *mvcc.RangeResult
err error
)
@@ -176,13 +228,19 @@ func (a *applierV3backend) Range(txnID int64, r *pb.RangeRequest) (*pb.RangeResp
limit = limit + 1
}
ro := mvcc.RangeOptions{
Limit: limit,
Rev: r.Revision,
Count: r.CountOnly,
}
if txnID != noTxn {
kvs, rev, err = a.s.KV().TxnRange(txnID, r.Key, r.RangeEnd, limit, r.Revision)
rr, err = a.s.KV().TxnRange(txnID, r.Key, r.RangeEnd, ro)
if err != nil {
return nil, err
}
} else {
kvs, rev, err = a.s.KV().Range(r.Key, r.RangeEnd, limit, r.Revision)
rr, err = a.s.KV().Range(r.Key, r.RangeEnd, ro)
if err != nil {
return nil, err
}
@@ -192,15 +250,15 @@ func (a *applierV3backend) Range(txnID int64, r *pb.RangeRequest) (*pb.RangeResp
var sorter sort.Interface
switch {
case r.SortTarget == pb.RangeRequest_KEY:
sorter = &kvSortByKey{&kvSort{kvs}}
sorter = &kvSortByKey{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_VERSION:
sorter = &kvSortByVersion{&kvSort{kvs}}
sorter = &kvSortByVersion{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_CREATE:
sorter = &kvSortByCreate{&kvSort{kvs}}
sorter = &kvSortByCreate{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_MOD:
sorter = &kvSortByMod{&kvSort{kvs}}
sorter = &kvSortByMod{&kvSort{rr.KVs}}
case r.SortTarget == pb.RangeRequest_VALUE:
sorter = &kvSortByValue{&kvSort{kvs}}
sorter = &kvSortByValue{&kvSort{rr.KVs}}
}
switch {
case r.SortOrder == pb.RangeRequest_ASCEND:
@@ -210,29 +268,31 @@ func (a *applierV3backend) Range(txnID int64, r *pb.RangeRequest) (*pb.RangeResp
}
}
if r.Limit > 0 && len(kvs) > int(r.Limit) {
kvs = kvs[:r.Limit]
if r.Limit > 0 && len(rr.KVs) > int(r.Limit) {
rr.KVs = rr.KVs[:r.Limit]
resp.More = true
}
resp.Header.Revision = rev
for i := range kvs {
resp.Kvs = append(resp.Kvs, &kvs[i])
resp.Header.Revision = rr.Rev
resp.Count = int64(rr.Count)
for i := range rr.KVs {
if r.KeysOnly {
rr.KVs[i].Value = nil
}
resp.Kvs = append(resp.Kvs, &rr.KVs[i])
}
return resp, nil
}
func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
var revision int64
ok := true
for _, c := range rt.Compare {
if revision, ok = a.applyCompare(c); !ok {
if _, ok = a.applyCompare(c); !ok {
break
}
}
var reqs []*pb.RequestUnion
var reqs []*pb.RequestOp
if ok {
reqs = rt.Success
} else {
@@ -246,6 +306,8 @@ func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
return nil, err
}
revision := a.s.KV().Rev()
// When executing the operations of txn, we need to hold the txn lock.
// So the reader will not see any intermediate results.
txnID := a.s.KV().TxnBegin()
@@ -256,12 +318,16 @@ func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
}
}()
resps := make([]*pb.ResponseUnion, len(reqs))
resps := make([]*pb.ResponseOp, len(reqs))
changedKV := false
for i := range reqs {
if reqs[i].GetRequestRange() == nil {
changedKV = true
}
resps[i] = a.applyUnion(txnID, reqs[i])
}
if len(resps) != 0 {
if changedKV {
revision += 1
}
@@ -277,16 +343,18 @@ func (a *applierV3backend) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
// It returns the revision at which the comparison happens. If the comparison
// succeeds, the it returns true. Otherwise it returns false.
func (a *applierV3backend) applyCompare(c *pb.Compare) (int64, bool) {
ckvs, rev, err := a.s.KV().Range(c.Key, nil, 1, 0)
rr, err := a.s.KV().Range(c.Key, nil, mvcc.RangeOptions{})
rev := rr.Rev
if err != nil {
if err == dstorage.ErrTxnIDMismatch {
if err == mvcc.ErrTxnIDMismatch {
panic("unexpected txn ID mismatch error")
}
return rev, false
}
var ckv storagepb.KeyValue
if len(ckvs) != 0 {
ckv = ckvs[0]
var ckv mvccpb.KeyValue
if len(rr.KVs) != 0 {
ckv = rr.KVs[0]
} else {
// Use the zero value of ckv normally. However...
if c.Target == pb.Compare_VALUE {
@@ -341,31 +409,31 @@ func (a *applierV3backend) applyCompare(c *pb.Compare) (int64, bool) {
return rev, true
}
func (a *applierV3backend) applyUnion(txnID int64, union *pb.RequestUnion) *pb.ResponseUnion {
func (a *applierV3backend) applyUnion(txnID int64, union *pb.RequestOp) *pb.ResponseOp {
switch tv := union.Request.(type) {
case *pb.RequestUnion_RequestRange:
case *pb.RequestOp_RequestRange:
if tv.RequestRange != nil {
resp, err := a.Range(txnID, tv.RequestRange)
if err != nil {
panic("unexpected error during txn")
}
return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseRange{ResponseRange: resp}}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponseRange{ResponseRange: resp}}
}
case *pb.RequestUnion_RequestPut:
case *pb.RequestOp_RequestPut:
if tv.RequestPut != nil {
resp, err := a.Put(txnID, tv.RequestPut)
if err != nil {
panic("unexpected error during txn")
}
return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponsePut{ResponsePut: resp}}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponsePut{ResponsePut: resp}}
}
case *pb.RequestUnion_RequestDeleteRange:
case *pb.RequestOp_RequestDeleteRange:
if tv.RequestDeleteRange != nil {
resp, err := a.DeleteRange(txnID, tv.RequestDeleteRange)
if err != nil {
panic("unexpected error during txn")
}
return &pb.ResponseUnion{Response: &pb.ResponseUnion_ResponseDeleteRange{ResponseDeleteRange: resp}}
return &pb.ResponseOp{Response: &pb.ResponseOp_ResponseDeleteRange{ResponseDeleteRange: resp}}
}
default:
// empty union
@@ -383,7 +451,8 @@ func (a *applierV3backend) Compaction(compaction *pb.CompactionRequest) (*pb.Com
return nil, ch, err
}
// get the current revision. which key to get is not important.
_, resp.Header.Revision, _ = a.s.KV().Range([]byte("compaction"), nil, 1, 0)
rr, _ := a.s.KV().Range([]byte("compaction"), nil, mvcc.RangeOptions{})
resp.Header.Revision = rr.Rev
return resp, ch, err
}
@@ -393,13 +462,15 @@ func (a *applierV3backend) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantR
if err == nil {
resp.ID = int64(l.ID)
resp.TTL = l.TTL
resp.Header = &pb.ResponseHeader{Revision: a.s.KV().Rev()}
}
return resp, err
}
func (a *applierV3backend) LeaseRevoke(lc *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
err := a.s.lessor.Revoke(lease.LeaseID(lc.ID))
return &pb.LeaseRevokeResponse{}, err
return &pb.LeaseRevokeResponse{Header: &pb.ResponseHeader{Revision: a.s.KV().Rev()}}, err
}
func (a *applierV3backend) Alarm(ar *pb.AlarmRequest) (*pb.AlarmResponse, error) {
@@ -441,7 +512,7 @@ func (a *applierV3backend) Alarm(ar *pb.AlarmRequest) (*pb.AlarmResponse, error)
switch m.Alarm {
case pb.AlarmType_NOSPACE:
plog.Infof("alarm disarmed %+v", ar)
a.s.applyV3 = newQuotaApplierV3(a.s, &applierV3backend{a.s})
a.s.applyV3 = a.s.newApplierV3()
default:
plog.Errorf("unimplemented alarm deactivation (%+v)", m)
}
@@ -476,10 +547,23 @@ func (a *applierV3Capped) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantRe
}
func (a *applierV3backend) AuthEnable() (*pb.AuthEnableResponse, error) {
a.s.AuthStore().AuthEnable()
err := a.s.AuthStore().AuthEnable()
if err != nil {
return nil, err
}
return &pb.AuthEnableResponse{}, nil
}
func (a *applierV3backend) AuthDisable() (*pb.AuthDisableResponse, error) {
a.s.AuthStore().AuthDisable()
return &pb.AuthDisableResponse{}, nil
}
func (a *applierV3backend) Authenticate(r *pb.InternalAuthenticateRequest) (*pb.AuthenticateResponse, error) {
ctx := context.WithValue(context.WithValue(context.TODO(), "index", a.s.consistIndex.ConsistentIndex()), "simpleToken", r.SimpleToken)
return a.s.AuthStore().Authenticate(ctx, r.Name, r.Password)
}
func (a *applierV3backend) UserAdd(r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
return a.s.AuthStore().UserAdd(r)
}
@@ -492,10 +576,46 @@ func (a *applierV3backend) UserChangePassword(r *pb.AuthUserChangePasswordReques
return a.s.AuthStore().UserChangePassword(r)
}
func (a *applierV3backend) UserGrantRole(r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
return a.s.AuthStore().UserGrantRole(r)
}
func (a *applierV3backend) UserGet(r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
return a.s.AuthStore().UserGet(r)
}
func (a *applierV3backend) UserRevokeRole(r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
return a.s.AuthStore().UserRevokeRole(r)
}
func (a *applierV3backend) RoleAdd(r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
return a.s.AuthStore().RoleAdd(r)
}
func (a *applierV3backend) RoleGrantPermission(r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
return a.s.AuthStore().RoleGrantPermission(r)
}
func (a *applierV3backend) RoleGet(r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
return a.s.AuthStore().RoleGet(r)
}
func (a *applierV3backend) RoleRevokePermission(r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
return a.s.AuthStore().RoleRevokePermission(r)
}
func (a *applierV3backend) RoleDelete(r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
return a.s.AuthStore().RoleDelete(r)
}
func (a *applierV3backend) UserList(r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
return a.s.AuthStore().UserList(r)
}
func (a *applierV3backend) RoleList(r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
return a.s.AuthStore().RoleList(r)
}
type quotaApplierV3 struct {
applierV3
q Quota
@@ -532,7 +652,7 @@ func (a *quotaApplierV3) LeaseGrant(lc *pb.LeaseGrantRequest) (*pb.LeaseGrantRes
return resp, err
}
type kvSort struct{ kvs []storagepb.KeyValue }
type kvSort struct{ kvs []mvccpb.KeyValue }
func (s *kvSort) Swap(i, j int) {
t := s.kvs[i]
@@ -571,9 +691,9 @@ func (s *kvSortByValue) Less(i, j int) bool {
return bytes.Compare(s.kvs[i].Value, s.kvs[j].Value) < 0
}
func (a *applierV3backend) checkRequestLeases(reqs []*pb.RequestUnion) error {
func (a *applierV3backend) checkRequestLeases(reqs []*pb.RequestOp) error {
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestUnion_RequestPut)
tv, ok := requ.Request.(*pb.RequestOp_RequestPut)
if !ok {
continue
}
@@ -588,9 +708,9 @@ func (a *applierV3backend) checkRequestLeases(reqs []*pb.RequestUnion) error {
return nil
}
func (a *applierV3backend) checkRequestRange(reqs []*pb.RequestUnion) error {
func (a *applierV3backend) checkRequestRange(reqs []*pb.RequestOp) error {
for _, requ := range reqs {
tv, ok := requ.Request.(*pb.RequestUnion_RequestRange)
tv, ok := requ.Request.(*pb.RequestOp_RequestRange)
if !ok {
continue
}
@@ -600,10 +720,10 @@ func (a *applierV3backend) checkRequestRange(reqs []*pb.RequestUnion) error {
}
if greq.Revision > a.s.KV().Rev() {
return dstorage.ErrFutureRev
return mvcc.ErrFutureRev
}
if greq.Revision < a.s.KV().FirstRev() {
return dstorage.ErrCompacted
return mvcc.ErrCompacted
}
}
return nil

163
vendor/github.com/coreos/etcd/etcdserver/apply_auth.go generated vendored Normal file
View File

@@ -0,0 +1,163 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"sync"
"github.com/coreos/etcd/auth"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
)
type authApplierV3 struct {
applierV3
as auth.AuthStore
// mu serializes Apply so that user isn't corrupted and so that
// serialized requests don't leak data from TOCTOU errors
mu sync.Mutex
user string
}
func newAuthApplierV3(as auth.AuthStore, base applierV3) *authApplierV3 {
return &authApplierV3{applierV3: base, as: as}
}
func (aa *authApplierV3) Apply(r *pb.InternalRaftRequest) *applyResult {
aa.mu.Lock()
defer aa.mu.Unlock()
if r.Header != nil {
// backward-compatible with pre-3.0 releases when internalRaftRequest
// does not have header field
aa.user = r.Header.Username
}
if needAdminPermission(r) && !aa.as.IsAdminPermitted(aa.user) {
aa.user = ""
return &applyResult{err: auth.ErrPermissionDenied}
}
ret := aa.applierV3.Apply(r)
aa.user = ""
return ret
}
func (aa *authApplierV3) Put(txnID int64, r *pb.PutRequest) (*pb.PutResponse, error) {
if !aa.as.IsPutPermitted(aa.user, r.Key) {
return nil, auth.ErrPermissionDenied
}
return aa.applierV3.Put(txnID, r)
}
func (aa *authApplierV3) Range(txnID int64, r *pb.RangeRequest) (*pb.RangeResponse, error) {
if !aa.as.IsRangePermitted(aa.user, r.Key, r.RangeEnd) {
return nil, auth.ErrPermissionDenied
}
return aa.applierV3.Range(txnID, r)
}
func (aa *authApplierV3) DeleteRange(txnID int64, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
if !aa.as.IsDeleteRangePermitted(aa.user, r.Key, r.RangeEnd) {
return nil, auth.ErrPermissionDenied
}
return aa.applierV3.DeleteRange(txnID, r)
}
func (aa *authApplierV3) checkTxnReqsPermission(reqs []*pb.RequestOp) bool {
for _, requ := range reqs {
switch tv := requ.Request.(type) {
case *pb.RequestOp_RequestRange:
if tv.RequestRange == nil {
continue
}
if !aa.as.IsRangePermitted(aa.user, tv.RequestRange.Key, tv.RequestRange.RangeEnd) {
return false
}
case *pb.RequestOp_RequestPut:
if tv.RequestPut == nil {
continue
}
if !aa.as.IsPutPermitted(aa.user, tv.RequestPut.Key) {
return false
}
case *pb.RequestOp_RequestDeleteRange:
if tv.RequestDeleteRange == nil {
continue
}
if !aa.as.IsDeleteRangePermitted(aa.user, tv.RequestDeleteRange.Key, tv.RequestDeleteRange.RangeEnd) {
return false
}
}
}
return true
}
func (aa *authApplierV3) Txn(rt *pb.TxnRequest) (*pb.TxnResponse, error) {
for _, c := range rt.Compare {
if !aa.as.IsRangePermitted(aa.user, c.Key, nil) {
return nil, auth.ErrPermissionDenied
}
}
if !aa.checkTxnReqsPermission(rt.Success) {
return nil, auth.ErrPermissionDenied
}
if !aa.checkTxnReqsPermission(rt.Failure) {
return nil, auth.ErrPermissionDenied
}
return aa.applierV3.Txn(rt)
}
func needAdminPermission(r *pb.InternalRaftRequest) bool {
switch {
case r.AuthEnable != nil:
return true
case r.AuthDisable != nil:
return true
case r.AuthUserAdd != nil:
return true
case r.AuthUserDelete != nil:
return true
case r.AuthUserChangePassword != nil:
return true
case r.AuthUserGrantRole != nil:
return true
case r.AuthUserGet != nil:
return true
case r.AuthUserRevokeRole != nil:
return true
case r.AuthRoleAdd != nil:
return true
case r.AuthRoleGrantPermission != nil:
return true
case r.AuthRoleGet != nil:
return true
case r.AuthRoleRevokePermission != nil:
return true
case r.AuthRoleDelete != nil:
return true
case r.AuthUserList != nil:
return true
case r.AuthRoleList != nil:
return true
default:
return false
}
}

140
vendor/github.com/coreos/etcd/etcdserver/apply_v2.go generated vendored Normal file
View File

@@ -0,0 +1,140 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"encoding/json"
"path"
"time"
"github.com/coreos/etcd/etcdserver/api"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/store"
"github.com/coreos/go-semver/semver"
)
// ApplierV2 is the interface for processing V2 raft messages
type ApplierV2 interface {
Delete(r *pb.Request) Response
Post(r *pb.Request) Response
Put(r *pb.Request) Response
QGet(r *pb.Request) Response
Sync(r *pb.Request) Response
}
func NewApplierV2(s store.Store, c *membership.RaftCluster) ApplierV2 {
return &applierV2store{store: s, cluster: c}
}
type applierV2store struct {
store store.Store
cluster *membership.RaftCluster
}
func (a *applierV2store) Delete(r *pb.Request) Response {
switch {
case r.PrevIndex > 0 || r.PrevValue != "":
return toResponse(a.store.CompareAndDelete(r.Path, r.PrevValue, r.PrevIndex))
default:
return toResponse(a.store.Delete(r.Path, r.Dir, r.Recursive))
}
}
func (a *applierV2store) Post(r *pb.Request) Response {
return toResponse(a.store.Create(r.Path, r.Dir, r.Val, true, toTTLOptions(r)))
}
func (a *applierV2store) Put(r *pb.Request) Response {
ttlOptions := toTTLOptions(r)
exists, existsSet := pbutil.GetBool(r.PrevExist)
switch {
case existsSet:
if exists {
if r.PrevIndex == 0 && r.PrevValue == "" {
return toResponse(a.store.Update(r.Path, r.Val, ttlOptions))
}
return toResponse(a.store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, ttlOptions))
}
return toResponse(a.store.Create(r.Path, r.Dir, r.Val, false, ttlOptions))
case r.PrevIndex > 0 || r.PrevValue != "":
return toResponse(a.store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, ttlOptions))
default:
if storeMemberAttributeRegexp.MatchString(r.Path) {
id := membership.MustParseMemberIDFromKey(path.Dir(r.Path))
var attr membership.Attributes
if err := json.Unmarshal([]byte(r.Val), &attr); err != nil {
plog.Panicf("unmarshal %s should never fail: %v", r.Val, err)
}
if a.cluster != nil {
a.cluster.UpdateAttributes(id, attr)
}
// return an empty response since there is no consumer.
return Response{}
}
if r.Path == membership.StoreClusterVersionKey() {
if a.cluster != nil {
a.cluster.SetVersion(semver.Must(semver.NewVersion(r.Val)), api.UpdateCapability)
}
// return an empty response since there is no consumer.
return Response{}
}
return toResponse(a.store.Set(r.Path, r.Dir, r.Val, ttlOptions))
}
}
func (a *applierV2store) QGet(r *pb.Request) Response {
return toResponse(a.store.Get(r.Path, r.Recursive, r.Sorted))
}
func (a *applierV2store) Sync(r *pb.Request) Response {
a.store.DeleteExpiredKeys(time.Unix(0, r.Time))
return Response{}
}
// applyV2Request interprets r as a call to store.X and returns a Response interpreted
// from store.Event
func (s *EtcdServer) applyV2Request(r *pb.Request) Response {
toTTLOptions(r)
switch r.Method {
case "POST":
return s.applyV2.Post(r)
case "PUT":
return s.applyV2.Put(r)
case "DELETE":
return s.applyV2.Delete(r)
case "QGET":
return s.applyV2.QGet(r)
case "SYNC":
return s.applyV2.Sync(r)
default:
// This should never be reached, but just in case:
return Response{err: ErrUnknownMethod}
}
}
func toTTLOptions(r *pb.Request) store.TTLOptionSet {
refresh, _ := pbutil.GetBool(r.Refresh)
ttlOptions := store.TTLOptionSet{Refresh: refresh}
if r.Expiration != 0 {
ttlOptions.ExpireTime = time.Unix(0, r.Expiration)
}
return ttlOptions
}
func toResponse(ev *store.Event, err error) Response {
return Response{Event: ev, err: err}
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -281,13 +281,7 @@ func (s *store) UpdateUser(user User) (User, error) {
return old, err
}
hash, err := s.HashPassword(user.Password)
if err != nil {
return old, err
}
user.Password = hash
newUser, err := old.merge(user)
newUser, err := old.merge(user, s.PasswordStore)
if err != nil {
return old, err
}
@@ -335,11 +329,7 @@ func (s *store) GetRole(name string) (Role, error) {
}
var r Role
err = json.Unmarshal([]byte(*resp.Event.Node.Value), &r)
if err != nil {
return r, err
}
return r, nil
return r, err
}
func (s *store) CreateRole(role Role) error {
@@ -452,29 +442,33 @@ func (s *store) DisableAuth() error {
// is called and returns a new User with these modifications applied. Think of
// all Users as immutable sets of data. Merge allows you to perform the set
// operations (desired grants and revokes) atomically
func (u User) merge(n User) (User, error) {
func (ou User) merge(nu User, s PasswordStore) (User, error) {
var out User
if u.User != n.User {
return out, authErr(http.StatusConflict, "Merging user data with conflicting usernames: %s %s", u.User, n.User)
if ou.User != nu.User {
return out, authErr(http.StatusConflict, "Merging user data with conflicting usernames: %s %s", ou.User, nu.User)
}
out.User = u.User
if n.Password != "" {
out.Password = n.Password
out.User = ou.User
if nu.Password != "" {
hash, err := s.HashPassword(nu.Password)
if err != nil {
return ou, err
}
out.Password = hash
} else {
out.Password = u.Password
out.Password = ou.Password
}
currentRoles := types.NewUnsafeSet(u.Roles...)
for _, g := range n.Grant {
currentRoles := types.NewUnsafeSet(ou.Roles...)
for _, g := range nu.Grant {
if currentRoles.Contains(g) {
plog.Noticef("granting duplicate role %s for user %s", g, n.User)
return User{}, authErr(http.StatusConflict, fmt.Sprintf("Granting duplicate role %s for user %s", g, n.User))
plog.Noticef("granting duplicate role %s for user %s", g, nu.User)
return User{}, authErr(http.StatusConflict, fmt.Sprintf("Granting duplicate role %s for user %s", g, nu.User))
}
currentRoles.Add(g)
}
for _, r := range n.Revoke {
for _, r := range nu.Revoke {
if !currentRoles.Contains(r) {
plog.Noticef("revoking ungranted role %s for user %s", r, n.User)
return User{}, authErr(http.StatusConflict, fmt.Sprintf("Revoking ungranted role %s for user %s", r, n.User))
plog.Noticef("revoking ungranted role %s for user %s", r, nu.User)
return User{}, authErr(http.StatusConflict, fmt.Sprintf("Revoking ungranted role %s for user %s", r, nu.User))
}
currentRoles.Remove(r)
}
@@ -497,10 +491,7 @@ func (r Role) merge(n Role) (Role, error) {
return out, err
}
out.Permissions, err = out.Permissions.Revoke(n.Revoke)
if err != nil {
return out, err
}
return out, nil
return out, err
}
func (r Role) HasKeyAccess(key string, write bool) bool {

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -73,6 +73,7 @@ func getClusterFromRemotePeers(urls []string, timeout time.Duration, logerr bool
continue
}
b, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
if logerr {
plog.Warningf("could not read the body of cluster response: %v", err)

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -56,6 +56,9 @@ type ServerConfig struct {
StrictReconfigCheck bool
EnablePprof bool
// ClientCertAuthEnabled is true when cert has been signed by the client CA.
ClientCertAuthEnabled bool
}
// VerifyBootstrap sanity-checks the initial config for bootstrap case

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -14,12 +14,20 @@
package etcdserver
import (
"sync/atomic"
)
// consistentIndex represents the offset of an entry in a consistent replica log.
// It implements the storage.ConsistentIndexGetter interface.
// It implements the mvcc.ConsistentIndexGetter interface.
// It is always set to the offset of current entry before executing the entry,
// so ConsistentWatchableKV could get the consistent index from it.
type consistentIndex uint64
func (i *consistentIndex) setConsistentIndex(v uint64) { *i = consistentIndex(v) }
func (i *consistentIndex) setConsistentIndex(v uint64) {
atomic.StoreUint64((*uint64)(i), v)
}
func (i *consistentIndex) ConsistentIndex() uint64 { return uint64(*i) }
func (i *consistentIndex) ConsistentIndex() uint64 {
return atomic.LoadUint64((*uint64)(i))
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -30,6 +30,7 @@ var (
ErrNoLeader = errors.New("etcdserver: no leader")
ErrRequestTooLarge = errors.New("etcdserver: request is too large")
ErrNoSpace = errors.New("etcdserver: no space")
ErrInvalidAuthToken = errors.New("etcdserver: invalid auth token")
)
type DiscoveryError struct {

View File

@@ -13,8 +13,10 @@
It has these top-level messages:
Request
Metadata
RequestHeader
InternalRaftRequest
EmptyResponse
InternalAuthenticateRequest
ResponseHeader
RangeRequest
RangeResponse
@@ -22,8 +24,8 @@
PutResponse
DeleteRangeRequest
DeleteRangeResponse
RequestUnion
ResponseUnion
RequestOp
ResponseOp
Compare
TxnRequest
TxnResponse
@@ -31,6 +33,8 @@
CompactionResponse
HashRequest
HashResponse
SnapshotRequest
SnapshotResponse
WatchRequest
WatchCreateRequest
WatchCancelRequest
@@ -64,13 +68,15 @@
AuthUserGetRequest
AuthUserDeleteRequest
AuthUserChangePasswordRequest
AuthUserGrantRequest
AuthUserRevokeRequest
AuthUserGrantRoleRequest
AuthUserRevokeRoleRequest
AuthRoleAddRequest
AuthRoleGetRequest
AuthUserListRequest
AuthRoleListRequest
AuthRoleDeleteRequest
AuthRoleGrantRequest
AuthRoleRevokeRequest
AuthRoleGrantPermissionRequest
AuthRoleRevokePermissionRequest
AuthEnableResponse
AuthDisableResponse
AuthenticateResponse
@@ -78,20 +84,22 @@
AuthUserGetResponse
AuthUserDeleteResponse
AuthUserChangePasswordResponse
AuthUserGrantResponse
AuthUserRevokeResponse
AuthUserGrantRoleResponse
AuthUserRevokeRoleResponse
AuthRoleAddResponse
AuthRoleGetResponse
AuthRoleListResponse
AuthUserListResponse
AuthRoleDeleteResponse
AuthRoleGrantResponse
AuthRoleRevokeResponse
AuthRoleGrantPermissionResponse
AuthRoleRevokePermissionResponse
*/
package etcdserverpb
import (
"fmt"
proto "github.com/gogo/protobuf/proto"
proto "github.com/golang/protobuf/proto"
math "math"
)
@@ -103,40 +111,46 @@ var _ = proto.Marshal
var _ = fmt.Errorf
var _ = math.Inf
// This is a compile-time assertion to ensure that this generated file
// is compatible with the proto package it is being compiled against.
const _ = proto.ProtoPackageIsVersion1
type Request struct {
ID uint64 `protobuf:"varint,1,opt,name=ID" json:"ID"`
Method string `protobuf:"bytes,2,opt,name=Method" json:"Method"`
Path string `protobuf:"bytes,3,opt,name=Path" json:"Path"`
Val string `protobuf:"bytes,4,opt,name=Val" json:"Val"`
Dir bool `protobuf:"varint,5,opt,name=Dir" json:"Dir"`
PrevValue string `protobuf:"bytes,6,opt,name=PrevValue" json:"PrevValue"`
PrevIndex uint64 `protobuf:"varint,7,opt,name=PrevIndex" json:"PrevIndex"`
PrevExist *bool `protobuf:"varint,8,opt,name=PrevExist" json:"PrevExist,omitempty"`
Expiration int64 `protobuf:"varint,9,opt,name=Expiration" json:"Expiration"`
Wait bool `protobuf:"varint,10,opt,name=Wait" json:"Wait"`
Since uint64 `protobuf:"varint,11,opt,name=Since" json:"Since"`
Recursive bool `protobuf:"varint,12,opt,name=Recursive" json:"Recursive"`
Sorted bool `protobuf:"varint,13,opt,name=Sorted" json:"Sorted"`
Quorum bool `protobuf:"varint,14,opt,name=Quorum" json:"Quorum"`
Time int64 `protobuf:"varint,15,opt,name=Time" json:"Time"`
Stream bool `protobuf:"varint,16,opt,name=Stream" json:"Stream"`
Refresh *bool `protobuf:"varint,17,opt,name=Refresh" json:"Refresh,omitempty"`
ID uint64 `protobuf:"varint,1,opt,name=ID,json=iD" json:"ID"`
Method string `protobuf:"bytes,2,opt,name=Method,json=method" json:"Method"`
Path string `protobuf:"bytes,3,opt,name=Path,json=path" json:"Path"`
Val string `protobuf:"bytes,4,opt,name=Val,json=val" json:"Val"`
Dir bool `protobuf:"varint,5,opt,name=Dir,json=dir" json:"Dir"`
PrevValue string `protobuf:"bytes,6,opt,name=PrevValue,json=prevValue" json:"PrevValue"`
PrevIndex uint64 `protobuf:"varint,7,opt,name=PrevIndex,json=prevIndex" json:"PrevIndex"`
PrevExist *bool `protobuf:"varint,8,opt,name=PrevExist,json=prevExist" json:"PrevExist,omitempty"`
Expiration int64 `protobuf:"varint,9,opt,name=Expiration,json=expiration" json:"Expiration"`
Wait bool `protobuf:"varint,10,opt,name=Wait,json=wait" json:"Wait"`
Since uint64 `protobuf:"varint,11,opt,name=Since,json=since" json:"Since"`
Recursive bool `protobuf:"varint,12,opt,name=Recursive,json=recursive" json:"Recursive"`
Sorted bool `protobuf:"varint,13,opt,name=Sorted,json=sorted" json:"Sorted"`
Quorum bool `protobuf:"varint,14,opt,name=Quorum,json=quorum" json:"Quorum"`
Time int64 `protobuf:"varint,15,opt,name=Time,json=time" json:"Time"`
Stream bool `protobuf:"varint,16,opt,name=Stream,json=stream" json:"Stream"`
Refresh *bool `protobuf:"varint,17,opt,name=Refresh,json=refresh" json:"Refresh,omitempty"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Request) Reset() { *m = Request{} }
func (m *Request) String() string { return proto.CompactTextString(m) }
func (*Request) ProtoMessage() {}
func (m *Request) Reset() { *m = Request{} }
func (m *Request) String() string { return proto.CompactTextString(m) }
func (*Request) ProtoMessage() {}
func (*Request) Descriptor() ([]byte, []int) { return fileDescriptorEtcdserver, []int{0} }
type Metadata struct {
NodeID uint64 `protobuf:"varint,1,opt,name=NodeID" json:"NodeID"`
ClusterID uint64 `protobuf:"varint,2,opt,name=ClusterID" json:"ClusterID"`
NodeID uint64 `protobuf:"varint,1,opt,name=NodeID,json=nodeID" json:"NodeID"`
ClusterID uint64 `protobuf:"varint,2,opt,name=ClusterID,json=clusterID" json:"ClusterID"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Metadata) Reset() { *m = Metadata{} }
func (m *Metadata) String() string { return proto.CompactTextString(m) }
func (*Metadata) ProtoMessage() {}
func (m *Metadata) Reset() { *m = Metadata{} }
func (m *Metadata) String() string { return proto.CompactTextString(m) }
func (*Metadata) ProtoMessage() {}
func (*Metadata) Descriptor() ([]byte, []int) { return fileDescriptorEtcdserver, []int{1} }
func init() {
proto.RegisterType((*Request)(nil), "etcdserverpb.Request")
@@ -995,3 +1009,33 @@ var (
ErrInvalidLengthEtcdserver = fmt.Errorf("proto: negative length found during unmarshaling")
ErrIntOverflowEtcdserver = fmt.Errorf("proto: integer overflow")
)
var fileDescriptorEtcdserver = []byte{
// 404 bytes of a gzipped FileDescriptorProto
0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x5c, 0x92, 0x41, 0x6e, 0x13, 0x31,
0x14, 0x86, 0xe3, 0xc4, 0x99, 0x64, 0x4c, 0x81, 0x62, 0x45, 0xe8, 0xa9, 0x42, 0x43, 0x14, 0xb1,
0xc8, 0x0a, 0xee, 0x50, 0xd2, 0x45, 0x24, 0x8a, 0x4a, 0x8a, 0xca, 0xda, 0x64, 0x1e, 0x8d, 0xa5,
0xcc, 0x78, 0x6a, 0xbf, 0x19, 0x72, 0x03, 0xae, 0xc0, 0x91, 0xb2, 0xe4, 0x04, 0x08, 0xc2, 0x45,
0x90, 0x3d, 0x9d, 0x60, 0xba, 0xb3, 0xbe, 0xff, 0xf7, 0xef, 0xdf, 0xf6, 0x13, 0xa7, 0x48, 0xeb,
0xdc, 0xa1, 0x6d, 0xd0, 0xbe, 0xae, 0xac, 0x21, 0x23, 0x4f, 0xfe, 0x91, 0xea, 0xf3, 0xd9, 0xe4,
0xd6, 0xdc, 0x9a, 0x20, 0xbc, 0xf1, 0xab, 0xd6, 0x33, 0xfb, 0xc6, 0xc5, 0x68, 0x85, 0x77, 0x35,
0x3a, 0x92, 0x13, 0xd1, 0x5f, 0x2e, 0x80, 0x4d, 0xd9, 0x9c, 0x9f, 0xf3, 0xfd, 0xcf, 0x97, 0xbd,
0x55, 0x5f, 0x2f, 0xe4, 0x0b, 0x91, 0x5c, 0x22, 0x6d, 0x4c, 0x0e, 0xfd, 0x29, 0x9b, 0xa7, 0xf7,
0x4a, 0x52, 0x04, 0x26, 0x41, 0xf0, 0x2b, 0x45, 0x1b, 0x18, 0x44, 0x1a, 0xaf, 0x14, 0x6d, 0xe4,
0x73, 0x31, 0xb8, 0x51, 0x5b, 0xe0, 0x91, 0x30, 0x68, 0xd4, 0xd6, 0xf3, 0x85, 0xb6, 0x30, 0x9c,
0xb2, 0xf9, 0xb8, 0xe3, 0xb9, 0xb6, 0x72, 0x26, 0xd2, 0x2b, 0x8b, 0xcd, 0x8d, 0xda, 0xd6, 0x08,
0x49, 0xb4, 0x2b, 0xad, 0x3a, 0xdc, 0x79, 0x96, 0x65, 0x8e, 0x3b, 0x18, 0x45, 0x45, 0x83, 0x27,
0xe0, 0xce, 0x73, 0xb1, 0xd3, 0x8e, 0x60, 0x7c, 0x3c, 0x85, 0xb5, 0x9e, 0x80, 0xe5, 0x2b, 0x21,
0x2e, 0x76, 0x95, 0xb6, 0x8a, 0xb4, 0x29, 0x21, 0x9d, 0xb2, 0xf9, 0xe0, 0x3e, 0x48, 0xe0, 0x91,
0xfb, 0xbb, 0x7d, 0x52, 0x9a, 0x40, 0x44, 0x55, 0xf9, 0x57, 0xa5, 0x49, 0x9e, 0x89, 0xe1, 0xb5,
0x2e, 0xd7, 0x08, 0x8f, 0xa2, 0x0e, 0x43, 0xe7, 0x91, 0x3f, 0x7f, 0x85, 0xeb, 0xda, 0x3a, 0xdd,
0x20, 0x9c, 0x44, 0x5b, 0x53, 0xdb, 0x61, 0xff, 0xa6, 0xd7, 0xc6, 0x12, 0xe6, 0xf0, 0x38, 0x32,
0x24, 0x2e, 0x30, 0xaf, 0x7e, 0xa8, 0x8d, 0xad, 0x0b, 0x78, 0x12, 0xab, 0x77, 0x81, 0xf9, 0x56,
0x1f, 0x75, 0x81, 0xf0, 0x34, 0x6a, 0xcd, 0x49, 0x17, 0x6d, 0x2a, 0x59, 0x54, 0x05, 0x9c, 0xfe,
0x97, 0x1a, 0x98, 0xcc, 0xfc, 0x47, 0x7f, 0xb1, 0xe8, 0x36, 0xf0, 0x2c, 0x7a, 0x95, 0x91, 0x6d,
0xe1, 0xec, 0x9d, 0x18, 0x5f, 0x22, 0xa9, 0x5c, 0x91, 0xf2, 0x49, 0xef, 0x4d, 0x8e, 0x0f, 0xa6,
0x21, 0x29, 0x03, 0xf3, 0x37, 0x7c, 0xbb, 0xad, 0x1d, 0xa1, 0x5d, 0x2e, 0xc2, 0x50, 0x1c, 0x7f,
0x61, 0xdd, 0xe1, 0xf3, 0xc9, 0xfe, 0x77, 0xd6, 0xdb, 0x1f, 0x32, 0xf6, 0xe3, 0x90, 0xb1, 0x5f,
0x87, 0x8c, 0x7d, 0xff, 0x93, 0xf5, 0xfe, 0x06, 0x00, 0x00, 0xff, 0xff, 0x80, 0x62, 0xfc, 0x40,
0xa4, 0x02, 0x00, 0x00,
}

File diff suppressed because it is too large Load Diff

View File

@@ -10,10 +10,18 @@ option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true;
option (gogoproto.goproto_getters_all) = false;
message RequestHeader {
uint64 ID = 1;
// username is a username that is associated with an auth token of gRPC connection
string username = 2;
}
// An InternalRaftRequest is the union of all requests which can be
// sent via raft.
message InternalRaftRequest {
RequestHeader header = 100;
uint64 ID = 1;
Request v2 = 2;
RangeRequest range = 3;
@@ -25,14 +33,40 @@ message InternalRaftRequest {
LeaseGrantRequest lease_grant = 8;
LeaseRevokeRequest lease_revoke = 9;
AuthEnableRequest auth_enable = 10;
AuthUserAddRequest auth_user_add = 11;
AuthUserDeleteRequest auth_user_delete = 12;
AuthUserChangePasswordRequest auth_user_change_password = 13;
AuthRoleAddRequest auth_role_add = 14;
AlarmRequest alarm = 10;
AlarmRequest alarm = 15;
AuthEnableRequest auth_enable = 1000;
AuthDisableRequest auth_disable = 1011;
InternalAuthenticateRequest authenticate = 1012;
AuthUserAddRequest auth_user_add = 1100;
AuthUserDeleteRequest auth_user_delete = 1101;
AuthUserGetRequest auth_user_get = 1102;
AuthUserChangePasswordRequest auth_user_change_password = 1103;
AuthUserGrantRoleRequest auth_user_grant_role = 1104;
AuthUserRevokeRoleRequest auth_user_revoke_role = 1105;
AuthUserListRequest auth_user_list = 1106;
AuthRoleListRequest auth_role_list = 1107;
AuthRoleAddRequest auth_role_add = 1200;
AuthRoleDeleteRequest auth_role_delete = 1201;
AuthRoleGetRequest auth_role_get = 1202;
AuthRoleGrantPermissionRequest auth_role_grant_permission = 1203;
AuthRoleRevokePermissionRequest auth_role_revoke_permission = 1204;
}
message EmptyResponse {
}
// What is the difference between AuthenticateRequest (defined in rpc.proto) and InternalAuthenticateRequest?
// InternalAuthenticateRequest has a member that is filled by etcdserver and shouldn't be user-facing.
// For avoiding misusage the field, we have an internal version of AuthenticateRequest.
message InternalAuthenticateRequest {
string name = 1;
string password = 2;
// simple_token is generated in API layer (etcdserver/v3_server.go)
string simple_token = 3;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -2,56 +2,107 @@ syntax = "proto3";
package etcdserverpb;
import "gogoproto/gogo.proto";
import "etcd/storage/storagepb/kv.proto";
import "etcd/mvcc/mvccpb/kv.proto";
import "etcd/auth/authpb/auth.proto";
// for grpc-gateway
import "google/api/annotations.proto";
option (gogoproto.marshaler_all) = true;
option (gogoproto.unmarshaler_all) = true;
service KV {
// Range gets the keys in the range from the store.
rpc Range(RangeRequest) returns (RangeResponse) {}
// Range gets the keys in the range from the key-value store.
rpc Range(RangeRequest) returns (RangeResponse) {
option (google.api.http) = {
post: "/v3alpha/kv/range"
body: "*"
};
}
// Put puts the given key into the store.
// A put request increases the revision of the store,
// Put puts the given key into the key-value store.
// A put request increments the revision of the key-value store
// and generates one event in the event history.
rpc Put(PutRequest) returns (PutResponse) {}
rpc Put(PutRequest) returns (PutResponse) {
option (google.api.http) = {
post: "/v3alpha/kv/put"
body: "*"
};
}
// Delete deletes the given range from the store.
// A delete request increase the revision of the store,
// and generates one event in the event history.
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse) {}
// DeleteRange deletes the given range from the key-value store.
// A delete request increments the revision of the key-value store
// and generates a delete event in the event history for every deleted key.
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse) {
option (google.api.http) = {
post: "/v3alpha/kv/deleterange"
body: "*"
};
}
// Txn processes all the requests in one transaction.
// A txn request increases the revision of the store,
// and generates events with the same revision in the event history.
// Txn processes multiple requests in a single transaction.
// A txn request increments the revision of the key-value store
// and generates events with the same revision for every completed request.
// It is not allowed to modify the same key several times within one txn.
rpc Txn(TxnRequest) returns (TxnResponse) {}
rpc Txn(TxnRequest) returns (TxnResponse) {
option (google.api.http) = {
post: "/v3alpha/kv/txn"
body: "*"
};
}
// Compact compacts the event history in etcd. User should compact the
// event history periodically, or it will grow infinitely.
rpc Compact(CompactionRequest) returns (CompactionResponse) {}
// Compact compacts the event history in the etcd key-value store. The key-value
// store should be periodically compacted or the event history will continue to grow
// indefinitely.
rpc Compact(CompactionRequest) returns (CompactionResponse) {
option (google.api.http) = {
post: "/v3alpha/kv/compaction"
body: "*"
};
}
}
service Watch {
// Watch watches the events happening or happened. Both input and output
// are stream. One watch rpc can watch for multiple keys or prefixs and
// get a stream of events. The whole events history can be watched unless
// compacted.
rpc Watch(stream WatchRequest) returns (stream WatchResponse) {}
// Watch watches for events happening or that have happened. Both input and output
// are streams; the input stream is for creating and canceling watchers and the output
// stream sends events. One watch RPC can watch on multiple key ranges, streaming events
// for several watches at once. The entire event history can be watched starting from the
// last compaction revision.
rpc Watch(stream WatchRequest) returns (stream WatchResponse) {
option (google.api.http) = {
post: "/v3alpha/watch"
body: "*"
};
}
}
service Lease {
// LeaseGrant creates a lease. A lease has a TTL. The lease will expire if the
// server does not receive a keepAlive within TTL from the lease holder.
// All keys attached to the lease will be expired and deleted if the lease expires.
// The key expiration generates an event in event history.
rpc LeaseGrant(LeaseGrantRequest) returns (LeaseGrantResponse) {}
// LeaseGrant creates a lease which expires if the server does not receive a keepAlive
// within a given time to live period. All keys attached to the lease will be expired and
// deleted if the lease expires. Each expired key generates a delete event in the event history.
rpc LeaseGrant(LeaseGrantRequest) returns (LeaseGrantResponse) {
option (google.api.http) = {
post: "/v3alpha/lease/grant"
body: "*"
};
}
// LeaseRevoke revokes a lease. All the key attached to the lease will be expired and deleted.
rpc LeaseRevoke(LeaseRevokeRequest) returns (LeaseRevokeResponse) {}
// LeaseRevoke revokes a lease. All keys attached to the lease will expire and be deleted.
rpc LeaseRevoke(LeaseRevokeRequest) returns (LeaseRevokeResponse) {
option (google.api.http) = {
post: "/v3alpha/kv/lease/revoke"
body: "*"
};
}
// KeepAlive keeps the lease alive.
rpc LeaseKeepAlive(stream LeaseKeepAliveRequest) returns (stream LeaseKeepAliveResponse) {}
// LeaseKeepAlive keeps the lease alive by streaming keep alive requests from the client
// to the server and streaming keep alive responses from the server to the client.
rpc LeaseKeepAlive(stream LeaseKeepAliveRequest) returns (stream LeaseKeepAliveResponse) {
option (google.api.http) = {
post: "/v3alpha/lease/keepalive"
body: "*"
};
}
// TODO(xiangli) List all existing Leases?
// TODO(xiangli) Get details information (expirations, leased keys, etc.) of a lease?
@@ -59,83 +110,220 @@ service Lease {
service Cluster {
// MemberAdd adds a member into the cluster.
rpc MemberAdd(MemberAddRequest) returns (MemberAddResponse) {}
rpc MemberAdd(MemberAddRequest) returns (MemberAddResponse) {
option (google.api.http) = {
post: "/v3alpha/cluster/member/add"
body: "*"
};
}
// MemberRemove removes an existing member from the cluster.
rpc MemberRemove(MemberRemoveRequest) returns (MemberRemoveResponse) {}
rpc MemberRemove(MemberRemoveRequest) returns (MemberRemoveResponse) {
option (google.api.http) = {
post: "/v3alpha/cluster/member/remove"
body: "*"
};
}
// MemberUpdate updates the member configuration.
rpc MemberUpdate(MemberUpdateRequest) returns (MemberUpdateResponse) {}
rpc MemberUpdate(MemberUpdateRequest) returns (MemberUpdateResponse) {
option (google.api.http) = {
post: "/v3alpha/cluster/member/update"
body: "*"
};
}
// MemberList lists all the members in the cluster.
rpc MemberList(MemberListRequest) returns (MemberListResponse) {}
rpc MemberList(MemberListRequest) returns (MemberListResponse) {
option (google.api.http) = {
post: "/v3alpha/cluster/member/list"
body: "*"
};
}
}
service Maintenance {
// Alarm activates, deactivates, and queries alarms regarding cluster health.
rpc Alarm(AlarmRequest) returns (AlarmResponse) {}
rpc Alarm(AlarmRequest) returns (AlarmResponse) {
option (google.api.http) = {
post: "/v3alpha/maintenance/alarm"
body: "*"
};
}
// Status gets the status of the member.
rpc Status(StatusRequest) returns (StatusResponse) {}
rpc Status(StatusRequest) returns (StatusResponse) {
option (google.api.http) = {
post: "/v3alpha/maintenance/status"
body: "*"
};
}
rpc Defragment(DefragmentRequest) returns (DefragmentResponse) {}
// Defragment defragments a member's backend database to recover storage space.
rpc Defragment(DefragmentRequest) returns (DefragmentResponse) {
option (google.api.http) = {
post: "/v3alpha/maintenance/defragment"
body: "*"
};
}
// Hash returns the hash of the local KV state for consistency checking purpose.
// This is designed for testing; do not use this in production when there
// are ongoing transactions.
rpc Hash(HashRequest) returns (HashResponse) {}
rpc Hash(HashRequest) returns (HashResponse) {
option (google.api.http) = {
post: "/v3alpha/maintenance/hash"
body: "*"
};
}
// Snapshot sends a snapshot of the entire backend from a member over a stream to a client.
rpc Snapshot(SnapshotRequest) returns (stream SnapshotResponse) {
option (google.api.http) = {
post: "/v3alpha/maintenance/snapshot"
body: "*"
};
}
}
service Auth {
// AuthEnable enables authentication.
rpc AuthEnable(AuthEnableRequest) returns (AuthEnableResponse) {}
rpc AuthEnable(AuthEnableRequest) returns (AuthEnableResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/enable"
body: "*"
};
}
// AuthDisable disables authentication.
rpc AuthDisable(AuthDisableRequest) returns (AuthDisableResponse) {}
rpc AuthDisable(AuthDisableRequest) returns (AuthDisableResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/disable"
body: "*"
};
}
// Authenticate processes authenticate request.
rpc Authenticate(AuthenticateRequest) returns (AuthenticateResponse) {}
// Authenticate processes an authenticate request.
rpc Authenticate(AuthenticateRequest) returns (AuthenticateResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/authenticate"
body: "*"
};
}
// UserAdd adds a new user.
rpc UserAdd(AuthUserAddRequest) returns (AuthUserAddResponse) {}
rpc UserAdd(AuthUserAddRequest) returns (AuthUserAddResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/user/add"
body: "*"
};
}
// UserGet gets a detailed information of a user or lists entire users.
rpc UserGet(AuthUserGetRequest) returns (AuthUserGetResponse) {}
// UserGet gets detailed user information.
rpc UserGet(AuthUserGetRequest) returns (AuthUserGetResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/user/get"
body: "*"
};
}
// UserList gets a list of all users.
rpc UserList(AuthUserListRequest) returns (AuthUserListResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/user/list"
body: "*"
};
}
// UserDelete deletes a specified user.
rpc UserDelete(AuthUserDeleteRequest) returns (AuthUserDeleteResponse) {}
rpc UserDelete(AuthUserDeleteRequest) returns (AuthUserDeleteResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/user/delete"
body: "*"
};
}
// UserChangePassword changes password of a specified user.
rpc UserChangePassword(AuthUserChangePasswordRequest) returns (AuthUserChangePasswordResponse) {}
// UserChangePassword changes the password of a specified user.
rpc UserChangePassword(AuthUserChangePasswordRequest) returns (AuthUserChangePasswordResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/user/changepw"
body: "*"
};
}
// UserGrant grants a role to a specified user.
rpc UserGrant(AuthUserGrantRequest) returns (AuthUserGrantResponse) {}
rpc UserGrantRole(AuthUserGrantRoleRequest) returns (AuthUserGrantRoleResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/user/grant"
body: "*"
};
}
// UserRevoke revokes a role of specified user.
rpc UserRevoke(AuthUserRevokeRequest) returns (AuthUserRevokeResponse) {}
// UserRevokeRole revokes a role of specified user.
rpc UserRevokeRole(AuthUserRevokeRoleRequest) returns (AuthUserRevokeRoleResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/user/revoke"
body: "*"
};
}
// RoleAdd adds a new role.
rpc RoleAdd(AuthRoleAddRequest) returns (AuthRoleAddResponse) {}
rpc RoleAdd(AuthRoleAddRequest) returns (AuthRoleAddResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/role/add"
body: "*"
};
}
// RoleGet gets a detailed information of a role or lists entire roles.
rpc RoleGet(AuthRoleGetRequest) returns (AuthRoleGetResponse) {}
// RoleGet gets detailed role information.
rpc RoleGet(AuthRoleGetRequest) returns (AuthRoleGetResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/role/get"
body: "*"
};
}
// RoleList gets lists of all roles.
rpc RoleList(AuthRoleListRequest) returns (AuthRoleListResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/role/list"
body: "*"
};
}
// RoleDelete deletes a specified role.
rpc RoleDelete(AuthRoleDeleteRequest) returns (AuthRoleDeleteResponse) {}
rpc RoleDelete(AuthRoleDeleteRequest) returns (AuthRoleDeleteResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/role/delete"
body: "*"
};
}
// RoleGrant grants a permission of a specified key or range to a specified role.
rpc RoleGrant(AuthRoleGrantRequest) returns (AuthRoleGrantResponse) {}
// RoleGrantPermission grants a permission of a specified key or range to a specified role.
rpc RoleGrantPermission(AuthRoleGrantPermissionRequest) returns (AuthRoleGrantPermissionResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/role/grant"
body: "*"
};
}
// RoleRevoke revokes a key or range permission of a specified role.
rpc RoleRevoke(AuthRoleRevokeRequest) returns (AuthRoleRevokeResponse) {}
// RoleRevokePermission revokes a key or range permission of a specified role.
rpc RoleRevokePermission(AuthRoleRevokePermissionRequest) returns (AuthRoleRevokePermissionResponse) {
option (google.api.http) = {
post: "/v3alpha/auth/role/revoke"
body: "*"
};
}
}
message ResponseHeader {
// cluster_id is the ID of the cluster which sent the response.
uint64 cluster_id = 1;
// member_id is the ID of the member which sent the response.
uint64 member_id = 2;
// revision of the store when the request was applied.
// revision is the key-value store revision when the request was applied.
int64 revision = 3;
// term of raft when the request was applied.
// raft_term is the raft term when the request was applied.
uint64 raft_term = 4;
}
@@ -153,43 +341,60 @@ message RangeRequest {
VALUE = 4;
}
// if the range_end is not given, the request returns the key.
// key is the first key for the range. If range_end is not given, the request only looks up key.
bytes key = 1;
// if the range_end is given, it gets the keys in range [key, range_end)
// if range_end is nonempty, otherwise it returns all keys >= key.
// range_end is the upper bound on the requested range [key, range_end).
// If range_end is '\0', the range is all keys >= key.
// If the range_end is one bit larger than the given key,
// then the range requests get the all keys with the prefix (the given key).
// If both key and range_end are '\0', then range requests returns all keys.
bytes range_end = 2;
// limit the number of keys returned.
// limit is a limit on the number of keys returned for the request.
int64 limit = 3;
// range over the store at the given revision.
// if revision is less or equal to zero, range over the newest store.
// if the revision has been compacted, ErrCompaction will be returned in
// response.
// revision is the point-in-time of the key-value store to use for the range.
// If revision is less or equal to zero, the range is over the newest key-value store.
// If the revision has been compacted, ErrCompacted is returned as a response.
int64 revision = 4;
// sort_order is the requested order for returned the results
// sort_order is the order for returned sorted results.
SortOrder sort_order = 5;
// sort_target is the kv field to use for sorting
// sort_target is the key-value field to use for sorting.
SortTarget sort_target = 6;
// range request is linearizable by default. Linearizable requests has a higher
// latency and lower throughput than serializable request.
// To reduce latency, serializable can be set. If serializable is set, range request
// will be serializable, but not linearizable with other requests.
// Serializable range can be served locally without waiting for other nodes in the cluster.
// serializable sets the range request to use serializable member-local reads.
// Range requests are linearizable by default; linearizable requests have higher
// latency and lower throughput than serializable requests but reflect the current
// consensus of the cluster. For better performance, in exchange for possible stale reads,
// a serializable range request is served locally without needing to reach consensus
// with other nodes in the cluster.
bool serializable = 7;
// keys_only when set returns only the keys and not the values.
bool keys_only = 8;
// count_only when set returns only the count of the keys in the range.
bool count_only = 9;
}
message RangeResponse {
ResponseHeader header = 1;
repeated storagepb.KeyValue kvs = 2;
// kvs is the list of key-value pairs matched by the range request.
// kvs is empty when count is requested.
repeated mvccpb.KeyValue kvs = 2;
// more indicates if there are more keys to return in the requested range.
bool more = 3;
// count is set to the number of keys within the range when requested.
int64 count = 4;
}
message PutRequest {
// key is the key, in bytes, to put into the key-value store.
bytes key = 1;
// value is the value, in bytes, to associate with the key in the key-value store.
bytes value = 2;
// lease is the lease ID to associate with the key in the key-value store. A lease
// value of 0 indicates no lease.
int64 lease = 3;
}
@@ -198,19 +403,22 @@ message PutResponse {
}
message DeleteRangeRequest {
// if the range_end is not given, the request deletes the key.
// key is the first key to delete in the range.
bytes key = 1;
// if the range_end is given, it deletes the keys in range [key, range_end).
// range_end is the key following the last key to delete for the range [key, range_end).
// If range_end is not given, the range is defined to contain only the key argument.
// If range_end is '\0', the range is all keys greater than or equal to the key argument.
bytes range_end = 2;
}
message DeleteRangeResponse {
ResponseHeader header = 1;
// Deleted is the number of keys that got deleted.
// deleted is the number of keys deleted by the delete range request.
int64 deleted = 2;
}
message RequestUnion {
message RequestOp {
// request is a union of request types accepted by a transaction.
oneof request {
RangeRequest request_range = 1;
PutRequest request_put = 2;
@@ -218,7 +426,8 @@ message RequestUnion {
}
}
message ResponseUnion {
message ResponseOp {
// response is a union of response types returned by a transaction.
oneof response {
RangeResponse response_range = 1;
PutResponse response_put = 2;
@@ -238,27 +447,24 @@ message Compare {
MOD = 2;
VALUE= 3;
}
// result is logical comparison operation for this comparison.
CompareResult result = 1;
// target is the key-value field to inspect for the comparison.
CompareTarget target = 2;
// key path
// key is the subject key for the comparison operation.
bytes key = 3;
oneof target_union {
// version of the given key
// version is the version of the given key
int64 version = 4;
// create revision of the given key
// create_revision is the creation revision of the given key
int64 create_revision = 5;
// last modified revision of the given key
// mod_revision is the last modified revision of the given key.
int64 mod_revision = 6;
// value of the given key
// value is the value of the given key, in bytes.
bytes value = 7;
}
}
// If the comparisons succeed, then the success requests will be processed in order,
// and the response will contain their respective responses in order.
// If the comparisons fail, then the failure requests will be processed in order,
// and the response will contain their respective responses in order.
// From google paxosdb paper:
// Our implementation hinges around a powerful primitive which we call MultiOp. All other database
// operations except for iteration are implemented as a single call to MultiOp. A MultiOp is applied atomically
@@ -275,26 +481,35 @@ message Compare {
// true.
// 3. A list of database operations called f op. Like t op, but executed if guard evaluates to false.
message TxnRequest {
// compare is a list of predicates representing a conjunction of terms.
// If the comparisons succeed, then the success requests will be processed in order,
// and the response will contain their respective responses in order.
// If the comparisons fail, then the failure requests will be processed in order,
// and the response will contain their respective responses in order.
repeated Compare compare = 1;
repeated RequestUnion success = 2;
repeated RequestUnion failure = 3;
// success is a list of requests which will be applied when compare evaluates to true.
repeated RequestOp success = 2;
// failure is a list of requests which will be applied when compare evaluates to false.
repeated RequestOp failure = 3;
}
message TxnResponse {
ResponseHeader header = 1;
// succeeded is set to true if the compare evaluated to true or false otherwise.
bool succeeded = 2;
repeated ResponseUnion responses = 3;
// responses is a list of responses corresponding to the results from applying
// success if succeeded is true or failure if succeeded is false.
repeated ResponseOp responses = 3;
}
// Compaction compacts the kv store upto the given revision (including).
// It removes the old versions of a key. It keeps the newest version of
// the key even if its latest modification revision is smaller than the given
// revision.
// CompactionRequest compacts the key-value store up to a given revision. All superseded keys
// with a revision less than the compaction revision will be removed.
message CompactionRequest {
// revision is the key-value store revision for the compaction operation.
int64 revision = 1;
// physical is set so the RPC will wait until the compaction is physically
// applied to the local database such that compacted entries are totally
// removed from the backing store.
// removed from the backend database.
bool physical = 2;
}
@@ -307,10 +522,27 @@ message HashRequest {
message HashResponse {
ResponseHeader header = 1;
// hash is the hash value computed from the responding member's key-value store.
uint32 hash = 2;
}
message SnapshotRequest {
}
message SnapshotResponse {
// header has the current key-value store information. The first header in the snapshot
// stream indicates the point in time of the snapshot.
ResponseHeader header = 1;
// remaining_bytes is the number of blob bytes to be sent after this message
uint64 remaining_bytes = 2;
// blob contains the next chunk of the snapshot in the snapshot stream.
bytes blob = 3;
}
message WatchRequest {
// request_union is a request to either create a new watcher or cancel an existing watcher.
oneof request_union {
WatchCreateRequest create_request = 1;
WatchCancelRequest cancel_request = 2;
@@ -318,65 +550,69 @@ message WatchRequest {
}
message WatchCreateRequest {
// the key to be watched
// key is the key to register for watching.
bytes key = 1;
// if the range_end is given, keys in [key, range_end) are watched
// NOTE: only range_end == prefixEnd(key) is accepted now
// range_end is the end of the range [key, range_end) to watch. If range_end is not given,
// only the key argument is watched. If range_end is equal to '\0', all keys greater than
// or equal to the key argument are watched.
bytes range_end = 2;
// start_revision is an optional revision (including) to watch from. No start_revision is "now".
// start_revision is an optional revision to watch from (inclusive). No start_revision is "now".
int64 start_revision = 3;
// if progress_notify is set, etcd server sends WatchResponse with empty events to the
// created watcher when there are no recent events. It is useful when clients want always to be
// able to recover a disconnected watcher from a recent known revision.
// etcdsever can decide how long it should send a notification based on current load.
// progress_notify is set so that the etcd server will periodically send a WatchResponse with
// no events to the new watcher if there are no recent events. It is useful when clients
// wish to recover a disconnected watcher starting from a recent known revision.
// The etcd server may decide how often it will send notifications based on current load.
bool progress_notify = 4;
}
message WatchCancelRequest {
// watch_id is the watcher id to cancel so that no more events are transmitted.
int64 watch_id = 1;
}
message WatchResponse {
ResponseHeader header = 1;
// watch_id is the ID of the watching the response sent to.
// watch_id is the ID of the watcher that corresponds to the response.
int64 watch_id = 2;
// If the response is for a create watch request, created is set to true.
// Client should record the watch_id and prepare for receiving events for
// that watching from the same stream.
// All events sent to the created watching will attach with the same watch_id.
// created is set to true if the response is for a create watch request.
// The client should record the watch_id and expect to receive events for
// the created watcher from the same stream.
// All events sent to the created watcher will attach with the same watch_id.
bool created = 3;
// If the response is for a cancel watch request, cancel is set to true.
// No further events will be sent to the canceled watching.
// canceled is set to true if the response is for a cancel watch request.
// No further events will be sent to the canceled watcher.
bool canceled = 4;
// CompactRevision is set to the minimum index if a watching tries to watch
// compact_revision is set to the minimum index if a watcher tries to watch
// at a compacted index.
//
// This happens when creating a watching at a compacted revision or the watching cannot
// catch up with the progress of the KV.
// This happens when creating a watcher at a compacted revision or the watcher cannot
// catch up with the progress of the key-value store.
//
// Client should treat the watching as canceled and should not try to create any
// watching with same start_revision again.
// The client should treat the watcher as canceled and should not try to create any
// watcher with the same start_revision again.
int64 compact_revision = 5;
repeated storagepb.Event events = 11;
repeated mvccpb.Event events = 11;
}
message LeaseGrantRequest {
// advisory ttl in seconds
// TTL is the advisory time-to-live in seconds.
int64 TTL = 1;
// requested ID to create; 0 lets lessor choose
// ID is the requested ID for the lease. If ID is set to 0, the lessor chooses an ID.
int64 ID = 2;
}
message LeaseGrantResponse {
ResponseHeader header = 1;
// ID is the lease ID for the granted lease.
int64 ID = 2;
// server decided ttl in second
// TTL is the server chosen lease time-to-live in seconds.
int64 TTL = 3;
string error = 4;
}
message LeaseRevokeRequest {
// ID is the lease ID to revoke. When the ID is revoked, all associated keys will be deleted.
int64 ID = 1;
}
@@ -385,36 +621,42 @@ message LeaseRevokeResponse {
}
message LeaseKeepAliveRequest {
// ID is the lease ID for the lease to keep alive.
int64 ID = 1;
}
message LeaseKeepAliveResponse {
ResponseHeader header = 1;
// ID is the lease ID from the keep alive request.
int64 ID = 2;
// TTL is the new time-to-live for the lease.
int64 TTL = 3;
}
message Member {
// ID is the member ID for this member.
uint64 ID = 1;
// If the member is not started, name will be an empty string.
// name is the human-readable name of the member. If the member is not started, the name will be an empty string.
string name = 2;
bool IsLeader = 3;
repeated string peerURLs = 4;
// If the member is not started, client_URLs will be an zero length
// string array.
repeated string clientURLs = 5;
// peerURLs is the list of URLs the member exposes to the cluster for communication.
repeated string peerURLs = 3;
// clientURLs is the list of URLs the member exposes to clients for communication. If the member is not started, clientURLs will be empty.
repeated string clientURLs = 4;
}
message MemberAddRequest {
// peerURLs is the list of URLs the added member will use to communicate with the cluster.
repeated string peerURLs = 1;
}
message MemberAddResponse {
ResponseHeader header = 1;
// member is the member information for the added member.
Member member = 2;
}
message MemberRemoveRequest {
// ID is the member ID of the member to remove.
uint64 ID = 1;
}
@@ -423,7 +665,9 @@ message MemberRemoveResponse {
}
message MemberUpdateRequest {
// ID is the member ID of the member to update.
uint64 ID = 1;
// peerURLs is the new list of URLs the member will use to communicate with the cluster.
repeated string peerURLs = 2;
}
@@ -436,11 +680,11 @@ message MemberListRequest {
message MemberListResponse {
ResponseHeader header = 1;
// members is a list of all members associated with the cluster.
repeated Member members = 2;
}
message DefragmentRequest {
}
message DefragmentResponse {
@@ -449,7 +693,7 @@ message DefragmentResponse {
enum AlarmType {
NONE = 0; // default, used to query if any alarm is active
NOSPACE = 1;
NOSPACE = 1; // space quota is exhausted
}
message AlarmRequest {
@@ -458,19 +702,27 @@ message AlarmRequest {
ACTIVATE = 1;
DEACTIVATE = 2;
}
// action is the kind of alarm request to issue. The action
// may GET alarm statuses, ACTIVATE an alarm, or DEACTIVATE a
// raised alarm.
AlarmAction action = 1;
// MemberID is the member raising the alarm request
// memberID is the ID of the member associated with the alarm. If memberID is 0, the
// alarm request covers all members.
uint64 memberID = 2;
// alarm is the type of alarm to consider for this request.
AlarmType alarm = 3;
}
message AlarmMember {
uint64 memberID = 1;
AlarmType alarm = 2;
// memberID is the ID of the member associated with the raised alarm.
uint64 memberID = 1;
// alarm is the type of alarm which has been raised.
AlarmType alarm = 2;
}
message AlarmResponse {
ResponseHeader header = 1;
// alarms is a list of alarms associated with the alarm request.
repeated AlarmMember alarms = 2;
}
@@ -479,7 +731,16 @@ message StatusRequest {
message StatusResponse {
ResponseHeader header = 1;
// version is the cluster protocol version used by the responding member.
string version = 2;
// dbSize is the size of the backend database, in bytes, of the responding member.
int64 dbSize = 3;
// leader is the member ID which the responding member believes is the current leader.
uint64 leader = 4;
// raftIndex is the current raft index of the responding member.
uint64 raftIndex = 5;
// raftTerm is the current raft term of the responding member.
uint64 raftTerm = 6;
}
message AuthEnableRequest {
@@ -489,6 +750,8 @@ message AuthDisableRequest {
}
message AuthenticateRequest {
string name = 1;
string password = 2;
}
message AuthUserAddRequest {
@@ -497,37 +760,63 @@ message AuthUserAddRequest {
}
message AuthUserGetRequest {
string name = 1;
}
message AuthUserDeleteRequest {
// name is the name of the user to delete.
string name = 1;
}
message AuthUserChangePasswordRequest {
// name is the name of the user whose password is being changed.
string name = 1;
// password is the new password for the user.
string password = 2;
}
message AuthUserGrantRequest {
message AuthUserGrantRoleRequest {
// user is the name of the user which should be granted a given role.
string user = 1;
// role is the name of the role to grant to the user.
string role = 2;
}
message AuthUserRevokeRequest {
message AuthUserRevokeRoleRequest {
string name = 1;
string role = 2;
}
message AuthRoleAddRequest {
// name is the name of the role to add to the authentication system.
string name = 1;
}
message AuthRoleGetRequest {
string role = 1;
}
message AuthUserListRequest {
}
message AuthRoleListRequest {
}
message AuthRoleDeleteRequest {
string role = 1;
}
message AuthRoleGrantRequest {
message AuthRoleGrantPermissionRequest {
// name is the name of the role which will be granted the permission.
string name = 1;
// perm is the permission to grant to the role.
authpb.Permission perm = 2;
}
message AuthRoleRevokeRequest {
message AuthRoleRevokePermissionRequest {
string role = 1;
string key = 2;
string range_end = 3;
}
message AuthEnableResponse {
@@ -540,6 +829,8 @@ message AuthDisableResponse {
message AuthenticateResponse {
ResponseHeader header = 1;
// token is an authorized token that can be used in succeeding RPCs
string token = 2;
}
message AuthUserAddResponse {
@@ -548,6 +839,8 @@ message AuthUserAddResponse {
message AuthUserGetResponse {
ResponseHeader header = 1;
repeated string roles = 2;
}
message AuthUserDeleteResponse {
@@ -558,11 +851,11 @@ message AuthUserChangePasswordResponse {
ResponseHeader header = 1;
}
message AuthUserGrantResponse {
message AuthUserGrantRoleResponse {
ResponseHeader header = 1;
}
message AuthUserRevokeResponse {
message AuthUserRevokeRoleResponse {
ResponseHeader header = 1;
}
@@ -572,16 +865,30 @@ message AuthRoleAddResponse {
message AuthRoleGetResponse {
ResponseHeader header = 1;
repeated authpb.Permission perm = 2;
}
message AuthRoleListResponse {
ResponseHeader header = 1;
repeated string roles = 2;
}
message AuthUserListResponse {
ResponseHeader header = 1;
repeated string users = 2;
}
message AuthRoleDeleteResponse {
ResponseHeader header = 1;
}
message AuthRoleGrantResponse {
message AuthRoleGrantPermissionResponse {
ResponseHeader header = 1;
}
message AuthRoleRevokeResponse {
message AuthRoleRevokePermissionResponse {
ResponseHeader header = 1;
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -25,11 +25,11 @@ import (
"strings"
"sync"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/netutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/storage/backend"
"github.com/coreos/etcd/store"
"github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver"
@@ -144,9 +144,7 @@ func (c *RaftCluster) PeerURLs() []string {
defer c.Unlock()
urls := make([]string, 0)
for _, p := range c.members {
for _, addr := range p.PeerURLs {
urls = append(urls, addr)
}
urls = append(urls, p.PeerURLs...)
}
sort.Strings(urls)
return urls
@@ -159,9 +157,7 @@ func (c *RaftCluster) ClientURLs() []string {
defer c.Unlock()
urls := make([]string, 0)
for _, p := range c.members {
for _, url := range p.ClientURLs {
urls = append(urls, url)
}
urls = append(urls, p.ClientURLs...)
}
sort.Strings(urls)
return urls
@@ -199,13 +195,19 @@ func (c *RaftCluster) SetID(id types.ID) { c.id = id }
func (c *RaftCluster) SetStore(st store.Store) { c.store = st }
func (c *RaftCluster) Recover() {
func (c *RaftCluster) SetBackend(be backend.Backend) {
c.be = be
mustCreateBackendBuckets(c.be)
}
func (c *RaftCluster) Recover(onSet func(*semver.Version)) {
c.Lock()
defer c.Unlock()
c.members, c.removed = membersFromStore(c.store)
c.version = clusterVersionFromStore(c.store)
mustDetectDowngrade(c.version)
onSet(c.version)
for _, m := range c.members {
plog.Infof("added member %s %v to cluster %s from store", m.ID, m.PeerURLs, c.id)
@@ -289,6 +291,8 @@ func (c *RaftCluster) AddMember(m *Member) {
}
c.members[m.ID] = m
plog.Infof("added member %s %v to cluster %s", m.ID, m.PeerURLs, c.id)
}
// RemoveMember removes a member from the store.
@@ -305,23 +309,28 @@ func (c *RaftCluster) RemoveMember(id types.ID) {
delete(c.members, id)
c.removed[id] = true
plog.Infof("removed member %s from cluster %s", id, c.id)
}
func (c *RaftCluster) UpdateAttributes(id types.ID, attr Attributes) bool {
func (c *RaftCluster) UpdateAttributes(id types.ID, attr Attributes) {
c.Lock()
defer c.Unlock()
if m, ok := c.members[id]; ok {
m.Attributes = attr
return true
if c.store != nil {
mustUpdateMemberAttrInStore(c.store, m)
}
if c.be != nil {
mustSaveMemberToBackend(c.be, m)
}
return
}
_, ok := c.removed[id]
if ok {
plog.Warningf("skipped updating attributes of removed member %s", id)
} else {
if !ok {
plog.Panicf("error updating attributes of unknown member %s", id)
}
// TODO: update store in this function
return false
plog.Warningf("skipped updating attributes of removed member %s", id)
}
func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes) {
@@ -335,6 +344,8 @@ func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes)
if c.be != nil {
mustSaveMemberToBackend(c.be, c.members[id])
}
plog.Noticef("updated member %s %v in cluster %s", id, raftAttr.PeerURLs, c.id)
}
func (c *RaftCluster) Version() *semver.Version {
@@ -346,7 +357,7 @@ func (c *RaftCluster) Version() *semver.Version {
return semver.Must(semver.NewVersion(c.version.String()))
}
func (c *RaftCluster) SetVersion(ver *semver.Version) {
func (c *RaftCluster) SetVersion(ver *semver.Version, onSet func(*semver.Version)) {
c.Lock()
defer c.Unlock()
if c.version != nil {
@@ -356,6 +367,13 @@ func (c *RaftCluster) SetVersion(ver *semver.Version) {
}
c.version = ver
mustDetectDowngrade(c.version)
if c.store != nil {
mustSaveClusterVersionToStore(c.store, ver)
}
if c.be != nil {
mustSaveClusterVersionToBackend(c.be, ver)
}
onSet(ver)
}
func (c *RaftCluster) IsReadyToAddNewMember() bool {
@@ -371,7 +389,7 @@ func (c *RaftCluster) IsReadyToAddNewMember() bool {
if nstarted == 1 && nmembers == 2 {
// a case of adding a new node to 1-member cluster for restoring cluster data
// https://github.com/coreos/etcd/blob/master/Documentation/admin_guide.md#restoring-the-cluster
// https://github.com/coreos/etcd/blob/master/Documentation/v2/admin_guide.md#restoring-the-cluster
plog.Debugf("The number of started member is 1. This cluster can accept add member request.")
return true

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -19,15 +19,15 @@ import (
"fmt"
"path"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/storage/backend"
"github.com/coreos/etcd/store"
"github.com/coreos/go-semver/semver"
)
const (
// TODO: make this private after moving all membership storage logic
// from etcdserver pkg
AttributesSuffix = "attributes"
attributesSuffix = "attributes"
raftAttributesSuffix = "raftAttributes"
// the prefix for stroing membership related information in store provided by store pkg.
@@ -37,6 +37,7 @@ const (
var (
membersBucketName = []byte("members")
membersRemovedBuckedName = []byte("members_removed")
clusterBucketName = []byte("cluster")
StoreMembersPrefix = path.Join(storePrefix, "members")
storeRemovedMembersPrefix = path.Join(storePrefix, "removed_members")
@@ -44,7 +45,7 @@ var (
func mustSaveMemberToBackend(be backend.Backend, m *Member) {
mkey := backendMemberKey(m.ID)
mvalue, err := json.Marshal(m.RaftAttributes)
mvalue, err := json.Marshal(m)
if err != nil {
plog.Panicf("marshal raftAttributes should never fail: %v", err)
}
@@ -65,6 +66,15 @@ func mustDeleteMemberFromBackend(be backend.Backend, id types.ID) {
tx.Unlock()
}
func mustSaveClusterVersionToBackend(be backend.Backend, ver *semver.Version) {
ckey := backendClusterVersionKey()
tx := be.BatchTx()
tx.Lock()
defer tx.Unlock()
tx.UnsafePut(clusterBucketName, ckey, []byte(ver.String()))
}
func mustSaveMemberToStore(s store.Store, m *Member) {
b, err := json.Marshal(m.RaftAttributes)
if err != nil {
@@ -96,13 +106,30 @@ func mustUpdateMemberInStore(s store.Store, m *Member) {
}
}
func mustUpdateMemberAttrInStore(s store.Store, m *Member) {
b, err := json.Marshal(m.Attributes)
if err != nil {
plog.Panicf("marshal raftAttributes should never fail: %v", err)
}
p := path.Join(MemberStoreKey(m.ID), attributesSuffix)
if _, err := s.Set(p, false, string(b), store.TTLOptionSet{ExpireTime: store.Permanent}); err != nil {
plog.Panicf("update raftAttributes should never fail: %v", err)
}
}
func mustSaveClusterVersionToStore(s store.Store, ver *semver.Version) {
if _, err := s.Set(StoreClusterVersionKey(), false, ver.String(), store.TTLOptionSet{ExpireTime: store.Permanent}); err != nil {
plog.Panicf("save cluster version should never fail: %v", err)
}
}
// nodeToMember builds member from a key value node.
// the child nodes of the given node MUST be sorted by key.
func nodeToMember(n *store.NodeExtern) (*Member, error) {
m := &Member{ID: MustParseMemberIDFromKey(n.Key)}
attrs := make(map[string][]byte)
raftAttrKey := path.Join(n.Key, raftAttributesSuffix)
attrKey := path.Join(n.Key, AttributesSuffix)
attrKey := path.Join(n.Key, attributesSuffix)
for _, nn := range n.Nodes {
if nn.Key != raftAttrKey && nn.Key != attrKey {
return nil, fmt.Errorf("unknown key %q", nn.Key)
@@ -125,15 +152,32 @@ func nodeToMember(n *store.NodeExtern) (*Member, error) {
}
func backendMemberKey(id types.ID) []byte {
return []byte(path.Join(id.String(), raftAttributesSuffix))
return []byte(id.String())
}
func backendClusterVersionKey() []byte {
return []byte("clusterVersion")
}
func mustCreateBackendBuckets(be backend.Backend) {
tx := be.BatchTx()
tx.Lock()
defer tx.Unlock()
tx.UnsafeCreateBucket(membersBucketName)
tx.UnsafeCreateBucket(membersRemovedBuckedName)
tx.UnsafeCreateBucket(clusterBucketName)
}
func MemberStoreKey(id types.ID) string {
return path.Join(StoreMembersPrefix, id.String())
}
func StoreClusterVersionKey() string {
return path.Join(storePrefix, "version")
}
func MemberAttributesStorePath(id types.ID) string {
return path.Join(MemberStoreKey(id), AttributesSuffix)
return path.Join(MemberStoreKey(id), attributesSuffix)
}
func MustParseMemberIDFromKey(key string) types.ID {

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -22,42 +22,51 @@ import (
)
var (
// TODO: with label in v3?
proposeDurations = prometheus.NewHistogram(prometheus.HistogramOpts{
hasLeader = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposal_durations_seconds",
Help: "The latency distributions of committing proposal.",
Buckets: prometheus.ExponentialBuckets(0.001, 2, 14),
Name: "has_leader",
Help: "Whether or not a leader exists. 1 is existence, 0 is not.",
})
proposePending = prometheus.NewGauge(prometheus.GaugeOpts{
leaderChanges = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "pending_proposal_total",
Help: "The total number of pending proposals.",
Name: "leader_changes_seen_total",
Help: "The number of leader changes seen.",
})
// This is number of proposal failed in client's view.
// The proposal might be later got committed in raft.
proposeFailed = prometheus.NewCounter(prometheus.CounterOpts{
proposalsCommitted = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposal_failed_total",
Help: "The total number of failed proposals.",
Name: "proposals_committed_total",
Help: "The total number of consensus proposals committed.",
})
fileDescriptorUsed = prometheus.NewGauge(prometheus.GaugeOpts{
proposalsApplied = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "file_descriptors_used_total",
Help: "The total number of file descriptors used.",
Name: "proposals_applied_total",
Help: "The total number of consensus proposals applied.",
})
proposalsPending = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_pending",
Help: "The current number of pending proposals to commit.",
})
proposalsFailed = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposals_failed_total",
Help: "The total number of failed proposals seen.",
})
)
func init() {
prometheus.MustRegister(proposeDurations)
prometheus.MustRegister(proposePending)
prometheus.MustRegister(proposeFailed)
prometheus.MustRegister(fileDescriptorUsed)
prometheus.MustRegister(hasLeader)
prometheus.MustRegister(leaderChanges)
prometheus.MustRegister(proposalsCommitted)
prometheus.MustRegister(proposalsApplied)
prometheus.MustRegister(proposalsPending)
prometheus.MustRegister(proposalsFailed)
}
func monitorFileDescriptor(done <-chan struct{}) {
@@ -69,7 +78,6 @@ func monitorFileDescriptor(done <-chan struct{}) {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
fileDescriptorUsed.Set(float64(used))
limit, err := runtime.FDLimit()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)

View File

@@ -1,4 +1,4 @@
// Copyright 2016 CoreOS, Inc.
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@ package etcdserver
import (
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/storage/backend"
"github.com/coreos/etcd/mvcc/backend"
)
// Quota represents an arbitrary quota against arbitrary requests. Each request
@@ -50,20 +50,20 @@ const (
)
func NewBackendQuota(s *EtcdServer) Quota {
if s.cfg.QuotaBackendBytes < 0 {
if s.Cfg.QuotaBackendBytes < 0 {
// disable quotas if negative
plog.Warningf("disabling backend quota")
return &passthroughQuota{}
}
if s.cfg.QuotaBackendBytes == 0 {
if s.Cfg.QuotaBackendBytes == 0 {
// use default size if no quota size given
return &backendQuota{s, backend.DefaultQuotaBytes}
}
if s.cfg.QuotaBackendBytes > backend.MaxQuotaBytes {
plog.Warningf("backend quota %v exceeds maximum quota %v; using maximum", s.cfg.QuotaBackendBytes, backend.MaxQuotaBytes)
if s.Cfg.QuotaBackendBytes > backend.MaxQuotaBytes {
plog.Warningf("backend quota %v exceeds maximum quota %v; using maximum", s.Cfg.QuotaBackendBytes, backend.MaxQuotaBytes)
return &backendQuota{s, backend.MaxQuotaBytes}
}
return &backendQuota{s, s.cfg.QuotaBackendBytes}
return &backendQuota{s, s.Cfg.QuotaBackendBytes}
}
func (b *backendQuota) Available(v interface{}) bool {
@@ -86,7 +86,7 @@ func (b *backendQuota) Cost(v interface{}) int {
func costPut(r *pb.PutRequest) int { return kvOverhead + len(r.Key) + len(r.Value) }
func costTxnReq(u *pb.RequestUnion) int {
func costTxnReq(u *pb.RequestOp) int {
r := u.GetRequestPut()
if r == nil {
return 0

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@ package etcdserver
import (
"encoding/json"
"expvar"
"os"
"sort"
"sync"
"sync/atomic"
@@ -135,8 +134,8 @@ func (r *raftNode) start(s *EtcdServer) {
r.done = make(chan struct{})
heartbeat := 200 * time.Millisecond
if s.cfg != nil {
heartbeat = time.Duration(s.cfg.TickMs) * time.Millisecond
if s.Cfg != nil {
heartbeat = time.Duration(s.Cfg.TickMs) * time.Millisecond
}
// set up contention detectors for raft heartbeat message.
// expect to send a heartbeat within 2 heartbeat intervals.
@@ -158,7 +157,15 @@ func (r *raftNode) start(s *EtcdServer) {
r.mu.Lock()
r.lt = time.Now()
r.mu.Unlock()
leaderChanges.Inc()
}
if rd.SoftState.Lead == raft.None {
hasLeader.Set(0)
} else {
hasLeader.Set(1)
}
atomic.StoreUint64(&r.lead, rd.SoftState.Lead)
if rd.RaftState == raft.StateLeader {
islead = true
@@ -166,7 +173,7 @@ func (r *raftNode) start(s *EtcdServer) {
// it promotes or demotes instead of modifying server directly.
syncC = r.s.SyncTicker
if r.s.lessor != nil {
r.s.lessor.Promote(r.s.cfg.electionTimeout())
r.s.lessor.Promote(r.s.Cfg.electionTimeout())
}
// TODO: remove the nil checking
// current test utility does not provide the stats
@@ -206,28 +213,40 @@ func (r *raftNode) start(s *EtcdServer) {
// writing to their disks.
// For more details, check raft thesis 10.2.1
if islead {
// gofail: var raftBeforeLeaderSend struct{}
r.s.send(rd.Messages)
}
if !raft.IsEmptySnap(rd.Snapshot) {
if err := r.storage.SaveSnap(rd.Snapshot); err != nil {
plog.Fatalf("raft save snapshot error: %v", err)
}
r.raftStorage.ApplySnapshot(rd.Snapshot)
plog.Infof("raft applied incoming snapshot at index %d", rd.Snapshot.Metadata.Index)
}
// gofail: var raftBeforeSave struct{}
if err := r.storage.Save(rd.HardState, rd.Entries); err != nil {
plog.Fatalf("raft save state and entries error: %v", err)
}
if !raft.IsEmptyHardState(rd.HardState) {
proposalsCommitted.Set(float64(rd.HardState.Commit))
}
// gofail: var raftAfterSave struct{}
if !raft.IsEmptySnap(rd.Snapshot) {
// gofail: var raftBeforeSaveSnap struct{}
if err := r.storage.SaveSnap(rd.Snapshot); err != nil {
plog.Fatalf("raft save snapshot error: %v", err)
}
// gofail: var raftAfterSaveSnap struct{}
r.raftStorage.ApplySnapshot(rd.Snapshot)
plog.Infof("raft applied incoming snapshot at index %d", rd.Snapshot.Metadata.Index)
// gofail: var raftAfterApplySnap struct{}
}
r.raftStorage.Append(rd.Entries)
if !islead {
// gofail: var raftBeforeFollowerSend struct{}
r.s.send(rd.Messages)
}
raftDone <- struct{}{}
r.Advance()
case <-syncC:
r.s.sync(r.s.cfg.ReqTimeout())
r.s.sync(r.s.Cfg.ReqTimeout())
case <-r.stopped:
return
}
@@ -289,9 +308,6 @@ func startNode(cfg *ServerConfig, cl *membership.RaftCluster, ids []types.ID) (i
ClusterID: uint64(cl.ID()),
},
)
if err = os.MkdirAll(cfg.SnapDir(), privateDirMode); err != nil {
plog.Fatalf("create snapshot directory error: %v", err)
}
if w, err = wal.Create(cfg.WALDir(), metadata); err != nil {
plog.Fatalf("create wal error: %v", err)
}
@@ -438,7 +454,7 @@ func getIDs(snap *raftpb.Snapshot, ents []raftpb.Entry) []uint64 {
plog.Panicf("ConfChange Type should be either ConfChangeAddNode or ConfChangeRemoveNode!")
}
}
sids := make(types.Uint64Slice, 0)
sids := make(types.Uint64Slice, 0, len(ids))
for id := range ids {
sids = append(sids, id)
}
@@ -476,7 +492,7 @@ func createConfigChangeEnts(ids []uint64, self uint64, term, index uint64) []raf
if !found {
m := membership.Member{
ID: types.ID(self),
RaftAttributes: membership.RaftAttributes{PeerURLs: []string{"http://localhost:7001", "http://localhost:2380"}},
RaftAttributes: membership.RaftAttributes{PeerURLs: []string{"http://localhost:2380"}},
}
ctx, err := json.Marshal(m)
if err != nil {

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -31,11 +31,14 @@ import (
"github.com/coreos/etcd/auth"
"github.com/coreos/etcd/compactor"
"github.com/coreos/etcd/discovery"
"github.com/coreos/etcd/etcdserver/api"
"github.com/coreos/etcd/etcdserver/api/v2http/httptypes"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/membership"
"github.com/coreos/etcd/etcdserver/stats"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/mvcc"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/pkg/fileutil"
"github.com/coreos/etcd/pkg/idutil"
"github.com/coreos/etcd/pkg/pbutil"
@@ -47,8 +50,6 @@ import (
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/rafthttp"
"github.com/coreos/etcd/snap"
dstorage "github.com/coreos/etcd/storage"
"github.com/coreos/etcd/storage/backend"
"github.com/coreos/etcd/store"
"github.com/coreos/etcd/version"
"github.com/coreos/etcd/wal"
@@ -58,9 +59,6 @@ import (
)
const (
// owner can make/remove files inside the directory
privateDirMode = 0700
DefaultSnapCount = 10000
StoreClusterPrefix = "/0"
@@ -156,11 +154,17 @@ type Server interface {
// EtcdServer is the production implementation of the Server interface
type EtcdServer struct {
// r must be the first element to keep 64-bit alignment for atomic
// r and inflightSnapshots must be the first elements to keep 64-bit alignment for atomic
// access to fields
r raftNode
cfg *ServerConfig
// count the number of inflight snapshots.
// MUST use atomic operation to access this field.
inflightSnapshots int64
Cfg *ServerConfig
readych chan struct{}
r raftNode
snapCount uint64
w wait.Wait
@@ -174,8 +178,10 @@ type EtcdServer struct {
store store.Store
applyV2 ApplierV2
applyV3 applierV3
kv dstorage.ConsistentWatchableKV
kv mvcc.ConsistentWatchableKV
lessor lease.Lessor
bemu sync.Mutex
be backend.Backend
@@ -203,15 +209,18 @@ type EtcdServer struct {
msgSnapC chan raftpb.Message
// count the number of inflight snapshots.
// MUST use atomic operation to access this field.
inflightSnapshots int64
// wg is used to wait for the go routines that depends on the server state
// to exit when stopping the server.
wg sync.WaitGroup
appliedIndex uint64
}
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
func NewServer(cfg *ServerConfig) (srv *EtcdServer, err error) {
st := store.New(StoreClusterPrefix, StoreKeysPrefix)
var (
w *wal.WAL
n raft.Node
@@ -229,45 +238,63 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
if err != nil {
return nil, err
}
if err := upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
if err = upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
return nil, err
}
haveWAL := wal.Exist(cfg.WALDir())
if err = fileutil.TouchDirAll(cfg.SnapDir()); err != nil {
plog.Fatalf("create snapshot directory error: %v", err)
}
ss := snap.New(cfg.SnapDir())
bepath := path.Join(cfg.SnapDir(), databaseFilename)
beExist := fileutil.Exist(bepath)
be := backend.NewDefaultBackend(bepath)
defer func() {
if err != nil {
be.Close()
}
}()
prt, err := rafthttp.NewRoundTripper(cfg.PeerTLSInfo, cfg.peerDialTimeout())
if err != nil {
return nil, err
}
var remotes []*membership.Member
var (
remotes []*membership.Member
snapshot *raftpb.Snapshot
)
switch {
case !haveWAL && !cfg.NewCluster:
if err := cfg.VerifyJoinExisting(); err != nil {
if err = cfg.VerifyJoinExisting(); err != nil {
return nil, err
}
cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
if err != nil {
return nil, err
}
existingCluster, err := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
if err != nil {
return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", err)
existingCluster, gerr := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), prt)
if gerr != nil {
return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", gerr)
}
if err := membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
if err = membership.ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
}
if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, prt) {
return nil, fmt.Errorf("incomptible with current running cluster")
return nil, fmt.Errorf("incompatible with current running cluster")
}
remotes = existingCluster.Members()
cl.SetID(existingCluster.ID())
cl.SetStore(st)
cl.SetBackend(be)
cfg.Print()
id, n, s, w = startNode(cfg, cl, nil)
case !haveWAL && cfg.NewCluster:
if err := cfg.VerifyBootstrap(); err != nil {
if err = cfg.VerifyBootstrap(); err != nil {
return nil, err
}
cl, err = membership.NewClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
@@ -280,12 +307,12 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
}
if cfg.ShouldDiscover() {
var str string
var err error
str, err = discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
if err != nil {
return nil, &DiscoveryError{Op: "join", Err: err}
}
urlsmap, err := types.NewURLsMap(str)
var urlsmap types.URLsMap
urlsmap, err = types.NewURLsMap(str)
if err != nil {
return nil, err
}
@@ -297,28 +324,27 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
}
}
cl.SetStore(st)
cl.SetBackend(be)
cfg.PrintWithInitial()
id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
case haveWAL:
if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
if err = fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
return nil, fmt.Errorf("cannot write to member directory: %v", err)
}
if err := fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
if err = fileutil.IsDirWriteable(cfg.WALDir()); err != nil {
return nil, fmt.Errorf("cannot write to WAL directory: %v", err)
}
if cfg.ShouldDiscover() {
plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
}
var snapshot *raftpb.Snapshot
var err error
snapshot, err = ss.Load()
if err != nil && err != snap.ErrNoSnapshot {
return nil, err
}
if snapshot != nil {
if err := st.Recovery(snapshot.Data); err != nil {
if err = st.Recovery(snapshot.Data); err != nil {
plog.Panicf("recovered store from snapshot error: %v", err)
}
plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
@@ -330,7 +356,12 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
}
cl.SetStore(st)
cl.Recover()
cl.SetBackend(be)
cl.Recover(api.UpdateCapability)
if cl.Version() != nil && !cl.Version().LessThan(semver.Version{Major: 3}) && !beExist {
os.RemoveAll(bepath)
return nil, fmt.Errorf("database file (%v) of the backend is missing", bepath)
}
default:
return nil, fmt.Errorf("unsupported bootstrap config")
}
@@ -346,8 +377,9 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
sstats.Initialize()
lstats := stats.NewLeaderStats(id.String())
srv := &EtcdServer{
cfg: cfg,
srv = &EtcdServer{
readych: make(chan struct{}),
Cfg: cfg,
snapCount: cfg.SnapCount,
errorc: make(chan error, 1),
store: st,
@@ -369,9 +401,17 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
msgSnapC: make(chan raftpb.Message, maxInFlightMsgSnap),
}
srv.be = backend.NewDefaultBackend(path.Join(cfg.SnapDir(), databaseFilename))
srv.applyV2 = &applierV2store{store: srv.store, cluster: srv.cluster}
srv.be = be
srv.lessor = lease.NewLessor(srv.be)
srv.kv = dstorage.New(srv.be, srv.lessor, &srv.consistIndex)
srv.kv = mvcc.New(srv.be, srv.lessor, &srv.consistIndex)
if beExist {
kvindex := srv.kv.ConsistentIndex()
if snapshot != nil && kvindex < snapshot.Metadata.Index {
return nil, fmt.Errorf("database file (%v index %d) does not match with snapshot (index %d).", bepath, kvindex, snapshot.Metadata.Index)
}
}
srv.consistIndex.setConsistentIndex(srv.kv.ConsistentIndex())
srv.authStore = auth.NewAuthStore(srv.be)
if h := cfg.AutoCompactionRetention; h != 0 {
@@ -379,7 +419,7 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
srv.compactor.Run()
}
if err := srv.restoreAlarms(); err != nil {
if err = srv.restoreAlarms(); err != nil {
return nil, err
}
@@ -396,7 +436,7 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
LeaderStats: lstats,
ErrorC: srv.errorc,
}
if err := tr.Start(); err != nil {
if err = tr.Start(); err != nil {
return nil, err
}
// add all remotes into transport
@@ -420,7 +460,7 @@ func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
// It also starts a goroutine to publish its server information.
func (s *EtcdServer) Start() {
s.start()
go s.publish(s.cfg.ReqTimeout())
go s.publish(s.Cfg.ReqTimeout())
go s.purgeFile()
go monitorFileDescriptor(s.done)
go s.monitorVersions()
@@ -449,11 +489,11 @@ func (s *EtcdServer) start() {
func (s *EtcdServer) purgeFile() {
var serrc, werrc <-chan error
if s.cfg.MaxSnapFiles > 0 {
serrc = fileutil.PurgeFile(s.cfg.SnapDir(), "snap", s.cfg.MaxSnapFiles, purgeFileInterval, s.done)
if s.Cfg.MaxSnapFiles > 0 {
serrc = fileutil.PurgeFile(s.Cfg.SnapDir(), "snap", s.Cfg.MaxSnapFiles, purgeFileInterval, s.done)
}
if s.cfg.MaxWALFiles > 0 {
werrc = fileutil.PurgeFile(s.cfg.WALDir(), "wal", s.cfg.MaxWALFiles, purgeFileInterval, s.done)
if s.Cfg.MaxWALFiles > 0 {
werrc = fileutil.PurgeFile(s.Cfg.WALDir(), "wal", s.Cfg.MaxWALFiles, purgeFileInterval, s.done)
}
select {
case e := <-werrc:
@@ -516,9 +556,15 @@ func (s *EtcdServer) run() {
}
defer func() {
s.r.stop()
sched.Stop()
// wait for snapshots before closing raft so wal stays open
s.wg.Wait()
// must stop raft after scheduler-- etcdserver can leak rafthttp pipelines
// by adding a peer after raft stops the transport
s.r.stop()
// kv, lessor and backend can be nil if running without v3 enabled
// or running unit tests.
if s.lessor != nil {
@@ -564,7 +610,15 @@ func (s *EtcdServer) run() {
func (s *EtcdServer) applyAll(ep *etcdProgress, apply *apply) {
s.applySnapshot(ep, apply)
st := time.Now()
s.applyEntries(ep, apply)
d := time.Since(st)
entriesNum := len(apply.entries)
if entriesNum != 0 && d > time.Duration(entriesNum)*warnApplyDuration {
plog.Warningf("apply entries took too long [%v for %d entries]", d, len(apply.entries))
plog.Warningf("avoid queries with large range/delete range!")
}
proposalsApplied.Set(float64(ep.appliedi))
// wait for the raft routine to finish the disk writes before triggering a
// snapshot. or applied index might be greater than the last index in raft
// storage, since the raft routine might be slower than apply routine.
@@ -584,6 +638,9 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
return
}
plog.Infof("applying snapshot at index %d...", ep.snapi)
defer plog.Infof("finished applying incoming snapshot at index %d", ep.snapi)
if apply.snapshot.Metadata.Index <= ep.appliedi {
plog.Panicf("snapshot index [%d] should > appliedi[%d] + 1",
apply.snapshot.Metadata.Index, ep.appliedi)
@@ -594,23 +651,31 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
plog.Panicf("get database snapshot file path error: %v", err)
}
fn := path.Join(s.cfg.SnapDir(), databaseFilename)
fn := path.Join(s.Cfg.SnapDir(), databaseFilename)
if err := os.Rename(snapfn, fn); err != nil {
plog.Panicf("rename snapshot file error: %v", err)
}
newbe := backend.NewDefaultBackend(fn)
plog.Info("restoring mvcc store...")
if err := s.kv.Restore(newbe); err != nil {
plog.Panicf("restore KV error: %v", err)
}
s.consistIndex.setConsistentIndex(s.kv.ConsistentIndex())
plog.Info("finished restoring mvcc store")
// Closing old backend might block until all the txns
// on the backend are finished.
// We do not want to wait on closing the old backend.
s.bemu.Lock()
oldbe := s.be
go func() {
plog.Info("closing old backend...")
defer plog.Info("finished closing old backend")
if err := oldbe.Close(); err != nil {
plog.Panicf("close backend error: %v", err)
}
@@ -620,35 +685,51 @@ func (s *EtcdServer) applySnapshot(ep *etcdProgress, apply *apply) {
s.bemu.Unlock()
if s.lessor != nil {
plog.Info("recovering lessor...")
s.lessor.Recover(newbe, s.kv)
plog.Info("finished recovering lessor")
}
plog.Info("recovering alarms...")
if err := s.restoreAlarms(); err != nil {
plog.Panicf("restore alarms error: %v", err)
}
plog.Info("finished recovering alarms")
if s.authStore != nil {
plog.Info("recovering auth store...")
s.authStore.Recover(newbe)
plog.Info("finished recovering auth store")
}
plog.Info("recovering store v2...")
if err := s.store.Recovery(apply.snapshot.Data); err != nil {
plog.Panicf("recovery store error: %v", err)
}
s.cluster.Recover()
plog.Info("finished recovering store v2")
s.cluster.SetBackend(s.be)
plog.Info("recovering cluster configuration...")
s.cluster.Recover(api.UpdateCapability)
plog.Info("finished recovering cluster configuration")
plog.Info("removing old peers from network...")
// recover raft transport
s.r.transport.RemoveAllPeers()
plog.Info("finished removing old peers from network")
plog.Info("adding peers from new cluster configuration into network...")
for _, m := range s.cluster.Members() {
if m.ID == s.ID() {
continue
}
s.r.transport.AddPeer(m.ID, m.PeerURLs)
}
plog.Info("finished adding peers from new cluster configuration into network...")
ep.appliedi = apply.snapshot.Metadata.Index
ep.snapi = ep.appliedi
ep.confState = apply.snapshot.Metadata.ConfState
plog.Infof("recovered from incoming snapshot at index %d", ep.snapi)
}
func (s *EtcdServer) applyEntries(ep *etcdProgress, apply *apply) {
@@ -677,15 +758,6 @@ func (s *EtcdServer) triggerSnapshot(ep *etcdProgress) {
return
}
// When sending a snapshot, etcd will pause compaction.
// After receives a snapshot, the slow follower needs to get all the entries right after
// the snapshot sent to catch up. If we do not pause compaction, the log entries right after
// the snapshot sent might already be compacted. It happens when the snapshot takes long time
// to send and save. Pausing compaction avoids triggering a snapshot sending cycle.
if atomic.LoadInt64(&s.inflightSnapshots) != 0 {
return
}
plog.Infof("start to snapshot (applied: %d, lastsnap: %d)", ep.appliedi, ep.snapi)
s.snapshot(ep.appliedi, ep.confState)
ep.snapi = ep.appliedi
@@ -702,6 +774,10 @@ func (s *EtcdServer) Stop() {
<-s.done
}
// ReadyNotify returns a channel that will be closed when the server
// is ready to serve client requests
func (s *EtcdServer) ReadyNotify() <-chan struct{} { return s.readych }
func (s *EtcdServer) stopWithDelay(d time.Duration, err error) {
select {
case <-time.After(d):
@@ -717,70 +793,6 @@ func (s *EtcdServer) stopWithDelay(d time.Duration, err error) {
// when the server is stopped.
func (s *EtcdServer) StopNotify() <-chan struct{} { return s.done }
// Do interprets r and performs an operation on s.store according to r.Method
// and other fields. If r.Method is "POST", "PUT", "DELETE", or a "GET" with
// Quorum == true, r will be sent through consensus before performing its
// respective operation. Do will block until an action is performed or there is
// an error.
func (s *EtcdServer) Do(ctx context.Context, r pb.Request) (Response, error) {
r.ID = s.reqIDGen.Next()
if r.Method == "GET" && r.Quorum {
r.Method = "QGET"
}
switch r.Method {
case "POST", "PUT", "DELETE", "QGET":
data, err := r.Marshal()
if err != nil {
return Response{}, err
}
ch := s.w.Register(r.ID)
// TODO: benchmark the cost of time.Now()
// might be sampling?
start := time.Now()
s.r.Propose(ctx, data)
proposePending.Inc()
defer proposePending.Dec()
select {
case x := <-ch:
proposeDurations.Observe(float64(time.Since(start)) / float64(time.Second))
resp := x.(Response)
return resp, resp.err
case <-ctx.Done():
proposeFailed.Inc()
s.w.Trigger(r.ID, nil) // GC wait
return Response{}, s.parseProposeCtxErr(ctx.Err(), start)
case <-s.done:
return Response{}, ErrStopped
}
case "GET":
switch {
case r.Wait:
wc, err := s.store.Watch(r.Path, r.Recursive, r.Stream, r.Since)
if err != nil {
return Response{}, err
}
return Response{Watcher: wc}, nil
default:
ev, err := s.store.Get(r.Path, r.Recursive, r.Sorted)
if err != nil {
return Response{}, err
}
return Response{Event: ev}, nil
}
case "HEAD":
ev, err := s.store.Get(r.Path, r.Recursive, r.Sorted)
if err != nil {
return Response{}, err
}
return Response{Event: ev}, nil
default:
return Response{}, ErrUnknownMethod
}
}
func (s *EtcdServer) SelfStats() []byte { return s.stats.JSON() }
func (s *EtcdServer) LeaderStats() []byte {
@@ -794,7 +806,7 @@ func (s *EtcdServer) LeaderStats() []byte {
func (s *EtcdServer) StoreStats() []byte { return s.store.JsonStats() }
func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) error {
if s.cfg.StrictReconfigCheck && !s.cluster.IsReadyToAddNewMember() {
if s.Cfg.StrictReconfigCheck && !s.cluster.IsReadyToAddNewMember() {
// If s.cfg.StrictReconfigCheck is false, it means the option --strict-reconfig-check isn't passed to etcd.
// In such a case adding a new member is allowed unconditionally
return ErrNotEnoughStartedMembers
@@ -814,7 +826,7 @@ func (s *EtcdServer) AddMember(ctx context.Context, memb membership.Member) erro
}
func (s *EtcdServer) RemoveMember(ctx context.Context, id uint64) error {
if s.cfg.StrictReconfigCheck && !s.cluster.IsReadyToRemoveMember(id) {
if s.Cfg.StrictReconfigCheck && !s.cluster.IsReadyToRemoveMember(id) {
// If s.cfg.StrictReconfigCheck is false, it means the option --strict-reconfig-check isn't passed to etcd.
// In such a case removing a member is allowed unconditionally
return ErrNotEnoughStartedMembers
@@ -853,7 +865,7 @@ func (s *EtcdServer) Lead() uint64 { return atomic.LoadUint64(&s.r.lead) }
func (s *EtcdServer) Leader() types.ID { return types.ID(s.Lead()) }
func (s *EtcdServer) IsPprofEnabled() bool { return s.cfg.EnablePprof }
func (s *EtcdServer) IsPprofEnabled() bool { return s.Cfg.EnablePprof }
// configure sends a configuration change through consensus and
// then waits for it to be applied to the server. It
@@ -925,6 +937,7 @@ func (s *EtcdServer) publish(timeout time.Duration) {
cancel()
switch err {
case nil:
close(s.readych)
plog.Infof("published %+v to cluster %s", s.attributes, s.cluster.ID())
return
case ErrStopped:
@@ -938,11 +951,20 @@ func (s *EtcdServer) publish(timeout time.Duration) {
// TODO: move this function into raft.go
func (s *EtcdServer) send(ms []raftpb.Message) {
for i := range ms {
sentAppResp := false
for i := len(ms) - 1; i >= 0; i-- {
if s.cluster.IsIDRemoved(types.ID(ms[i].To)) {
ms[i].To = 0
}
if ms[i].Type == raftpb.MsgAppResp {
if sentAppResp {
ms[i].To = 0
} else {
sentAppResp = true
}
}
if ms[i].Type == raftpb.MsgSnap {
// There are two separate data store: the store for v2, and the KV for v3.
// The msgSnap only contains the most recent snapshot of store without KV.
@@ -959,7 +981,7 @@ func (s *EtcdServer) send(ms []raftpb.Message) {
ok, exceed := s.r.td.Observe(ms[i].To)
if !ok {
// TODO: limit request rate.
plog.Warningf("failed to send out heartbeat on time (deadline exceeded for %v)", exceed)
plog.Warningf("failed to send out heartbeat on time (exceeded the %dms timeout for %v)", s.Cfg.TickMs, exceed)
plog.Warningf("server is likely overloaded")
}
}
@@ -1002,48 +1024,7 @@ func (s *EtcdServer) apply(es []raftpb.Entry, confState *raftpb.ConfState) (uint
e := es[i]
switch e.Type {
case raftpb.EntryNormal:
// raft state machine may generate noop entry when leader confirmation.
// skip it in advance to avoid some potential bug in the future
if len(e.Data) == 0 {
select {
case s.forceVersionC <- struct{}{}:
default:
}
break
}
var raftReq pb.InternalRaftRequest
if !pbutil.MaybeUnmarshal(&raftReq, e.Data) { // backward compatible
var r pb.Request
pbutil.MustUnmarshal(&r, e.Data)
s.w.Trigger(r.ID, s.applyRequest(r))
} else if raftReq.V2 != nil {
req := raftReq.V2
s.w.Trigger(req.ID, s.applyRequest(*req))
} else {
// do not re-apply applied entries.
if e.Index <= s.consistIndex.ConsistentIndex() {
break
}
// set the consistent index of current executing entry
s.consistIndex.setConsistentIndex(e.Index)
ar := s.applyV3Request(&raftReq)
if ar.err != ErrNoSpace || len(s.alarmStore.Get(pb.AlarmType_NOSPACE)) > 0 {
s.w.Trigger(raftReq.ID, ar)
break
}
plog.Errorf("applying raft message exceeded backend quota")
go func() {
a := &pb.AlarmRequest{
MemberID: uint64(s.ID()),
Action: pb.AlarmRequest_ACTIVATE,
Alarm: pb.AlarmType_NOSPACE,
}
r := pb.InternalRaftRequest{Alarm: a}
s.processInternalRaftRequest(context.TODO(), r)
s.w.Trigger(raftReq.ID, ar)
}()
}
s.applyEntryNormal(&e)
case raftpb.EntryConfChange:
var cc raftpb.ConfChange
pbutil.MustUnmarshal(&cc, e.Data)
@@ -1060,71 +1041,66 @@ func (s *EtcdServer) apply(es []raftpb.Entry, confState *raftpb.ConfState) (uint
return applied, shouldstop
}
// applyRequest interprets r as a call to store.X and returns a Response interpreted
// from store.Event
func (s *EtcdServer) applyRequest(r pb.Request) Response {
f := func(ev *store.Event, err error) Response {
return Response{Event: ev, err: err}
// applyEntryNormal apples an EntryNormal type raftpb request to the EtcdServer
func (s *EtcdServer) applyEntryNormal(e *raftpb.Entry) {
shouldApplyV3 := false
if e.Index > s.consistIndex.ConsistentIndex() {
// set the consistent index of current executing entry
s.consistIndex.setConsistentIndex(e.Index)
shouldApplyV3 = true
}
refresh, _ := pbutil.GetBool(r.Refresh)
ttlOptions := store.TTLOptionSet{Refresh: refresh}
if r.Expiration != 0 {
ttlOptions.ExpireTime = time.Unix(0, r.Expiration)
// raft state machine may generate noop entry when leader confirmation.
// skip it in advance to avoid some potential bug in the future
if len(e.Data) == 0 {
select {
case s.forceVersionC <- struct{}{}:
default:
}
return
}
switch r.Method {
case "POST":
return f(s.store.Create(r.Path, r.Dir, r.Val, true, ttlOptions))
case "PUT":
exists, existsSet := pbutil.GetBool(r.PrevExist)
switch {
case existsSet:
if exists {
if r.PrevIndex == 0 && r.PrevValue == "" {
return f(s.store.Update(r.Path, r.Val, ttlOptions))
} else {
return f(s.store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, ttlOptions))
}
}
return f(s.store.Create(r.Path, r.Dir, r.Val, false, ttlOptions))
case r.PrevIndex > 0 || r.PrevValue != "":
return f(s.store.CompareAndSwap(r.Path, r.PrevValue, r.PrevIndex, r.Val, ttlOptions))
default:
// TODO (yicheng): cluster should be the owner of cluster prefix store
// we should not modify cluster store here.
if storeMemberAttributeRegexp.MatchString(r.Path) {
id := membership.MustParseMemberIDFromKey(path.Dir(r.Path))
var attr membership.Attributes
if err := json.Unmarshal([]byte(r.Val), &attr); err != nil {
plog.Panicf("unmarshal %s should never fail: %v", r.Val, err)
}
ok := s.cluster.UpdateAttributes(id, attr)
if !ok {
return Response{}
}
}
if r.Path == path.Join(StoreClusterPrefix, "version") {
s.cluster.SetVersion(semver.Must(semver.NewVersion(r.Val)))
}
return f(s.store.Set(r.Path, r.Dir, r.Val, ttlOptions))
}
case "DELETE":
switch {
case r.PrevIndex > 0 || r.PrevValue != "":
return f(s.store.CompareAndDelete(r.Path, r.PrevValue, r.PrevIndex))
default:
return f(s.store.Delete(r.Path, r.Dir, r.Recursive))
}
case "QGET":
return f(s.store.Get(r.Path, r.Recursive, r.Sorted))
case "SYNC":
s.store.DeleteExpiredKeys(time.Unix(0, r.Time))
return Response{}
default:
// This should never be reached, but just in case:
return Response{err: ErrUnknownMethod}
var raftReq pb.InternalRaftRequest
if !pbutil.MaybeUnmarshal(&raftReq, e.Data) { // backward compatible
var r pb.Request
pbutil.MustUnmarshal(&r, e.Data)
s.w.Trigger(r.ID, s.applyV2Request(&r))
return
}
if raftReq.V2 != nil {
req := raftReq.V2
s.w.Trigger(req.ID, s.applyV2Request(req))
return
}
// do not re-apply applied entries.
if !shouldApplyV3 {
return
}
id := raftReq.ID
if id == 0 {
id = raftReq.Header.ID
}
ar := s.applyV3.Apply(&raftReq)
s.setAppliedIndex(e.Index)
if ar.err != ErrNoSpace || len(s.alarmStore.Get(pb.AlarmType_NOSPACE)) > 0 {
s.w.Trigger(id, ar)
return
}
plog.Errorf("applying raft message exceeded backend quota")
go func() {
a := &pb.AlarmRequest{
MemberID: uint64(s.ID()),
Action: pb.AlarmRequest_ACTIVATE,
Alarm: pb.AlarmType_NOSPACE,
}
r := pb.InternalRaftRequest{Alarm: a}
s.processInternalRaftRequest(context.TODO(), r)
s.w.Trigger(id, ar)
}()
}
// applyConfChange applies a ConfChange to the server. It is only
@@ -1146,21 +1122,16 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con
plog.Panicf("nodeID should always be equal to member ID")
}
s.cluster.AddMember(m)
if m.ID == s.id {
plog.Noticef("added local member %s %v to cluster %s", m.ID, m.PeerURLs, s.cluster.ID())
} else {
if m.ID != s.id {
s.r.transport.AddPeer(m.ID, m.PeerURLs)
plog.Noticef("added member %s %v to cluster %s", m.ID, m.PeerURLs, s.cluster.ID())
}
case raftpb.ConfChangeRemoveNode:
id := types.ID(cc.NodeID)
s.cluster.RemoveMember(id)
if id == s.id {
return true, nil
} else {
s.r.transport.RemovePeer(id)
plog.Noticef("removed member %s from cluster %s", id, s.cluster.ID())
}
s.r.transport.RemovePeer(id)
case raftpb.ConfChangeUpdateNode:
m := new(membership.Member)
if err := json.Unmarshal(cc.Context, m); err != nil {
@@ -1170,11 +1141,8 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con
plog.Panicf("nodeID should always be equal to member ID")
}
s.cluster.UpdateRaftAttributes(m.ID, m.RaftAttributes)
if m.ID == s.id {
plog.Noticef("update local member %s %v in cluster %s", m.ID, m.PeerURLs, s.cluster.ID())
} else {
if m.ID != s.id {
s.r.transport.UpdatePeer(m.ID, m.PeerURLs)
plog.Noticef("update member %s %v in cluster %s", m.ID, m.PeerURLs, s.cluster.ID())
}
}
return false, nil
@@ -1184,7 +1152,10 @@ func (s *EtcdServer) applyConfChange(cc raftpb.ConfChange, confState *raftpb.Con
func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) {
clone := s.store.Clone()
s.wg.Add(1)
go func() {
defer s.wg.Done()
d, err := clone.SaveNoCopy()
// TODO: current store will never fail to do a snapshot
// what should we do if the store might fail?
@@ -1200,8 +1171,7 @@ func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) {
}
plog.Panicf("unexpected create snapshot error %v", err)
}
// commit v3 storage because WAL file before snapshot index
// could be removed after SaveSnap.
// commit kv to write metadata (for example: consistent index) to disk.
s.KV().Commit()
// SaveSnap saves the snapshot and releases the locked wal files
// to the snapshot index.
@@ -1210,6 +1180,16 @@ func (s *EtcdServer) snapshot(snapi uint64, confState raftpb.ConfState) {
}
plog.Infof("saved snapshot at index %d", snap.Metadata.Index)
// When sending a snapshot, etcd will pause compaction.
// After receives a snapshot, the slow follower needs to get all the entries right after
// the snapshot sent to catch up. If we do not pause compaction, the log entries right after
// the snapshot sent might already be compacted. It happens when the snapshot takes long time
// to send and save. Pausing compaction avoids triggering a snapshot sending cycle.
if atomic.LoadInt64(&s.inflightSnapshots) != 0 {
plog.Infof("skip compaction since there is an inflight snapshot")
return
}
// keep some in memory log entries for slow followers.
compacti := uint64(1)
if snapi > numberOfCatchUpEntries {
@@ -1293,10 +1273,10 @@ func (s *EtcdServer) updateClusterVersion(ver string) {
}
req := pb.Request{
Method: "PUT",
Path: path.Join(StoreClusterPrefix, "version"),
Path: membership.StoreClusterVersionKey(),
Val: ver,
}
ctx, cancel := context.WithTimeout(context.Background(), s.cfg.ReqTimeout())
ctx, cancel := context.WithTimeout(context.Background(), s.Cfg.ReqTimeout())
_, err := s.Do(ctx, req)
cancel()
switch err {
@@ -1316,7 +1296,7 @@ func (s *EtcdServer) parseProposeCtxErr(err error, start time.Time) error {
return ErrCanceled
case context.DeadlineExceeded:
curLeadElected := s.r.leadElectedTime()
prevLeadLost := curLeadElected.Add(-2 * time.Duration(s.cfg.ElectionTicks) * time.Duration(s.cfg.TickMs) * time.Millisecond)
prevLeadLost := curLeadElected.Add(-2 * time.Duration(s.Cfg.ElectionTicks) * time.Duration(s.Cfg.TickMs) * time.Millisecond)
if start.After(prevLeadLost) && start.Before(curLeadElected) {
return ErrTimeoutDueToLeaderFail
}
@@ -1341,7 +1321,7 @@ func (s *EtcdServer) parseProposeCtxErr(err error, start time.Time) error {
}
}
func (s *EtcdServer) KV() dstorage.ConsistentWatchableKV { return s.kv }
func (s *EtcdServer) KV() mvcc.ConsistentWatchableKV { return s.kv }
func (s *EtcdServer) Backend() backend.Backend {
s.bemu.Lock()
defer s.bemu.Unlock()
@@ -1351,8 +1331,7 @@ func (s *EtcdServer) Backend() backend.Backend {
func (s *EtcdServer) AuthStore() auth.AuthStore { return s.authStore }
func (s *EtcdServer) restoreAlarms() error {
s.applyV3 = newQuotaApplierV3(s, &applierV3backend{s})
s.applyV3 = s.newApplierV3()
as, err := alarm.NewAlarmStore(s)
if err != nil {
return err
@@ -1363,3 +1342,11 @@ func (s *EtcdServer) restoreAlarms() error {
}
return nil
}
func (s *EtcdServer) getAppliedIndex() uint64 {
return atomic.LoadUint64(&s.appliedIndex)
}
func (s *EtcdServer) setAppliedIndex(v uint64) {
atomic.StoreUint64(&s.appliedIndex, v)
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -18,9 +18,9 @@ import (
"io"
"log"
"github.com/coreos/etcd/mvcc/backend"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/snap"
"github.com/coreos/etcd/storage/backend"
)
// createMergedSnapshotMessage creates a snapshot message that contains: raft status (term, conf),
@@ -39,8 +39,11 @@ func (s *EtcdServer) createMergedSnapshotMessage(m raftpb.Message, snapi uint64,
plog.Panicf("store save should never fail: %v", err)
}
// commit kv to write metadata(for example: consistent index).
s.KV().Commit()
dbsnap := s.be.Snapshot()
// get a snapshot of v3 KV as readCloser
rc := newSnapshotReaderCloser(s.be.Snapshot())
rc := newSnapshotReaderCloser(dbsnap)
// put the []byte snapshot of store into raft snapshot and return the merged snapshot with
// KV readCloser snapshot.
@@ -54,7 +57,7 @@ func (s *EtcdServer) createMergedSnapshotMessage(m raftpb.Message, snapi uint64,
}
m.Snapshot = snapshot
return *snap.NewMessage(m, rc)
return *snap.NewMessage(m, rc, dbsnap.Size())
}
func newSnapshotReaderCloser(snapshot backend.Snapshot) io.ReadCloser {

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -85,7 +85,7 @@ func (q *statsQueue) Rate() (float64, float64) {
return 0, 0
}
if time.Now().Sub(back.SendingTime) > time.Second {
if time.Since(back.SendingTime) > time.Second {
q.Clear()
return 0, 0
}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -57,7 +57,7 @@ func (ss *ServerStats) JSON() []byte {
ss.Lock()
stats := *ss
ss.Unlock()
stats.LeaderInfo.Uptime = time.Now().Sub(stats.LeaderInfo.StartTime).String()
stats.LeaderInfo.Uptime = time.Since(stats.LeaderInfo.StartTime).String()
stats.SendingPkgRate, stats.SendingBandwidthRate = stats.SendRates()
stats.RecvingPkgRate, stats.RecvingBandwidthRate = stats.RecvRates()
b, err := json.Marshal(stats)
@@ -142,6 +142,9 @@ func (ss *ServerStats) SendAppendReq(reqSize int) {
}
func (ss *ServerStats) BecomeLeader() {
ss.Lock()
defer ss.Unlock()
if ss.State != raft.StateLeader {
ss.State = raft.StateLeader
ss.LeaderInfo.Name = ss.ID

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
@@ -20,6 +20,7 @@ import (
"path"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/fileutil"
"github.com/coreos/etcd/pkg/pbutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft/raftpb"
@@ -66,11 +67,7 @@ func (st *storage) SaveSnap(snap raftpb.Snapshot) error {
if err != nil {
return err
}
err = st.WAL.ReleaseLockTo(snap.Metadata.Index)
if err != nil {
return err
}
return nil
return st.WAL.ReleaseLockTo(snap.Metadata.Index)
}
func readWAL(waldir string, snap walpb.Snapshot) (w *wal.WAL, id, cid types.ID, st raftpb.HardState, ents []raftpb.Entry) {
@@ -133,7 +130,7 @@ func makeMemberDir(dir string) error {
case !os.IsNotExist(err):
return err
}
if err := os.MkdirAll(membdir, 0700); err != nil {
if err := fileutil.CreateDirAll(membdir); err != nil {
return err
}
names := []string{"snap", "wal"}

View File

@@ -1,4 +1,4 @@
// Copyright 2015 CoreOS, Inc.
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.

125
vendor/github.com/coreos/etcd/etcdserver/v2_server.go generated vendored Normal file
View File

@@ -0,0 +1,125 @@
// Copyright 2016 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
type v2API interface {
Post(ctx context.Context, r *pb.Request) (Response, error)
Put(ctx context.Context, r *pb.Request) (Response, error)
Delete(ctx context.Context, r *pb.Request) (Response, error)
QGet(ctx context.Context, r *pb.Request) (Response, error)
Get(ctx context.Context, r *pb.Request) (Response, error)
Head(ctx context.Context, r *pb.Request) (Response, error)
}
type v2apiStore struct{ s *EtcdServer }
func (a *v2apiStore) Post(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) Put(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) Delete(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) QGet(ctx context.Context, r *pb.Request) (Response, error) {
return a.processRaftRequest(ctx, r)
}
func (a *v2apiStore) processRaftRequest(ctx context.Context, r *pb.Request) (Response, error) {
data, err := r.Marshal()
if err != nil {
return Response{}, err
}
ch := a.s.w.Register(r.ID)
start := time.Now()
a.s.r.Propose(ctx, data)
proposalsPending.Inc()
defer proposalsPending.Dec()
select {
case x := <-ch:
resp := x.(Response)
return resp, resp.err
case <-ctx.Done():
proposalsFailed.Inc()
a.s.w.Trigger(r.ID, nil) // GC wait
return Response{}, a.s.parseProposeCtxErr(ctx.Err(), start)
case <-a.s.done:
}
return Response{}, ErrStopped
}
func (a *v2apiStore) Get(ctx context.Context, r *pb.Request) (Response, error) {
if r.Wait {
wc, err := a.s.store.Watch(r.Path, r.Recursive, r.Stream, r.Since)
if err != nil {
return Response{}, err
}
return Response{Watcher: wc}, nil
}
ev, err := a.s.store.Get(r.Path, r.Recursive, r.Sorted)
if err != nil {
return Response{}, err
}
return Response{Event: ev}, nil
}
func (a *v2apiStore) Head(ctx context.Context, r *pb.Request) (Response, error) {
ev, err := a.s.store.Get(r.Path, r.Recursive, r.Sorted)
if err != nil {
return Response{}, err
}
return Response{Event: ev}, nil
}
// Do interprets r and performs an operation on s.store according to r.Method
// and other fields. If r.Method is "POST", "PUT", "DELETE", or a "GET" with
// Quorum == true, r will be sent through consensus before performing its
// respective operation. Do will block until an action is performed or there is
// an error.
func (s *EtcdServer) Do(ctx context.Context, r pb.Request) (Response, error) {
r.ID = s.reqIDGen.Next()
if r.Method == "GET" && r.Quorum {
r.Method = "QGET"
}
v2api := (v2API)(&v2apiStore{s})
switch r.Method {
case "POST":
return v2api.Post(ctx, &r)
case "PUT":
return v2api.Put(ctx, &r)
case "DELETE":
return v2api.Delete(ctx, &r)
case "QGET":
return v2api.QGet(ctx, &r)
case "GET":
return v2api.Get(ctx, &r)
case "HEAD":
return v2api.Head(ctx, &r)
}
return Response{}, ErrUnknownMethod
}

554
vendor/github.com/coreos/etcd/etcdserver/v3_server.go generated vendored Normal file
View File

@@ -0,0 +1,554 @@
// Copyright 2015 The etcd Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"strconv"
"strings"
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/lease/leasehttp"
"github.com/coreos/etcd/mvcc"
"golang.org/x/net/context"
"google.golang.org/grpc/metadata"
)
const (
// the max request size that raft accepts.
// TODO: make this a flag? But we probably do not want to
// accept large request which might block raft stream. User
// specify a large value might end up with shooting in the foot.
maxRequestBytes = 1.5 * 1024 * 1024
// max timeout for waiting a v3 request to go through raft.
maxV3RequestTimeout = 5 * time.Second
)
type RaftKV interface {
Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
}
type Lessor interface {
// LeaseGrant sends LeaseGrant request to raft and apply it after committed.
LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
// LeaseRevoke sends LeaseRevoke request to raft and apply it after committed.
LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
// LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
// is returned.
LeaseRenew(id lease.LeaseID) (int64, error)
}
type Authenticator interface {
AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error)
AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error)
Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error)
UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error)
UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error)
UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error)
RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error)
RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error)
RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error)
RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error)
UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error)
RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error)
}
func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
var result *applyResult
var err error
if r.Serializable {
var user string
user, err = s.usernameFromCtx(ctx)
if err != nil {
return nil, err
}
result = s.applyV3.Apply(
&pb.InternalRaftRequest{
Header: &pb.RequestHeader{Username: user},
Range: r})
} else {
result, err = s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Range: r})
}
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.RangeResponse), nil
}
func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Put: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.PutResponse), nil
}
func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.DeleteRangeResponse), nil
}
func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
var result *applyResult
var err error
if isTxnSerializable(r) {
user, err := s.usernameFromCtx(ctx)
if err != nil {
return nil, err
}
result = s.applyV3.Apply(
&pb.InternalRaftRequest{
Header: &pb.RequestHeader{Username: user},
Txn: r})
} else {
result, err = s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Txn: r})
}
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.TxnResponse), nil
}
func isTxnSerializable(r *pb.TxnRequest) bool {
for _, u := range r.Success {
if r := u.GetRequestRange(); r == nil || !r.Serializable {
return false
}
}
for _, u := range r.Failure {
if r := u.GetRequestRange(); r == nil || !r.Serializable {
return false
}
}
return true
}
func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Compaction: r})
if r.Physical && result != nil && result.physc != nil {
<-result.physc
// The compaction is done deleting keys; the hash is now settled
// but the data is not necessarily committed. If there's a crash,
// the hash may revert to a hash prior to compaction completing
// if the compaction resumes. Force the finished compaction to
// commit so it won't resume following a crash.
s.be.ForceCommit()
}
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
resp := result.resp.(*pb.CompactionResponse)
if resp == nil {
resp = &pb.CompactionResponse{}
}
if resp.Header == nil {
resp.Header = &pb.ResponseHeader{}
}
resp.Header.Revision = s.kv.Rev()
return resp, nil
}
func (s *EtcdServer) LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
// no id given? choose one
for r.ID == int64(lease.NoLease) {
// only use positive int64 id's
r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
}
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseGrant: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.LeaseGrantResponse), nil
}
func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.LeaseRevokeResponse), nil
}
func (s *EtcdServer) LeaseRenew(id lease.LeaseID) (int64, error) {
ttl, err := s.lessor.Renew(id)
if err == nil {
return ttl, nil
}
if err != lease.ErrNotPrimary {
return -1, err
}
// renewals don't go through raft; forward to leader manually
leader := s.cluster.Member(s.Leader())
for i := 0; i < 5 && leader == nil; i++ {
// wait an election
dur := time.Duration(s.Cfg.ElectionTicks) * time.Duration(s.Cfg.TickMs) * time.Millisecond
select {
case <-time.After(dur):
leader = s.cluster.Member(s.Leader())
case <-s.done:
return -1, ErrStopped
}
}
if leader == nil || len(leader.PeerURLs) == 0 {
return -1, ErrNoLeader
}
for _, url := range leader.PeerURLs {
lurl := url + "/leases"
ttl, err = leasehttp.RenewHTTP(id, lurl, s.peerRt, s.Cfg.peerDialTimeout())
if err == nil {
break
}
}
return ttl, err
}
func (s *EtcdServer) Alarm(ctx context.Context, r *pb.AlarmRequest) (*pb.AlarmResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Alarm: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AlarmResponse), nil
}
func (s *EtcdServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthEnable: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthEnableResponse), nil
}
func (s *EtcdServer) AuthDisable(ctx context.Context, r *pb.AuthDisableRequest) (*pb.AuthDisableResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthDisable: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthDisableResponse), nil
}
func (s *EtcdServer) Authenticate(ctx context.Context, r *pb.AuthenticateRequest) (*pb.AuthenticateResponse, error) {
st, err := s.AuthStore().GenSimpleToken()
if err != nil {
return nil, err
}
internalReq := &pb.InternalAuthenticateRequest{
Name: r.Name,
Password: r.Password,
SimpleToken: st,
}
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Authenticate: internalReq})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthenticateResponse), nil
}
func (s *EtcdServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserAdd: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthUserAddResponse), nil
}
func (s *EtcdServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserDelete: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthUserDeleteResponse), nil
}
func (s *EtcdServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserChangePassword: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthUserChangePasswordResponse), nil
}
func (s *EtcdServer) UserGrantRole(ctx context.Context, r *pb.AuthUserGrantRoleRequest) (*pb.AuthUserGrantRoleResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserGrantRole: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthUserGrantRoleResponse), nil
}
func (s *EtcdServer) UserGet(ctx context.Context, r *pb.AuthUserGetRequest) (*pb.AuthUserGetResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserGet: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthUserGetResponse), nil
}
func (s *EtcdServer) UserList(ctx context.Context, r *pb.AuthUserListRequest) (*pb.AuthUserListResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserList: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthUserListResponse), nil
}
func (s *EtcdServer) UserRevokeRole(ctx context.Context, r *pb.AuthUserRevokeRoleRequest) (*pb.AuthUserRevokeRoleResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserRevokeRole: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthUserRevokeRoleResponse), nil
}
func (s *EtcdServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthRoleAdd: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthRoleAddResponse), nil
}
func (s *EtcdServer) RoleGrantPermission(ctx context.Context, r *pb.AuthRoleGrantPermissionRequest) (*pb.AuthRoleGrantPermissionResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthRoleGrantPermission: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthRoleGrantPermissionResponse), nil
}
func (s *EtcdServer) RoleGet(ctx context.Context, r *pb.AuthRoleGetRequest) (*pb.AuthRoleGetResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthRoleGet: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthRoleGetResponse), nil
}
func (s *EtcdServer) RoleList(ctx context.Context, r *pb.AuthRoleListRequest) (*pb.AuthRoleListResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthRoleList: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthRoleListResponse), nil
}
func (s *EtcdServer) RoleRevokePermission(ctx context.Context, r *pb.AuthRoleRevokePermissionRequest) (*pb.AuthRoleRevokePermissionResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthRoleRevokePermission: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthRoleRevokePermissionResponse), nil
}
func (s *EtcdServer) RoleDelete(ctx context.Context, r *pb.AuthRoleDeleteRequest) (*pb.AuthRoleDeleteResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthRoleDelete: r})
if err != nil {
return nil, err
}
if result.err != nil {
return nil, result.err
}
return result.resp.(*pb.AuthRoleDeleteResponse), nil
}
func (s *EtcdServer) isValidSimpleToken(token string) bool {
splitted := strings.Split(token, ".")
if len(splitted) != 2 {
return false
}
index, err := strconv.Atoi(splitted[1])
if err != nil {
return false
}
// CAUTION: below index synchronization is required because this node
// might not receive and apply the log entry of Authenticate() RPC.
authApplied := false
for i := 0; i < 10; i++ {
if uint64(index) <= s.getAppliedIndex() {
authApplied = true
break
}
time.Sleep(100 * time.Millisecond)
}
if !authApplied {
plog.Errorf("timeout of waiting Authenticate() RPC")
return false
}
return true
}
func (s *EtcdServer) usernameFromCtx(ctx context.Context) (string, error) {
md, ok := metadata.FromContext(ctx)
if !ok {
return "", nil
}
ts, tok := md["token"]
if !tok {
return "", nil
}
token := ts[0]
if !s.isValidSimpleToken(token) {
return "", ErrInvalidAuthToken
}
username, uok := s.AuthStore().UsernameFromToken(token)
if !uok {
plog.Warningf("invalid auth token: %s", token)
return "", ErrInvalidAuthToken
}
return username, nil
}
func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) {
r.Header = &pb.RequestHeader{
ID: s.reqIDGen.Next(),
}
username, err := s.usernameFromCtx(ctx)
if err != nil {
return nil, err
}
r.Header.Username = username
data, err := r.Marshal()
if err != nil {
return nil, err
}
if len(data) > maxRequestBytes {
return nil, ErrRequestTooLarge
}
id := r.ID
if id == 0 {
id = r.Header.ID
}
ch := s.w.Register(id)
cctx, cancel := context.WithTimeout(ctx, maxV3RequestTimeout)
defer cancel()
start := time.Now()
s.r.Propose(cctx, data)
proposalsPending.Inc()
defer proposalsPending.Dec()
select {
case x := <-ch:
return x.(*applyResult), nil
case <-cctx.Done():
proposalsFailed.Inc()
s.w.Trigger(id, nil) // GC wait
return nil, s.parseProposeCtxErr(cctx.Err(), start)
case <-s.done:
return nil, ErrStopped
}
}
// Watchable returns a watchable interface attached to the etcdserver.
func (s *EtcdServer) Watchable() mvcc.Watchable { return s.KV() }

View File

@@ -1,273 +0,0 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/lease"
"github.com/coreos/etcd/lease/leasehttp"
dstorage "github.com/coreos/etcd/storage"
"golang.org/x/net/context"
)
const (
// the max request size that raft accepts.
// TODO: make this a flag? But we probably do not want to
// accept large request which might block raft stream. User
// specify a large value might end up with shooting in the foot.
maxRequestBytes = 1.5 * 1024 * 1024
)
type RaftKV interface {
Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error)
Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error)
DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error)
Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error)
Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error)
}
type Lessor interface {
// LeaseGrant sends LeaseGrant request to raft and apply it after committed.
LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error)
// LeaseRevoke sends LeaseRevoke request to raft and apply it after committed.
LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error)
// LeaseRenew renews the lease with given ID. The renewed TTL is returned. Or an error
// is returned.
LeaseRenew(id lease.LeaseID) (int64, error)
}
type Authenticator interface {
AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error)
UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error)
UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error)
UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error)
RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error)
}
func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
if r.Serializable {
return s.applyV3.Range(noTxn, r)
}
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Range: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.RangeResponse), result.err
}
func (s *EtcdServer) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Put: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.PutResponse), result.err
}
func (s *EtcdServer) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{DeleteRange: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.DeleteRangeResponse), result.err
}
func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
if isTxnSerializable(r) {
return s.applyV3.Txn(r)
}
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Txn: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.TxnResponse), result.err
}
func isTxnSerializable(r *pb.TxnRequest) bool {
for _, u := range r.Success {
if r := u.GetRequestRange(); r == nil || !r.Serializable {
return false
}
}
for _, u := range r.Failure {
if r := u.GetRequestRange(); r == nil || !r.Serializable {
return false
}
}
return true
}
func (s *EtcdServer) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Compaction: r})
if r.Physical && result.physc != nil {
<-result.physc
// The compaction is done deleting keys; the hash is now settled
// but the data is not necessarily committed. If there's a crash,
// the hash may revert to a hash prior to compaction completing
// if the compaction resumes. Force the finished compaction to
// commit so it won't resume following a crash.
s.be.ForceCommit()
}
if err != nil {
return nil, err
}
resp := result.resp.(*pb.CompactionResponse)
if resp == nil {
resp = &pb.CompactionResponse{}
}
if resp.Header == nil {
resp.Header = &pb.ResponseHeader{}
}
resp.Header.Revision = s.kv.Rev()
return resp, result.err
}
func (s *EtcdServer) LeaseGrant(ctx context.Context, r *pb.LeaseGrantRequest) (*pb.LeaseGrantResponse, error) {
// no id given? choose one
for r.ID == int64(lease.NoLease) {
// only use positive int64 id's
r.ID = int64(s.reqIDGen.Next() & ((1 << 63) - 1))
}
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseGrant: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.LeaseGrantResponse), result.err
}
func (s *EtcdServer) LeaseRevoke(ctx context.Context, r *pb.LeaseRevokeRequest) (*pb.LeaseRevokeResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{LeaseRevoke: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.LeaseRevokeResponse), result.err
}
func (s *EtcdServer) LeaseRenew(id lease.LeaseID) (int64, error) {
ttl, err := s.lessor.Renew(id)
if err == nil {
return ttl, nil
}
if err != lease.ErrNotPrimary {
return -1, err
}
// renewals don't go through raft; forward to leader manually
leader := s.cluster.Member(s.Leader())
for i := 0; i < 5 && leader == nil; i++ {
// wait an election
dur := time.Duration(s.cfg.ElectionTicks) * time.Duration(s.cfg.TickMs) * time.Millisecond
select {
case <-time.After(dur):
leader = s.cluster.Member(s.Leader())
case <-s.done:
return -1, ErrStopped
}
}
if leader == nil || len(leader.PeerURLs) == 0 {
return -1, ErrNoLeader
}
for _, url := range leader.PeerURLs {
lurl := url + "/leases"
ttl, err = leasehttp.RenewHTTP(id, lurl, s.peerRt, s.cfg.peerDialTimeout())
if err == nil {
break
}
}
return ttl, err
}
func (s *EtcdServer) Alarm(ctx context.Context, r *pb.AlarmRequest) (*pb.AlarmResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{Alarm: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.AlarmResponse), result.err
}
func (s *EtcdServer) AuthEnable(ctx context.Context, r *pb.AuthEnableRequest) (*pb.AuthEnableResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthEnable: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.AuthEnableResponse), result.err
}
func (s *EtcdServer) UserAdd(ctx context.Context, r *pb.AuthUserAddRequest) (*pb.AuthUserAddResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserAdd: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.AuthUserAddResponse), result.err
}
func (s *EtcdServer) UserDelete(ctx context.Context, r *pb.AuthUserDeleteRequest) (*pb.AuthUserDeleteResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserDelete: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.AuthUserDeleteResponse), result.err
}
func (s *EtcdServer) UserChangePassword(ctx context.Context, r *pb.AuthUserChangePasswordRequest) (*pb.AuthUserChangePasswordResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthUserChangePassword: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.AuthUserChangePasswordResponse), result.err
}
func (s *EtcdServer) RoleAdd(ctx context.Context, r *pb.AuthRoleAddRequest) (*pb.AuthRoleAddResponse, error) {
result, err := s.processInternalRaftRequest(ctx, pb.InternalRaftRequest{AuthRoleAdd: r})
if err != nil {
return nil, err
}
return result.resp.(*pb.AuthRoleAddResponse), result.err
}
func (s *EtcdServer) processInternalRaftRequest(ctx context.Context, r pb.InternalRaftRequest) (*applyResult, error) {
r.ID = s.reqIDGen.Next()
data, err := r.Marshal()
if err != nil {
return nil, err
}
if len(data) > maxRequestBytes {
return nil, ErrRequestTooLarge
}
ch := s.w.Register(r.ID)
s.r.Propose(ctx, data)
select {
case x := <-ch:
return x.(*applyResult), nil
case <-ctx.Done():
s.w.Trigger(r.ID, nil) // GC wait
return nil, ctx.Err()
case <-s.done:
return nil, ErrStopped
}
}
// Watchable returns a watchable interface attached to the etcdserver.
func (s *EtcdServer) Watchable() dstorage.Watchable { return s.KV() }