Godeps commit to enable migrating tests off of FakeClient.

Stems from issues: #15349, #11962
This commit is contained in:
Timothy St. Clair
2015-10-16 08:14:03 -05:00
parent 1f758d8ca6
commit df14277e41
280 changed files with 56536 additions and 10 deletions

163
Godeps/Godeps.json generated
View File

@@ -1,6 +1,6 @@
{
"ImportPath": "k8s.io/kubernetes",
"GoVersion": "go1.4.2",
"GoVersion": "go1.5.1",
"Packages": [
"./..."
],
@@ -101,11 +101,145 @@
"ImportPath": "github.com/beorn7/perks/quantile",
"Rev": "b965b613227fddccbfffe13eae360ed3fa822f8d"
},
{
"ImportPath": "github.com/boltdb/bolt",
"Comment": "v1.0-119-g90fef38",
"Rev": "90fef389f98027ca55594edd7dbd6e7f3926fdad"
},
{
"ImportPath": "github.com/bradfitz/http2",
"Rev": "3e36af6d3af0e56fa3da71099f864933dea3d9fb"
},
{
"ImportPath": "github.com/codegangsta/negroni",
"Comment": "v0.1-62-g8d75e11",
"Rev": "8d75e11374a1928608c906fe745b538483e7aeb2"
},
{
"ImportPath": "github.com/coreos/etcd/client",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/discovery",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/error",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/etcdserver",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/crc",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/fileutil",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/httputil",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/idutil",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/ioutil",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/netutil",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/pathutil",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/pbutil",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/runtime",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/testutil",
"Comment": "v2.2.0-17-g45c86af",
"Rev": "45c86af0eb195f6f833cab6fb176a60fc8c47185"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/timeutil",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/transport",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/types",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/pkg/wait",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/raft",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/rafthttp",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/snap",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/storage",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/store",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/version",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/etcd/wal",
"Comment": "v2.2.1-1-g4dc835c",
"Rev": "4dc835c718bbdbb9a1c36ef5cdf1921a423cbf70"
},
{
"ImportPath": "github.com/coreos/go-etcd/etcd",
"Comment": "v2.0.0-34-gde3514f",
@@ -133,7 +267,7 @@
},
{
"ImportPath": "github.com/coreos/go-semver/semver",
"Rev": "6fe83ccda8fb9b7549c9ab4ba47f47858bc950aa"
"Rev": "d043ae190b3202550d026daf009359bb5d761672"
},
{
"ImportPath": "github.com/coreos/go-systemd/daemon",
@@ -145,6 +279,11 @@
"Comment": "v2-27-g97e243d",
"Rev": "97e243d21a8e232e9d8af38ba2366dfcfceebeba"
},
{
"ImportPath": "github.com/coreos/go-systemd/journal",
"Comment": "v2-27-g97e243d",
"Rev": "97e243d21a8e232e9d8af38ba2366dfcfceebeba"
},
{
"ImportPath": "github.com/coreos/go-systemd/unit",
"Comment": "v2-27-g97e243d",
@@ -279,6 +418,10 @@
"ImportPath": "github.com/golang/protobuf/proto",
"Rev": "7f07925444bb51fa4cf9dfe6f7661876f8852275"
},
{
"ImportPath": "github.com/google/btree",
"Rev": "cc6329d4279e3f025a53a83c397d2339b5705c45"
},
{
"ImportPath": "github.com/google/cadvisor/api",
"Comment": "0.16.0.2",
@@ -570,11 +713,23 @@
"ImportPath": "github.com/vaughan0/go-ini",
"Rev": "a98ad7ee00ec53921f08832bc06ecf7fd600e6a1"
},
{
"ImportPath": "github.com/xiang90/probing",
"Rev": "6a0cc1ae81b4cc11db5e491e030e4b98fba79c19"
},
{
"ImportPath": "github.com/xyproto/simpleredis",
"Comment": "v1.0-13-g5292687",
"Rev": "5292687f5379e01054407da44d7c4590a61fd3de"
},
{
"ImportPath": "golang.org/x/crypto/bcrypt",
"Rev": "c84e1f8e3a7e322d497cd16c0e8a13c7e127baf3"
},
{
"ImportPath": "golang.org/x/crypto/blowfish",
"Rev": "c84e1f8e3a7e322d497cd16c0e8a13c7e127baf3"
},
{
"ImportPath": "golang.org/x/crypto/ssh",
"Rev": "c84e1f8e3a7e322d497cd16c0e8a13c7e127baf3"
@@ -631,6 +786,10 @@
"ImportPath": "google.golang.org/cloud/internal",
"Rev": "2e43671e4ad874a7bca65746ff3edb38e6e93762"
},
{
"ImportPath": "google.golang.org/grpc",
"Rev": "f5ebd86be717593ab029545492c93ddf8914832b"
},
{
"ImportPath": "gopkg.in/natefinch/lumberjack.v2",
"Comment": "v1.0-16-g20b71e5",

View File

@@ -0,0 +1,4 @@
*.prof
*.test
*.swp
/bin/

20
Godeps/_workspace/src/github.com/boltdb/bolt/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,20 @@
The MIT License (MIT)
Copyright (c) 2013 Ben Johnson
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

54
Godeps/_workspace/src/github.com/boltdb/bolt/Makefile generated vendored Normal file
View File

@@ -0,0 +1,54 @@
TEST=.
BENCH=.
COVERPROFILE=/tmp/c.out
BRANCH=`git rev-parse --abbrev-ref HEAD`
COMMIT=`git rev-parse --short HEAD`
GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)"
default: build
bench:
go test -v -test.run=NOTHINCONTAINSTHIS -test.bench=$(BENCH)
# http://cloc.sourceforge.net/
cloc:
@cloc --not-match-f='Makefile|_test.go' .
cover: fmt
go test -coverprofile=$(COVERPROFILE) -test.run=$(TEST) $(COVERFLAG) .
go tool cover -html=$(COVERPROFILE)
rm $(COVERPROFILE)
cpuprofile: fmt
@go test -c
@./bolt.test -test.v -test.run=$(TEST) -test.cpuprofile cpu.prof
# go get github.com/kisielk/errcheck
errcheck:
@echo "=== errcheck ==="
@errcheck github.com/boltdb/bolt
fmt:
@go fmt ./...
get:
@go get -d ./...
build: get
@mkdir -p bin
@go build -ldflags=$(GOLDFLAGS) -a -o bin/bolt ./cmd/bolt
test: fmt
@go get github.com/stretchr/testify/assert
@echo "=== TESTS ==="
@go test -v -cover -test.run=$(TEST)
@echo ""
@echo ""
@echo "=== CLI ==="
@go test -v -test.run=$(TEST) ./cmd/bolt
@echo ""
@echo ""
@echo "=== RACE DETECTOR ==="
@go test -v -race -test.run="TestSimulate_(100op|1000op)"
.PHONY: bench cloc cover cpuprofile fmt memprofile test

621
Godeps/_workspace/src/github.com/boltdb/bolt/README.md generated vendored Normal file
View File

@@ -0,0 +1,621 @@
Bolt [![Build Status](https://drone.io/github.com/boltdb/bolt/status.png)](https://drone.io/github.com/boltdb/bolt/latest) [![Coverage Status](https://coveralls.io/repos/boltdb/bolt/badge.png?branch=master)](https://coveralls.io/r/boltdb/bolt?branch=master) [![GoDoc](https://godoc.org/github.com/boltdb/bolt?status.png)](https://godoc.org/github.com/boltdb/bolt) ![Version](http://img.shields.io/badge/version-1.0-green.png)
====
Bolt is a pure Go key/value store inspired by [Howard Chu's][hyc_symas] and
the [LMDB project][lmdb]. The goal of the project is to provide a simple,
fast, and reliable database for projects that don't require a full database
server such as Postgres or MySQL.
Since Bolt is meant to be used as such a low-level piece of functionality,
simplicity is key. The API will be small and only focus on getting values
and setting values. That's it.
[hyc_symas]: https://twitter.com/hyc_symas
[lmdb]: http://symas.com/mdb/
## Project Status
Bolt is stable and the API is fixed. Full unit test coverage and randomized
black box testing are used to ensure database consistency and thread safety.
Bolt is currently in high-load production environments serving databases as
large as 1TB. Many companies such as Shopify and Heroku use Bolt-backed
services every day.
## Getting Started
### Installing
To start using Bolt, install Go and run `go get`:
```sh
$ go get github.com/boltdb/bolt/...
```
This will retrieve the library and install the `bolt` command line utility into
your `$GOBIN` path.
### Opening a database
The top-level object in Bolt is a `DB`. It is represented as a single file on
your disk and represents a consistent snapshot of your data.
To open your database, simply use the `bolt.Open()` function:
```go
package main
import (
"log"
"github.com/boltdb/bolt"
)
func main() {
// Open the my.db data file in your current directory.
// It will be created if it doesn't exist.
db, err := bolt.Open("my.db", 0600, nil)
if err != nil {
log.Fatal(err)
}
defer db.Close()
...
}
```
Please note that Bolt obtains a file lock on the data file so multiple processes
cannot open the same database at the same time. Opening an already open Bolt
database will cause it to hang until the other process closes it. To prevent
an indefinite wait you can pass a timeout option to the `Open()` function:
```go
db, err := bolt.Open("my.db", 0600, &bolt.Options{Timeout: 1 * time.Second})
```
### Transactions
Bolt allows only one read-write transaction at a time but allows as many
read-only transactions as you want at a time. Each transaction has a consistent
view of the data as it existed when the transaction started.
Individual transactions and all objects created from them (e.g. buckets, keys)
are not thread safe. To work with data in multiple goroutines you must start
a transaction for each one or use locking to ensure only one goroutine accesses
a transaction at a time. Creating transaction from the `DB` is thread safe.
Read-only transactions and read-write transactions should not depend on one
another and generally shouldn't be opened simultaneously in the same goroutine.
This can cause a deadlock as the read-write transaction needs to periodically
re-map the data file but it cannot do so while a read-only transaction is open.
#### Read-write transactions
To start a read-write transaction, you can use the `DB.Update()` function:
```go
err := db.Update(func(tx *bolt.Tx) error {
...
return nil
})
```
Inside the closure, you have a consistent view of the database. You commit the
transaction by returning `nil` at the end. You can also rollback the transaction
at any point by returning an error. All database operations are allowed inside
a read-write transaction.
Always check the return error as it will report any disk failures that can cause
your transaction to not complete. If you return an error within your closure
it will be passed through.
#### Read-only transactions
To start a read-only transaction, you can use the `DB.View()` function:
```go
err := db.View(func(tx *bolt.Tx) error {
...
return nil
})
```
You also get a consistent view of the database within this closure, however,
no mutating operations are allowed within a read-only transaction. You can only
retrieve buckets, retrieve values, and copy the database within a read-only
transaction.
#### Batch read-write transactions
Each `DB.Update()` waits for disk to commit the writes. This overhead
can be minimized by combining multiple updates with the `DB.Batch()`
function:
```go
err := db.Batch(func(tx *bolt.Tx) error {
...
return nil
})
```
Concurrent Batch calls are opportunistically combined into larger
transactions. Batch is only useful when there are multiple goroutines
calling it.
The trade-off is that `Batch` can call the given
function multiple times, if parts of the transaction fail. The
function must be idempotent and side effects must take effect only
after a successful return from `DB.Batch()`.
For example: don't display messages from inside the function, instead
set variables in the enclosing scope:
```go
var id uint64
err := db.Batch(func(tx *bolt.Tx) error {
// Find last key in bucket, decode as bigendian uint64, increment
// by one, encode back to []byte, and add new key.
...
id = newValue
return nil
})
if err != nil {
return ...
}
fmt.Println("Allocated ID %d", id)
```
#### Managing transactions manually
The `DB.View()` and `DB.Update()` functions are wrappers around the `DB.Begin()`
function. These helper functions will start the transaction, execute a function,
and then safely close your transaction if an error is returned. This is the
recommended way to use Bolt transactions.
However, sometimes you may want to manually start and end your transactions.
You can use the `Tx.Begin()` function directly but _please_ be sure to close the
transaction.
```go
// Start a writable transaction.
tx, err := db.Begin(true)
if err != nil {
return err
}
defer tx.Rollback()
// Use the transaction...
_, err := tx.CreateBucket([]byte("MyBucket"))
if err != nil {
return err
}
// Commit the transaction and check for error.
if err := tx.Commit(); err != nil {
return err
}
```
The first argument to `DB.Begin()` is a boolean stating if the transaction
should be writable.
### Using buckets
Buckets are collections of key/value pairs within the database. All keys in a
bucket must be unique. You can create a bucket using the `DB.CreateBucket()`
function:
```go
db.Update(func(tx *bolt.Tx) error {
b, err := tx.CreateBucket([]byte("MyBucket"))
if err != nil {
return fmt.Errorf("create bucket: %s", err)
}
return nil
})
```
You can also create a bucket only if it doesn't exist by using the
`Tx.CreateBucketIfNotExists()` function. It's a common pattern to call this
function for all your top-level buckets after you open your database so you can
guarantee that they exist for future transactions.
To delete a bucket, simply call the `Tx.DeleteBucket()` function.
### Using key/value pairs
To save a key/value pair to a bucket, use the `Bucket.Put()` function:
```go
db.Update(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("MyBucket"))
err := b.Put([]byte("answer"), []byte("42"))
return err
})
```
This will set the value of the `"answer"` key to `"42"` in the `MyBucket`
bucket. To retrieve this value, we can use the `Bucket.Get()` function:
```go
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("MyBucket"))
v := b.Get([]byte("answer"))
fmt.Printf("The answer is: %s\n", v)
return nil
})
```
The `Get()` function does not return an error because its operation is
guarenteed to work (unless there is some kind of system failure). If the key
exists then it will return its byte slice value. If it doesn't exist then it
will return `nil`. It's important to note that you can have a zero-length value
set to a key which is different than the key not existing.
Use the `Bucket.Delete()` function to delete a key from the bucket.
Please note that values returned from `Get()` are only valid while the
transaction is open. If you need to use a value outside of the transaction
then you must use `copy()` to copy it to another byte slice.
### Iterating over keys
Bolt stores its keys in byte-sorted order within a bucket. This makes sequential
iteration over these keys extremely fast. To iterate over keys we'll use a
`Cursor`:
```go
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("MyBucket"))
c := b.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
fmt.Printf("key=%s, value=%s\n", k, v)
}
return nil
})
```
The cursor allows you to move to a specific point in the list of keys and move
forward or backward through the keys one at a time.
The following functions are available on the cursor:
```
First() Move to the first key.
Last() Move to the last key.
Seek() Move to a specific key.
Next() Move to the next key.
Prev() Move to the previous key.
```
When you have iterated to the end of the cursor then `Next()` will return `nil`.
You must seek to a position using `First()`, `Last()`, or `Seek()` before
calling `Next()` or `Prev()`. If you do not seek to a position then these
functions will return `nil`.
#### Prefix scans
To iterate over a key prefix, you can combine `Seek()` and `bytes.HasPrefix()`:
```go
db.View(func(tx *bolt.Tx) error {
c := tx.Bucket([]byte("MyBucket")).Cursor()
prefix := []byte("1234")
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() {
fmt.Printf("key=%s, value=%s\n", k, v)
}
return nil
})
```
#### Range scans
Another common use case is scanning over a range such as a time range. If you
use a sortable time encoding such as RFC3339 then you can query a specific
date range like this:
```go
db.View(func(tx *bolt.Tx) error {
// Assume our events bucket has RFC3339 encoded time keys.
c := tx.Bucket([]byte("Events")).Cursor()
// Our time range spans the 90's decade.
min := []byte("1990-01-01T00:00:00Z")
max := []byte("2000-01-01T00:00:00Z")
// Iterate over the 90's.
for k, v := c.Seek(min); k != nil && bytes.Compare(k, max) <= 0; k, v = c.Next() {
fmt.Printf("%s: %s\n", k, v)
}
return nil
})
```
#### ForEach()
You can also use the function `ForEach()` if you know you'll be iterating over
all the keys in a bucket:
```go
db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("MyBucket"))
b.ForEach(func(k, v []byte) error {
fmt.Printf("key=%s, value=%s\n", k, v)
return nil
})
return nil
})
```
### Nested buckets
You can also store a bucket in a key to create nested buckets. The API is the
same as the bucket management API on the `DB` object:
```go
func (*Bucket) CreateBucket(key []byte) (*Bucket, error)
func (*Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error)
func (*Bucket) DeleteBucket(key []byte) error
```
### Database backups
Bolt is a single file so it's easy to backup. You can use the `Tx.WriteTo()`
function to write a consistent view of the database to a writer. If you call
this from a read-only transaction, it will perform a hot backup and not block
your other database reads and writes. It will also use `O_DIRECT` when available
to prevent page cache trashing.
One common use case is to backup over HTTP so you can use tools like `cURL` to
do database backups:
```go
func BackupHandleFunc(w http.ResponseWriter, req *http.Request) {
err := db.View(func(tx *bolt.Tx) error {
w.Header().Set("Content-Type", "application/octet-stream")
w.Header().Set("Content-Disposition", `attachment; filename="my.db"`)
w.Header().Set("Content-Length", strconv.Itoa(int(tx.Size())))
_, err := tx.WriteTo(w)
return err
})
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}
```
Then you can backup using this command:
```sh
$ curl http://localhost/backup > my.db
```
Or you can open your browser to `http://localhost/backup` and it will download
automatically.
If you want to backup to another file you can use the `Tx.CopyFile()` helper
function.
### Statistics
The database keeps a running count of many of the internal operations it
performs so you can better understand what's going on. By grabbing a snapshot
of these stats at two points in time we can see what operations were performed
in that time range.
For example, we could start a goroutine to log stats every 10 seconds:
```go
go func() {
// Grab the initial stats.
prev := db.Stats()
for {
// Wait for 10s.
time.Sleep(10 * time.Second)
// Grab the current stats and diff them.
stats := db.Stats()
diff := stats.Sub(&prev)
// Encode stats to JSON and print to STDERR.
json.NewEncoder(os.Stderr).Encode(diff)
// Save stats for the next loop.
prev = stats
}
}()
```
It's also useful to pipe these stats to a service such as statsd for monitoring
or to provide an HTTP endpoint that will perform a fixed-length sample.
### Read-Only Mode
Sometimes it is useful to create a shared, read-only Bolt database. To this,
set the `Options.ReadOnly` flag when opening your database. Read-only mode
uses a shared lock to allow multiple processes to read from the database but
it will block any processes from opening the database in read-write mode.
```go
db, err := bolt.Open("my.db", 0666, &bolt.Options{ReadOnly: true})
if err != nil {
log.Fatal(err)
}
```
## Resources
For more information on getting started with Bolt, check out the following articles:
* [Intro to BoltDB: Painless Performant Persistence](http://npf.io/2014/07/intro-to-boltdb-painless-performant-persistence/) by [Nate Finch](https://github.com/natefinch).
* [Bolt -- an embedded key/value database for Go](https://www.progville.com/go/bolt-embedded-db-golang/) by Progville
## Comparison with other databases
### Postgres, MySQL, & other relational databases
Relational databases structure data into rows and are only accessible through
the use of SQL. This approach provides flexibility in how you store and query
your data but also incurs overhead in parsing and planning SQL statements. Bolt
accesses all data by a byte slice key. This makes Bolt fast to read and write
data by key but provides no built-in support for joining values together.
Most relational databases (with the exception of SQLite) are standalone servers
that run separately from your application. This gives your systems
flexibility to connect multiple application servers to a single database
server but also adds overhead in serializing and transporting data over the
network. Bolt runs as a library included in your application so all data access
has to go through your application's process. This brings data closer to your
application but limits multi-process access to the data.
### LevelDB, RocksDB
LevelDB and its derivatives (RocksDB, HyperLevelDB) are similar to Bolt in that
they are libraries bundled into the application, however, their underlying
structure is a log-structured merge-tree (LSM tree). An LSM tree optimizes
random writes by using a write ahead log and multi-tiered, sorted files called
SSTables. Bolt uses a B+tree internally and only a single file. Both approaches
have trade offs.
If you require a high random write throughput (>10,000 w/sec) or you need to use
spinning disks then LevelDB could be a good choice. If your application is
read-heavy or does a lot of range scans then Bolt could be a good choice.
One other important consideration is that LevelDB does not have transactions.
It supports batch writing of key/values pairs and it supports read snapshots
but it will not give you the ability to do a compare-and-swap operation safely.
Bolt supports fully serializable ACID transactions.
### LMDB
Bolt was originally a port of LMDB so it is architecturally similar. Both use
a B+tree, have ACID semantics with fully serializable transactions, and support
lock-free MVCC using a single writer and multiple readers.
The two projects have somewhat diverged. LMDB heavily focuses on raw performance
while Bolt has focused on simplicity and ease of use. For example, LMDB allows
several unsafe actions such as direct writes for the sake of performance. Bolt
opts to disallow actions which can leave the database in a corrupted state. The
only exception to this in Bolt is `DB.NoSync`.
There are also a few differences in API. LMDB requires a maximum mmap size when
opening an `mdb_env` whereas Bolt will handle incremental mmap resizing
automatically. LMDB overloads the getter and setter functions with multiple
flags whereas Bolt splits these specialized cases into their own functions.
## Caveats & Limitations
It's important to pick the right tool for the job and Bolt is no exception.
Here are a few things to note when evaluating and using Bolt:
* Bolt is good for read intensive workloads. Sequential write performance is
also fast but random writes can be slow. You can add a write-ahead log or
[transaction coalescer](https://github.com/boltdb/coalescer) in front of Bolt
to mitigate this issue.
* Bolt uses a B+tree internally so there can be a lot of random page access.
SSDs provide a significant performance boost over spinning disks.
* Try to avoid long running read transactions. Bolt uses copy-on-write so
old pages cannot be reclaimed while an old transaction is using them.
* Byte slices returned from Bolt are only valid during a transaction. Once the
transaction has been committed or rolled back then the memory they point to
can be reused by a new page or can be unmapped from virtual memory and you'll
see an `unexpected fault address` panic when accessing it.
* Be careful when using `Bucket.FillPercent`. Setting a high fill percent for
buckets that have random inserts will cause your database to have very poor
page utilization.
* Use larger buckets in general. Smaller buckets causes poor page utilization
once they become larger than the page size (typically 4KB).
* Bulk loading a lot of random writes into a new bucket can be slow as the
page will not split until the transaction is committed. Randomly inserting
more than 100,000 key/value pairs into a single new bucket in a single
transaction is not advised.
* Bolt uses a memory-mapped file so the underlying operating system handles the
caching of the data. Typically, the OS will cache as much of the file as it
can in memory and will release memory as needed to other processes. This means
that Bolt can show very high memory usage when working with large databases.
However, this is expected and the OS will release memory as needed. Bolt can
handle databases much larger than the available physical RAM.
* The data structures in the Bolt database are memory mapped so the data file
will be endian specific. This means that you cannot copy a Bolt file from a
little endian machine to a big endian machine and have it work. For most
users this is not a concern since most modern CPUs are little endian.
* Because of the way pages are laid out on disk, Bolt cannot truncate data files
and return free pages back to the disk. Instead, Bolt maintains a free list
of unused pages within its data file. These free pages can be reused by later
transactions. This works well for many use cases as databases generally tend
to grow. However, it's important to note that deleting large chunks of data
will not allow you to reclaim that space on disk.
For more information on page allocation, [see this comment][page-allocation].
[page-allocation]: https://github.com/boltdb/bolt/issues/308#issuecomment-74811638
## Other Projects Using Bolt
Below is a list of public, open source projects that use Bolt:
* [Operation Go: A Routine Mission](http://gocode.io) - An online programming game for Golang using Bolt for user accounts and a leaderboard.
* [Bazil](https://bazil.org/) - A file system that lets your data reside where it is most convenient for it to reside.
* [DVID](https://github.com/janelia-flyem/dvid) - Added Bolt as optional storage engine and testing it against Basho-tuned leveldb.
* [Skybox Analytics](https://github.com/skybox/skybox) - A standalone funnel analysis tool for web analytics.
* [Scuttlebutt](https://github.com/benbjohnson/scuttlebutt) - Uses Bolt to store and process all Twitter mentions of GitHub projects.
* [Wiki](https://github.com/peterhellberg/wiki) - A tiny wiki using Goji, BoltDB and Blackfriday.
* [ChainStore](https://github.com/nulayer/chainstore) - Simple key-value interface to a variety of storage engines organized as a chain of operations.
* [MetricBase](https://github.com/msiebuhr/MetricBase) - Single-binary version of Graphite.
* [Gitchain](https://github.com/gitchain/gitchain) - Decentralized, peer-to-peer Git repositories aka "Git meets Bitcoin".
* [event-shuttle](https://github.com/sclasen/event-shuttle) - A Unix system service to collect and reliably deliver messages to Kafka.
* [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed.
* [BoltStore](https://github.com/yosssi/boltstore) - Session store using Bolt.
* [photosite/session](http://godoc.org/bitbucket.org/kardianos/photosite/session) - Sessions for a photo viewing site.
* [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage.
* [ipLocator](https://github.com/AndreasBriese/ipLocator) - A fast ip-geo-location-server using bolt with bloom filters.
* [cayley](https://github.com/google/cayley) - Cayley is an open-source graph database using Bolt as optional backend.
* [bleve](http://www.blevesearch.com/) - A pure Go search engine similar to ElasticSearch that uses Bolt as the default storage backend.
* [tentacool](https://github.com/optiflows/tentacool) - REST api server to manage system stuff (IP, DNS, Gateway...) on a linux server.
* [SkyDB](https://github.com/skydb/sky) - Behavioral analytics database.
* [Seaweed File System](https://github.com/chrislusf/weed-fs) - Highly scalable distributed key~file system with O(1) disk read.
* [InfluxDB](http://influxdb.com) - Scalable datastore for metrics, events, and real-time analytics.
* [Freehold](http://tshannon.bitbucket.org/freehold/) - An open, secure, and lightweight platform for your files and data.
* [Prometheus Annotation Server](https://github.com/oliver006/prom_annotation_server) - Annotation server for PromDash & Prometheus service monitoring system.
* [Consul](https://github.com/hashicorp/consul) - Consul is service discovery and configuration made easy. Distributed, highly available, and datacenter-aware.
* [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistant, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs.
* [drive](https://github.com/odeke-em/drive) - drive is an unofficial Google Drive command line client for \*NIX operating systems.
If you are using Bolt in a project please send a pull request to add it to the list.

138
Godeps/_workspace/src/github.com/boltdb/bolt/batch.go generated vendored Normal file
View File

@@ -0,0 +1,138 @@
package bolt
import (
"errors"
"fmt"
"sync"
"time"
)
// Batch calls fn as part of a batch. It behaves similar to Update,
// except:
//
// 1. concurrent Batch calls can be combined into a single Bolt
// transaction.
//
// 2. the function passed to Batch may be called multiple times,
// regardless of whether it returns error or not.
//
// This means that Batch function side effects must be idempotent and
// take permanent effect only after a successful return is seen in
// caller.
//
// The maximum batch size and delay can be adjusted with DB.MaxBatchSize
// and DB.MaxBatchDelay, respectively.
//
// Batch is only useful when there are multiple goroutines calling it.
func (db *DB) Batch(fn func(*Tx) error) error {
errCh := make(chan error, 1)
db.batchMu.Lock()
if (db.batch == nil) || (db.batch != nil && len(db.batch.calls) >= db.MaxBatchSize) {
// There is no existing batch, or the existing batch is full; start a new one.
db.batch = &batch{
db: db,
}
db.batch.timer = time.AfterFunc(db.MaxBatchDelay, db.batch.trigger)
}
db.batch.calls = append(db.batch.calls, call{fn: fn, err: errCh})
if len(db.batch.calls) >= db.MaxBatchSize {
// wake up batch, it's ready to run
go db.batch.trigger()
}
db.batchMu.Unlock()
err := <-errCh
if err == trySolo {
err = db.Update(fn)
}
return err
}
type call struct {
fn func(*Tx) error
err chan<- error
}
type batch struct {
db *DB
timer *time.Timer
start sync.Once
calls []call
}
// trigger runs the batch if it hasn't already been run.
func (b *batch) trigger() {
b.start.Do(b.run)
}
// run performs the transactions in the batch and communicates results
// back to DB.Batch.
func (b *batch) run() {
b.db.batchMu.Lock()
b.timer.Stop()
// Make sure no new work is added to this batch, but don't break
// other batches.
if b.db.batch == b {
b.db.batch = nil
}
b.db.batchMu.Unlock()
retry:
for len(b.calls) > 0 {
var failIdx = -1
err := b.db.Update(func(tx *Tx) error {
for i, c := range b.calls {
if err := safelyCall(c.fn, tx); err != nil {
failIdx = i
return err
}
}
return nil
})
if failIdx >= 0 {
// take the failing transaction out of the batch. it's
// safe to shorten b.calls here because db.batch no longer
// points to us, and we hold the mutex anyway.
c := b.calls[failIdx]
b.calls[failIdx], b.calls = b.calls[len(b.calls)-1], b.calls[:len(b.calls)-1]
// tell the submitter re-run it solo, continue with the rest of the batch
c.err <- trySolo
continue retry
}
// pass success, or bolt internal errors, to all callers
for _, c := range b.calls {
if c.err != nil {
c.err <- err
}
}
break retry
}
}
// trySolo is a special sentinel error value used for signaling that a
// transaction function should be re-run. It should never be seen by
// callers.
var trySolo = errors.New("batch function returned an error and should be re-run solo")
type panicked struct {
reason interface{}
}
func (p panicked) Error() string {
if err, ok := p.reason.(error); ok {
return err.Error()
}
return fmt.Sprintf("panic: %v", p.reason)
}
func safelyCall(fn func(*Tx) error, tx *Tx) (err error) {
defer func() {
if p := recover(); p != nil {
err = panicked{p}
}
}()
return fn(tx)
}

View File

@@ -0,0 +1,7 @@
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF

View File

@@ -0,0 +1,7 @@
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0xFFFFFFFFFFFF // 256TB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0x7FFFFFFF

View File

@@ -0,0 +1,7 @@
package bolt
// maxMapSize represents the largest mmap size supported by Bolt.
const maxMapSize = 0x7FFFFFFF // 2GB
// maxAllocSize is the size used when creating array pointers.
const maxAllocSize = 0xFFFFFFF

View File

@@ -0,0 +1,12 @@
package bolt
import (
"syscall"
)
var odirect = syscall.O_DIRECT
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return syscall.Fdatasync(int(db.file.Fd()))
}

View File

@@ -0,0 +1,29 @@
package bolt
import (
"syscall"
"unsafe"
)
const (
msAsync = 1 << iota // perform asynchronous writes
msSync // perform synchronous writes
msInvalidate // invalidate cached data
)
var odirect int
func msync(db *DB) error {
_, _, errno := syscall.Syscall(syscall.SYS_MSYNC, uintptr(unsafe.Pointer(db.data)), uintptr(db.datasz), msInvalidate)
if errno != 0 {
return errno
}
return nil
}
func fdatasync(db *DB) error {
if db.data != nil {
return msync(db)
}
return db.file.Sync()
}

View File

@@ -0,0 +1,100 @@
// +build !windows,!plan9,!solaris
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
)
// flock acquires an advisory lock on a file descriptor.
func flock(f *os.File, exclusive bool, timeout time.Duration) error {
var t time.Time
for {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
flag := syscall.LOCK_SH
if exclusive {
flag = syscall.LOCK_EX
}
// Otherwise attempt to obtain an exclusive lock.
err := syscall.Flock(int(f.Fd()), flag|syscall.LOCK_NB)
if err == nil {
return nil
} else if err != syscall.EWOULDBLOCK {
return err
}
// Wait for a bit and try again.
time.Sleep(50 * time.Millisecond)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(f *os.File) error {
return syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
}
// mmap memory maps a DB's data file.
func mmap(db *DB, sz int) error {
// Truncate and fsync to ensure file size metadata is flushed.
// https://github.com/boltdb/bolt/issues/284
if !db.NoGrowSync && !db.readOnly {
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("file resize error: %s", err)
}
if err := db.file.Sync(); err != nil {
return fmt.Errorf("file sync error: %s", err)
}
}
// Map the data file to memory.
b, err := syscall.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED)
if err != nil {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
db.datasz = sz
return nil
}
// munmap unmaps a DB's data file from memory.
func munmap(db *DB) error {
// Ignore the unmap if we have no mapped data.
if db.dataref == nil {
return nil
}
// Unmap using the original byte slice.
err := syscall.Munmap(db.dataref)
db.dataref = nil
db.data = nil
db.datasz = 0
return err
}
// NOTE: This function is copied from stdlib because it is not available on darwin.
func madvise(b []byte, advice int) (err error) {
_, _, e1 := syscall.Syscall(syscall.SYS_MADVISE, uintptr(unsafe.Pointer(&b[0])), uintptr(len(b)), uintptr(advice))
if e1 != 0 {
err = e1
}
return
}

View File

@@ -0,0 +1,101 @@
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
"golang.org/x/sys/unix"
)
// flock acquires an advisory lock on a file descriptor.
func flock(f *os.File, exclusive bool, timeout time.Duration) error {
var t time.Time
for {
// If we're beyond our timeout then return an error.
// This can only occur after we've attempted a flock once.
if t.IsZero() {
t = time.Now()
} else if timeout > 0 && time.Since(t) > timeout {
return ErrTimeout
}
var lock syscall.Flock_t
lock.Start = 0
lock.Len = 0
lock.Pid = 0
lock.Whence = 0
lock.Pid = 0
if exclusive {
lock.Type = syscall.F_WRLCK
} else {
lock.Type = syscall.F_RDLCK
}
err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &lock)
if err == nil {
return nil
} else if err != syscall.EAGAIN {
return err
}
// Wait for a bit and try again.
time.Sleep(50 * time.Millisecond)
}
}
// funlock releases an advisory lock on a file descriptor.
func funlock(f *os.File) error {
var lock syscall.Flock_t
lock.Start = 0
lock.Len = 0
lock.Type = syscall.F_UNLCK
lock.Whence = 0
return syscall.FcntlFlock(uintptr(f.Fd()), syscall.F_SETLK, &lock)
}
// mmap memory maps a DB's data file.
func mmap(db *DB, sz int) error {
// Truncate and fsync to ensure file size metadata is flushed.
// https://github.com/boltdb/bolt/issues/284
if !db.NoGrowSync && !db.readOnly {
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("file resize error: %s", err)
}
if err := db.file.Sync(); err != nil {
return fmt.Errorf("file sync error: %s", err)
}
}
// Map the data file to memory.
b, err := unix.Mmap(int(db.file.Fd()), 0, sz, syscall.PROT_READ, syscall.MAP_SHARED)
if err != nil {
return err
}
// Advise the kernel that the mmap is accessed randomly.
if err := unix.Madvise(b, syscall.MADV_RANDOM); err != nil {
return fmt.Errorf("madvise: %s", err)
}
// Save the original byte slice and convert to a byte array pointer.
db.dataref = b
db.data = (*[maxMapSize]byte)(unsafe.Pointer(&b[0]))
db.datasz = sz
return nil
}
// munmap unmaps a DB's data file from memory.
func munmap(db *DB) error {
// Ignore the unmap if we have no mapped data.
if db.dataref == nil {
return nil
}
// Unmap using the original byte slice.
err := unix.Munmap(db.dataref)
db.dataref = nil
db.data = nil
db.datasz = 0
return err
}

View File

@@ -0,0 +1,76 @@
package bolt
import (
"fmt"
"os"
"syscall"
"time"
"unsafe"
)
var odirect int
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return db.file.Sync()
}
// flock acquires an advisory lock on a file descriptor.
func flock(f *os.File, _ bool, _ time.Duration) error {
return nil
}
// funlock releases an advisory lock on a file descriptor.
func funlock(f *os.File) error {
return nil
}
// mmap memory maps a DB's data file.
// Based on: https://github.com/edsrzf/mmap-go
func mmap(db *DB, sz int) error {
if !db.readOnly {
// Truncate the database to the size of the mmap.
if err := db.file.Truncate(int64(sz)); err != nil {
return fmt.Errorf("truncate: %s", err)
}
}
// Open a file mapping handle.
sizelo := uint32(sz >> 32)
sizehi := uint32(sz) & 0xffffffff
h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil)
if h == 0 {
return os.NewSyscallError("CreateFileMapping", errno)
}
// Create the memory map.
addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz))
if addr == 0 {
return os.NewSyscallError("MapViewOfFile", errno)
}
// Close mapping handle.
if err := syscall.CloseHandle(syscall.Handle(h)); err != nil {
return os.NewSyscallError("CloseHandle", err)
}
// Convert to a byte array.
db.data = ((*[maxMapSize]byte)(unsafe.Pointer(addr)))
db.datasz = sz
return nil
}
// munmap unmaps a pointer from a file.
// Based on: https://github.com/edsrzf/mmap-go
func munmap(db *DB) error {
if db.data == nil {
return nil
}
addr := (uintptr)(unsafe.Pointer(&db.data[0]))
if err := syscall.UnmapViewOfFile(addr); err != nil {
return os.NewSyscallError("UnmapViewOfFile", err)
}
return nil
}

View File

@@ -0,0 +1,10 @@
// +build !windows,!plan9,!linux,!openbsd
package bolt
var odirect int
// fdatasync flushes written data to a file descriptor.
func fdatasync(db *DB) error {
return db.file.Sync()
}

743
Godeps/_workspace/src/github.com/boltdb/bolt/bucket.go generated vendored Normal file
View File

@@ -0,0 +1,743 @@
package bolt
import (
"bytes"
"fmt"
"unsafe"
)
const (
// MaxKeySize is the maximum length of a key, in bytes.
MaxKeySize = 32768
// MaxValueSize is the maximum length of a value, in bytes.
MaxValueSize = 4294967295
)
const (
maxUint = ^uint(0)
minUint = 0
maxInt = int(^uint(0) >> 1)
minInt = -maxInt - 1
)
const bucketHeaderSize = int(unsafe.Sizeof(bucket{}))
const (
minFillPercent = 0.1
maxFillPercent = 1.0
)
// DefaultFillPercent is the percentage that split pages are filled.
// This value can be changed by setting Bucket.FillPercent.
const DefaultFillPercent = 0.5
// Bucket represents a collection of key/value pairs inside the database.
type Bucket struct {
*bucket
tx *Tx // the associated transaction
buckets map[string]*Bucket // subbucket cache
page *page // inline page reference
rootNode *node // materialized node for the root page.
nodes map[pgid]*node // node cache
// Sets the threshold for filling nodes when they split. By default,
// the bucket will fill to 50% but it can be useful to increase this
// amount if you know that your write workloads are mostly append-only.
//
// This is non-persisted across transactions so it must be set in every Tx.
FillPercent float64
}
// bucket represents the on-file representation of a bucket.
// This is stored as the "value" of a bucket key. If the bucket is small enough,
// then its root page can be stored inline in the "value", after the bucket
// header. In the case of inline buckets, the "root" will be 0.
type bucket struct {
root pgid // page id of the bucket's root-level page
sequence uint64 // monotonically incrementing, used by NextSequence()
}
// newBucket returns a new bucket associated with a transaction.
func newBucket(tx *Tx) Bucket {
var b = Bucket{tx: tx, FillPercent: DefaultFillPercent}
if tx.writable {
b.buckets = make(map[string]*Bucket)
b.nodes = make(map[pgid]*node)
}
return b
}
// Tx returns the tx of the bucket.
func (b *Bucket) Tx() *Tx {
return b.tx
}
// Root returns the root of the bucket.
func (b *Bucket) Root() pgid {
return b.root
}
// Writable returns whether the bucket is writable.
func (b *Bucket) Writable() bool {
return b.tx.writable
}
// Cursor creates a cursor associated with the bucket.
// The cursor is only valid as long as the transaction is open.
// Do not use a cursor after the transaction is closed.
func (b *Bucket) Cursor() *Cursor {
// Update transaction statistics.
b.tx.stats.CursorCount++
// Allocate and return a cursor.
return &Cursor{
bucket: b,
stack: make([]elemRef, 0),
}
}
// Bucket retrieves a nested bucket by name.
// Returns nil if the bucket does not exist.
func (b *Bucket) Bucket(name []byte) *Bucket {
if b.buckets != nil {
if child := b.buckets[string(name)]; child != nil {
return child
}
}
// Move cursor to key.
c := b.Cursor()
k, v, flags := c.seek(name)
// Return nil if the key doesn't exist or it is not a bucket.
if !bytes.Equal(name, k) || (flags&bucketLeafFlag) == 0 {
return nil
}
// Otherwise create a bucket and cache it.
var child = b.openBucket(v)
if b.buckets != nil {
b.buckets[string(name)] = child
}
return child
}
// Helper method that re-interprets a sub-bucket value
// from a parent into a Bucket
func (b *Bucket) openBucket(value []byte) *Bucket {
var child = newBucket(b.tx)
// If this is a writable transaction then we need to copy the bucket entry.
// Read-only transactions can point directly at the mmap entry.
if b.tx.writable {
child.bucket = &bucket{}
*child.bucket = *(*bucket)(unsafe.Pointer(&value[0]))
} else {
child.bucket = (*bucket)(unsafe.Pointer(&value[0]))
}
// Save a reference to the inline page if the bucket is inline.
if child.root == 0 {
child.page = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
}
return &child
}
// CreateBucket creates a new bucket at the given key and returns the new bucket.
// Returns an error if the key already exists, if the bucket name is blank, or if the bucket name is too long.
func (b *Bucket) CreateBucket(key []byte) (*Bucket, error) {
if b.tx.db == nil {
return nil, ErrTxClosed
} else if !b.tx.writable {
return nil, ErrTxNotWritable
} else if len(key) == 0 {
return nil, ErrBucketNameRequired
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if there is an existing key.
if bytes.Equal(key, k) {
if (flags & bucketLeafFlag) != 0 {
return nil, ErrBucketExists
} else {
return nil, ErrIncompatibleValue
}
}
// Create empty, inline bucket.
var bucket = Bucket{
bucket: &bucket{},
rootNode: &node{isLeaf: true},
FillPercent: DefaultFillPercent,
}
var value = bucket.write()
// Insert into node.
key = cloneBytes(key)
c.node().put(key, key, value, 0, bucketLeafFlag)
// Since subbuckets are not allowed on inline buckets, we need to
// dereference the inline page, if it exists. This will cause the bucket
// to be treated as a regular, non-inline bucket for the rest of the tx.
b.page = nil
return b.Bucket(key), nil
}
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist and returns a reference to it.
// Returns an error if the bucket name is blank, or if the bucket name is too long.
func (b *Bucket) CreateBucketIfNotExists(key []byte) (*Bucket, error) {
child, err := b.CreateBucket(key)
if err == ErrBucketExists {
return b.Bucket(key), nil
} else if err != nil {
return nil, err
}
return child, nil
}
// DeleteBucket deletes a bucket at the given key.
// Returns an error if the bucket does not exists, or if the key represents a non-bucket value.
func (b *Bucket) DeleteBucket(key []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if bucket doesn't exist or is not a bucket.
if !bytes.Equal(key, k) {
return ErrBucketNotFound
} else if (flags & bucketLeafFlag) == 0 {
return ErrIncompatibleValue
}
// Recursively delete all child buckets.
child := b.Bucket(key)
err := child.ForEach(func(k, v []byte) error {
if v == nil {
if err := child.DeleteBucket(k); err != nil {
return fmt.Errorf("delete bucket: %s", err)
}
}
return nil
})
if err != nil {
return err
}
// Remove cached copy.
delete(b.buckets, string(key))
// Release all bucket pages to freelist.
child.nodes = nil
child.rootNode = nil
child.free()
// Delete the node if we have a matching key.
c.node().del(key)
return nil
}
// Get retrieves the value for a key in the bucket.
// Returns a nil value if the key does not exist or if the key is a nested bucket.
// The returned value is only valid for the life of the transaction.
func (b *Bucket) Get(key []byte) []byte {
k, v, flags := b.Cursor().seek(key)
// Return nil if this is a bucket.
if (flags & bucketLeafFlag) != 0 {
return nil
}
// If our target node isn't the same key as what's passed in then return nil.
if !bytes.Equal(key, k) {
return nil
}
return v
}
// Put sets the value for a key in the bucket.
// If the key exist then its previous value will be overwritten.
// Returns an error if the bucket was created from a read-only transaction, if the key is blank, if the key is too large, or if the value is too large.
func (b *Bucket) Put(key []byte, value []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
} else if len(key) == 0 {
return ErrKeyRequired
} else if len(key) > MaxKeySize {
return ErrKeyTooLarge
} else if int64(len(value)) > MaxValueSize {
return ErrValueTooLarge
}
// Move cursor to correct position.
c := b.Cursor()
k, _, flags := c.seek(key)
// Return an error if there is an existing key with a bucket value.
if bytes.Equal(key, k) && (flags&bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
// Insert into node.
key = cloneBytes(key)
c.node().put(key, key, value, 0, 0)
return nil
}
// Delete removes a key from the bucket.
// If the key does not exist then nothing is done and a nil error is returned.
// Returns an error if the bucket was created from a read-only transaction.
func (b *Bucket) Delete(key []byte) error {
if b.tx.db == nil {
return ErrTxClosed
} else if !b.Writable() {
return ErrTxNotWritable
}
// Move cursor to correct position.
c := b.Cursor()
_, _, flags := c.seek(key)
// Return an error if there is already existing bucket value.
if (flags & bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
// Delete the node if we have a matching key.
c.node().del(key)
return nil
}
// NextSequence returns an autoincrementing integer for the bucket.
func (b *Bucket) NextSequence() (uint64, error) {
if b.tx.db == nil {
return 0, ErrTxClosed
} else if !b.Writable() {
return 0, ErrTxNotWritable
}
// Materialize the root node if it hasn't been already so that the
// bucket will be saved during commit.
if b.rootNode == nil {
_ = b.node(b.root, nil)
}
// Increment and return the sequence.
b.bucket.sequence++
return b.bucket.sequence, nil
}
// ForEach executes a function for each key/value pair in a bucket.
// If the provided function returns an error then the iteration is stopped and
// the error is returned to the caller.
func (b *Bucket) ForEach(fn func(k, v []byte) error) error {
if b.tx.db == nil {
return ErrTxClosed
}
c := b.Cursor()
for k, v := c.First(); k != nil; k, v = c.Next() {
if err := fn(k, v); err != nil {
return err
}
}
return nil
}
// Stat returns stats on a bucket.
func (b *Bucket) Stats() BucketStats {
var s, subStats BucketStats
pageSize := b.tx.db.pageSize
s.BucketN += 1
if b.root == 0 {
s.InlineBucketN += 1
}
b.forEachPage(func(p *page, depth int) {
if (p.flags & leafPageFlag) != 0 {
s.KeyN += int(p.count)
// used totals the used bytes for the page
used := pageHeaderSize
if p.count != 0 {
// If page has any elements, add all element headers.
used += leafPageElementSize * int(p.count-1)
// Add all element key, value sizes.
// The computation takes advantage of the fact that the position
// of the last element's key/value equals to the total of the sizes
// of all previous elements' keys and values.
// It also includes the last element's header.
lastElement := p.leafPageElement(p.count - 1)
used += int(lastElement.pos + lastElement.ksize + lastElement.vsize)
}
if b.root == 0 {
// For inlined bucket just update the inline stats
s.InlineBucketInuse += used
} else {
// For non-inlined bucket update all the leaf stats
s.LeafPageN++
s.LeafInuse += used
s.LeafOverflowN += int(p.overflow)
// Collect stats from sub-buckets.
// Do that by iterating over all element headers
// looking for the ones with the bucketLeafFlag.
for i := uint16(0); i < p.count; i++ {
e := p.leafPageElement(i)
if (e.flags & bucketLeafFlag) != 0 {
// For any bucket element, open the element value
// and recursively call Stats on the contained bucket.
subStats.Add(b.openBucket(e.value()).Stats())
}
}
}
} else if (p.flags & branchPageFlag) != 0 {
s.BranchPageN++
lastElement := p.branchPageElement(p.count - 1)
// used totals the used bytes for the page
// Add header and all element headers.
used := pageHeaderSize + (branchPageElementSize * int(p.count-1))
// Add size of all keys and values.
// Again, use the fact that last element's position equals to
// the total of key, value sizes of all previous elements.
used += int(lastElement.pos + lastElement.ksize)
s.BranchInuse += used
s.BranchOverflowN += int(p.overflow)
}
// Keep track of maximum page depth.
if depth+1 > s.Depth {
s.Depth = (depth + 1)
}
})
// Alloc stats can be computed from page counts and pageSize.
s.BranchAlloc = (s.BranchPageN + s.BranchOverflowN) * pageSize
s.LeafAlloc = (s.LeafPageN + s.LeafOverflowN) * pageSize
// Add the max depth of sub-buckets to get total nested depth.
s.Depth += subStats.Depth
// Add the stats for all sub-buckets
s.Add(subStats)
return s
}
// forEachPage iterates over every page in a bucket, including inline pages.
func (b *Bucket) forEachPage(fn func(*page, int)) {
// If we have an inline page then just use that.
if b.page != nil {
fn(b.page, 0)
return
}
// Otherwise traverse the page hierarchy.
b.tx.forEachPage(b.root, 0, fn)
}
// forEachPageNode iterates over every page (or node) in a bucket.
// This also includes inline pages.
func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) {
// If we have an inline page or root node then just use that.
if b.page != nil {
fn(b.page, nil, 0)
return
}
b._forEachPageNode(b.root, 0, fn)
}
func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) {
var p, n = b.pageNode(pgid)
// Execute function.
fn(p, n, depth)
// Recursively loop over children.
if p != nil {
if (p.flags & branchPageFlag) != 0 {
for i := 0; i < int(p.count); i++ {
elem := p.branchPageElement(uint16(i))
b._forEachPageNode(elem.pgid, depth+1, fn)
}
}
} else {
if !n.isLeaf {
for _, inode := range n.inodes {
b._forEachPageNode(inode.pgid, depth+1, fn)
}
}
}
}
// spill writes all the nodes for this bucket to dirty pages.
func (b *Bucket) spill() error {
// Spill all child buckets first.
for name, child := range b.buckets {
// If the child bucket is small enough and it has no child buckets then
// write it inline into the parent bucket's page. Otherwise spill it
// like a normal bucket and make the parent value a pointer to the page.
var value []byte
if child.inlineable() {
child.free()
value = child.write()
} else {
if err := child.spill(); err != nil {
return err
}
// Update the child bucket header in this bucket.
value = make([]byte, unsafe.Sizeof(bucket{}))
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
*bucket = *child.bucket
}
// Skip writing the bucket if there are no materialized nodes.
if child.rootNode == nil {
continue
}
// Update parent node.
var c = b.Cursor()
k, _, flags := c.seek([]byte(name))
if !bytes.Equal([]byte(name), k) {
panic(fmt.Sprintf("misplaced bucket header: %x -> %x", []byte(name), k))
}
if flags&bucketLeafFlag == 0 {
panic(fmt.Sprintf("unexpected bucket header flag: %x", flags))
}
c.node().put([]byte(name), []byte(name), value, 0, bucketLeafFlag)
}
// Ignore if there's not a materialized root node.
if b.rootNode == nil {
return nil
}
// Spill nodes.
if err := b.rootNode.spill(); err != nil {
return err
}
b.rootNode = b.rootNode.root()
// Update the root node for this bucket.
if b.rootNode.pgid >= b.tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", b.rootNode.pgid, b.tx.meta.pgid))
}
b.root = b.rootNode.pgid
return nil
}
// inlineable returns true if a bucket is small enough to be written inline
// and if it contains no subbuckets. Otherwise returns false.
func (b *Bucket) inlineable() bool {
var n = b.rootNode
// Bucket must only contain a single leaf node.
if n == nil || !n.isLeaf {
return false
}
// Bucket is not inlineable if it contains subbuckets or if it goes beyond
// our threshold for inline bucket size.
var size = pageHeaderSize
for _, inode := range n.inodes {
size += leafPageElementSize + len(inode.key) + len(inode.value)
if inode.flags&bucketLeafFlag != 0 {
return false
} else if size > b.maxInlineBucketSize() {
return false
}
}
return true
}
// Returns the maximum total size of a bucket to make it a candidate for inlining.
func (b *Bucket) maxInlineBucketSize() int {
return b.tx.db.pageSize / 4
}
// write allocates and writes a bucket to a byte slice.
func (b *Bucket) write() []byte {
// Allocate the appropriate size.
var n = b.rootNode
var value = make([]byte, bucketHeaderSize+n.size())
// Write a bucket header.
var bucket = (*bucket)(unsafe.Pointer(&value[0]))
*bucket = *b.bucket
// Convert byte slice to a fake page and write the root node.
var p = (*page)(unsafe.Pointer(&value[bucketHeaderSize]))
n.write(p)
return value
}
// rebalance attempts to balance all nodes.
func (b *Bucket) rebalance() {
for _, n := range b.nodes {
n.rebalance()
}
for _, child := range b.buckets {
child.rebalance()
}
}
// node creates a node from a page and associates it with a given parent.
func (b *Bucket) node(pgid pgid, parent *node) *node {
_assert(b.nodes != nil, "nodes map expected")
// Retrieve node if it's already been created.
if n := b.nodes[pgid]; n != nil {
return n
}
// Otherwise create a node and cache it.
n := &node{bucket: b, parent: parent}
if parent == nil {
b.rootNode = n
} else {
parent.children = append(parent.children, n)
}
// Use the inline page if this is an inline bucket.
var p = b.page
if p == nil {
p = b.tx.page(pgid)
}
// Read the page into the node and cache it.
n.read(p)
b.nodes[pgid] = n
// Update statistics.
b.tx.stats.NodeCount++
return n
}
// free recursively frees all pages in the bucket.
func (b *Bucket) free() {
if b.root == 0 {
return
}
var tx = b.tx
b.forEachPageNode(func(p *page, n *node, _ int) {
if p != nil {
tx.db.freelist.free(tx.meta.txid, p)
} else {
n.free()
}
})
b.root = 0
}
// dereference removes all references to the old mmap.
func (b *Bucket) dereference() {
if b.rootNode != nil {
b.rootNode.root().dereference()
}
for _, child := range b.buckets {
child.dereference()
}
}
// pageNode returns the in-memory node, if it exists.
// Otherwise returns the underlying page.
func (b *Bucket) pageNode(id pgid) (*page, *node) {
// Inline buckets have a fake page embedded in their value so treat them
// differently. We'll return the rootNode (if available) or the fake page.
if b.root == 0 {
if id != 0 {
panic(fmt.Sprintf("inline bucket non-zero page access(2): %d != 0", id))
}
if b.rootNode != nil {
return nil, b.rootNode
}
return b.page, nil
}
// Check the node cache for non-inline buckets.
if b.nodes != nil {
if n := b.nodes[id]; n != nil {
return nil, n
}
}
// Finally lookup the page from the transaction if no node is materialized.
return b.tx.page(id), nil
}
// BucketStats records statistics about resources used by a bucket.
type BucketStats struct {
// Page count statistics.
BranchPageN int // number of logical branch pages
BranchOverflowN int // number of physical branch overflow pages
LeafPageN int // number of logical leaf pages
LeafOverflowN int // number of physical leaf overflow pages
// Tree statistics.
KeyN int // number of keys/value pairs
Depth int // number of levels in B+tree
// Page size utilization.
BranchAlloc int // bytes allocated for physical branch pages
BranchInuse int // bytes actually used for branch data
LeafAlloc int // bytes allocated for physical leaf pages
LeafInuse int // bytes actually used for leaf data
// Bucket statistics
BucketN int // total number of buckets including the top bucket
InlineBucketN int // total number on inlined buckets
InlineBucketInuse int // bytes used for inlined buckets (also accounted for in LeafInuse)
}
func (s *BucketStats) Add(other BucketStats) {
s.BranchPageN += other.BranchPageN
s.BranchOverflowN += other.BranchOverflowN
s.LeafPageN += other.LeafPageN
s.LeafOverflowN += other.LeafOverflowN
s.KeyN += other.KeyN
if s.Depth < other.Depth {
s.Depth = other.Depth
}
s.BranchAlloc += other.BranchAlloc
s.BranchInuse += other.BranchInuse
s.LeafAlloc += other.LeafAlloc
s.LeafInuse += other.LeafInuse
s.BucketN += other.BucketN
s.InlineBucketN += other.InlineBucketN
s.InlineBucketInuse += other.InlineBucketInuse
}
// cloneBytes returns a copy of a given slice.
func cloneBytes(v []byte) []byte {
var clone = make([]byte, len(v))
copy(clone, v)
return clone
}

File diff suppressed because it is too large Load Diff

384
Godeps/_workspace/src/github.com/boltdb/bolt/cursor.go generated vendored Normal file
View File

@@ -0,0 +1,384 @@
package bolt
import (
"bytes"
"fmt"
"sort"
)
// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order.
// Cursors see nested buckets with value == nil.
// Cursors can be obtained from a transaction and are valid as long as the transaction is open.
//
// Keys and values returned from the cursor are only valid for the life of the transaction.
//
// Changing data while traversing with a cursor may cause it to be invalidated
// and return unexpected keys and/or values. You must reposition your cursor
// after mutating data.
type Cursor struct {
bucket *Bucket
stack []elemRef
}
// Bucket returns the bucket that this cursor was created from.
func (c *Cursor) Bucket() *Bucket {
return c.bucket
}
// First moves the cursor to the first item in the bucket and returns its key and value.
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) First() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
c.stack = c.stack[:0]
p, n := c.bucket.pageNode(c.bucket.root)
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
c.first()
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Last moves the cursor to the last item in the bucket and returns its key and value.
// If the bucket is empty then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Last() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
c.stack = c.stack[:0]
p, n := c.bucket.pageNode(c.bucket.root)
ref := elemRef{page: p, node: n}
ref.index = ref.count() - 1
c.stack = append(c.stack, ref)
c.last()
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Next moves the cursor to the next item in the bucket and returns its key and value.
// If the cursor is at the end of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Next() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
k, v, flags := c.next()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Prev moves the cursor to the previous item in the bucket and returns its key and value.
// If the cursor is at the beginning of the bucket then a nil key and value are returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Prev() (key []byte, value []byte) {
_assert(c.bucket.tx.db != nil, "tx closed")
// Attempt to move back one element until we're successful.
// Move up the stack as we hit the beginning of each page in our stack.
for i := len(c.stack) - 1; i >= 0; i-- {
elem := &c.stack[i]
if elem.index > 0 {
elem.index--
break
}
c.stack = c.stack[:i]
}
// If we've hit the end then return nil.
if len(c.stack) == 0 {
return nil, nil
}
// Move down the stack to find the last element of the last leaf under this branch.
c.last()
k, v, flags := c.keyValue()
if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used. If no keys
// follow, a nil key is returned.
// The returned key and value are only valid for the life of the transaction.
func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) {
k, v, flags := c.seek(seek)
// If we ended up after the last element of a page then move to the next one.
if ref := &c.stack[len(c.stack)-1]; ref.index >= ref.count() {
k, v, flags = c.next()
}
if k == nil {
return nil, nil
} else if (flags & uint32(bucketLeafFlag)) != 0 {
return k, nil
}
return k, v
}
// Delete removes the current key/value under the cursor from the bucket.
// Delete fails if current key/value is a bucket or if the transaction is not writable.
func (c *Cursor) Delete() error {
if c.bucket.tx.db == nil {
return ErrTxClosed
} else if !c.bucket.Writable() {
return ErrTxNotWritable
}
key, _, flags := c.keyValue()
// Return an error if current value is a bucket.
if (flags & bucketLeafFlag) != 0 {
return ErrIncompatibleValue
}
c.node().del(key)
return nil
}
// seek moves the cursor to a given key and returns it.
// If the key does not exist then the next key is used.
func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) {
_assert(c.bucket.tx.db != nil, "tx closed")
// Start from root page/node and traverse to correct page.
c.stack = c.stack[:0]
c.search(seek, c.bucket.root)
ref := &c.stack[len(c.stack)-1]
// If the cursor is pointing to the end of page/node then return nil.
if ref.index >= ref.count() {
return nil, nil, 0
}
// If this is a bucket then return a nil value.
return c.keyValue()
}
// first moves the cursor to the first leaf element under the last page in the stack.
func (c *Cursor) first() {
for {
// Exit when we hit a leaf page.
var ref = &c.stack[len(c.stack)-1]
if ref.isLeaf() {
break
}
// Keep adding pages pointing to the first element to the stack.
var pgid pgid
if ref.node != nil {
pgid = ref.node.inodes[ref.index].pgid
} else {
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
}
p, n := c.bucket.pageNode(pgid)
c.stack = append(c.stack, elemRef{page: p, node: n, index: 0})
}
}
// last moves the cursor to the last leaf element under the last page in the stack.
func (c *Cursor) last() {
for {
// Exit when we hit a leaf page.
ref := &c.stack[len(c.stack)-1]
if ref.isLeaf() {
break
}
// Keep adding pages pointing to the last element in the stack.
var pgid pgid
if ref.node != nil {
pgid = ref.node.inodes[ref.index].pgid
} else {
pgid = ref.page.branchPageElement(uint16(ref.index)).pgid
}
p, n := c.bucket.pageNode(pgid)
var nextRef = elemRef{page: p, node: n}
nextRef.index = nextRef.count() - 1
c.stack = append(c.stack, nextRef)
}
}
// next moves to the next leaf element and returns the key and value.
// If the cursor is at the last leaf element then it stays there and returns nil.
func (c *Cursor) next() (key []byte, value []byte, flags uint32) {
// Attempt to move over one element until we're successful.
// Move up the stack as we hit the end of each page in our stack.
var i int
for i = len(c.stack) - 1; i >= 0; i-- {
elem := &c.stack[i]
if elem.index < elem.count()-1 {
elem.index++
break
}
}
// If we've hit the root page then stop and return. This will leave the
// cursor on the last element of the last page.
if i == -1 {
return nil, nil, 0
}
// Otherwise start from where we left off in the stack and find the
// first element of the first leaf page.
c.stack = c.stack[:i+1]
c.first()
return c.keyValue()
}
// search recursively performs a binary search against a given page/node until it finds a given key.
func (c *Cursor) search(key []byte, pgid pgid) {
p, n := c.bucket.pageNode(pgid)
if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 {
panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags))
}
e := elemRef{page: p, node: n}
c.stack = append(c.stack, e)
// If we're on a leaf page/node then find the specific node.
if e.isLeaf() {
c.nsearch(key)
return
}
if n != nil {
c.searchNode(key, n)
return
}
c.searchPage(key, p)
}
func (c *Cursor) searchNode(key []byte, n *node) {
var exact bool
index := sort.Search(len(n.inodes), func(i int) bool {
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
ret := bytes.Compare(n.inodes[i].key, key)
if ret == 0 {
exact = true
}
return ret != -1
})
if !exact && index > 0 {
index--
}
c.stack[len(c.stack)-1].index = index
// Recursively search to the next page.
c.search(key, n.inodes[index].pgid)
}
func (c *Cursor) searchPage(key []byte, p *page) {
// Binary search for the correct range.
inodes := p.branchPageElements()
var exact bool
index := sort.Search(int(p.count), func(i int) bool {
// TODO(benbjohnson): Optimize this range search. It's a bit hacky right now.
// sort.Search() finds the lowest index where f() != -1 but we need the highest index.
ret := bytes.Compare(inodes[i].key(), key)
if ret == 0 {
exact = true
}
return ret != -1
})
if !exact && index > 0 {
index--
}
c.stack[len(c.stack)-1].index = index
// Recursively search to the next page.
c.search(key, inodes[index].pgid)
}
// nsearch searches the leaf node on the top of the stack for a key.
func (c *Cursor) nsearch(key []byte) {
e := &c.stack[len(c.stack)-1]
p, n := e.page, e.node
// If we have a node then search its inodes.
if n != nil {
index := sort.Search(len(n.inodes), func(i int) bool {
return bytes.Compare(n.inodes[i].key, key) != -1
})
e.index = index
return
}
// If we have a page then search its leaf elements.
inodes := p.leafPageElements()
index := sort.Search(int(p.count), func(i int) bool {
return bytes.Compare(inodes[i].key(), key) != -1
})
e.index = index
}
// keyValue returns the key and value of the current leaf element.
func (c *Cursor) keyValue() ([]byte, []byte, uint32) {
ref := &c.stack[len(c.stack)-1]
if ref.count() == 0 || ref.index >= ref.count() {
return nil, nil, 0
}
// Retrieve value from node.
if ref.node != nil {
inode := &ref.node.inodes[ref.index]
return inode.key, inode.value, inode.flags
}
// Or retrieve value from page.
elem := ref.page.leafPageElement(uint16(ref.index))
return elem.key(), elem.value(), elem.flags
}
// node returns the node that the cursor is currently positioned on.
func (c *Cursor) node() *node {
_assert(len(c.stack) > 0, "accessing a node with a zero-length cursor stack")
// If the top of the stack is a leaf node then just return it.
if ref := &c.stack[len(c.stack)-1]; ref.node != nil && ref.isLeaf() {
return ref.node
}
// Start from root and traverse down the hierarchy.
var n = c.stack[0].node
if n == nil {
n = c.bucket.node(c.stack[0].page.id, nil)
}
for _, ref := range c.stack[:len(c.stack)-1] {
_assert(!n.isLeaf, "expected branch node")
n = n.childAt(int(ref.index))
}
_assert(n.isLeaf, "expected leaf node")
return n
}
// elemRef represents a reference to an element on a given page/node.
type elemRef struct {
page *page
node *node
index int
}
// isLeaf returns whether the ref is pointing at a leaf page/node.
func (r *elemRef) isLeaf() bool {
if r.node != nil {
return r.node.isLeaf
}
return (r.page.flags & leafPageFlag) != 0
}
// count returns the number of inodes or page elements.
func (r *elemRef) count() int {
if r.node != nil {
return len(r.node.inodes)
}
return int(r.page.count)
}

792
Godeps/_workspace/src/github.com/boltdb/bolt/db.go generated vendored Normal file
View File

@@ -0,0 +1,792 @@
package bolt
import (
"fmt"
"hash/fnv"
"os"
"runtime"
"runtime/debug"
"strings"
"sync"
"time"
"unsafe"
)
// The largest step that can be taken when remapping the mmap.
const maxMmapStep = 1 << 30 // 1GB
// The data file format version.
const version = 2
// Represents a marker value to indicate that a file is a Bolt DB.
const magic uint32 = 0xED0CDAED
// IgnoreNoSync specifies whether the NoSync field of a DB is ignored when
// syncing changes to a file. This is required as some operating systems,
// such as OpenBSD, do not have a unified buffer cache (UBC) and writes
// must be synchronzied using the msync(2) syscall.
const IgnoreNoSync = runtime.GOOS == "openbsd"
// Default values if not set in a DB instance.
const (
DefaultMaxBatchSize int = 1000
DefaultMaxBatchDelay = 10 * time.Millisecond
)
// DB represents a collection of buckets persisted to a file on disk.
// All data access is performed through transactions which can be obtained through the DB.
// All the functions on DB will return a ErrDatabaseNotOpen if accessed before Open() is called.
type DB struct {
// When enabled, the database will perform a Check() after every commit.
// A panic is issued if the database is in an inconsistent state. This
// flag has a large performance impact so it should only be used for
// debugging purposes.
StrictMode bool
// Setting the NoSync flag will cause the database to skip fsync()
// calls after each commit. This can be useful when bulk loading data
// into a database and you can restart the bulk load in the event of
// a system failure or database corruption. Do not set this flag for
// normal use.
//
// If the package global IgnoreNoSync constant is true, this value is
// ignored. See the comment on that constant for more details.
//
// THIS IS UNSAFE. PLEASE USE WITH CAUTION.
NoSync bool
// When true, skips the truncate call when growing the database.
// Setting this to true is only safe on non-ext3/ext4 systems.
// Skipping truncation avoids preallocation of hard drive space and
// bypasses a truncate() and fsync() syscall on remapping.
//
// https://github.com/boltdb/bolt/issues/284
NoGrowSync bool
// MaxBatchSize is the maximum size of a batch. Default value is
// copied from DefaultMaxBatchSize in Open.
//
// If <=0, disables batching.
//
// Do not change concurrently with calls to Batch.
MaxBatchSize int
// MaxBatchDelay is the maximum delay before a batch starts.
// Default value is copied from DefaultMaxBatchDelay in Open.
//
// If <=0, effectively disables batching.
//
// Do not change concurrently with calls to Batch.
MaxBatchDelay time.Duration
path string
file *os.File
dataref []byte // mmap'ed readonly, write throws SEGV
data *[maxMapSize]byte
datasz int
meta0 *meta
meta1 *meta
pageSize int
opened bool
rwtx *Tx
txs []*Tx
freelist *freelist
stats Stats
batchMu sync.Mutex
batch *batch
rwlock sync.Mutex // Allows only one writer at a time.
metalock sync.Mutex // Protects meta page access.
mmaplock sync.RWMutex // Protects mmap access during remapping.
statlock sync.RWMutex // Protects stats access.
ops struct {
writeAt func(b []byte, off int64) (n int, err error)
}
// Read only mode.
// When true, Update() and Begin(true) return ErrDatabaseReadOnly immediately.
readOnly bool
}
// Path returns the path to currently open database file.
func (db *DB) Path() string {
return db.path
}
// GoString returns the Go string representation of the database.
func (db *DB) GoString() string {
return fmt.Sprintf("bolt.DB{path:%q}", db.path)
}
// String returns the string representation of the database.
func (db *DB) String() string {
return fmt.Sprintf("DB<%q>", db.path)
}
// Open creates and opens a database at the given path.
// If the file does not exist then it will be created automatically.
// Passing in nil options will cause Bolt to open the database with the default options.
func Open(path string, mode os.FileMode, options *Options) (*DB, error) {
var db = &DB{opened: true}
// Set default options if no options are provided.
if options == nil {
options = DefaultOptions
}
db.NoGrowSync = options.NoGrowSync
// Set default values for later DB operations.
db.MaxBatchSize = DefaultMaxBatchSize
db.MaxBatchDelay = DefaultMaxBatchDelay
flag := os.O_RDWR
if options.ReadOnly {
flag = os.O_RDONLY
db.readOnly = true
}
// Open data file and separate sync handler for metadata writes.
db.path = path
var err error
if db.file, err = os.OpenFile(db.path, flag|os.O_CREATE, mode); err != nil {
_ = db.close()
return nil, err
}
// Lock file so that other processes using Bolt in read-write mode cannot
// use the database at the same time. This would cause corruption since
// the two processes would write meta pages and free pages separately.
// The database file is locked exclusively (only one process can grab the lock)
// if !options.ReadOnly.
// The database file is locked using the shared lock (more than one process may
// hold a lock at the same time) otherwise (options.ReadOnly is set).
if err := flock(db.file, !db.readOnly, options.Timeout); err != nil {
_ = db.close()
return nil, err
}
// Default values for test hooks
db.ops.writeAt = db.file.WriteAt
// Initialize the database if it doesn't exist.
if info, err := db.file.Stat(); err != nil {
return nil, fmt.Errorf("stat error: %s", err)
} else if info.Size() == 0 {
// Initialize new files with meta pages.
if err := db.init(); err != nil {
return nil, err
}
} else {
// Read the first meta page to determine the page size.
var buf [0x1000]byte
if _, err := db.file.ReadAt(buf[:], 0); err == nil {
m := db.pageInBuffer(buf[:], 0).meta()
if err := m.validate(); err != nil {
return nil, fmt.Errorf("meta0 error: %s", err)
}
db.pageSize = int(m.pageSize)
}
}
// Memory map the data file.
if err := db.mmap(0); err != nil {
_ = db.close()
return nil, err
}
// Read in the freelist.
db.freelist = newFreelist()
db.freelist.read(db.page(db.meta().freelist))
// Mark the database as opened and return.
return db, nil
}
// mmap opens the underlying memory-mapped file and initializes the meta references.
// minsz is the minimum size that the new mmap can be.
func (db *DB) mmap(minsz int) error {
db.mmaplock.Lock()
defer db.mmaplock.Unlock()
info, err := db.file.Stat()
if err != nil {
return fmt.Errorf("mmap stat error: %s", err)
} else if int(info.Size()) < db.pageSize*2 {
return fmt.Errorf("file size too small")
}
// Ensure the size is at least the minimum size.
var size = int(info.Size())
if size < minsz {
size = minsz
}
size, err = db.mmapSize(size)
if err != nil {
return err
}
// Dereference all mmap references before unmapping.
if db.rwtx != nil {
db.rwtx.root.dereference()
}
// Unmap existing data before continuing.
if err := db.munmap(); err != nil {
return err
}
// Memory-map the data file as a byte slice.
if err := mmap(db, size); err != nil {
return err
}
// Save references to the meta pages.
db.meta0 = db.page(0).meta()
db.meta1 = db.page(1).meta()
// Validate the meta pages.
if err := db.meta0.validate(); err != nil {
return fmt.Errorf("meta0 error: %s", err)
}
if err := db.meta1.validate(); err != nil {
return fmt.Errorf("meta1 error: %s", err)
}
return nil
}
// munmap unmaps the data file from memory.
func (db *DB) munmap() error {
if err := munmap(db); err != nil {
return fmt.Errorf("unmap error: " + err.Error())
}
return nil
}
// mmapSize determines the appropriate size for the mmap given the current size
// of the database. The minimum size is 1MB and doubles until it reaches 1GB.
// Returns an error if the new mmap size is greater than the max allowed.
func (db *DB) mmapSize(size int) (int, error) {
// Double the size from 32KB until 1GB.
for i := uint(15); i <= 30; i++ {
if size <= 1<<i {
return 1 << i, nil
}
}
// Verify the requested size is not above the maximum allowed.
if size > maxMapSize {
return 0, fmt.Errorf("mmap too large")
}
// If larger than 1GB then grow by 1GB at a time.
sz := int64(size)
if remainder := sz % int64(maxMmapStep); remainder > 0 {
sz += int64(maxMmapStep) - remainder
}
// Ensure that the mmap size is a multiple of the page size.
// This should always be true since we're incrementing in MBs.
pageSize := int64(db.pageSize)
if (sz % pageSize) != 0 {
sz = ((sz / pageSize) + 1) * pageSize
}
// If we've exceeded the max size then only grow up to the max size.
if sz > maxMapSize {
sz = maxMapSize
}
return int(sz), nil
}
// init creates a new database file and initializes its meta pages.
func (db *DB) init() error {
// Set the page size to the OS page size.
db.pageSize = os.Getpagesize()
// Create two meta pages on a buffer.
buf := make([]byte, db.pageSize*4)
for i := 0; i < 2; i++ {
p := db.pageInBuffer(buf[:], pgid(i))
p.id = pgid(i)
p.flags = metaPageFlag
// Initialize the meta page.
m := p.meta()
m.magic = magic
m.version = version
m.pageSize = uint32(db.pageSize)
m.freelist = 2
m.root = bucket{root: 3}
m.pgid = 4
m.txid = txid(i)
}
// Write an empty freelist at page 3.
p := db.pageInBuffer(buf[:], pgid(2))
p.id = pgid(2)
p.flags = freelistPageFlag
p.count = 0
// Write an empty leaf page at page 4.
p = db.pageInBuffer(buf[:], pgid(3))
p.id = pgid(3)
p.flags = leafPageFlag
p.count = 0
// Write the buffer to our data file.
if _, err := db.ops.writeAt(buf, 0); err != nil {
return err
}
if err := fdatasync(db); err != nil {
return err
}
return nil
}
// Close releases all database resources.
// All transactions must be closed before closing the database.
func (db *DB) Close() error {
db.rwlock.Lock()
defer db.rwlock.Unlock()
db.metalock.Lock()
defer db.metalock.Unlock()
db.mmaplock.RLock()
defer db.mmaplock.RUnlock()
return db.close()
}
func (db *DB) close() error {
db.opened = false
db.freelist = nil
db.path = ""
// Clear ops.
db.ops.writeAt = nil
// Close the mmap.
if err := db.munmap(); err != nil {
return err
}
// Close file handles.
if db.file != nil {
// No need to unlock read-only file.
if !db.readOnly {
// Unlock the file.
_ = funlock(db.file)
}
// Close the file descriptor.
if err := db.file.Close(); err != nil {
return fmt.Errorf("db file close: %s", err)
}
db.file = nil
}
return nil
}
// Begin starts a new transaction.
// Multiple read-only transactions can be used concurrently but only one
// write transaction can be used at a time. Starting multiple write transactions
// will cause the calls to block and be serialized until the current write
// transaction finishes.
//
// Transactions should not be depedent on one another. Opening a read
// transaction and a write transaction in the same goroutine can cause the
// writer to deadlock because the database periodically needs to re-mmap itself
// as it grows and it cannot do that while a read transaction is open.
//
// IMPORTANT: You must close read-only transactions after you are finished or
// else the database will not reclaim old pages.
func (db *DB) Begin(writable bool) (*Tx, error) {
if writable {
return db.beginRWTx()
}
return db.beginTx()
}
func (db *DB) beginTx() (*Tx, error) {
// Lock the meta pages while we initialize the transaction. We obtain
// the meta lock before the mmap lock because that's the order that the
// write transaction will obtain them.
db.metalock.Lock()
// Obtain a read-only lock on the mmap. When the mmap is remapped it will
// obtain a write lock so all transactions must finish before it can be
// remapped.
db.mmaplock.RLock()
// Exit if the database is not open yet.
if !db.opened {
db.mmaplock.RUnlock()
db.metalock.Unlock()
return nil, ErrDatabaseNotOpen
}
// Create a transaction associated with the database.
t := &Tx{}
t.init(db)
// Keep track of transaction until it closes.
db.txs = append(db.txs, t)
n := len(db.txs)
// Unlock the meta pages.
db.metalock.Unlock()
// Update the transaction stats.
db.statlock.Lock()
db.stats.TxN++
db.stats.OpenTxN = n
db.statlock.Unlock()
return t, nil
}
func (db *DB) beginRWTx() (*Tx, error) {
// If the database was opened with Options.ReadOnly, return an error.
if db.readOnly {
return nil, ErrDatabaseReadOnly
}
// Obtain writer lock. This is released by the transaction when it closes.
// This enforces only one writer transaction at a time.
db.rwlock.Lock()
// Once we have the writer lock then we can lock the meta pages so that
// we can set up the transaction.
db.metalock.Lock()
defer db.metalock.Unlock()
// Exit if the database is not open yet.
if !db.opened {
db.rwlock.Unlock()
return nil, ErrDatabaseNotOpen
}
// Create a transaction associated with the database.
t := &Tx{writable: true}
t.init(db)
db.rwtx = t
// Free any pages associated with closed read-only transactions.
var minid txid = 0xFFFFFFFFFFFFFFFF
for _, t := range db.txs {
if t.meta.txid < minid {
minid = t.meta.txid
}
}
if minid > 0 {
db.freelist.release(minid - 1)
}
return t, nil
}
// removeTx removes a transaction from the database.
func (db *DB) removeTx(tx *Tx) {
// Release the read lock on the mmap.
db.mmaplock.RUnlock()
// Use the meta lock to restrict access to the DB object.
db.metalock.Lock()
// Remove the transaction.
for i, t := range db.txs {
if t == tx {
db.txs = append(db.txs[:i], db.txs[i+1:]...)
break
}
}
n := len(db.txs)
// Unlock the meta pages.
db.metalock.Unlock()
// Merge statistics.
db.statlock.Lock()
db.stats.OpenTxN = n
db.stats.TxStats.add(&tx.stats)
db.statlock.Unlock()
}
// Update executes a function within the context of a read-write managed transaction.
// If no error is returned from the function then the transaction is committed.
// If an error is returned then the entire transaction is rolled back.
// Any error that is returned from the function or returned from the commit is
// returned from the Update() method.
//
// Attempting to manually commit or rollback within the function will cause a panic.
func (db *DB) Update(fn func(*Tx) error) error {
t, err := db.Begin(true)
if err != nil {
return err
}
// Make sure the transaction rolls back in the event of a panic.
defer func() {
if t.db != nil {
t.rollback()
}
}()
// Mark as a managed tx so that the inner function cannot manually commit.
t.managed = true
// If an error is returned from the function then rollback and return error.
err = fn(t)
t.managed = false
if err != nil {
_ = t.Rollback()
return err
}
return t.Commit()
}
// View executes a function within the context of a managed read-only transaction.
// Any error that is returned from the function is returned from the View() method.
//
// Attempting to manually rollback within the function will cause a panic.
func (db *DB) View(fn func(*Tx) error) error {
t, err := db.Begin(false)
if err != nil {
return err
}
// Make sure the transaction rolls back in the event of a panic.
defer func() {
if t.db != nil {
t.rollback()
}
}()
// Mark as a managed tx so that the inner function cannot manually rollback.
t.managed = true
// If an error is returned from the function then pass it through.
err = fn(t)
t.managed = false
if err != nil {
_ = t.Rollback()
return err
}
if err := t.Rollback(); err != nil {
return err
}
return nil
}
// Sync executes fdatasync() against the database file handle.
//
// This is not necessary under normal operation, however, if you use NoSync
// then it allows you to force the database file to sync against the disk.
func (db *DB) Sync() error { return fdatasync(db) }
// Stats retrieves ongoing performance stats for the database.
// This is only updated when a transaction closes.
func (db *DB) Stats() Stats {
db.statlock.RLock()
defer db.statlock.RUnlock()
return db.stats
}
// This is for internal access to the raw data bytes from the C cursor, use
// carefully, or not at all.
func (db *DB) Info() *Info {
return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize}
}
// page retrieves a page reference from the mmap based on the current page size.
func (db *DB) page(id pgid) *page {
pos := id * pgid(db.pageSize)
return (*page)(unsafe.Pointer(&db.data[pos]))
}
// pageInBuffer retrieves a page reference from a given byte array based on the current page size.
func (db *DB) pageInBuffer(b []byte, id pgid) *page {
return (*page)(unsafe.Pointer(&b[id*pgid(db.pageSize)]))
}
// meta retrieves the current meta page reference.
func (db *DB) meta() *meta {
if db.meta0.txid > db.meta1.txid {
return db.meta0
}
return db.meta1
}
// allocate returns a contiguous block of memory starting at a given page.
func (db *DB) allocate(count int) (*page, error) {
// Allocate a temporary buffer for the page.
buf := make([]byte, count*db.pageSize)
p := (*page)(unsafe.Pointer(&buf[0]))
p.overflow = uint32(count - 1)
// Use pages from the freelist if they are available.
if p.id = db.freelist.allocate(count); p.id != 0 {
return p, nil
}
// Resize mmap() if we're at the end.
p.id = db.rwtx.meta.pgid
var minsz = int((p.id+pgid(count))+1) * db.pageSize
if minsz >= db.datasz {
if err := db.mmap(minsz); err != nil {
return nil, fmt.Errorf("mmap allocate error: %s", err)
}
}
// Move the page id high water mark.
db.rwtx.meta.pgid += pgid(count)
return p, nil
}
func (db *DB) IsReadOnly() bool {
return db.readOnly
}
// Options represents the options that can be set when opening a database.
type Options struct {
// Timeout is the amount of time to wait to obtain a file lock.
// When set to zero it will wait indefinitely. This option is only
// available on Darwin and Linux.
Timeout time.Duration
// Sets the DB.NoGrowSync flag before memory mapping the file.
NoGrowSync bool
// Open database in read-only mode. Uses flock(..., LOCK_SH |LOCK_NB) to
// grab a shared lock (UNIX).
ReadOnly bool
}
// DefaultOptions represent the options used if nil options are passed into Open().
// No timeout is used which will cause Bolt to wait indefinitely for a lock.
var DefaultOptions = &Options{
Timeout: 0,
NoGrowSync: false,
}
// Stats represents statistics about the database.
type Stats struct {
// Freelist stats
FreePageN int // total number of free pages on the freelist
PendingPageN int // total number of pending pages on the freelist
FreeAlloc int // total bytes allocated in free pages
FreelistInuse int // total bytes used by the freelist
// Transaction stats
TxN int // total number of started read transactions
OpenTxN int // number of currently open read transactions
TxStats TxStats // global, ongoing stats.
}
// Sub calculates and returns the difference between two sets of database stats.
// This is useful when obtaining stats at two different points and time and
// you need the performance counters that occurred within that time span.
func (s *Stats) Sub(other *Stats) Stats {
if other == nil {
return *s
}
var diff Stats
diff.FreePageN = s.FreePageN
diff.PendingPageN = s.PendingPageN
diff.FreeAlloc = s.FreeAlloc
diff.FreelistInuse = s.FreelistInuse
diff.TxN = other.TxN - s.TxN
diff.TxStats = s.TxStats.Sub(&other.TxStats)
return diff
}
func (s *Stats) add(other *Stats) {
s.TxStats.add(&other.TxStats)
}
type Info struct {
Data uintptr
PageSize int
}
type meta struct {
magic uint32
version uint32
pageSize uint32
flags uint32
root bucket
freelist pgid
pgid pgid
txid txid
checksum uint64
}
// validate checks the marker bytes and version of the meta page to ensure it matches this binary.
func (m *meta) validate() error {
if m.checksum != 0 && m.checksum != m.sum64() {
return ErrChecksum
} else if m.magic != magic {
return ErrInvalid
} else if m.version != version {
return ErrVersionMismatch
}
return nil
}
// copy copies one meta object to another.
func (m *meta) copy(dest *meta) {
*dest = *m
}
// write writes the meta onto a page.
func (m *meta) write(p *page) {
if m.root.root >= m.pgid {
panic(fmt.Sprintf("root bucket pgid (%d) above high water mark (%d)", m.root.root, m.pgid))
} else if m.freelist >= m.pgid {
panic(fmt.Sprintf("freelist pgid (%d) above high water mark (%d)", m.freelist, m.pgid))
}
// Page id is either going to be 0 or 1 which we can determine by the transaction ID.
p.id = pgid(m.txid % 2)
p.flags |= metaPageFlag
// Calculate the checksum.
m.checksum = m.sum64()
m.copy(p.meta())
}
// generates the checksum for the meta.
func (m *meta) sum64() uint64 {
var h = fnv.New64a()
_, _ = h.Write((*[unsafe.Offsetof(meta{}.checksum)]byte)(unsafe.Pointer(m))[:])
return h.Sum64()
}
// _assert will panic with a given formatted message if the given condition is false.
func _assert(condition bool, msg string, v ...interface{}) {
if !condition {
panic(fmt.Sprintf("assertion failed: "+msg, v...))
}
}
func warn(v ...interface{}) { fmt.Fprintln(os.Stderr, v...) }
func warnf(msg string, v ...interface{}) { fmt.Fprintf(os.Stderr, msg+"\n", v...) }
func printstack() {
stack := strings.Join(strings.Split(string(debug.Stack()), "\n")[2:], "\n")
fmt.Fprintln(os.Stderr, stack)
}

44
Godeps/_workspace/src/github.com/boltdb/bolt/doc.go generated vendored Normal file
View File

@@ -0,0 +1,44 @@
/*
Package bolt implements a low-level key/value store in pure Go. It supports
fully serializable transactions, ACID semantics, and lock-free MVCC with
multiple readers and a single writer. Bolt can be used for projects that
want a simple data store without the need to add large dependencies such as
Postgres or MySQL.
Bolt is a single-level, zero-copy, B+tree data store. This means that Bolt is
optimized for fast read access and does not require recovery in the event of a
system crash. Transactions which have not finished committing will simply be
rolled back in the event of a crash.
The design of Bolt is based on Howard Chu's LMDB database project.
Bolt currently works on Windows, Mac OS X, and Linux.
Basics
There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is
a collection of buckets and is represented by a single file on disk. A bucket is
a collection of unique keys that are associated with values.
Transactions provide either read-only or read-write access to the database.
Read-only transactions can retrieve key/value pairs and can use Cursors to
iterate over the dataset sequentially. Read-write transactions can create and
delete buckets and can insert and remove keys. Only one read-write transaction
is allowed at a time.
Caveats
The database uses a read-only, memory-mapped data file to ensure that
applications cannot corrupt the database, however, this means that keys and
values returned from Bolt cannot be changed. Writing to a read-only byte slice
will cause Go to panic.
Keys and values retrieved from the database are only valid for the life of
the transaction. When used outside the transaction, these byte slices can
point to different data or can point to invalid memory which will cause a panic.
*/
package bolt

70
Godeps/_workspace/src/github.com/boltdb/bolt/errors.go generated vendored Normal file
View File

@@ -0,0 +1,70 @@
package bolt
import "errors"
// These errors can be returned when opening or calling methods on a DB.
var (
// ErrDatabaseNotOpen is returned when a DB instance is accessed before it
// is opened or after it is closed.
ErrDatabaseNotOpen = errors.New("database not open")
// ErrDatabaseOpen is returned when opening a database that is
// already open.
ErrDatabaseOpen = errors.New("database already open")
// ErrInvalid is returned when a data file is not a Bolt-formatted database.
ErrInvalid = errors.New("invalid database")
// ErrVersionMismatch is returned when the data file was created with a
// different version of Bolt.
ErrVersionMismatch = errors.New("version mismatch")
// ErrChecksum is returned when either meta page checksum does not match.
ErrChecksum = errors.New("checksum error")
// ErrTimeout is returned when a database cannot obtain an exclusive lock
// on the data file after the timeout passed to Open().
ErrTimeout = errors.New("timeout")
)
// These errors can occur when beginning or committing a Tx.
var (
// ErrTxNotWritable is returned when performing a write operation on a
// read-only transaction.
ErrTxNotWritable = errors.New("tx not writable")
// ErrTxClosed is returned when committing or rolling back a transaction
// that has already been committed or rolled back.
ErrTxClosed = errors.New("tx closed")
// ErrDatabaseReadOnly is returned when a mutating transaction is started on a
// read-only database.
ErrDatabaseReadOnly = errors.New("database is in read-only mode")
)
// These errors can occur when putting or deleting a value or a bucket.
var (
// ErrBucketNotFound is returned when trying to access a bucket that has
// not been created yet.
ErrBucketNotFound = errors.New("bucket not found")
// ErrBucketExists is returned when creating a bucket that already exists.
ErrBucketExists = errors.New("bucket already exists")
// ErrBucketNameRequired is returned when creating a bucket with a blank name.
ErrBucketNameRequired = errors.New("bucket name required")
// ErrKeyRequired is returned when inserting a zero-length key.
ErrKeyRequired = errors.New("key required")
// ErrKeyTooLarge is returned when inserting a key that is larger than MaxKeySize.
ErrKeyTooLarge = errors.New("key too large")
// ErrValueTooLarge is returned when inserting a value that is larger than MaxValueSize.
ErrValueTooLarge = errors.New("value too large")
// ErrIncompatibleValue is returned when trying create or delete a bucket
// on an existing non-bucket key or when trying to create or delete a
// non-bucket key on an existing bucket key.
ErrIncompatibleValue = errors.New("incompatible value")
)

View File

@@ -0,0 +1,242 @@
package bolt
import (
"fmt"
"sort"
"unsafe"
)
// freelist represents a list of all pages that are available for allocation.
// It also tracks pages that have been freed but are still in use by open transactions.
type freelist struct {
ids []pgid // all free and available free page ids.
pending map[txid][]pgid // mapping of soon-to-be free page ids by tx.
cache map[pgid]bool // fast lookup of all free and pending page ids.
}
// newFreelist returns an empty, initialized freelist.
func newFreelist() *freelist {
return &freelist{
pending: make(map[txid][]pgid),
cache: make(map[pgid]bool),
}
}
// size returns the size of the page after serialization.
func (f *freelist) size() int {
return pageHeaderSize + (int(unsafe.Sizeof(pgid(0))) * f.count())
}
// count returns count of pages on the freelist
func (f *freelist) count() int {
return f.free_count() + f.pending_count()
}
// free_count returns count of free pages
func (f *freelist) free_count() int {
return len(f.ids)
}
// pending_count returns count of pending pages
func (f *freelist) pending_count() int {
var count int
for _, list := range f.pending {
count += len(list)
}
return count
}
// all returns a list of all free ids and all pending ids in one sorted list.
func (f *freelist) all() []pgid {
m := make(pgids, 0)
for _, list := range f.pending {
m = append(m, list...)
}
sort.Sort(m)
return pgids(f.ids).merge(m)
}
// allocate returns the starting page id of a contiguous list of pages of a given size.
// If a contiguous block cannot be found then 0 is returned.
func (f *freelist) allocate(n int) pgid {
if len(f.ids) == 0 {
return 0
}
var initial, previd pgid
for i, id := range f.ids {
if id <= 1 {
panic(fmt.Sprintf("invalid page allocation: %d", id))
}
// Reset initial page if this is not contiguous.
if previd == 0 || id-previd != 1 {
initial = id
}
// If we found a contiguous block then remove it and return it.
if (id-initial)+1 == pgid(n) {
// If we're allocating off the beginning then take the fast path
// and just adjust the existing slice. This will use extra memory
// temporarily but the append() in free() will realloc the slice
// as is necessary.
if (i + 1) == n {
f.ids = f.ids[i+1:]
} else {
copy(f.ids[i-n+1:], f.ids[i+1:])
f.ids = f.ids[:len(f.ids)-n]
}
// Remove from the free cache.
for i := pgid(0); i < pgid(n); i++ {
delete(f.cache, initial+i)
}
return initial
}
previd = id
}
return 0
}
// free releases a page and its overflow for a given transaction id.
// If the page is already free then a panic will occur.
func (f *freelist) free(txid txid, p *page) {
if p.id <= 1 {
panic(fmt.Sprintf("cannot free page 0 or 1: %d", p.id))
}
// Free page and all its overflow pages.
var ids = f.pending[txid]
for id := p.id; id <= p.id+pgid(p.overflow); id++ {
// Verify that page is not already free.
if f.cache[id] {
panic(fmt.Sprintf("page %d already freed", id))
}
// Add to the freelist and cache.
ids = append(ids, id)
f.cache[id] = true
}
f.pending[txid] = ids
}
// release moves all page ids for a transaction id (or older) to the freelist.
func (f *freelist) release(txid txid) {
m := make(pgids, 0)
for tid, ids := range f.pending {
if tid <= txid {
// Move transaction's pending pages to the available freelist.
// Don't remove from the cache since the page is still free.
m = append(m, ids...)
delete(f.pending, tid)
}
}
sort.Sort(m)
f.ids = pgids(f.ids).merge(m)
}
// rollback removes the pages from a given pending tx.
func (f *freelist) rollback(txid txid) {
// Remove page ids from cache.
for _, id := range f.pending[txid] {
delete(f.cache, id)
}
// Remove pages from pending list.
delete(f.pending, txid)
}
// freed returns whether a given page is in the free list.
func (f *freelist) freed(pgid pgid) bool {
return f.cache[pgid]
}
// read initializes the freelist from a freelist page.
func (f *freelist) read(p *page) {
// If the page.count is at the max uint16 value (64k) then it's considered
// an overflow and the size of the freelist is stored as the first element.
idx, count := 0, int(p.count)
if count == 0xFFFF {
idx = 1
count = int(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0])
}
// Copy the list of page ids from the freelist.
ids := ((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[idx:count]
f.ids = make([]pgid, len(ids))
copy(f.ids, ids)
// Make sure they're sorted.
sort.Sort(pgids(f.ids))
// Rebuild the page cache.
f.reindex()
}
// write writes the page ids onto a freelist page. All free and pending ids are
// saved to disk since in the event of a program crash, all pending ids will
// become free.
func (f *freelist) write(p *page) error {
// Combine the old free pgids and pgids waiting on an open transaction.
ids := f.all()
// Update the header flag.
p.flags |= freelistPageFlag
// The page.count can only hold up to 64k elements so if we overflow that
// number then we handle it by putting the size in the first element.
if len(ids) < 0xFFFF {
p.count = uint16(len(ids))
copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[:], ids)
} else {
p.count = 0xFFFF
((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[0] = pgid(len(ids))
copy(((*[maxAllocSize]pgid)(unsafe.Pointer(&p.ptr)))[1:], ids)
}
return nil
}
// reload reads the freelist from a page and filters out pending items.
func (f *freelist) reload(p *page) {
f.read(p)
// Build a cache of only pending pages.
pcache := make(map[pgid]bool)
for _, pendingIDs := range f.pending {
for _, pendingID := range pendingIDs {
pcache[pendingID] = true
}
}
// Check each page in the freelist and build a new available freelist
// with any pages not in the pending lists.
var a []pgid
for _, id := range f.ids {
if !pcache[id] {
a = append(a, id)
}
}
f.ids = a
// Once the available list is rebuilt then rebuild the free cache so that
// it includes the available and pending free pages.
f.reindex()
}
// reindex rebuilds the free cache based on available and pending free lists.
func (f *freelist) reindex() {
f.cache = make(map[pgid]bool)
for _, id := range f.ids {
f.cache[id] = true
}
for _, pendingIDs := range f.pending {
for _, pendingID := range pendingIDs {
f.cache[pendingID] = true
}
}
}

636
Godeps/_workspace/src/github.com/boltdb/bolt/node.go generated vendored Normal file
View File

@@ -0,0 +1,636 @@
package bolt
import (
"bytes"
"fmt"
"sort"
"unsafe"
)
// node represents an in-memory, deserialized page.
type node struct {
bucket *Bucket
isLeaf bool
unbalanced bool
spilled bool
key []byte
pgid pgid
parent *node
children nodes
inodes inodes
}
// root returns the top-level node this node is attached to.
func (n *node) root() *node {
if n.parent == nil {
return n
}
return n.parent.root()
}
// minKeys returns the minimum number of inodes this node should have.
func (n *node) minKeys() int {
if n.isLeaf {
return 1
}
return 2
}
// size returns the size of the node after serialization.
func (n *node) size() int {
sz, elsz := pageHeaderSize, n.pageElementSize()
for i := 0; i < len(n.inodes); i++ {
item := &n.inodes[i]
sz += elsz + len(item.key) + len(item.value)
}
return sz
}
// sizeLessThan returns true if the node is less than a given size.
// This is an optimization to avoid calculating a large node when we only need
// to know if it fits inside a certain page size.
func (n *node) sizeLessThan(v int) bool {
sz, elsz := pageHeaderSize, n.pageElementSize()
for i := 0; i < len(n.inodes); i++ {
item := &n.inodes[i]
sz += elsz + len(item.key) + len(item.value)
if sz >= v {
return false
}
}
return true
}
// pageElementSize returns the size of each page element based on the type of node.
func (n *node) pageElementSize() int {
if n.isLeaf {
return leafPageElementSize
}
return branchPageElementSize
}
// childAt returns the child node at a given index.
func (n *node) childAt(index int) *node {
if n.isLeaf {
panic(fmt.Sprintf("invalid childAt(%d) on a leaf node", index))
}
return n.bucket.node(n.inodes[index].pgid, n)
}
// childIndex returns the index of a given child node.
func (n *node) childIndex(child *node) int {
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, child.key) != -1 })
return index
}
// numChildren returns the number of children.
func (n *node) numChildren() int {
return len(n.inodes)
}
// nextSibling returns the next node with the same parent.
func (n *node) nextSibling() *node {
if n.parent == nil {
return nil
}
index := n.parent.childIndex(n)
if index >= n.parent.numChildren()-1 {
return nil
}
return n.parent.childAt(index + 1)
}
// prevSibling returns the previous node with the same parent.
func (n *node) prevSibling() *node {
if n.parent == nil {
return nil
}
index := n.parent.childIndex(n)
if index == 0 {
return nil
}
return n.parent.childAt(index - 1)
}
// put inserts a key/value.
func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) {
if pgid >= n.bucket.tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid))
} else if len(oldKey) <= 0 {
panic("put: zero-length old key")
} else if len(newKey) <= 0 {
panic("put: zero-length new key")
}
// Find insertion index.
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, oldKey) != -1 })
// Add capacity and shift nodes if we don't have an exact match and need to insert.
exact := (len(n.inodes) > 0 && index < len(n.inodes) && bytes.Equal(n.inodes[index].key, oldKey))
if !exact {
n.inodes = append(n.inodes, inode{})
copy(n.inodes[index+1:], n.inodes[index:])
}
inode := &n.inodes[index]
inode.flags = flags
inode.key = newKey
inode.value = value
inode.pgid = pgid
_assert(len(inode.key) > 0, "put: zero-length inode key")
}
// del removes a key from the node.
func (n *node) del(key []byte) {
// Find index of key.
index := sort.Search(len(n.inodes), func(i int) bool { return bytes.Compare(n.inodes[i].key, key) != -1 })
// Exit if the key isn't found.
if index >= len(n.inodes) || !bytes.Equal(n.inodes[index].key, key) {
return
}
// Delete inode from the node.
n.inodes = append(n.inodes[:index], n.inodes[index+1:]...)
// Mark the node as needing rebalancing.
n.unbalanced = true
}
// read initializes the node from a page.
func (n *node) read(p *page) {
n.pgid = p.id
n.isLeaf = ((p.flags & leafPageFlag) != 0)
n.inodes = make(inodes, int(p.count))
for i := 0; i < int(p.count); i++ {
inode := &n.inodes[i]
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
inode.flags = elem.flags
inode.key = elem.key()
inode.value = elem.value()
} else {
elem := p.branchPageElement(uint16(i))
inode.pgid = elem.pgid
inode.key = elem.key()
}
_assert(len(inode.key) > 0, "read: zero-length inode key")
}
// Save first key so we can find the node in the parent when we spill.
if len(n.inodes) > 0 {
n.key = n.inodes[0].key
_assert(len(n.key) > 0, "read: zero-length node key")
} else {
n.key = nil
}
}
// write writes the items onto one or more pages.
func (n *node) write(p *page) {
// Initialize page.
if n.isLeaf {
p.flags |= leafPageFlag
} else {
p.flags |= branchPageFlag
}
if len(n.inodes) >= 0xFFFF {
panic(fmt.Sprintf("inode overflow: %d (pgid=%d)", len(n.inodes), p.id))
}
p.count = uint16(len(n.inodes))
// Loop over each item and write it to the page.
b := (*[maxAllocSize]byte)(unsafe.Pointer(&p.ptr))[n.pageElementSize()*len(n.inodes):]
for i, item := range n.inodes {
_assert(len(item.key) > 0, "write: zero-length inode key")
// Write the page element.
if n.isLeaf {
elem := p.leafPageElement(uint16(i))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.flags = item.flags
elem.ksize = uint32(len(item.key))
elem.vsize = uint32(len(item.value))
} else {
elem := p.branchPageElement(uint16(i))
elem.pos = uint32(uintptr(unsafe.Pointer(&b[0])) - uintptr(unsafe.Pointer(elem)))
elem.ksize = uint32(len(item.key))
elem.pgid = item.pgid
_assert(elem.pgid != p.id, "write: circular dependency occurred")
}
// If the length of key+value is larger than the max allocation size
// then we need to reallocate the byte array pointer.
//
// See: https://github.com/boltdb/bolt/pull/335
klen, vlen := len(item.key), len(item.value)
if len(b) < klen+vlen {
b = (*[maxAllocSize]byte)(unsafe.Pointer(&b[0]))[:]
}
// Write data for the element to the end of the page.
copy(b[0:], item.key)
b = b[klen:]
copy(b[0:], item.value)
b = b[vlen:]
}
// DEBUG ONLY: n.dump()
}
// split breaks up a node into multiple smaller nodes, if appropriate.
// This should only be called from the spill() function.
func (n *node) split(pageSize int) []*node {
var nodes []*node
node := n
for {
// Split node into two.
a, b := node.splitTwo(pageSize)
nodes = append(nodes, a)
// If we can't split then exit the loop.
if b == nil {
break
}
// Set node to b so it gets split on the next iteration.
node = b
}
return nodes
}
// splitTwo breaks up a node into two smaller nodes, if appropriate.
// This should only be called from the split() function.
func (n *node) splitTwo(pageSize int) (*node, *node) {
// Ignore the split if the page doesn't have at least enough nodes for
// two pages or if the nodes can fit in a single page.
if len(n.inodes) <= (minKeysPerPage*2) || n.sizeLessThan(pageSize) {
return n, nil
}
// Determine the threshold before starting a new node.
var fillPercent = n.bucket.FillPercent
if fillPercent < minFillPercent {
fillPercent = minFillPercent
} else if fillPercent > maxFillPercent {
fillPercent = maxFillPercent
}
threshold := int(float64(pageSize) * fillPercent)
// Determine split position and sizes of the two pages.
splitIndex, _ := n.splitIndex(threshold)
// Split node into two separate nodes.
// If there's no parent then we'll need to create one.
if n.parent == nil {
n.parent = &node{bucket: n.bucket, children: []*node{n}}
}
// Create a new node and add it to the parent.
next := &node{bucket: n.bucket, isLeaf: n.isLeaf, parent: n.parent}
n.parent.children = append(n.parent.children, next)
// Split inodes across two nodes.
next.inodes = n.inodes[splitIndex:]
n.inodes = n.inodes[:splitIndex]
// Update the statistics.
n.bucket.tx.stats.Split++
return n, next
}
// splitIndex finds the position where a page will fill a given threshold.
// It returns the index as well as the size of the first page.
// This is only be called from split().
func (n *node) splitIndex(threshold int) (index, sz int) {
sz = pageHeaderSize
// Loop until we only have the minimum number of keys required for the second page.
for i := 0; i < len(n.inodes)-minKeysPerPage; i++ {
index = i
inode := n.inodes[i]
elsize := n.pageElementSize() + len(inode.key) + len(inode.value)
// If we have at least the minimum number of keys and adding another
// node would put us over the threshold then exit and return.
if i >= minKeysPerPage && sz+elsize > threshold {
break
}
// Add the element size to the total size.
sz += elsize
}
return
}
// spill writes the nodes to dirty pages and splits nodes as it goes.
// Returns an error if dirty pages cannot be allocated.
func (n *node) spill() error {
var tx = n.bucket.tx
if n.spilled {
return nil
}
// Spill child nodes first. Child nodes can materialize sibling nodes in
// the case of split-merge so we cannot use a range loop. We have to check
// the children size on every loop iteration.
sort.Sort(n.children)
for i := 0; i < len(n.children); i++ {
if err := n.children[i].spill(); err != nil {
return err
}
}
// We no longer need the child list because it's only used for spill tracking.
n.children = nil
// Split nodes into appropriate sizes. The first node will always be n.
var nodes = n.split(tx.db.pageSize)
for _, node := range nodes {
// Add node's page to the freelist if it's not new.
if node.pgid > 0 {
tx.db.freelist.free(tx.meta.txid, tx.page(node.pgid))
node.pgid = 0
}
// Allocate contiguous space for the node.
p, err := tx.allocate((node.size() / tx.db.pageSize) + 1)
if err != nil {
return err
}
// Write the node.
if p.id >= tx.meta.pgid {
panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", p.id, tx.meta.pgid))
}
node.pgid = p.id
node.write(p)
node.spilled = true
// Insert into parent inodes.
if node.parent != nil {
var key = node.key
if key == nil {
key = node.inodes[0].key
}
node.parent.put(key, node.inodes[0].key, nil, node.pgid, 0)
node.key = node.inodes[0].key
_assert(len(node.key) > 0, "spill: zero-length node key")
}
// Update the statistics.
tx.stats.Spill++
}
// If the root node split and created a new root then we need to spill that
// as well. We'll clear out the children to make sure it doesn't try to respill.
if n.parent != nil && n.parent.pgid == 0 {
n.children = nil
return n.parent.spill()
}
return nil
}
// rebalance attempts to combine the node with sibling nodes if the node fill
// size is below a threshold or if there are not enough keys.
func (n *node) rebalance() {
if !n.unbalanced {
return
}
n.unbalanced = false
// Update statistics.
n.bucket.tx.stats.Rebalance++
// Ignore if node is above threshold (25%) and has enough keys.
var threshold = n.bucket.tx.db.pageSize / 4
if n.size() > threshold && len(n.inodes) > n.minKeys() {
return
}
// Root node has special handling.
if n.parent == nil {
// If root node is a branch and only has one node then collapse it.
if !n.isLeaf && len(n.inodes) == 1 {
// Move root's child up.
child := n.bucket.node(n.inodes[0].pgid, n)
n.isLeaf = child.isLeaf
n.inodes = child.inodes[:]
n.children = child.children
// Reparent all child nodes being moved.
for _, inode := range n.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent = n
}
}
// Remove old child.
child.parent = nil
delete(n.bucket.nodes, child.pgid)
child.free()
}
return
}
// If node has no keys then just remove it.
if n.numChildren() == 0 {
n.parent.del(n.key)
n.parent.removeChild(n)
delete(n.bucket.nodes, n.pgid)
n.free()
n.parent.rebalance()
return
}
_assert(n.parent.numChildren() > 1, "parent must have at least 2 children")
// Destination node is right sibling if idx == 0, otherwise left sibling.
var target *node
var useNextSibling = (n.parent.childIndex(n) == 0)
if useNextSibling {
target = n.nextSibling()
} else {
target = n.prevSibling()
}
// If target node has extra nodes then just move one over.
if target.numChildren() > target.minKeys() {
if useNextSibling {
// Reparent and move node.
if child, ok := n.bucket.nodes[target.inodes[0].pgid]; ok {
child.parent.removeChild(child)
child.parent = n
child.parent.children = append(child.parent.children, child)
}
n.inodes = append(n.inodes, target.inodes[0])
target.inodes = target.inodes[1:]
// Update target key on parent.
target.parent.put(target.key, target.inodes[0].key, nil, target.pgid, 0)
target.key = target.inodes[0].key
_assert(len(target.key) > 0, "rebalance(1): zero-length node key")
} else {
// Reparent and move node.
if child, ok := n.bucket.nodes[target.inodes[len(target.inodes)-1].pgid]; ok {
child.parent.removeChild(child)
child.parent = n
child.parent.children = append(child.parent.children, child)
}
n.inodes = append(n.inodes, inode{})
copy(n.inodes[1:], n.inodes)
n.inodes[0] = target.inodes[len(target.inodes)-1]
target.inodes = target.inodes[:len(target.inodes)-1]
}
// Update parent key for node.
n.parent.put(n.key, n.inodes[0].key, nil, n.pgid, 0)
n.key = n.inodes[0].key
_assert(len(n.key) > 0, "rebalance(2): zero-length node key")
return
}
// If both this node and the target node are too small then merge them.
if useNextSibling {
// Reparent all child nodes being moved.
for _, inode := range target.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent.removeChild(child)
child.parent = n
child.parent.children = append(child.parent.children, child)
}
}
// Copy over inodes from target and remove target.
n.inodes = append(n.inodes, target.inodes...)
n.parent.del(target.key)
n.parent.removeChild(target)
delete(n.bucket.nodes, target.pgid)
target.free()
} else {
// Reparent all child nodes being moved.
for _, inode := range n.inodes {
if child, ok := n.bucket.nodes[inode.pgid]; ok {
child.parent.removeChild(child)
child.parent = target
child.parent.children = append(child.parent.children, child)
}
}
// Copy over inodes to target and remove node.
target.inodes = append(target.inodes, n.inodes...)
n.parent.del(n.key)
n.parent.removeChild(n)
delete(n.bucket.nodes, n.pgid)
n.free()
}
// Either this node or the target node was deleted from the parent so rebalance it.
n.parent.rebalance()
}
// removes a node from the list of in-memory children.
// This does not affect the inodes.
func (n *node) removeChild(target *node) {
for i, child := range n.children {
if child == target {
n.children = append(n.children[:i], n.children[i+1:]...)
return
}
}
}
// dereference causes the node to copy all its inode key/value references to heap memory.
// This is required when the mmap is reallocated so inodes are not pointing to stale data.
func (n *node) dereference() {
if n.key != nil {
key := make([]byte, len(n.key))
copy(key, n.key)
n.key = key
_assert(n.pgid == 0 || len(n.key) > 0, "dereference: zero-length node key on existing node")
}
for i := range n.inodes {
inode := &n.inodes[i]
key := make([]byte, len(inode.key))
copy(key, inode.key)
inode.key = key
_assert(len(inode.key) > 0, "dereference: zero-length inode key")
value := make([]byte, len(inode.value))
copy(value, inode.value)
inode.value = value
}
// Recursively dereference children.
for _, child := range n.children {
child.dereference()
}
// Update statistics.
n.bucket.tx.stats.NodeDeref++
}
// free adds the node's underlying page to the freelist.
func (n *node) free() {
if n.pgid != 0 {
n.bucket.tx.db.freelist.free(n.bucket.tx.meta.txid, n.bucket.tx.page(n.pgid))
n.pgid = 0
}
}
// dump writes the contents of the node to STDERR for debugging purposes.
/*
func (n *node) dump() {
// Write node header.
var typ = "branch"
if n.isLeaf {
typ = "leaf"
}
warnf("[NODE %d {type=%s count=%d}]", n.pgid, typ, len(n.inodes))
// Write out abbreviated version of each item.
for _, item := range n.inodes {
if n.isLeaf {
if item.flags&bucketLeafFlag != 0 {
bucket := (*bucket)(unsafe.Pointer(&item.value[0]))
warnf("+L %08x -> (bucket root=%d)", trunc(item.key, 4), bucket.root)
} else {
warnf("+L %08x -> %08x", trunc(item.key, 4), trunc(item.value, 4))
}
} else {
warnf("+B %08x -> pgid=%d", trunc(item.key, 4), item.pgid)
}
}
warn("")
}
*/
type nodes []*node
func (s nodes) Len() int { return len(s) }
func (s nodes) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s nodes) Less(i, j int) bool { return bytes.Compare(s[i].inodes[0].key, s[j].inodes[0].key) == -1 }
// inode represents an internal node inside of a node.
// It can be used to point to elements in a page or point
// to an element which hasn't been added to a page yet.
type inode struct {
flags uint32
pgid pgid
key []byte
value []byte
}
type inodes []inode

172
Godeps/_workspace/src/github.com/boltdb/bolt/page.go generated vendored Normal file
View File

@@ -0,0 +1,172 @@
package bolt
import (
"fmt"
"os"
"sort"
"unsafe"
)
const pageHeaderSize = int(unsafe.Offsetof(((*page)(nil)).ptr))
const minKeysPerPage = 2
const branchPageElementSize = int(unsafe.Sizeof(branchPageElement{}))
const leafPageElementSize = int(unsafe.Sizeof(leafPageElement{}))
const (
branchPageFlag = 0x01
leafPageFlag = 0x02
metaPageFlag = 0x04
freelistPageFlag = 0x10
)
const (
bucketLeafFlag = 0x01
)
type pgid uint64
type page struct {
id pgid
flags uint16
count uint16
overflow uint32
ptr uintptr
}
// typ returns a human readable page type string used for debugging.
func (p *page) typ() string {
if (p.flags & branchPageFlag) != 0 {
return "branch"
} else if (p.flags & leafPageFlag) != 0 {
return "leaf"
} else if (p.flags & metaPageFlag) != 0 {
return "meta"
} else if (p.flags & freelistPageFlag) != 0 {
return "freelist"
}
return fmt.Sprintf("unknown<%02x>", p.flags)
}
// meta returns a pointer to the metadata section of the page.
func (p *page) meta() *meta {
return (*meta)(unsafe.Pointer(&p.ptr))
}
// leafPageElement retrieves the leaf node by index
func (p *page) leafPageElement(index uint16) *leafPageElement {
n := &((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[index]
return n
}
// leafPageElements retrieves a list of leaf nodes.
func (p *page) leafPageElements() []leafPageElement {
return ((*[0x7FFFFFF]leafPageElement)(unsafe.Pointer(&p.ptr)))[:]
}
// branchPageElement retrieves the branch node by index
func (p *page) branchPageElement(index uint16) *branchPageElement {
return &((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[index]
}
// branchPageElements retrieves a list of branch nodes.
func (p *page) branchPageElements() []branchPageElement {
return ((*[0x7FFFFFF]branchPageElement)(unsafe.Pointer(&p.ptr)))[:]
}
// dump writes n bytes of the page to STDERR as hex output.
func (p *page) hexdump(n int) {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(p))[:n]
fmt.Fprintf(os.Stderr, "%x\n", buf)
}
type pages []*page
func (s pages) Len() int { return len(s) }
func (s pages) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pages) Less(i, j int) bool { return s[i].id < s[j].id }
// branchPageElement represents a node on a branch page.
type branchPageElement struct {
pos uint32
ksize uint32
pgid pgid
}
// key returns a byte slice of the node key.
func (n *branchPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
}
// leafPageElement represents a node on a leaf page.
type leafPageElement struct {
flags uint32
pos uint32
ksize uint32
vsize uint32
}
// key returns a byte slice of the node key.
func (n *leafPageElement) key() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos]))[:n.ksize]
}
// value returns a byte slice of the node value.
func (n *leafPageElement) value() []byte {
buf := (*[maxAllocSize]byte)(unsafe.Pointer(n))
return (*[maxAllocSize]byte)(unsafe.Pointer(&buf[n.pos+n.ksize]))[:n.vsize]
}
// PageInfo represents human readable information about a page.
type PageInfo struct {
ID int
Type string
Count int
OverflowCount int
}
type pgids []pgid
func (s pgids) Len() int { return len(s) }
func (s pgids) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
func (s pgids) Less(i, j int) bool { return s[i] < s[j] }
// merge returns the sorted union of a and b.
func (a pgids) merge(b pgids) pgids {
// Return the opposite slice if one is nil.
if len(a) == 0 {
return b
} else if len(b) == 0 {
return a
}
// Create a list to hold all elements from both lists.
merged := make(pgids, 0, len(a)+len(b))
// Assign lead to the slice with a lower starting value, follow to the higher value.
lead, follow := a, b
if b[0] < a[0] {
lead, follow = b, a
}
// Continue while there are elements in the lead.
for len(lead) > 0 {
// Merge largest prefix of lead that is ahead of follow[0].
n := sort.Search(len(lead), func(i int) bool { return lead[i] > follow[0] })
merged = append(merged, lead[:n]...)
if n >= len(lead) {
break
}
// Swap lead and follow.
lead, follow = follow, lead[n:]
}
// Append what's left in follow.
merged = append(merged, follow...)
return merged
}

611
Godeps/_workspace/src/github.com/boltdb/bolt/tx.go generated vendored Normal file
View File

@@ -0,0 +1,611 @@
package bolt
import (
"fmt"
"io"
"os"
"sort"
"time"
"unsafe"
)
// txid represents the internal transaction identifier.
type txid uint64
// Tx represents a read-only or read/write transaction on the database.
// Read-only transactions can be used for retrieving values for keys and creating cursors.
// Read/write transactions can create and remove buckets and create and remove keys.
//
// IMPORTANT: You must commit or rollback transactions when you are done with
// them. Pages can not be reclaimed by the writer until no more transactions
// are using them. A long running read transaction can cause the database to
// quickly grow.
type Tx struct {
writable bool
managed bool
db *DB
meta *meta
root Bucket
pages map[pgid]*page
stats TxStats
commitHandlers []func()
}
// init initializes the transaction.
func (tx *Tx) init(db *DB) {
tx.db = db
tx.pages = nil
// Copy the meta page since it can be changed by the writer.
tx.meta = &meta{}
db.meta().copy(tx.meta)
// Copy over the root bucket.
tx.root = newBucket(tx)
tx.root.bucket = &bucket{}
*tx.root.bucket = tx.meta.root
// Increment the transaction id and add a page cache for writable transactions.
if tx.writable {
tx.pages = make(map[pgid]*page)
tx.meta.txid += txid(1)
}
}
// ID returns the transaction id.
func (tx *Tx) ID() int {
return int(tx.meta.txid)
}
// DB returns a reference to the database that created the transaction.
func (tx *Tx) DB() *DB {
return tx.db
}
// Size returns current database size in bytes as seen by this transaction.
func (tx *Tx) Size() int64 {
return int64(tx.meta.pgid) * int64(tx.db.pageSize)
}
// Writable returns whether the transaction can perform write operations.
func (tx *Tx) Writable() bool {
return tx.writable
}
// Cursor creates a cursor associated with the root bucket.
// All items in the cursor will return a nil value because all root bucket keys point to buckets.
// The cursor is only valid as long as the transaction is open.
// Do not use a cursor after the transaction is closed.
func (tx *Tx) Cursor() *Cursor {
return tx.root.Cursor()
}
// Stats retrieves a copy of the current transaction statistics.
func (tx *Tx) Stats() TxStats {
return tx.stats
}
// Bucket retrieves a bucket by name.
// Returns nil if the bucket does not exist.
func (tx *Tx) Bucket(name []byte) *Bucket {
return tx.root.Bucket(name)
}
// CreateBucket creates a new bucket.
// Returns an error if the bucket already exists, if the bucket name is blank, or if the bucket name is too long.
func (tx *Tx) CreateBucket(name []byte) (*Bucket, error) {
return tx.root.CreateBucket(name)
}
// CreateBucketIfNotExists creates a new bucket if it doesn't already exist.
// Returns an error if the bucket name is blank, or if the bucket name is too long.
func (tx *Tx) CreateBucketIfNotExists(name []byte) (*Bucket, error) {
return tx.root.CreateBucketIfNotExists(name)
}
// DeleteBucket deletes a bucket.
// Returns an error if the bucket cannot be found or if the key represents a non-bucket value.
func (tx *Tx) DeleteBucket(name []byte) error {
return tx.root.DeleteBucket(name)
}
// ForEach executes a function for each bucket in the root.
// If the provided function returns an error then the iteration is stopped and
// the error is returned to the caller.
func (tx *Tx) ForEach(fn func(name []byte, b *Bucket) error) error {
return tx.root.ForEach(func(k, v []byte) error {
if err := fn(k, tx.root.Bucket(k)); err != nil {
return err
}
return nil
})
}
// OnCommit adds a handler function to be executed after the transaction successfully commits.
func (tx *Tx) OnCommit(fn func()) {
tx.commitHandlers = append(tx.commitHandlers, fn)
}
// Commit writes all changes to disk and updates the meta page.
// Returns an error if a disk write error occurs, or if Commit is
// called on a read-only transaction.
func (tx *Tx) Commit() error {
_assert(!tx.managed, "managed tx commit not allowed")
if tx.db == nil {
return ErrTxClosed
} else if !tx.writable {
return ErrTxNotWritable
}
// TODO(benbjohnson): Use vectorized I/O to write out dirty pages.
// Rebalance nodes which have had deletions.
var startTime = time.Now()
tx.root.rebalance()
if tx.stats.Rebalance > 0 {
tx.stats.RebalanceTime += time.Since(startTime)
}
// spill data onto dirty pages.
startTime = time.Now()
if err := tx.root.spill(); err != nil {
tx.rollback()
return err
}
tx.stats.SpillTime += time.Since(startTime)
// Free the old root bucket.
tx.meta.root.root = tx.root.root
// Free the freelist and allocate new pages for it. This will overestimate
// the size of the freelist but not underestimate the size (which would be bad).
tx.db.freelist.free(tx.meta.txid, tx.db.page(tx.meta.freelist))
p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1)
if err != nil {
tx.rollback()
return err
}
if err := tx.db.freelist.write(p); err != nil {
tx.rollback()
return err
}
tx.meta.freelist = p.id
// Write dirty pages to disk.
startTime = time.Now()
if err := tx.write(); err != nil {
tx.rollback()
return err
}
// If strict mode is enabled then perform a consistency check.
// Only the first consistency error is reported in the panic.
if tx.db.StrictMode {
if err, ok := <-tx.Check(); ok {
panic("check fail: " + err.Error())
}
}
// Write meta to disk.
if err := tx.writeMeta(); err != nil {
tx.rollback()
return err
}
tx.stats.WriteTime += time.Since(startTime)
// Finalize the transaction.
tx.close()
// Execute commit handlers now that the locks have been removed.
for _, fn := range tx.commitHandlers {
fn()
}
return nil
}
// Rollback closes the transaction and ignores all previous updates. Read-only
// transactions must be rolled back and not committed.
func (tx *Tx) Rollback() error {
_assert(!tx.managed, "managed tx rollback not allowed")
if tx.db == nil {
return ErrTxClosed
}
tx.rollback()
return nil
}
func (tx *Tx) rollback() {
if tx.db == nil {
return
}
if tx.writable {
tx.db.freelist.rollback(tx.meta.txid)
tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist))
}
tx.close()
}
func (tx *Tx) close() {
if tx.db == nil {
return
}
if tx.writable {
// Grab freelist stats.
var freelistFreeN = tx.db.freelist.free_count()
var freelistPendingN = tx.db.freelist.pending_count()
var freelistAlloc = tx.db.freelist.size()
// Remove writer lock.
tx.db.rwlock.Unlock()
// Merge statistics.
tx.db.statlock.Lock()
tx.db.stats.FreePageN = freelistFreeN
tx.db.stats.PendingPageN = freelistPendingN
tx.db.stats.FreeAlloc = (freelistFreeN + freelistPendingN) * tx.db.pageSize
tx.db.stats.FreelistInuse = freelistAlloc
tx.db.stats.TxStats.add(&tx.stats)
tx.db.statlock.Unlock()
} else {
tx.db.removeTx(tx)
}
tx.db = nil
}
// Copy writes the entire database to a writer.
// This function exists for backwards compatibility. Use WriteTo() in
func (tx *Tx) Copy(w io.Writer) error {
_, err := tx.WriteTo(w)
return err
}
// WriteTo writes the entire database to a writer.
// If err == nil then exactly tx.Size() bytes will be written into the writer.
func (tx *Tx) WriteTo(w io.Writer) (n int64, err error) {
// Attempt to open reader directly.
var f *os.File
if f, err = os.OpenFile(tx.db.path, os.O_RDONLY|odirect, 0); err != nil {
// Fallback to a regular open if that doesn't work.
if f, err = os.OpenFile(tx.db.path, os.O_RDONLY, 0); err != nil {
return 0, err
}
}
// Copy the meta pages.
tx.db.metalock.Lock()
n, err = io.CopyN(w, f, int64(tx.db.pageSize*2))
tx.db.metalock.Unlock()
if err != nil {
_ = f.Close()
return n, fmt.Errorf("meta copy: %s", err)
}
// Copy data pages.
wn, err := io.CopyN(w, f, tx.Size()-int64(tx.db.pageSize*2))
n += wn
if err != nil {
_ = f.Close()
return n, err
}
return n, f.Close()
}
// CopyFile copies the entire database to file at the given path.
// A reader transaction is maintained during the copy so it is safe to continue
// using the database while a copy is in progress.
func (tx *Tx) CopyFile(path string, mode os.FileMode) error {
f, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, mode)
if err != nil {
return err
}
err = tx.Copy(f)
if err != nil {
_ = f.Close()
return err
}
return f.Close()
}
// Check performs several consistency checks on the database for this transaction.
// An error is returned if any inconsistency is found.
//
// It can be safely run concurrently on a writable transaction. However, this
// incurs a high cost for large databases and databases with a lot of subbuckets
// because of caching. This overhead can be removed if running on a read-only
// transaction, however, it is not safe to execute other writer transactions at
// the same time.
func (tx *Tx) Check() <-chan error {
ch := make(chan error)
go tx.check(ch)
return ch
}
func (tx *Tx) check(ch chan error) {
// Check if any pages are double freed.
freed := make(map[pgid]bool)
for _, id := range tx.db.freelist.all() {
if freed[id] {
ch <- fmt.Errorf("page %d: already freed", id)
}
freed[id] = true
}
// Track every reachable page.
reachable := make(map[pgid]*page)
reachable[0] = tx.page(0) // meta0
reachable[1] = tx.page(1) // meta1
for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ {
reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist)
}
// Recursively check buckets.
tx.checkBucket(&tx.root, reachable, freed, ch)
// Ensure all pages below high water mark are either reachable or freed.
for i := pgid(0); i < tx.meta.pgid; i++ {
_, isReachable := reachable[i]
if !isReachable && !freed[i] {
ch <- fmt.Errorf("page %d: unreachable unfreed", int(i))
}
}
// Close the channel to signal completion.
close(ch)
}
func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) {
// Ignore inline buckets.
if b.root == 0 {
return
}
// Check every page used by this bucket.
b.tx.forEachPage(b.root, 0, func(p *page, _ int) {
if p.id > tx.meta.pgid {
ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid))
}
// Ensure each page is only referenced once.
for i := pgid(0); i <= pgid(p.overflow); i++ {
var id = p.id + i
if _, ok := reachable[id]; ok {
ch <- fmt.Errorf("page %d: multiple references", int(id))
}
reachable[id] = p
}
// We should only encounter un-freed leaf and branch pages.
if freed[p.id] {
ch <- fmt.Errorf("page %d: reachable freed", int(p.id))
} else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 {
ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ())
}
})
// Check each bucket within this bucket.
_ = b.ForEach(func(k, v []byte) error {
if child := b.Bucket(k); child != nil {
tx.checkBucket(child, reachable, freed, ch)
}
return nil
})
}
// allocate returns a contiguous block of memory starting at a given page.
func (tx *Tx) allocate(count int) (*page, error) {
p, err := tx.db.allocate(count)
if err != nil {
return nil, err
}
// Save to our page cache.
tx.pages[p.id] = p
// Update statistics.
tx.stats.PageCount++
tx.stats.PageAlloc += count * tx.db.pageSize
return p, nil
}
// write writes any dirty pages to disk.
func (tx *Tx) write() error {
// Sort pages by id.
pages := make(pages, 0, len(tx.pages))
for _, p := range tx.pages {
pages = append(pages, p)
}
sort.Sort(pages)
// Write pages to disk in order.
for _, p := range pages {
size := (int(p.overflow) + 1) * tx.db.pageSize
offset := int64(p.id) * int64(tx.db.pageSize)
// Write out page in "max allocation" sized chunks.
ptr := (*[maxAllocSize]byte)(unsafe.Pointer(p))
for {
// Limit our write to our max allocation size.
sz := size
if sz > maxAllocSize-1 {
sz = maxAllocSize - 1
}
// Write chunk to disk.
buf := ptr[:sz]
if _, err := tx.db.ops.writeAt(buf, offset); err != nil {
return err
}
// Update statistics.
tx.stats.Write++
// Exit inner for loop if we've written all the chunks.
size -= sz
if size == 0 {
break
}
// Otherwise move offset forward and move pointer to next chunk.
offset += int64(sz)
ptr = (*[maxAllocSize]byte)(unsafe.Pointer(&ptr[sz]))
}
}
// Ignore file sync if flag is set on DB.
if !tx.db.NoSync || IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err
}
}
// Clear out page cache.
tx.pages = make(map[pgid]*page)
return nil
}
// writeMeta writes the meta to the disk.
func (tx *Tx) writeMeta() error {
// Create a temporary buffer for the meta page.
buf := make([]byte, tx.db.pageSize)
p := tx.db.pageInBuffer(buf, 0)
tx.meta.write(p)
// Write the meta page to file.
if _, err := tx.db.ops.writeAt(buf, int64(p.id)*int64(tx.db.pageSize)); err != nil {
return err
}
if !tx.db.NoSync || IgnoreNoSync {
if err := fdatasync(tx.db); err != nil {
return err
}
}
// Update statistics.
tx.stats.Write++
return nil
}
// page returns a reference to the page with a given id.
// If page has been written to then a temporary bufferred page is returned.
func (tx *Tx) page(id pgid) *page {
// Check the dirty pages first.
if tx.pages != nil {
if p, ok := tx.pages[id]; ok {
return p
}
}
// Otherwise return directly from the mmap.
return tx.db.page(id)
}
// forEachPage iterates over every page within a given page and executes a function.
func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) {
p := tx.page(pgid)
// Execute function.
fn(p, depth)
// Recursively loop over children.
if (p.flags & branchPageFlag) != 0 {
for i := 0; i < int(p.count); i++ {
elem := p.branchPageElement(uint16(i))
tx.forEachPage(elem.pgid, depth+1, fn)
}
}
}
// Page returns page information for a given page number.
// This is only safe for concurrent use when used by a writable transaction.
func (tx *Tx) Page(id int) (*PageInfo, error) {
if tx.db == nil {
return nil, ErrTxClosed
} else if pgid(id) >= tx.meta.pgid {
return nil, nil
}
// Build the page info.
p := tx.db.page(pgid(id))
info := &PageInfo{
ID: id,
Count: int(p.count),
OverflowCount: int(p.overflow),
}
// Determine the type (or if it's free).
if tx.db.freelist.freed(pgid(id)) {
info.Type = "free"
} else {
info.Type = p.typ()
}
return info, nil
}
// TxStats represents statistics about the actions performed by the transaction.
type TxStats struct {
// Page statistics.
PageCount int // number of page allocations
PageAlloc int // total bytes allocated
// Cursor statistics.
CursorCount int // number of cursors created
// Node statistics
NodeCount int // number of node allocations
NodeDeref int // number of node dereferences
// Rebalance statistics.
Rebalance int // number of node rebalances
RebalanceTime time.Duration // total time spent rebalancing
// Split/Spill statistics.
Split int // number of nodes split
Spill int // number of nodes spilled
SpillTime time.Duration // total time spent spilling
// Write statistics.
Write int // number of writes performed
WriteTime time.Duration // total time spent writing to disk
}
func (s *TxStats) add(other *TxStats) {
s.PageCount += other.PageCount
s.PageAlloc += other.PageAlloc
s.CursorCount += other.CursorCount
s.NodeCount += other.NodeCount
s.NodeDeref += other.NodeDeref
s.Rebalance += other.Rebalance
s.RebalanceTime += other.RebalanceTime
s.Split += other.Split
s.Spill += other.Spill
s.SpillTime += other.SpillTime
s.Write += other.Write
s.WriteTime += other.WriteTime
}
// Sub calculates and returns the difference between two sets of transaction stats.
// This is useful when obtaining stats at two different points and time and
// you need the performance counters that occurred within that time span.
func (s *TxStats) Sub(other *TxStats) TxStats {
var diff TxStats
diff.PageCount = s.PageCount - other.PageCount
diff.PageAlloc = s.PageAlloc - other.PageAlloc
diff.CursorCount = s.CursorCount - other.CursorCount
diff.NodeCount = s.NodeCount - other.NodeCount
diff.NodeDeref = s.NodeDeref - other.NodeDeref
diff.Rebalance = s.Rebalance - other.Rebalance
diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime
diff.Split = s.Split - other.Split
diff.Spill = s.Spill - other.Spill
diff.SpillTime = s.SpillTime - other.SpillTime
diff.Write = s.Write - other.Write
diff.WriteTime = s.WriteTime - other.WriteTime
return diff
}

View File

@@ -0,0 +1 @@
*~

View File

@@ -0,0 +1,19 @@
# This file is like Go's AUTHORS file: it lists Copyright holders.
# The list of humans who have contributd is in the CONTRIBUTORS file.
#
# To contribute to this project, because it will eventually be folded
# back in to Go itself, you need to submit a CLA:
#
# http://golang.org/doc/contribute.html#copyright
#
# Then you get added to CONTRIBUTORS and you or your company get added
# to the AUTHORS file.
Blake Mizerany <blake.mizerany@gmail.com> github=bmizerany
Daniel Morsing <daniel.morsing@gmail.com> github=DanielMorsing
Gabriel Aszalos <gabriel.aszalos@gmail.com> github=gbbr
Google, Inc.
Keith Rarick <kr@xph.us> github=kr
Matthew Keenan <tank.en.mate@gmail.com> <github@mattkeenan.net> github=mattkeenan
Matt Layher <mdlayher@gmail.com> github=mdlayher
Tatsuhiro Tsujikawa <tatsuhiro.t@gmail.com> github=tatsuhiro-t

View File

@@ -0,0 +1,19 @@
# This file is like Go's CONTRIBUTORS file: it lists humans.
# The list of copyright holders (which may be companies) are in the AUTHORS file.
#
# To contribute to this project, because it will eventually be folded
# back in to Go itself, you need to submit a CLA:
#
# http://golang.org/doc/contribute.html#copyright
#
# Then you get added to CONTRIBUTORS and you or your company get added
# to the AUTHORS file.
Blake Mizerany <blake.mizerany@gmail.com> github=bmizerany
Brad Fitzpatrick <bradfitz@golang.org> github=bradfitz
Daniel Morsing <daniel.morsing@gmail.com> github=DanielMorsing
Gabriel Aszalos <gabriel.aszalos@gmail.com> github=gbbr
Keith Rarick <kr@xph.us> github=kr
Matthew Keenan <tank.en.mate@gmail.com> <github@mattkeenan.net> github=mattkeenan
Matt Layher <mdlayher@gmail.com> github=mdlayher
Tatsuhiro Tsujikawa <tatsuhiro.t@gmail.com> github=tatsuhiro-t

View File

@@ -0,0 +1,44 @@
#
# This Dockerfile builds a recent curl with HTTP/2 client support, using
# a recent nghttp2 build.
#
# See the Makefile for how to tag it. If Docker and that image is found, the
# Go tests use this curl binary for integration tests.
#
FROM ubuntu:trusty
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y git-core build-essential wget
RUN apt-get install -y --no-install-recommends \
autotools-dev libtool pkg-config zlib1g-dev \
libcunit1-dev libssl-dev libxml2-dev libevent-dev \
automake autoconf
# Note: setting NGHTTP2_VER before the git clone, so an old git clone isn't cached:
ENV NGHTTP2_VER af24f8394e43f4
RUN cd /root && git clone https://github.com/tatsuhiro-t/nghttp2.git
WORKDIR /root/nghttp2
RUN git reset --hard $NGHTTP2_VER
RUN autoreconf -i
RUN automake
RUN autoconf
RUN ./configure
RUN make
RUN make install
WORKDIR /root
RUN wget http://curl.haxx.se/download/curl-7.40.0.tar.gz
RUN tar -zxvf curl-7.40.0.tar.gz
WORKDIR /root/curl-7.40.0
RUN ./configure --with-ssl --with-nghttp2=/usr/local
RUN make
RUN make install
RUN ldconfig
CMD ["-h"]
ENTRYPOINT ["/usr/local/bin/curl"]

View File

@@ -0,0 +1,5 @@
We only accept contributions from users who have gone through Go's
contribution process (signed a CLA).
Please acknowledge whether you have (and use the same email) if
sending a pull request.

View File

@@ -0,0 +1,7 @@
Copyright 2014 Google & the Go AUTHORS
Go AUTHORS are:
See https://code.google.com/p/go/source/browse/AUTHORS
Licensed under the terms of Go itself:
https://code.google.com/p/go/source/browse/LICENSE

View File

@@ -0,0 +1,3 @@
curlimage:
docker build -t gohttp2/curl .

17
Godeps/_workspace/src/github.com/bradfitz/http2/README generated vendored Normal file
View File

@@ -0,0 +1,17 @@
This is a work-in-progress HTTP/2 implementation for Go.
It will eventually live in the Go standard library and won't require
any changes to your code to use. It will just be automatic.
Status:
* The server support is pretty good. A few things are missing
but are being worked on.
* The client work has just started but shares a lot of code
is coming along much quicker.
Docs are at https://godoc.org/github.com/bradfitz/http2
Demo test server at https://http2.golang.org/
Help & bug reports welcome.

View File

@@ -0,0 +1,75 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import (
"errors"
)
// buffer is an io.ReadWriteCloser backed by a fixed size buffer.
// It never allocates, but moves old data as new data is written.
type buffer struct {
buf []byte
r, w int
closed bool
err error // err to return to reader
}
var (
errReadEmpty = errors.New("read from empty buffer")
errWriteFull = errors.New("write on full buffer")
)
// Read copies bytes from the buffer into p.
// It is an error to read when no data is available.
func (b *buffer) Read(p []byte) (n int, err error) {
n = copy(p, b.buf[b.r:b.w])
b.r += n
if b.closed && b.r == b.w {
err = b.err
} else if b.r == b.w && n == 0 {
err = errReadEmpty
}
return n, err
}
// Len returns the number of bytes of the unread portion of the buffer.
func (b *buffer) Len() int {
return b.w - b.r
}
// Write copies bytes from p into the buffer.
// It is an error to write more data than the buffer can hold.
func (b *buffer) Write(p []byte) (n int, err error) {
if b.closed {
return 0, errors.New("closed")
}
// Slide existing data to beginning.
if b.r > 0 && len(p) > len(b.buf)-b.w {
copy(b.buf, b.buf[b.r:b.w])
b.w -= b.r
b.r = 0
}
// Write new data.
n = copy(b.buf[b.w:], p)
b.w += n
if n < len(p) {
err = errWriteFull
}
return n, err
}
// Close marks the buffer as closed. Future calls to Write will
// return an error. Future calls to Read, once the buffer is
// empty, will return err.
func (b *buffer) Close(err error) {
if !b.closed {
b.closed = true
b.err = err
}
}

View File

@@ -0,0 +1,78 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import "fmt"
// An ErrCode is an unsigned 32-bit error code as defined in the HTTP/2 spec.
type ErrCode uint32
const (
ErrCodeNo ErrCode = 0x0
ErrCodeProtocol ErrCode = 0x1
ErrCodeInternal ErrCode = 0x2
ErrCodeFlowControl ErrCode = 0x3
ErrCodeSettingsTimeout ErrCode = 0x4
ErrCodeStreamClosed ErrCode = 0x5
ErrCodeFrameSize ErrCode = 0x6
ErrCodeRefusedStream ErrCode = 0x7
ErrCodeCancel ErrCode = 0x8
ErrCodeCompression ErrCode = 0x9
ErrCodeConnect ErrCode = 0xa
ErrCodeEnhanceYourCalm ErrCode = 0xb
ErrCodeInadequateSecurity ErrCode = 0xc
ErrCodeHTTP11Required ErrCode = 0xd
)
var errCodeName = map[ErrCode]string{
ErrCodeNo: "NO_ERROR",
ErrCodeProtocol: "PROTOCOL_ERROR",
ErrCodeInternal: "INTERNAL_ERROR",
ErrCodeFlowControl: "FLOW_CONTROL_ERROR",
ErrCodeSettingsTimeout: "SETTINGS_TIMEOUT",
ErrCodeStreamClosed: "STREAM_CLOSED",
ErrCodeFrameSize: "FRAME_SIZE_ERROR",
ErrCodeRefusedStream: "REFUSED_STREAM",
ErrCodeCancel: "CANCEL",
ErrCodeCompression: "COMPRESSION_ERROR",
ErrCodeConnect: "CONNECT_ERROR",
ErrCodeEnhanceYourCalm: "ENHANCE_YOUR_CALM",
ErrCodeInadequateSecurity: "INADEQUATE_SECURITY",
ErrCodeHTTP11Required: "HTTP_1_1_REQUIRED",
}
func (e ErrCode) String() string {
if s, ok := errCodeName[e]; ok {
return s
}
return fmt.Sprintf("unknown error code 0x%x", uint32(e))
}
// ConnectionError is an error that results in the termination of the
// entire connection.
type ConnectionError ErrCode
func (e ConnectionError) Error() string { return fmt.Sprintf("connection error: %s", ErrCode(e)) }
// StreamError is an error that only affects one stream within an
// HTTP/2 connection.
type StreamError struct {
StreamID uint32
Code ErrCode
}
func (e StreamError) Error() string {
return fmt.Sprintf("stream error: stream ID %d; %v", e.StreamID, e.Code)
}
// 6.9.1 The Flow Control Window
// "If a sender receives a WINDOW_UPDATE that causes a flow control
// window to exceed this maximum it MUST terminate either the stream
// or the connection, as appropriate. For streams, [...]; for the
// connection, a GOAWAY frame with a FLOW_CONTROL_ERROR code."
type goAwayFlowError struct{}
func (goAwayFlowError) Error() string { return "connection exceeded flow control window size" }

View File

@@ -0,0 +1,51 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
// Flow control
package http2
// flow is the flow control window's size.
type flow struct {
// n is the number of DATA bytes we're allowed to send.
// A flow is kept both on a conn and a per-stream.
n int32
// conn points to the shared connection-level flow that is
// shared by all streams on that conn. It is nil for the flow
// that's on the conn directly.
conn *flow
}
func (f *flow) setConnFlow(cf *flow) { f.conn = cf }
func (f *flow) available() int32 {
n := f.n
if f.conn != nil && f.conn.n < n {
n = f.conn.n
}
return n
}
func (f *flow) take(n int32) {
if n > f.available() {
panic("internal error: took too much")
}
f.n -= n
if f.conn != nil {
f.conn.n -= n
}
}
// add adds n bytes (positive or negative) to the flow control window.
// It returns false if the sum would exceed 2^31-1.
func (f *flow) add(n int32) bool {
remain := (1<<31 - 1) - f.n
if n > remain {
return false
}
f.n += n
return true
}

1113
Godeps/_workspace/src/github.com/bradfitz/http2/frame.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,169 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
// Defensive debug-only utility to track that functions run on the
// goroutine that they're supposed to.
package http2
import (
"bytes"
"errors"
"fmt"
"runtime"
"strconv"
"sync"
)
var DebugGoroutines = false
type goroutineLock uint64
func newGoroutineLock() goroutineLock {
return goroutineLock(curGoroutineID())
}
func (g goroutineLock) check() {
if !DebugGoroutines {
return
}
if curGoroutineID() != uint64(g) {
panic("running on the wrong goroutine")
}
}
func (g goroutineLock) checkNotOn() {
if !DebugGoroutines {
return
}
if curGoroutineID() == uint64(g) {
panic("running on the wrong goroutine")
}
}
var goroutineSpace = []byte("goroutine ")
func curGoroutineID() uint64 {
bp := littleBuf.Get().(*[]byte)
defer littleBuf.Put(bp)
b := *bp
b = b[:runtime.Stack(b, false)]
// Parse the 4707 out of "goroutine 4707 ["
b = bytes.TrimPrefix(b, goroutineSpace)
i := bytes.IndexByte(b, ' ')
if i < 0 {
panic(fmt.Sprintf("No space found in %q", b))
}
b = b[:i]
n, err := parseUintBytes(b, 10, 64)
if err != nil {
panic(fmt.Sprintf("Failed to parse goroutine ID out of %q: %v", b, err))
}
return n
}
var littleBuf = sync.Pool{
New: func() interface{} {
buf := make([]byte, 64)
return &buf
},
}
// parseUintBytes is like strconv.ParseUint, but using a []byte.
func parseUintBytes(s []byte, base int, bitSize int) (n uint64, err error) {
var cutoff, maxVal uint64
if bitSize == 0 {
bitSize = int(strconv.IntSize)
}
s0 := s
switch {
case len(s) < 1:
err = strconv.ErrSyntax
goto Error
case 2 <= base && base <= 36:
// valid base; nothing to do
case base == 0:
// Look for octal, hex prefix.
switch {
case s[0] == '0' && len(s) > 1 && (s[1] == 'x' || s[1] == 'X'):
base = 16
s = s[2:]
if len(s) < 1 {
err = strconv.ErrSyntax
goto Error
}
case s[0] == '0':
base = 8
default:
base = 10
}
default:
err = errors.New("invalid base " + strconv.Itoa(base))
goto Error
}
n = 0
cutoff = cutoff64(base)
maxVal = 1<<uint(bitSize) - 1
for i := 0; i < len(s); i++ {
var v byte
d := s[i]
switch {
case '0' <= d && d <= '9':
v = d - '0'
case 'a' <= d && d <= 'z':
v = d - 'a' + 10
case 'A' <= d && d <= 'Z':
v = d - 'A' + 10
default:
n = 0
err = strconv.ErrSyntax
goto Error
}
if int(v) >= base {
n = 0
err = strconv.ErrSyntax
goto Error
}
if n >= cutoff {
// n*base overflows
n = 1<<64 - 1
err = strconv.ErrRange
goto Error
}
n *= uint64(base)
n1 := n + uint64(v)
if n1 < n || n1 > maxVal {
// n+v overflows
n = 1<<64 - 1
err = strconv.ErrRange
goto Error
}
n = n1
}
return n, nil
Error:
return n, &strconv.NumError{Func: "ParseUint", Num: string(s0), Err: err}
}
// Return the first number n such that n*base >= 1<<64.
func cutoff64(base int) uint64 {
if base < 2 {
return 0
}
return (1<<64-1)/uint64(base) + 1
}

View File

@@ -0,0 +1,5 @@
h2demo
h2demo.linux
client-id.dat
client-secret.dat
token.dat

View File

@@ -0,0 +1,5 @@
h2demo.linux: h2demo.go
GOOS=linux go build --tags=h2demo -o h2demo.linux .
upload: h2demo.linux
cat h2demo.linux | go run launch.go --write_object=http2-demo-server-tls/h2demo --write_object_is_public

View File

@@ -0,0 +1,16 @@
Client:
-- Firefox nightly with about:config network.http.spdy.enabled.http2draft set true
-- Chrome: go to chrome://flags/#enable-spdy4, save and restart (button at bottom)
Make CA:
$ openssl genrsa -out rootCA.key 2048
$ openssl req -x509 -new -nodes -key rootCA.key -days 1024 -out rootCA.pem
... install that to Firefox
Make cert:
$ openssl genrsa -out server.key 2048
$ openssl req -new -key server.key -out server.csr
$ openssl x509 -req -in server.csr -CA rootCA.pem -CAkey rootCA.key -CAcreateserial -out server.crt -days 500

View File

@@ -0,0 +1,426 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
// +build h2demo
package main
import (
"bytes"
"crypto/tls"
"flag"
"fmt"
"hash/crc32"
"image"
"image/jpeg"
"io"
"io/ioutil"
"log"
"net"
"net/http"
"os/exec"
"path"
"regexp"
"runtime"
"strconv"
"strings"
"sync"
"time"
"camlistore.org/pkg/googlestorage"
"camlistore.org/pkg/singleflight"
"github.com/bradfitz/http2"
)
var (
openFirefox = flag.Bool("openff", false, "Open Firefox")
addr = flag.String("addr", "localhost:4430", "TLS address to listen on")
httpAddr = flag.String("httpaddr", "", "If non-empty, address to listen for regular HTTP on")
prod = flag.Bool("prod", false, "Whether to configure itself to be the production http2.golang.org server.")
)
func homeOldHTTP(w http.ResponseWriter, r *http.Request) {
io.WriteString(w, `<html>
<body>
<h1>Go + HTTP/2</h1>
<p>Welcome to <a href="https://golang.org/">the Go language</a>'s <a href="https://http2.github.io/">HTTP/2</a> demo & interop server.</p>
<p>Unfortunately, you're <b>not</b> using HTTP/2 right now. To do so:</p>
<ul>
<li>Use Firefox Nightly or go to <b>about:config</b> and enable "network.http.spdy.enabled.http2draft"</li>
<li>Use Google Chrome Canary and/or go to <b>chrome://flags/#enable-spdy4</b> to <i>Enable SPDY/4</i> (Chrome's name for HTTP/2)</li>
</ul>
<p>See code & instructions for connecting at <a href="https://github.com/bradfitz/http2">https://github.com/bradfitz/http2</a>.</p>
</body></html>`)
}
func home(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/" {
http.NotFound(w, r)
return
}
io.WriteString(w, `<html>
<body>
<h1>Go + HTTP/2</h1>
<p>Welcome to <a href="https://golang.org/">the Go language</a>'s <a
href="https://http2.github.io/">HTTP/2</a> demo & interop server.</p>
<p>Congratulations, <b>you're using HTTP/2 right now</b>.</p>
<p>This server exists for others in the HTTP/2 community to test their HTTP/2 client implementations and point out flaws in our server.</p>
<p> The code is currently at <a
href="https://github.com/bradfitz/http2">github.com/bradfitz/http2</a>
but will move to the Go standard library at some point in the future
(enabled by default, without users needing to change their code).</p>
<p>Contact info: <i>bradfitz@golang.org</i>, or <a
href="https://github.com/bradfitz/http2/issues">file a bug</a>.</p>
<h2>Handlers for testing</h2>
<ul>
<li>GET <a href="/reqinfo">/reqinfo</a> to dump the request + headers received</li>
<li>GET <a href="/clockstream">/clockstream</a> streams the current time every second</li>
<li>GET <a href="/gophertiles">/gophertiles</a> to see a page with a bunch of images</li>
<li>GET <a href="/file/gopher.png">/file/gopher.png</a> for a small file (does If-Modified-Since, Content-Range, etc)</li>
<li>GET <a href="/file/go.src.tar.gz">/file/go.src.tar.gz</a> for a larger file (~10 MB)</li>
<li>GET <a href="/redirect">/redirect</a> to redirect back to / (this page)</li>
<li>GET <a href="/goroutines">/goroutines</a> to see all active goroutines in this server</li>
<li>PUT something to <a href="/crc32">/crc32</a> to get a count of number of bytes and its CRC-32</li>
</ul>
</body></html>`)
}
func reqInfoHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "Method: %s\n", r.Method)
fmt.Fprintf(w, "Protocol: %s\n", r.Proto)
fmt.Fprintf(w, "Host: %s\n", r.Host)
fmt.Fprintf(w, "RemoteAddr: %s\n", r.RemoteAddr)
fmt.Fprintf(w, "RequestURI: %q\n", r.RequestURI)
fmt.Fprintf(w, "URL: %#v\n", r.URL)
fmt.Fprintf(w, "Body.ContentLength: %d (-1 means unknown)\n", r.ContentLength)
fmt.Fprintf(w, "Close: %v (relevant for HTTP/1 only)\n", r.Close)
fmt.Fprintf(w, "TLS: %#v\n", r.TLS)
fmt.Fprintf(w, "\nHeaders:\n")
r.Header.Write(w)
}
func crcHandler(w http.ResponseWriter, r *http.Request) {
if r.Method != "PUT" {
http.Error(w, "PUT required.", 400)
return
}
crc := crc32.NewIEEE()
n, err := io.Copy(crc, r.Body)
if err == nil {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "bytes=%d, CRC32=%x", n, crc.Sum(nil))
}
}
var (
fsGrp singleflight.Group
fsMu sync.Mutex // guards fsCache
fsCache = map[string]http.Handler{}
)
// fileServer returns a file-serving handler that proxies URL.
// It lazily fetches URL on the first access and caches its contents forever.
func fileServer(url string) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
hi, err := fsGrp.Do(url, func() (interface{}, error) {
fsMu.Lock()
if h, ok := fsCache[url]; ok {
fsMu.Unlock()
return h, nil
}
fsMu.Unlock()
res, err := http.Get(url)
if err != nil {
return nil, err
}
defer res.Body.Close()
slurp, err := ioutil.ReadAll(res.Body)
if err != nil {
return nil, err
}
modTime := time.Now()
var h http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.ServeContent(w, r, path.Base(url), modTime, bytes.NewReader(slurp))
})
fsMu.Lock()
fsCache[url] = h
fsMu.Unlock()
return h, nil
})
if err != nil {
http.Error(w, err.Error(), 500)
return
}
hi.(http.Handler).ServeHTTP(w, r)
})
}
func clockStreamHandler(w http.ResponseWriter, r *http.Request) {
clientGone := w.(http.CloseNotifier).CloseNotify()
w.Header().Set("Content-Type", "text/plain")
ticker := time.NewTicker(1 * time.Second)
defer ticker.Stop()
fmt.Fprintf(w, "# ~1KB of junk to force browsers to start rendering immediately: \n")
io.WriteString(w, strings.Repeat("# xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\n", 13))
for {
fmt.Fprintf(w, "%v\n", time.Now())
w.(http.Flusher).Flush()
select {
case <-ticker.C:
case <-clientGone:
log.Printf("Client %v disconnected from the clock", r.RemoteAddr)
return
}
}
}
func registerHandlers() {
tiles := newGopherTilesHandler()
mux2 := http.NewServeMux()
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if r.TLS == nil {
if r.URL.Path == "/gophertiles" {
tiles.ServeHTTP(w, r)
return
}
http.Redirect(w, r, "https://http2.golang.org/", http.StatusFound)
return
}
if r.ProtoMajor == 1 {
if r.URL.Path == "/reqinfo" {
reqInfoHandler(w, r)
return
}
homeOldHTTP(w, r)
return
}
mux2.ServeHTTP(w, r)
})
mux2.HandleFunc("/", home)
mux2.Handle("/file/gopher.png", fileServer("https://golang.org/doc/gopher/frontpage.png"))
mux2.Handle("/file/go.src.tar.gz", fileServer("https://storage.googleapis.com/golang/go1.4.1.src.tar.gz"))
mux2.HandleFunc("/reqinfo", reqInfoHandler)
mux2.HandleFunc("/crc32", crcHandler)
mux2.HandleFunc("/clockstream", clockStreamHandler)
mux2.Handle("/gophertiles", tiles)
mux2.HandleFunc("/redirect", func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, "/", http.StatusFound)
})
stripHomedir := regexp.MustCompile(`/(Users|home)/\w+`)
mux2.HandleFunc("/goroutines", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain; charset=utf-8")
buf := make([]byte, 2<<20)
w.Write(stripHomedir.ReplaceAll(buf[:runtime.Stack(buf, true)], nil))
})
}
func newGopherTilesHandler() http.Handler {
const gopherURL = "https://blog.golang.org/go-programming-language-turns-two_gophers.jpg"
res, err := http.Get(gopherURL)
if err != nil {
log.Fatal(err)
}
if res.StatusCode != 200 {
log.Fatalf("Error fetching %s: %v", gopherURL, res.Status)
}
slurp, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
log.Fatal(err)
}
im, err := jpeg.Decode(bytes.NewReader(slurp))
if err != nil {
if len(slurp) > 1024 {
slurp = slurp[:1024]
}
log.Fatalf("Failed to decode gopher image: %v (got %q)", err, slurp)
}
type subImager interface {
SubImage(image.Rectangle) image.Image
}
const tileSize = 32
xt := im.Bounds().Max.X / tileSize
yt := im.Bounds().Max.Y / tileSize
var tile [][][]byte // y -> x -> jpeg bytes
for yi := 0; yi < yt; yi++ {
var row [][]byte
for xi := 0; xi < xt; xi++ {
si := im.(subImager).SubImage(image.Rectangle{
Min: image.Point{xi * tileSize, yi * tileSize},
Max: image.Point{(xi + 1) * tileSize, (yi + 1) * tileSize},
})
buf := new(bytes.Buffer)
if err := jpeg.Encode(buf, si, &jpeg.Options{Quality: 90}); err != nil {
log.Fatal(err)
}
row = append(row, buf.Bytes())
}
tile = append(tile, row)
}
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ms, _ := strconv.Atoi(r.FormValue("latency"))
const nanosPerMilli = 1e6
if r.FormValue("x") != "" {
x, _ := strconv.Atoi(r.FormValue("x"))
y, _ := strconv.Atoi(r.FormValue("y"))
if ms <= 1000 {
time.Sleep(time.Duration(ms) * nanosPerMilli)
}
if x >= 0 && x < xt && y >= 0 && y < yt {
http.ServeContent(w, r, "", time.Time{}, bytes.NewReader(tile[y][x]))
return
}
}
io.WriteString(w, "<html><body>")
fmt.Fprintf(w, "A grid of %d tiled images is below. Compare:<p>", xt*yt)
for _, ms := range []int{0, 30, 200, 1000} {
d := time.Duration(ms) * nanosPerMilli
fmt.Fprintf(w, "[<a href='https://%s/gophertiles?latency=%d'>HTTP/2, %v latency</a>] [<a href='http://%s/gophertiles?latency=%d'>HTTP/1, %v latency</a>]<br>\n",
httpsHost(), ms, d,
httpHost(), ms, d,
)
}
io.WriteString(w, "<p>\n")
cacheBust := time.Now().UnixNano()
for y := 0; y < yt; y++ {
for x := 0; x < xt; x++ {
fmt.Fprintf(w, "<img width=%d height=%d src='/gophertiles?x=%d&y=%d&cachebust=%d&latency=%d'>",
tileSize, tileSize, x, y, cacheBust, ms)
}
io.WriteString(w, "<br/>\n")
}
io.WriteString(w, "<hr><a href='/'>&lt;&lt Back to Go HTTP/2 demo server</a></body></html>")
})
}
func httpsHost() string {
if *prod {
return "http2.golang.org"
}
if v := *addr; strings.HasPrefix(v, ":") {
return "localhost" + v
} else {
return v
}
}
func httpHost() string {
if *prod {
return "http2.golang.org"
}
if v := *httpAddr; strings.HasPrefix(v, ":") {
return "localhost" + v
} else {
return v
}
}
func serveProdTLS() error {
c, err := googlestorage.NewServiceClient()
if err != nil {
return err
}
slurp := func(key string) ([]byte, error) {
const bucket = "http2-demo-server-tls"
rc, _, err := c.GetObject(&googlestorage.Object{
Bucket: bucket,
Key: key,
})
if err != nil {
return nil, fmt.Errorf("Error fetching GCS object %q in bucket %q: %v", key, bucket, err)
}
defer rc.Close()
return ioutil.ReadAll(rc)
}
certPem, err := slurp("http2.golang.org.chained.pem")
if err != nil {
return err
}
keyPem, err := slurp("http2.golang.org.key")
if err != nil {
return err
}
cert, err := tls.X509KeyPair(certPem, keyPem)
if err != nil {
return err
}
srv := &http.Server{
TLSConfig: &tls.Config{
Certificates: []tls.Certificate{cert},
},
}
http2.ConfigureServer(srv, &http2.Server{})
ln, err := net.Listen("tcp", ":443")
if err != nil {
return err
}
return srv.Serve(tls.NewListener(tcpKeepAliveListener{ln.(*net.TCPListener)}, srv.TLSConfig))
}
type tcpKeepAliveListener struct {
*net.TCPListener
}
func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
tc, err := ln.AcceptTCP()
if err != nil {
return
}
tc.SetKeepAlive(true)
tc.SetKeepAlivePeriod(3 * time.Minute)
return tc, nil
}
func serveProd() error {
errc := make(chan error, 2)
go func() { errc <- http.ListenAndServe(":80", nil) }()
go func() { errc <- serveProdTLS() }()
return <-errc
}
func main() {
var srv http.Server
flag.BoolVar(&http2.VerboseLogs, "verbose", false, "Verbose HTTP/2 debugging.")
flag.Parse()
srv.Addr = *addr
registerHandlers()
if *prod {
*httpAddr = "http2.golang.org"
log.Fatal(serveProd())
}
url := "https://" + *addr + "/"
log.Printf("Listening on " + url)
http2.ConfigureServer(&srv, &http2.Server{})
if *httpAddr != "" {
go func() { log.Fatal(http.ListenAndServe(*httpAddr, nil)) }()
}
go func() {
log.Fatal(srv.ListenAndServeTLS("server.crt", "server.key"))
}()
if *openFirefox && runtime.GOOS == "darwin" {
time.Sleep(250 * time.Millisecond)
exec.Command("open", "-b", "org.mozilla.nightly", "https://localhost:4430/").Run()
}
select {}
}

View File

@@ -0,0 +1,279 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
import (
"bufio"
"bytes"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"strings"
"time"
"code.google.com/p/goauth2/oauth"
compute "code.google.com/p/google-api-go-client/compute/v1"
)
var (
proj = flag.String("project", "symbolic-datum-552", "name of Project")
zone = flag.String("zone", "us-central1-a", "GCE zone")
mach = flag.String("machinetype", "n1-standard-1", "Machine type")
instName = flag.String("instance_name", "http2-demo", "Name of VM instance.")
sshPub = flag.String("ssh_public_key", "", "ssh public key file to authorize. Can modify later in Google's web UI anyway.")
staticIP = flag.String("static_ip", "130.211.116.44", "Static IP to use. If empty, automatic.")
writeObject = flag.String("write_object", "", "If non-empty, a VM isn't created and the flag value is Google Cloud Storage bucket/object to write. The contents from stdin.")
publicObject = flag.Bool("write_object_is_public", false, "Whether the object created by --write_object should be public.")
)
func readFile(v string) string {
slurp, err := ioutil.ReadFile(v)
if err != nil {
log.Fatalf("Error reading %s: %v", v, err)
}
return strings.TrimSpace(string(slurp))
}
var config = &oauth.Config{
// The client-id and secret should be for an "Installed Application" when using
// the CLI. Later we'll use a web application with a callback.
ClientId: readFile("client-id.dat"),
ClientSecret: readFile("client-secret.dat"),
Scope: strings.Join([]string{
compute.DevstorageFull_controlScope,
compute.ComputeScope,
"https://www.googleapis.com/auth/sqlservice",
"https://www.googleapis.com/auth/sqlservice.admin",
}, " "),
AuthURL: "https://accounts.google.com/o/oauth2/auth",
TokenURL: "https://accounts.google.com/o/oauth2/token",
RedirectURL: "urn:ietf:wg:oauth:2.0:oob",
}
const baseConfig = `#cloud-config
coreos:
units:
- name: h2demo.service
command: start
content: |
[Unit]
Description=HTTP2 Demo
[Service]
ExecStartPre=/bin/bash -c 'mkdir -p /opt/bin && curl -s -o /opt/bin/h2demo http://storage.googleapis.com/http2-demo-server-tls/h2demo && chmod +x /opt/bin/h2demo'
ExecStart=/opt/bin/h2demo --prod
RestartSec=5s
Restart=always
Type=simple
[Install]
WantedBy=multi-user.target
`
func main() {
flag.Parse()
if *proj == "" {
log.Fatalf("Missing --project flag")
}
prefix := "https://www.googleapis.com/compute/v1/projects/" + *proj
machType := prefix + "/zones/" + *zone + "/machineTypes/" + *mach
tr := &oauth.Transport{
Config: config,
}
tokenCache := oauth.CacheFile("token.dat")
token, err := tokenCache.Token()
if err != nil {
if *writeObject != "" {
log.Fatalf("Can't use --write_object without a valid token.dat file already cached.")
}
log.Printf("Error getting token from %s: %v", string(tokenCache), err)
log.Printf("Get auth code from %v", config.AuthCodeURL("my-state"))
fmt.Print("\nEnter auth code: ")
sc := bufio.NewScanner(os.Stdin)
sc.Scan()
authCode := strings.TrimSpace(sc.Text())
token, err = tr.Exchange(authCode)
if err != nil {
log.Fatalf("Error exchanging auth code for a token: %v", err)
}
tokenCache.PutToken(token)
}
tr.Token = token
oauthClient := &http.Client{Transport: tr}
if *writeObject != "" {
writeCloudStorageObject(oauthClient)
return
}
computeService, _ := compute.New(oauthClient)
natIP := *staticIP
if natIP == "" {
// Try to find it by name.
aggAddrList, err := computeService.Addresses.AggregatedList(*proj).Do()
if err != nil {
log.Fatal(err)
}
// http://godoc.org/code.google.com/p/google-api-go-client/compute/v1#AddressAggregatedList
IPLoop:
for _, asl := range aggAddrList.Items {
for _, addr := range asl.Addresses {
if addr.Name == *instName+"-ip" && addr.Status == "RESERVED" {
natIP = addr.Address
break IPLoop
}
}
}
}
cloudConfig := baseConfig
if *sshPub != "" {
key := strings.TrimSpace(readFile(*sshPub))
cloudConfig += fmt.Sprintf("\nssh_authorized_keys:\n - %s\n", key)
}
if os.Getenv("USER") == "bradfitz" {
cloudConfig += fmt.Sprintf("\nssh_authorized_keys:\n - %s\n", "ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAIEAwks9dwWKlRC+73gRbvYtVg0vdCwDSuIlyt4z6xa/YU/jTDynM4R4W10hm2tPjy8iR1k8XhDv4/qdxe6m07NjG/By1tkmGpm1mGwho4Pr5kbAAy/Qg+NLCSdAYnnE00FQEcFOC15GFVMOW2AzDGKisReohwH9eIzHPzdYQNPRWXE= bradfitz@papag.bradfitz.com")
}
const maxCloudConfig = 32 << 10 // per compute API docs
if len(cloudConfig) > maxCloudConfig {
log.Fatalf("cloud config length of %d bytes is over %d byte limit", len(cloudConfig), maxCloudConfig)
}
instance := &compute.Instance{
Name: *instName,
Description: "Go Builder",
MachineType: machType,
Disks: []*compute.AttachedDisk{instanceDisk(computeService)},
Tags: &compute.Tags{
Items: []string{"http-server", "https-server"},
},
Metadata: &compute.Metadata{
Items: []*compute.MetadataItems{
{
Key: "user-data",
Value: cloudConfig,
},
},
},
NetworkInterfaces: []*compute.NetworkInterface{
&compute.NetworkInterface{
AccessConfigs: []*compute.AccessConfig{
&compute.AccessConfig{
Type: "ONE_TO_ONE_NAT",
Name: "External NAT",
NatIP: natIP,
},
},
Network: prefix + "/global/networks/default",
},
},
ServiceAccounts: []*compute.ServiceAccount{
{
Email: "default",
Scopes: []string{
compute.DevstorageFull_controlScope,
compute.ComputeScope,
},
},
},
}
log.Printf("Creating instance...")
op, err := computeService.Instances.Insert(*proj, *zone, instance).Do()
if err != nil {
log.Fatalf("Failed to create instance: %v", err)
}
opName := op.Name
log.Printf("Created. Waiting on operation %v", opName)
OpLoop:
for {
time.Sleep(2 * time.Second)
op, err := computeService.ZoneOperations.Get(*proj, *zone, opName).Do()
if err != nil {
log.Fatalf("Failed to get op %s: %v", opName, err)
}
switch op.Status {
case "PENDING", "RUNNING":
log.Printf("Waiting on operation %v", opName)
continue
case "DONE":
if op.Error != nil {
for _, operr := range op.Error.Errors {
log.Printf("Error: %+v", operr)
}
log.Fatalf("Failed to start.")
}
log.Printf("Success. %+v", op)
break OpLoop
default:
log.Fatalf("Unknown status %q: %+v", op.Status, op)
}
}
inst, err := computeService.Instances.Get(*proj, *zone, *instName).Do()
if err != nil {
log.Fatalf("Error getting instance after creation: %v", err)
}
ij, _ := json.MarshalIndent(inst, "", " ")
log.Printf("Instance: %s", ij)
}
func instanceDisk(svc *compute.Service) *compute.AttachedDisk {
const imageURL = "https://www.googleapis.com/compute/v1/projects/coreos-cloud/global/images/coreos-stable-444-5-0-v20141016"
diskName := *instName + "-disk"
return &compute.AttachedDisk{
AutoDelete: true,
Boot: true,
Type: "PERSISTENT",
InitializeParams: &compute.AttachedDiskInitializeParams{
DiskName: diskName,
SourceImage: imageURL,
DiskSizeGb: 50,
},
}
}
func writeCloudStorageObject(httpClient *http.Client) {
content := os.Stdin
const maxSlurp = 1 << 20
var buf bytes.Buffer
n, err := io.CopyN(&buf, content, maxSlurp)
if err != nil && err != io.EOF {
log.Fatalf("Error reading from stdin: %v, %v", n, err)
}
contentType := http.DetectContentType(buf.Bytes())
req, err := http.NewRequest("PUT", "https://storage.googleapis.com/"+*writeObject, io.MultiReader(&buf, content))
if err != nil {
log.Fatal(err)
}
req.Header.Set("x-goog-api-version", "2")
if *publicObject {
req.Header.Set("x-goog-acl", "public-read")
}
req.Header.Set("Content-Type", contentType)
res, err := httpClient.Do(req)
if err != nil {
log.Fatal(err)
}
if res.StatusCode != 200 {
res.Write(os.Stderr)
log.Fatalf("Failed.")
}
log.Printf("Success.")
os.Exit(0)
}

View File

@@ -0,0 +1,27 @@
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEAt5fAjp4fTcekWUTfzsp0kyih1OYbsGL0KX1eRbSSR8Od0+9Q
62Hyny+GFwMTb4A/KU8mssoHvcceSAAbwfbxFK/+s51TobqUnORZrOoTZjkUygby
XDSK99YBbcR1Pip8vwMTm4XKuLtCigeBBdjjAQdgUO28LENGlsMnmeYkJfODVGnV
mr5Ltb9ANA8IKyTfsnHJ4iOCS/PlPbUj2q7YnoVLposUBMlgUb/CykX3mOoLb4yJ
JQyA/iST6ZxiIEj36D4yWZ5lg7YJl+UiiBQHGCnPdGyipqV06ex0heYWcaiW8LWZ
SUQ93jQ+WVCH8hT7DQO1dmsvUmXlq/JeAlwQ/QIDAQABAoIBAFFHV7JMAqPWnMYA
nezY6J81v9+XN+7xABNWM2Q8uv4WdksbigGLTXR3/680Z2hXqJ7LMeC5XJACFT/e
/Gr0vmpgOCygnCPfjGehGKpavtfksXV3edikUlnCXsOP1C//c1bFL+sMYmFCVgTx
qYdDK8yKzXNGrKYT6q5YG7IglyRNV1rsQa8lM/5taFYiD1Ck/3tQi3YIq8Lcuser
hrxsMABcQ6mi+EIvG6Xr4mfJug0dGJMHG4RG1UGFQn6RXrQq2+q53fC8ZbVUSi0j
NQ918aKFzktwv+DouKU0ME4I9toks03gM860bAL7zCbKGmwR3hfgX/TqzVCWpG9E
LDVfvekCgYEA8fk9N53jbBRmULUGEf4qWypcLGiZnNU0OeXWpbPV9aa3H0VDytA7
8fCN2dPAVDPqlthMDdVe983NCNwp2Yo8ZimDgowyIAKhdC25s1kejuaiH9OAPj3c
0f8KbriYX4n8zNHxFwK6Ae3pQ6EqOLJVCUsziUaZX9nyKY5aZlyX6xcCgYEAwjws
K62PjC64U5wYddNLp+kNdJ4edx+a7qBb3mEgPvSFT2RO3/xafJyG8kQB30Mfstjd
bRxyUV6N0vtX1zA7VQtRUAvfGCecpMo+VQZzcHXKzoRTnQ7eZg4Lmj5fQ9tOAKAo
QCVBoSW/DI4PZL26CAMDcAba4Pa22ooLapoRIQsCgYA6pIfkkbxLNkpxpt2YwLtt
Kr/590O7UaR9n6k8sW/aQBRDXNsILR1KDl2ifAIxpf9lnXgZJiwE7HiTfCAcW7c1
nzwDCI0hWuHcMTS/NYsFYPnLsstyyjVZI3FY0h4DkYKV9Q9z3zJLQ2hz/nwoD3gy
b2pHC7giFcTts1VPV4Nt8wKBgHeFn4ihHJweg76vZz3Z78w7VNRWGFklUalVdDK7
gaQ7w2y/ROn/146mo0OhJaXFIFRlrpvdzVrU3GDf2YXJYDlM5ZRkObwbZADjksev
WInzcgDy3KDg7WnPasRXbTfMU4t/AkW2p1QKbi3DnSVYuokDkbH2Beo45vxDxhKr
C69RAoGBAIyo3+OJenoZmoNzNJl2WPW5MeBUzSh8T/bgyjFTdqFHF5WiYRD/lfHj
x9Glyw2nutuT4hlOqHvKhgTYdDMsF2oQ72fe3v8Q5FU7FuKndNPEAyvKNXZaShVA
hnlhv5DjXKb0wFWnt5PCCiQLtzG0yyHaITrrEme7FikkIcTxaX/Y
-----END RSA PRIVATE KEY-----

View File

@@ -0,0 +1,26 @@
-----BEGIN CERTIFICATE-----
MIIEWjCCA0KgAwIBAgIJALfRlWsI8YQHMA0GCSqGSIb3DQEBBQUAMHsxCzAJBgNV
BAYTAlVTMQswCQYDVQQIEwJDQTEWMBQGA1UEBxMNU2FuIEZyYW5jaXNjbzEUMBIG
A1UEChMLQnJhZGZpdHppbmMxEjAQBgNVBAMTCWxvY2FsaG9zdDEdMBsGCSqGSIb3
DQEJARYOYnJhZEBkYW5nYS5jb20wHhcNMTQwNzE1MjA0NjA1WhcNMTcwNTA0MjA0
NjA1WjB7MQswCQYDVQQGEwJVUzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDVNhbiBG
cmFuY2lzY28xFDASBgNVBAoTC0JyYWRmaXR6aW5jMRIwEAYDVQQDEwlsb2NhbGhv
c3QxHTAbBgkqhkiG9w0BCQEWDmJyYWRAZGFuZ2EuY29tMIIBIjANBgkqhkiG9w0B
AQEFAAOCAQ8AMIIBCgKCAQEAt5fAjp4fTcekWUTfzsp0kyih1OYbsGL0KX1eRbSS
R8Od0+9Q62Hyny+GFwMTb4A/KU8mssoHvcceSAAbwfbxFK/+s51TobqUnORZrOoT
ZjkUygbyXDSK99YBbcR1Pip8vwMTm4XKuLtCigeBBdjjAQdgUO28LENGlsMnmeYk
JfODVGnVmr5Ltb9ANA8IKyTfsnHJ4iOCS/PlPbUj2q7YnoVLposUBMlgUb/CykX3
mOoLb4yJJQyA/iST6ZxiIEj36D4yWZ5lg7YJl+UiiBQHGCnPdGyipqV06ex0heYW
caiW8LWZSUQ93jQ+WVCH8hT7DQO1dmsvUmXlq/JeAlwQ/QIDAQABo4HgMIHdMB0G
A1UdDgQWBBRcAROthS4P4U7vTfjByC569R7E6DCBrQYDVR0jBIGlMIGigBRcAROt
hS4P4U7vTfjByC569R7E6KF/pH0wezELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNB
MRYwFAYDVQQHEw1TYW4gRnJhbmNpc2NvMRQwEgYDVQQKEwtCcmFkZml0emluYzES
MBAGA1UEAxMJbG9jYWxob3N0MR0wGwYJKoZIhvcNAQkBFg5icmFkQGRhbmdhLmNv
bYIJALfRlWsI8YQHMAwGA1UdEwQFMAMBAf8wDQYJKoZIhvcNAQEFBQADggEBAG6h
U9f9sNH0/6oBbGGy2EVU0UgITUQIrFWo9rFkrW5k/XkDjQm+3lzjT0iGR4IxE/Ao
eU6sQhua7wrWeFEn47GL98lnCsJdD7oZNhFmQ95Tb/LnDUjs5Yj9brP0NWzXfYU4
UK2ZnINJRcJpB8iRCaCxE8DdcUF0XqIEq6pA272snoLmiXLMvNl3kYEdm+je6voD
58SNVEUsztzQyXmJEhCpwVI0A6QCjzXj+qvpmw3ZZHi8JwXei8ZZBLTSFBki8Z7n
sH9BBH38/SzUmAN4QHSPy1gjqm00OAE8NaYDkh/bzE4d7mLGGMWp/WE3KPSu82HF
kPe6XoSbiLm/kxk32T0=
-----END CERTIFICATE-----

View File

@@ -0,0 +1 @@
E2CE26BF3285059C

View File

@@ -0,0 +1,20 @@
-----BEGIN CERTIFICATE-----
MIIDPjCCAiYCCQDizia/MoUFnDANBgkqhkiG9w0BAQUFADB7MQswCQYDVQQGEwJV
UzELMAkGA1UECBMCQ0ExFjAUBgNVBAcTDVNhbiBGcmFuY2lzY28xFDASBgNVBAoT
C0JyYWRmaXR6aW5jMRIwEAYDVQQDEwlsb2NhbGhvc3QxHTAbBgkqhkiG9w0BCQEW
DmJyYWRAZGFuZ2EuY29tMB4XDTE0MDcxNTIwNTAyN1oXDTE1MTEyNzIwNTAyN1ow
RzELMAkGA1UEBhMCVVMxCzAJBgNVBAgTAkNBMQswCQYDVQQHEwJTRjEeMBwGA1UE
ChMVYnJhZGZpdHogaHR0cDIgc2VydmVyMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8A
MIIBCgKCAQEAs1Y9CyLFrdL8VQWN1WaifDqaZFnoqjHhCMlc1TfG2zA+InDifx2l
gZD3o8FeNnAcfM2sPlk3+ZleOYw9P/CklFVDlvqmpCv9ss/BEp/dDaWvy1LmJ4c2
dbQJfmTxn7CV1H3TsVJvKdwFmdoABb41NoBp6+NNO7OtDyhbIMiCI0pL3Nefb3HL
A7hIMo3DYbORTtJLTIH9W8YKrEWL0lwHLrYFx/UdutZnv+HjdmO6vCN4na55mjws
/vjKQUmc7xeY7Xe20xDEG2oDKVkL2eD7FfyrYMS3rO1ExP2KSqlXYG/1S9I/fz88
F0GK7HX55b5WjZCl2J3ERVdnv/0MQv+sYQIDAQABMA0GCSqGSIb3DQEBBQUAA4IB
AQC0zL+n/YpRZOdulSu9tS8FxrstXqGWoxfe+vIUgqfMZ5+0MkjJ/vW0FqlLDl2R
rn4XaR3e7FmWkwdDVbq/UB6lPmoAaFkCgh9/5oapMaclNVNnfF3fjCJfRr+qj/iD
EmJStTIN0ZuUjAlpiACmfnpEU55PafT5Zx+i1yE4FGjw8bJpFoyD4Hnm54nGjX19
KeCuvcYFUPnBm3lcL0FalF2AjqV02WTHYNQk7YF/oeO7NKBoEgvGvKG3x+xaOeBI
dwvdq175ZsGul30h+QjrRlXhH/twcuaT3GSdoysDl9cCYE8f1Mk8PD6gan3uBCJU
90p6/CbU71bGbfpM2PHot2fm
-----END CERTIFICATE-----

View File

@@ -0,0 +1,27 @@
-----BEGIN RSA PRIVATE KEY-----
MIIEowIBAAKCAQEAs1Y9CyLFrdL8VQWN1WaifDqaZFnoqjHhCMlc1TfG2zA+InDi
fx2lgZD3o8FeNnAcfM2sPlk3+ZleOYw9P/CklFVDlvqmpCv9ss/BEp/dDaWvy1Lm
J4c2dbQJfmTxn7CV1H3TsVJvKdwFmdoABb41NoBp6+NNO7OtDyhbIMiCI0pL3Nef
b3HLA7hIMo3DYbORTtJLTIH9W8YKrEWL0lwHLrYFx/UdutZnv+HjdmO6vCN4na55
mjws/vjKQUmc7xeY7Xe20xDEG2oDKVkL2eD7FfyrYMS3rO1ExP2KSqlXYG/1S9I/
fz88F0GK7HX55b5WjZCl2J3ERVdnv/0MQv+sYQIDAQABAoIBADQ2spUwbY+bcz4p
3M66ECrNQTBggP40gYl2XyHxGGOu2xhZ94f9ELf1hjRWU2DUKWco1rJcdZClV6q3
qwmXvcM2Q/SMS8JW0ImkNVl/0/NqPxGatEnj8zY30d/L8hGFb0orzFu/XYA5gCP4
NbN2WrXgk3ZLeqwcNxHHtSiJWGJ/fPyeDWAu/apy75u9Xf2GlzBZmV6HYD9EfK80
LTlI60f5FO487CrJnboL7ovPJrIHn+k05xRQqwma4orpz932rTXnTjs9Lg6KtbQN
a7PrqfAntIISgr11a66Mng3IYH1lYqJsWJJwX/xHT4WLEy0EH4/0+PfYemJekz2+
Co62drECgYEA6O9zVJZXrLSDsIi54cfxA7nEZWm5CAtkYWeAHa4EJ+IlZ7gIf9sL
W8oFcEfFGpvwVqWZ+AsQ70dsjXAv3zXaG0tmg9FtqWp7pzRSMPidifZcQwWkKeTO
gJnFmnVyed8h6GfjTEu4gxo1/S5U0V+mYSha01z5NTnN6ltKx1Or3b0CgYEAxRgm
S30nZxnyg/V7ys61AZhst1DG2tkZXEMcA7dYhabMoXPJAP/EfhlWwpWYYUs/u0gS
Wwmf5IivX5TlYScgmkvb/NYz0u4ZmOXkLTnLPtdKKFXhjXJcHjUP67jYmOxNlJLp
V4vLRnFxTpffAV+OszzRxsXX6fvruwZBANYJeXUCgYBVouLFsFgfWGYp2rpr9XP4
KK25kvrBqF6JKOIDB1zjxNJ3pUMKrl8oqccCFoCyXa4oTM2kUX0yWxHfleUjrMq4
yimwQKiOZmV7fVLSSjSw6e/VfBd0h3gb82ygcplZkN0IclkwTY5SNKqwn/3y07V5
drqdhkrgdJXtmQ6O5YYECQKBgATERcDToQ1USlI4sKrB/wyv1AlG8dg/IebiVJ4e
ZAyvcQmClFzq0qS+FiQUnB/WQw9TeeYrwGs1hxBHuJh16srwhLyDrbMvQP06qh8R
48F8UXXSRec22dV9MQphaROhu2qZdv1AC0WD3tqov6L33aqmEOi+xi8JgbT/PLk5
c/c1AoGBAI1A/02ryksW6/wc7/6SP2M2rTy4m1sD/GnrTc67EHnRcVBdKO6qH2RY
nqC8YcveC2ZghgPTDsA3VGuzuBXpwY6wTyV99q6jxQJ6/xcrD9/NUG6Uwv/xfCxl
IJLeBYEqQundSSny3VtaAUK8Ul1nxpTvVRNwtcyWTo8RHAAyNPWd
-----END RSA PRIVATE KEY-----

View File

@@ -0,0 +1,80 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import (
"net/http"
"strings"
)
var (
commonLowerHeader = map[string]string{} // Go-Canonical-Case -> lower-case
commonCanonHeader = map[string]string{} // lower-case -> Go-Canonical-Case
)
func init() {
for _, v := range []string{
"accept",
"accept-charset",
"accept-encoding",
"accept-language",
"accept-ranges",
"age",
"access-control-allow-origin",
"allow",
"authorization",
"cache-control",
"content-disposition",
"content-encoding",
"content-language",
"content-length",
"content-location",
"content-range",
"content-type",
"cookie",
"date",
"etag",
"expect",
"expires",
"from",
"host",
"if-match",
"if-modified-since",
"if-none-match",
"if-unmodified-since",
"last-modified",
"link",
"location",
"max-forwards",
"proxy-authenticate",
"proxy-authorization",
"range",
"referer",
"refresh",
"retry-after",
"server",
"set-cookie",
"strict-transport-security",
"transfer-encoding",
"user-agent",
"vary",
"via",
"www-authenticate",
} {
chk := http.CanonicalHeaderKey(v)
commonLowerHeader[chk] = v
commonCanonHeader[v] = chk
}
}
func lowerHeader(v string) string {
if s, ok := commonLowerHeader[v]; ok {
return s
}
return strings.ToLower(v)
}

View File

@@ -0,0 +1,252 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package hpack
import (
"io"
)
const (
uint32Max = ^uint32(0)
initialHeaderTableSize = 4096
)
type Encoder struct {
dynTab dynamicTable
// minSize is the minimum table size set by
// SetMaxDynamicTableSize after the previous Header Table Size
// Update.
minSize uint32
// maxSizeLimit is the maximum table size this encoder
// supports. This will protect the encoder from too large
// size.
maxSizeLimit uint32
// tableSizeUpdate indicates whether "Header Table Size
// Update" is required.
tableSizeUpdate bool
w io.Writer
buf []byte
}
// NewEncoder returns a new Encoder which performs HPACK encoding. An
// encoded data is written to w.
func NewEncoder(w io.Writer) *Encoder {
e := &Encoder{
minSize: uint32Max,
maxSizeLimit: initialHeaderTableSize,
tableSizeUpdate: false,
w: w,
}
e.dynTab.setMaxSize(initialHeaderTableSize)
return e
}
// WriteField encodes f into a single Write to e's underlying Writer.
// This function may also produce bytes for "Header Table Size Update"
// if necessary. If produced, it is done before encoding f.
func (e *Encoder) WriteField(f HeaderField) error {
e.buf = e.buf[:0]
if e.tableSizeUpdate {
e.tableSizeUpdate = false
if e.minSize < e.dynTab.maxSize {
e.buf = appendTableSize(e.buf, e.minSize)
}
e.minSize = uint32Max
e.buf = appendTableSize(e.buf, e.dynTab.maxSize)
}
idx, nameValueMatch := e.searchTable(f)
if nameValueMatch {
e.buf = appendIndexed(e.buf, idx)
} else {
indexing := e.shouldIndex(f)
if indexing {
e.dynTab.add(f)
}
if idx == 0 {
e.buf = appendNewName(e.buf, f, indexing)
} else {
e.buf = appendIndexedName(e.buf, f, idx, indexing)
}
}
n, err := e.w.Write(e.buf)
if err == nil && n != len(e.buf) {
err = io.ErrShortWrite
}
return err
}
// searchTable searches f in both stable and dynamic header tables.
// The static header table is searched first. Only when there is no
// exact match for both name and value, the dynamic header table is
// then searched. If there is no match, i is 0. If both name and value
// match, i is the matched index and nameValueMatch becomes true. If
// only name matches, i points to that index and nameValueMatch
// becomes false.
func (e *Encoder) searchTable(f HeaderField) (i uint64, nameValueMatch bool) {
for idx, hf := range staticTable {
if !constantTimeStringCompare(hf.Name, f.Name) {
continue
}
if i == 0 {
i = uint64(idx + 1)
}
if f.Sensitive {
continue
}
if !constantTimeStringCompare(hf.Value, f.Value) {
continue
}
i = uint64(idx + 1)
nameValueMatch = true
return
}
j, nameValueMatch := e.dynTab.search(f)
if nameValueMatch || (i == 0 && j != 0) {
i = j + uint64(len(staticTable))
}
return
}
// SetMaxDynamicTableSize changes the dynamic header table size to v.
// The actual size is bounded by the value passed to
// SetMaxDynamicTableSizeLimit.
func (e *Encoder) SetMaxDynamicTableSize(v uint32) {
if v > e.maxSizeLimit {
v = e.maxSizeLimit
}
if v < e.minSize {
e.minSize = v
}
e.tableSizeUpdate = true
e.dynTab.setMaxSize(v)
}
// SetMaxDynamicTableSizeLimit changes the maximum value that can be
// specified in SetMaxDynamicTableSize to v. By default, it is set to
// 4096, which is the same size of the default dynamic header table
// size described in HPACK specification. If the current maximum
// dynamic header table size is strictly greater than v, "Header Table
// Size Update" will be done in the next WriteField call and the
// maximum dynamic header table size is truncated to v.
func (e *Encoder) SetMaxDynamicTableSizeLimit(v uint32) {
e.maxSizeLimit = v
if e.dynTab.maxSize > v {
e.tableSizeUpdate = true
e.dynTab.setMaxSize(v)
}
}
// shouldIndex reports whether f should be indexed.
func (e *Encoder) shouldIndex(f HeaderField) bool {
return !f.Sensitive && f.size() <= e.dynTab.maxSize
}
// appendIndexed appends index i, as encoded in "Indexed Header Field"
// representation, to dst and returns the extended buffer.
func appendIndexed(dst []byte, i uint64) []byte {
first := len(dst)
dst = appendVarInt(dst, 7, i)
dst[first] |= 0x80
return dst
}
// appendNewName appends f, as encoded in one of "Literal Header field
// - New Name" representation variants, to dst and returns the
// extended buffer.
//
// If f.Sensitive is true, "Never Indexed" representation is used. If
// f.Sensitive is false and indexing is true, "Inremental Indexing"
// representation is used.
func appendNewName(dst []byte, f HeaderField, indexing bool) []byte {
dst = append(dst, encodeTypeByte(indexing, f.Sensitive))
dst = appendHpackString(dst, f.Name)
return appendHpackString(dst, f.Value)
}
// appendIndexedName appends f and index i referring indexed name
// entry, as encoded in one of "Literal Header field - Indexed Name"
// representation variants, to dst and returns the extended buffer.
//
// If f.Sensitive is true, "Never Indexed" representation is used. If
// f.Sensitive is false and indexing is true, "Incremental Indexing"
// representation is used.
func appendIndexedName(dst []byte, f HeaderField, i uint64, indexing bool) []byte {
first := len(dst)
var n byte
if indexing {
n = 6
} else {
n = 4
}
dst = appendVarInt(dst, n, i)
dst[first] |= encodeTypeByte(indexing, f.Sensitive)
return appendHpackString(dst, f.Value)
}
// appendTableSize appends v, as encoded in "Header Table Size Update"
// representation, to dst and returns the extended buffer.
func appendTableSize(dst []byte, v uint32) []byte {
first := len(dst)
dst = appendVarInt(dst, 5, uint64(v))
dst[first] |= 0x20
return dst
}
// appendVarInt appends i, as encoded in variable integer form using n
// bit prefix, to dst and returns the extended buffer.
//
// See
// http://http2.github.io/http2-spec/compression.html#integer.representation
func appendVarInt(dst []byte, n byte, i uint64) []byte {
k := uint64((1 << n) - 1)
if i < k {
return append(dst, byte(i))
}
dst = append(dst, byte(k))
i -= k
for ; i >= 128; i >>= 7 {
dst = append(dst, byte(0x80|(i&0x7f)))
}
return append(dst, byte(i))
}
// appendHpackString appends s, as encoded in "String Literal"
// representation, to dst and returns the the extended buffer.
//
// s will be encoded in Huffman codes only when it produces strictly
// shorter byte string.
func appendHpackString(dst []byte, s string) []byte {
huffmanLength := HuffmanEncodeLength(s)
if huffmanLength < uint64(len(s)) {
first := len(dst)
dst = appendVarInt(dst, 7, huffmanLength)
dst = AppendHuffmanString(dst, s)
dst[first] |= 0x80
} else {
dst = appendVarInt(dst, 7, uint64(len(s)))
dst = append(dst, s...)
}
return dst
}
// encodeTypeByte returns type byte. If sensitive is true, type byte
// for "Never Indexed" representation is returned. If sensitive is
// false and indexing is true, type byte for "Incremental Indexing"
// representation is returned. Otherwise, type byte for "Without
// Indexing" is returned.
func encodeTypeByte(indexing, sensitive bool) byte {
if sensitive {
return 0x10
}
if indexing {
return 0x40
}
return 0
}

View File

@@ -0,0 +1,445 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
// Package hpack implements HPACK, a compression format for
// efficiently representing HTTP header fields in the context of HTTP/2.
//
// See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09
package hpack
import (
"bytes"
"errors"
"fmt"
)
// A DecodingError is something the spec defines as a decoding error.
type DecodingError struct {
Err error
}
func (de DecodingError) Error() string {
return fmt.Sprintf("decoding error: %v", de.Err)
}
// An InvalidIndexError is returned when an encoder references a table
// entry before the static table or after the end of the dynamic table.
type InvalidIndexError int
func (e InvalidIndexError) Error() string {
return fmt.Sprintf("invalid indexed representation index %d", int(e))
}
// A HeaderField is a name-value pair. Both the name and value are
// treated as opaque sequences of octets.
type HeaderField struct {
Name, Value string
// Sensitive means that this header field should never be
// indexed.
Sensitive bool
}
func (hf *HeaderField) size() uint32 {
// http://http2.github.io/http2-spec/compression.html#rfc.section.4.1
// "The size of the dynamic table is the sum of the size of
// its entries. The size of an entry is the sum of its name's
// length in octets (as defined in Section 5.2), its value's
// length in octets (see Section 5.2), plus 32. The size of
// an entry is calculated using the length of the name and
// value without any Huffman encoding applied."
// This can overflow if somebody makes a large HeaderField
// Name and/or Value by hand, but we don't care, because that
// won't happen on the wire because the encoding doesn't allow
// it.
return uint32(len(hf.Name) + len(hf.Value) + 32)
}
// A Decoder is the decoding context for incremental processing of
// header blocks.
type Decoder struct {
dynTab dynamicTable
emit func(f HeaderField)
// buf is the unparsed buffer. It's only written to
// saveBuf if it was truncated in the middle of a header
// block. Because it's usually not owned, we can only
// process it under Write.
buf []byte // usually not owned
saveBuf bytes.Buffer
}
func NewDecoder(maxSize uint32, emitFunc func(f HeaderField)) *Decoder {
d := &Decoder{
emit: emitFunc,
}
d.dynTab.allowedMaxSize = maxSize
d.dynTab.setMaxSize(maxSize)
return d
}
// TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their
// underlying buffers for garbage reasons.
func (d *Decoder) SetMaxDynamicTableSize(v uint32) {
d.dynTab.setMaxSize(v)
}
// SetAllowedMaxDynamicTableSize sets the upper bound that the encoded
// stream (via dynamic table size updates) may set the maximum size
// to.
func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) {
d.dynTab.allowedMaxSize = v
}
type dynamicTable struct {
// ents is the FIFO described at
// http://http2.github.io/http2-spec/compression.html#rfc.section.2.3.2
// The newest (low index) is append at the end, and items are
// evicted from the front.
ents []HeaderField
size uint32
maxSize uint32 // current maxSize
allowedMaxSize uint32 // maxSize may go up to this, inclusive
}
func (dt *dynamicTable) setMaxSize(v uint32) {
dt.maxSize = v
dt.evict()
}
// TODO: change dynamicTable to be a struct with a slice and a size int field,
// per http://http2.github.io/http2-spec/compression.html#rfc.section.4.1:
//
//
// Then make add increment the size. maybe the max size should move from Decoder to
// dynamicTable and add should return an ok bool if there was enough space.
//
// Later we'll need a remove operation on dynamicTable.
func (dt *dynamicTable) add(f HeaderField) {
dt.ents = append(dt.ents, f)
dt.size += f.size()
dt.evict()
}
// If we're too big, evict old stuff (front of the slice)
func (dt *dynamicTable) evict() {
base := dt.ents // keep base pointer of slice
for dt.size > dt.maxSize {
dt.size -= dt.ents[0].size()
dt.ents = dt.ents[1:]
}
// Shift slice contents down if we evicted things.
if len(dt.ents) != len(base) {
copy(base, dt.ents)
dt.ents = base[:len(dt.ents)]
}
}
// constantTimeStringCompare compares string a and b in a constant
// time manner.
func constantTimeStringCompare(a, b string) bool {
if len(a) != len(b) {
return false
}
c := byte(0)
for i := 0; i < len(a); i++ {
c |= a[i] ^ b[i]
}
return c == 0
}
// Search searches f in the table. The return value i is 0 if there is
// no name match. If there is name match or name/value match, i is the
// index of that entry (1-based). If both name and value match,
// nameValueMatch becomes true.
func (dt *dynamicTable) search(f HeaderField) (i uint64, nameValueMatch bool) {
l := len(dt.ents)
for j := l - 1; j >= 0; j-- {
ent := dt.ents[j]
if !constantTimeStringCompare(ent.Name, f.Name) {
continue
}
if i == 0 {
i = uint64(l - j)
}
if f.Sensitive {
continue
}
if !constantTimeStringCompare(ent.Value, f.Value) {
continue
}
i = uint64(l - j)
nameValueMatch = true
return
}
return
}
func (d *Decoder) maxTableIndex() int {
return len(d.dynTab.ents) + len(staticTable)
}
func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) {
if i < 1 {
return
}
if i > uint64(d.maxTableIndex()) {
return
}
if i <= uint64(len(staticTable)) {
return staticTable[i-1], true
}
dents := d.dynTab.ents
return dents[len(dents)-(int(i)-len(staticTable))], true
}
// Decode decodes an entire block.
//
// TODO: remove this method and make it incremental later? This is
// easier for debugging now.
func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) {
var hf []HeaderField
saveFunc := d.emit
defer func() { d.emit = saveFunc }()
d.emit = func(f HeaderField) { hf = append(hf, f) }
if _, err := d.Write(p); err != nil {
return nil, err
}
if err := d.Close(); err != nil {
return nil, err
}
return hf, nil
}
func (d *Decoder) Close() error {
if d.saveBuf.Len() > 0 {
d.saveBuf.Reset()
return DecodingError{errors.New("truncated headers")}
}
return nil
}
func (d *Decoder) Write(p []byte) (n int, err error) {
if len(p) == 0 {
// Prevent state machine CPU attacks (making us redo
// work up to the point of finding out we don't have
// enough data)
return
}
// Only copy the data if we have to. Optimistically assume
// that p will contain a complete header block.
if d.saveBuf.Len() == 0 {
d.buf = p
} else {
d.saveBuf.Write(p)
d.buf = d.saveBuf.Bytes()
d.saveBuf.Reset()
}
for len(d.buf) > 0 {
err = d.parseHeaderFieldRepr()
if err != nil {
if err == errNeedMore {
err = nil
d.saveBuf.Write(d.buf)
}
break
}
}
return len(p), err
}
// errNeedMore is an internal sentinel error value that means the
// buffer is truncated and we need to read more data before we can
// continue parsing.
var errNeedMore = errors.New("need more data")
type indexType int
const (
indexedTrue indexType = iota
indexedFalse
indexedNever
)
func (v indexType) indexed() bool { return v == indexedTrue }
func (v indexType) sensitive() bool { return v == indexedNever }
// returns errNeedMore if there isn't enough data available.
// any other error is fatal.
// consumes d.buf iff it returns nil.
// precondition: must be called with len(d.buf) > 0
func (d *Decoder) parseHeaderFieldRepr() error {
b := d.buf[0]
switch {
case b&128 != 0:
// Indexed representation.
// High bit set?
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.1
return d.parseFieldIndexed()
case b&192 == 64:
// 6.2.1 Literal Header Field with Incremental Indexing
// 0b10xxxxxx: top two bits are 10
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.1
return d.parseFieldLiteral(6, indexedTrue)
case b&240 == 0:
// 6.2.2 Literal Header Field without Indexing
// 0b0000xxxx: top four bits are 0000
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.2
return d.parseFieldLiteral(4, indexedFalse)
case b&240 == 16:
// 6.2.3 Literal Header Field never Indexed
// 0b0001xxxx: top four bits are 0001
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.3
return d.parseFieldLiteral(4, indexedNever)
case b&224 == 32:
// 6.3 Dynamic Table Size Update
// Top three bits are '001'.
// http://http2.github.io/http2-spec/compression.html#rfc.section.6.3
return d.parseDynamicTableSizeUpdate()
}
return DecodingError{errors.New("invalid encoding")}
}
// (same invariants and behavior as parseHeaderFieldRepr)
func (d *Decoder) parseFieldIndexed() error {
buf := d.buf
idx, buf, err := readVarInt(7, buf)
if err != nil {
return err
}
hf, ok := d.at(idx)
if !ok {
return DecodingError{InvalidIndexError(idx)}
}
d.emit(HeaderField{Name: hf.Name, Value: hf.Value})
d.buf = buf
return nil
}
// (same invariants and behavior as parseHeaderFieldRepr)
func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error {
buf := d.buf
nameIdx, buf, err := readVarInt(n, buf)
if err != nil {
return err
}
var hf HeaderField
if nameIdx > 0 {
ihf, ok := d.at(nameIdx)
if !ok {
return DecodingError{InvalidIndexError(nameIdx)}
}
hf.Name = ihf.Name
} else {
hf.Name, buf, err = readString(buf)
if err != nil {
return err
}
}
hf.Value, buf, err = readString(buf)
if err != nil {
return err
}
d.buf = buf
if it.indexed() {
d.dynTab.add(hf)
}
hf.Sensitive = it.sensitive()
d.emit(hf)
return nil
}
// (same invariants and behavior as parseHeaderFieldRepr)
func (d *Decoder) parseDynamicTableSizeUpdate() error {
buf := d.buf
size, buf, err := readVarInt(5, buf)
if err != nil {
return err
}
if size > uint64(d.dynTab.allowedMaxSize) {
return DecodingError{errors.New("dynamic table size update too large")}
}
d.dynTab.setMaxSize(uint32(size))
d.buf = buf
return nil
}
var errVarintOverflow = DecodingError{errors.New("varint integer overflow")}
// readVarInt reads an unsigned variable length integer off the
// beginning of p. n is the parameter as described in
// http://http2.github.io/http2-spec/compression.html#rfc.section.5.1.
//
// n must always be between 1 and 8.
//
// The returned remain buffer is either a smaller suffix of p, or err != nil.
// The error is errNeedMore if p doesn't contain a complete integer.
func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) {
if n < 1 || n > 8 {
panic("bad n")
}
if len(p) == 0 {
return 0, p, errNeedMore
}
i = uint64(p[0])
if n < 8 {
i &= (1 << uint64(n)) - 1
}
if i < (1<<uint64(n))-1 {
return i, p[1:], nil
}
origP := p
p = p[1:]
var m uint64
for len(p) > 0 {
b := p[0]
p = p[1:]
i += uint64(b&127) << m
if b&128 == 0 {
return i, p, nil
}
m += 7
if m >= 63 { // TODO: proper overflow check. making this up.
return 0, origP, errVarintOverflow
}
}
return 0, origP, errNeedMore
}
func readString(p []byte) (s string, remain []byte, err error) {
if len(p) == 0 {
return "", p, errNeedMore
}
isHuff := p[0]&128 != 0
strLen, p, err := readVarInt(7, p)
if err != nil {
return "", p, err
}
if uint64(len(p)) < strLen {
return "", p, errNeedMore
}
if !isHuff {
return string(p[:strLen]), p[strLen:], nil
}
// TODO: optimize this garbage:
var buf bytes.Buffer
if _, err := HuffmanDecode(&buf, p[:strLen]); err != nil {
return "", nil, err
}
return buf.String(), p[strLen:], nil
}

View File

@@ -0,0 +1,159 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package hpack
import (
"bytes"
"io"
"sync"
)
var bufPool = sync.Pool{
New: func() interface{} { return new(bytes.Buffer) },
}
// HuffmanDecode decodes the string in v and writes the expanded
// result to w, returning the number of bytes written to w and the
// Write call's return value. At most one Write call is made.
func HuffmanDecode(w io.Writer, v []byte) (int, error) {
buf := bufPool.Get().(*bytes.Buffer)
buf.Reset()
defer bufPool.Put(buf)
n := rootHuffmanNode
cur, nbits := uint(0), uint8(0)
for _, b := range v {
cur = cur<<8 | uint(b)
nbits += 8
for nbits >= 8 {
n = n.children[byte(cur>>(nbits-8))]
if n.children == nil {
buf.WriteByte(n.sym)
nbits -= n.codeLen
n = rootHuffmanNode
} else {
nbits -= 8
}
}
}
for nbits > 0 {
n = n.children[byte(cur<<(8-nbits))]
if n.children != nil || n.codeLen > nbits {
break
}
buf.WriteByte(n.sym)
nbits -= n.codeLen
n = rootHuffmanNode
}
return w.Write(buf.Bytes())
}
type node struct {
// children is non-nil for internal nodes
children []*node
// The following are only valid if children is nil:
codeLen uint8 // number of bits that led to the output of sym
sym byte // output symbol
}
func newInternalNode() *node {
return &node{children: make([]*node, 256)}
}
var rootHuffmanNode = newInternalNode()
func init() {
for i, code := range huffmanCodes {
if i > 255 {
panic("too many huffman codes")
}
addDecoderNode(byte(i), code, huffmanCodeLen[i])
}
}
func addDecoderNode(sym byte, code uint32, codeLen uint8) {
cur := rootHuffmanNode
for codeLen > 8 {
codeLen -= 8
i := uint8(code >> codeLen)
if cur.children[i] == nil {
cur.children[i] = newInternalNode()
}
cur = cur.children[i]
}
shift := 8 - codeLen
start, end := int(uint8(code<<shift)), int(1<<shift)
for i := start; i < start+end; i++ {
cur.children[i] = &node{sym: sym, codeLen: codeLen}
}
}
// AppendHuffmanString appends s, as encoded in Huffman codes, to dst
// and returns the extended buffer.
func AppendHuffmanString(dst []byte, s string) []byte {
rembits := uint8(8)
for i := 0; i < len(s); i++ {
if rembits == 8 {
dst = append(dst, 0)
}
dst, rembits = appendByteToHuffmanCode(dst, rembits, s[i])
}
if rembits < 8 {
// special EOS symbol
code := uint32(0x3fffffff)
nbits := uint8(30)
t := uint8(code >> (nbits - rembits))
dst[len(dst)-1] |= t
}
return dst
}
// HuffmanEncodeLength returns the number of bytes required to encode
// s in Huffman codes. The result is round up to byte boundary.
func HuffmanEncodeLength(s string) uint64 {
n := uint64(0)
for i := 0; i < len(s); i++ {
n += uint64(huffmanCodeLen[s[i]])
}
return (n + 7) / 8
}
// appendByteToHuffmanCode appends Huffman code for c to dst and
// returns the extended buffer and the remaining bits in the last
// element. The appending is not byte aligned and the remaining bits
// in the last element of dst is given in rembits.
func appendByteToHuffmanCode(dst []byte, rembits uint8, c byte) ([]byte, uint8) {
code := huffmanCodes[c]
nbits := huffmanCodeLen[c]
for {
if rembits > nbits {
t := uint8(code << (rembits - nbits))
dst[len(dst)-1] |= t
rembits -= nbits
break
}
t := uint8(code >> (nbits - rembits))
dst[len(dst)-1] |= t
nbits -= rembits
rembits = 8
if nbits == 0 {
break
}
dst = append(dst, 0)
}
return dst, rembits
}

View File

@@ -0,0 +1,353 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package hpack
func pair(name, value string) HeaderField {
return HeaderField{Name: name, Value: value}
}
// http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-07#appendix-B
var staticTable = []HeaderField{
pair(":authority", ""), // index 1 (1-based)
pair(":method", "GET"),
pair(":method", "POST"),
pair(":path", "/"),
pair(":path", "/index.html"),
pair(":scheme", "http"),
pair(":scheme", "https"),
pair(":status", "200"),
pair(":status", "204"),
pair(":status", "206"),
pair(":status", "304"),
pair(":status", "400"),
pair(":status", "404"),
pair(":status", "500"),
pair("accept-charset", ""),
pair("accept-encoding", "gzip, deflate"),
pair("accept-language", ""),
pair("accept-ranges", ""),
pair("accept", ""),
pair("access-control-allow-origin", ""),
pair("age", ""),
pair("allow", ""),
pair("authorization", ""),
pair("cache-control", ""),
pair("content-disposition", ""),
pair("content-encoding", ""),
pair("content-language", ""),
pair("content-length", ""),
pair("content-location", ""),
pair("content-range", ""),
pair("content-type", ""),
pair("cookie", ""),
pair("date", ""),
pair("etag", ""),
pair("expect", ""),
pair("expires", ""),
pair("from", ""),
pair("host", ""),
pair("if-match", ""),
pair("if-modified-since", ""),
pair("if-none-match", ""),
pair("if-range", ""),
pair("if-unmodified-since", ""),
pair("last-modified", ""),
pair("link", ""),
pair("location", ""),
pair("max-forwards", ""),
pair("proxy-authenticate", ""),
pair("proxy-authorization", ""),
pair("range", ""),
pair("referer", ""),
pair("refresh", ""),
pair("retry-after", ""),
pair("server", ""),
pair("set-cookie", ""),
pair("strict-transport-security", ""),
pair("transfer-encoding", ""),
pair("user-agent", ""),
pair("vary", ""),
pair("via", ""),
pair("www-authenticate", ""),
}
var huffmanCodes = []uint32{
0x1ff8,
0x7fffd8,
0xfffffe2,
0xfffffe3,
0xfffffe4,
0xfffffe5,
0xfffffe6,
0xfffffe7,
0xfffffe8,
0xffffea,
0x3ffffffc,
0xfffffe9,
0xfffffea,
0x3ffffffd,
0xfffffeb,
0xfffffec,
0xfffffed,
0xfffffee,
0xfffffef,
0xffffff0,
0xffffff1,
0xffffff2,
0x3ffffffe,
0xffffff3,
0xffffff4,
0xffffff5,
0xffffff6,
0xffffff7,
0xffffff8,
0xffffff9,
0xffffffa,
0xffffffb,
0x14,
0x3f8,
0x3f9,
0xffa,
0x1ff9,
0x15,
0xf8,
0x7fa,
0x3fa,
0x3fb,
0xf9,
0x7fb,
0xfa,
0x16,
0x17,
0x18,
0x0,
0x1,
0x2,
0x19,
0x1a,
0x1b,
0x1c,
0x1d,
0x1e,
0x1f,
0x5c,
0xfb,
0x7ffc,
0x20,
0xffb,
0x3fc,
0x1ffa,
0x21,
0x5d,
0x5e,
0x5f,
0x60,
0x61,
0x62,
0x63,
0x64,
0x65,
0x66,
0x67,
0x68,
0x69,
0x6a,
0x6b,
0x6c,
0x6d,
0x6e,
0x6f,
0x70,
0x71,
0x72,
0xfc,
0x73,
0xfd,
0x1ffb,
0x7fff0,
0x1ffc,
0x3ffc,
0x22,
0x7ffd,
0x3,
0x23,
0x4,
0x24,
0x5,
0x25,
0x26,
0x27,
0x6,
0x74,
0x75,
0x28,
0x29,
0x2a,
0x7,
0x2b,
0x76,
0x2c,
0x8,
0x9,
0x2d,
0x77,
0x78,
0x79,
0x7a,
0x7b,
0x7ffe,
0x7fc,
0x3ffd,
0x1ffd,
0xffffffc,
0xfffe6,
0x3fffd2,
0xfffe7,
0xfffe8,
0x3fffd3,
0x3fffd4,
0x3fffd5,
0x7fffd9,
0x3fffd6,
0x7fffda,
0x7fffdb,
0x7fffdc,
0x7fffdd,
0x7fffde,
0xffffeb,
0x7fffdf,
0xffffec,
0xffffed,
0x3fffd7,
0x7fffe0,
0xffffee,
0x7fffe1,
0x7fffe2,
0x7fffe3,
0x7fffe4,
0x1fffdc,
0x3fffd8,
0x7fffe5,
0x3fffd9,
0x7fffe6,
0x7fffe7,
0xffffef,
0x3fffda,
0x1fffdd,
0xfffe9,
0x3fffdb,
0x3fffdc,
0x7fffe8,
0x7fffe9,
0x1fffde,
0x7fffea,
0x3fffdd,
0x3fffde,
0xfffff0,
0x1fffdf,
0x3fffdf,
0x7fffeb,
0x7fffec,
0x1fffe0,
0x1fffe1,
0x3fffe0,
0x1fffe2,
0x7fffed,
0x3fffe1,
0x7fffee,
0x7fffef,
0xfffea,
0x3fffe2,
0x3fffe3,
0x3fffe4,
0x7ffff0,
0x3fffe5,
0x3fffe6,
0x7ffff1,
0x3ffffe0,
0x3ffffe1,
0xfffeb,
0x7fff1,
0x3fffe7,
0x7ffff2,
0x3fffe8,
0x1ffffec,
0x3ffffe2,
0x3ffffe3,
0x3ffffe4,
0x7ffffde,
0x7ffffdf,
0x3ffffe5,
0xfffff1,
0x1ffffed,
0x7fff2,
0x1fffe3,
0x3ffffe6,
0x7ffffe0,
0x7ffffe1,
0x3ffffe7,
0x7ffffe2,
0xfffff2,
0x1fffe4,
0x1fffe5,
0x3ffffe8,
0x3ffffe9,
0xffffffd,
0x7ffffe3,
0x7ffffe4,
0x7ffffe5,
0xfffec,
0xfffff3,
0xfffed,
0x1fffe6,
0x3fffe9,
0x1fffe7,
0x1fffe8,
0x7ffff3,
0x3fffea,
0x3fffeb,
0x1ffffee,
0x1ffffef,
0xfffff4,
0xfffff5,
0x3ffffea,
0x7ffff4,
0x3ffffeb,
0x7ffffe6,
0x3ffffec,
0x3ffffed,
0x7ffffe7,
0x7ffffe8,
0x7ffffe9,
0x7ffffea,
0x7ffffeb,
0xffffffe,
0x7ffffec,
0x7ffffed,
0x7ffffee,
0x7ffffef,
0x7fffff0,
0x3ffffee,
}
var huffmanCodeLen = []uint8{
13, 23, 28, 28, 28, 28, 28, 28, 28, 24, 30, 28, 28, 30, 28, 28,
28, 28, 28, 28, 28, 28, 30, 28, 28, 28, 28, 28, 28, 28, 28, 28,
6, 10, 10, 12, 13, 6, 8, 11, 10, 10, 8, 11, 8, 6, 6, 6,
5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 7, 8, 15, 6, 12, 10,
13, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 8, 7, 8, 13, 19, 13, 14, 6,
15, 5, 6, 5, 6, 5, 6, 6, 6, 5, 7, 7, 6, 6, 6, 5,
6, 7, 6, 5, 5, 6, 7, 7, 7, 7, 7, 15, 11, 14, 13, 28,
20, 22, 20, 20, 22, 22, 22, 23, 22, 23, 23, 23, 23, 23, 24, 23,
24, 24, 22, 23, 24, 23, 23, 23, 23, 21, 22, 23, 22, 23, 23, 24,
22, 21, 20, 22, 22, 23, 23, 21, 23, 22, 22, 24, 21, 22, 23, 23,
21, 21, 22, 21, 23, 22, 23, 23, 20, 22, 22, 22, 23, 22, 22, 23,
26, 26, 20, 19, 22, 23, 22, 25, 26, 26, 26, 27, 27, 26, 24, 25,
19, 21, 26, 27, 27, 26, 27, 24, 21, 21, 26, 26, 28, 27, 27, 27,
20, 24, 20, 21, 22, 21, 21, 23, 22, 22, 25, 25, 24, 24, 26, 23,
26, 27, 26, 26, 27, 27, 27, 27, 27, 28, 27, 27, 27, 27, 27, 26,
}

View File

@@ -0,0 +1,249 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
// Package http2 implements the HTTP/2 protocol.
//
// This is a work in progress. This package is low-level and intended
// to be used directly by very few people. Most users will use it
// indirectly through integration with the net/http package. See
// ConfigureServer. That ConfigureServer call will likely be automatic
// or available via an empty import in the future.
//
// This package currently targets draft-14. See http://http2.github.io/
package http2
import (
"bufio"
"fmt"
"io"
"net/http"
"strconv"
"sync"
)
var VerboseLogs = false
const (
// ClientPreface is the string that must be sent by new
// connections from clients.
ClientPreface = "PRI * HTTP/2.0\r\n\r\nSM\r\n\r\n"
// SETTINGS_MAX_FRAME_SIZE default
// http://http2.github.io/http2-spec/#rfc.section.6.5.2
initialMaxFrameSize = 16384
// NextProtoTLS is the NPN/ALPN protocol negotiated during
// HTTP/2's TLS setup.
NextProtoTLS = "h2"
// http://http2.github.io/http2-spec/#SettingValues
initialHeaderTableSize = 4096
initialWindowSize = 65535 // 6.9.2 Initial Flow Control Window Size
defaultMaxReadFrameSize = 1 << 20
)
var (
clientPreface = []byte(ClientPreface)
)
type streamState int
const (
stateIdle streamState = iota
stateOpen
stateHalfClosedLocal
stateHalfClosedRemote
stateResvLocal
stateResvRemote
stateClosed
)
var stateName = [...]string{
stateIdle: "Idle",
stateOpen: "Open",
stateHalfClosedLocal: "HalfClosedLocal",
stateHalfClosedRemote: "HalfClosedRemote",
stateResvLocal: "ResvLocal",
stateResvRemote: "ResvRemote",
stateClosed: "Closed",
}
func (st streamState) String() string {
return stateName[st]
}
// Setting is a setting parameter: which setting it is, and its value.
type Setting struct {
// ID is which setting is being set.
// See http://http2.github.io/http2-spec/#SettingValues
ID SettingID
// Val is the value.
Val uint32
}
func (s Setting) String() string {
return fmt.Sprintf("[%v = %d]", s.ID, s.Val)
}
// Valid reports whether the setting is valid.
func (s Setting) Valid() error {
// Limits and error codes from 6.5.2 Defined SETTINGS Parameters
switch s.ID {
case SettingEnablePush:
if s.Val != 1 && s.Val != 0 {
return ConnectionError(ErrCodeProtocol)
}
case SettingInitialWindowSize:
if s.Val > 1<<31-1 {
return ConnectionError(ErrCodeFlowControl)
}
case SettingMaxFrameSize:
if s.Val < 16384 || s.Val > 1<<24-1 {
return ConnectionError(ErrCodeProtocol)
}
}
return nil
}
// A SettingID is an HTTP/2 setting as defined in
// http://http2.github.io/http2-spec/#iana-settings
type SettingID uint16
const (
SettingHeaderTableSize SettingID = 0x1
SettingEnablePush SettingID = 0x2
SettingMaxConcurrentStreams SettingID = 0x3
SettingInitialWindowSize SettingID = 0x4
SettingMaxFrameSize SettingID = 0x5
SettingMaxHeaderListSize SettingID = 0x6
)
var settingName = map[SettingID]string{
SettingHeaderTableSize: "HEADER_TABLE_SIZE",
SettingEnablePush: "ENABLE_PUSH",
SettingMaxConcurrentStreams: "MAX_CONCURRENT_STREAMS",
SettingInitialWindowSize: "INITIAL_WINDOW_SIZE",
SettingMaxFrameSize: "MAX_FRAME_SIZE",
SettingMaxHeaderListSize: "MAX_HEADER_LIST_SIZE",
}
func (s SettingID) String() string {
if v, ok := settingName[s]; ok {
return v
}
return fmt.Sprintf("UNKNOWN_SETTING_%d", uint16(s))
}
func validHeader(v string) bool {
if len(v) == 0 {
return false
}
for _, r := range v {
// "Just as in HTTP/1.x, header field names are
// strings of ASCII characters that are compared in a
// case-insensitive fashion. However, header field
// names MUST be converted to lowercase prior to their
// encoding in HTTP/2. "
if r >= 127 || ('A' <= r && r <= 'Z') {
return false
}
}
return true
}
var httpCodeStringCommon = map[int]string{} // n -> strconv.Itoa(n)
func init() {
for i := 100; i <= 999; i++ {
if v := http.StatusText(i); v != "" {
httpCodeStringCommon[i] = strconv.Itoa(i)
}
}
}
func httpCodeString(code int) string {
if s, ok := httpCodeStringCommon[code]; ok {
return s
}
return strconv.Itoa(code)
}
// from pkg io
type stringWriter interface {
WriteString(s string) (n int, err error)
}
// A gate lets two goroutines coordinate their activities.
type gate chan struct{}
func (g gate) Done() { g <- struct{}{} }
func (g gate) Wait() { <-g }
// A closeWaiter is like a sync.WaitGroup but only goes 1 to 0 (open to closed).
type closeWaiter chan struct{}
// Init makes a closeWaiter usable.
// It exists because so a closeWaiter value can be placed inside a
// larger struct and have the Mutex and Cond's memory in the same
// allocation.
func (cw *closeWaiter) Init() {
*cw = make(chan struct{})
}
// Close marks the closeWaiter as closed and unblocks any waiters.
func (cw closeWaiter) Close() {
close(cw)
}
// Wait waits for the closeWaiter to become closed.
func (cw closeWaiter) Wait() {
<-cw
}
// bufferedWriter is a buffered writer that writes to w.
// Its buffered writer is lazily allocated as needed, to minimize
// idle memory usage with many connections.
type bufferedWriter struct {
w io.Writer // immutable
bw *bufio.Writer // non-nil when data is buffered
}
func newBufferedWriter(w io.Writer) *bufferedWriter {
return &bufferedWriter{w: w}
}
var bufWriterPool = sync.Pool{
New: func() interface{} {
// TODO: pick something better? this is a bit under
// (3 x typical 1500 byte MTU) at least.
return bufio.NewWriterSize(nil, 4<<10)
},
}
func (w *bufferedWriter) Write(p []byte) (n int, err error) {
if w.bw == nil {
bw := bufWriterPool.Get().(*bufio.Writer)
bw.Reset(w.w)
w.bw = bw
}
return w.bw.Write(p)
}
func (w *bufferedWriter) Flush() error {
bw := w.bw
if bw == nil {
return nil
}
err := bw.Flush()
bw.Reset(nil)
bufWriterPool.Put(bw)
w.bw = nil
return err
}

View File

@@ -0,0 +1,43 @@
// Copyright 2014 The Go Authors.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import (
"sync"
)
type pipe struct {
b buffer
c sync.Cond
m sync.Mutex
}
// Read waits until data is available and copies bytes
// from the buffer into p.
func (r *pipe) Read(p []byte) (n int, err error) {
r.c.L.Lock()
defer r.c.L.Unlock()
for r.b.Len() == 0 && !r.b.closed {
r.c.Wait()
}
return r.b.Read(p)
}
// Write copies bytes from p into the buffer and wakes a reader.
// It is an error to write more data than the buffer can hold.
func (w *pipe) Write(p []byte) (n int, err error) {
w.c.L.Lock()
defer w.c.L.Unlock()
defer w.c.Signal()
return w.b.Write(p)
}
func (c *pipe) Close(err error) {
c.c.L.Lock()
defer c.c.L.Unlock()
defer c.c.Signal()
c.b.Close(err)
}

1777
Godeps/_workspace/src/github.com/bradfitz/http2/server.go generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,553 @@
// Copyright 2015 The Go Authors.
// See https://go.googlesource.com/go/+/master/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://go.googlesource.com/go/+/master/LICENSE
package http2
import (
"bufio"
"bytes"
"crypto/tls"
"errors"
"fmt"
"io"
"log"
"net"
"net/http"
"strconv"
"strings"
"sync"
"github.com/bradfitz/http2/hpack"
)
type Transport struct {
Fallback http.RoundTripper
// TODO: remove this and make more general with a TLS dial hook, like http
InsecureTLSDial bool
connMu sync.Mutex
conns map[string][]*clientConn // key is host:port
}
type clientConn struct {
t *Transport
tconn *tls.Conn
tlsState *tls.ConnectionState
connKey []string // key(s) this connection is cached in, in t.conns
readerDone chan struct{} // closed on error
readerErr error // set before readerDone is closed
hdec *hpack.Decoder
nextRes *http.Response
mu sync.Mutex
closed bool
goAway *GoAwayFrame // if non-nil, the GoAwayFrame we received
streams map[uint32]*clientStream
nextStreamID uint32
bw *bufio.Writer
werr error // first write error that has occurred
br *bufio.Reader
fr *Framer
// Settings from peer:
maxFrameSize uint32
maxConcurrentStreams uint32
initialWindowSize uint32
hbuf bytes.Buffer // HPACK encoder writes into this
henc *hpack.Encoder
}
type clientStream struct {
ID uint32
resc chan resAndError
pw *io.PipeWriter
pr *io.PipeReader
}
type stickyErrWriter struct {
w io.Writer
err *error
}
func (sew stickyErrWriter) Write(p []byte) (n int, err error) {
if *sew.err != nil {
return 0, *sew.err
}
n, err = sew.w.Write(p)
*sew.err = err
return
}
func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
if req.URL.Scheme != "https" {
if t.Fallback == nil {
return nil, errors.New("http2: unsupported scheme and no Fallback")
}
return t.Fallback.RoundTrip(req)
}
host, port, err := net.SplitHostPort(req.URL.Host)
if err != nil {
host = req.URL.Host
port = "443"
}
for {
cc, err := t.getClientConn(host, port)
if err != nil {
return nil, err
}
res, err := cc.roundTrip(req)
if shouldRetryRequest(err) { // TODO: or clientconn is overloaded (too many outstanding requests)?
continue
}
if err != nil {
return nil, err
}
return res, nil
}
}
// CloseIdleConnections closes any connections which were previously
// connected from previous requests but are now sitting idle.
// It does not interrupt any connections currently in use.
func (t *Transport) CloseIdleConnections() {
t.connMu.Lock()
defer t.connMu.Unlock()
for _, vv := range t.conns {
for _, cc := range vv {
cc.closeIfIdle()
}
}
}
var errClientConnClosed = errors.New("http2: client conn is closed")
func shouldRetryRequest(err error) bool {
// TODO: or GOAWAY graceful shutdown stuff
return err == errClientConnClosed
}
func (t *Transport) removeClientConn(cc *clientConn) {
t.connMu.Lock()
defer t.connMu.Unlock()
for _, key := range cc.connKey {
vv, ok := t.conns[key]
if !ok {
continue
}
newList := filterOutClientConn(vv, cc)
if len(newList) > 0 {
t.conns[key] = newList
} else {
delete(t.conns, key)
}
}
}
func filterOutClientConn(in []*clientConn, exclude *clientConn) []*clientConn {
out := in[:0]
for _, v := range in {
if v != exclude {
out = append(out, v)
}
}
return out
}
func (t *Transport) getClientConn(host, port string) (*clientConn, error) {
t.connMu.Lock()
defer t.connMu.Unlock()
key := net.JoinHostPort(host, port)
for _, cc := range t.conns[key] {
if cc.canTakeNewRequest() {
return cc, nil
}
}
if t.conns == nil {
t.conns = make(map[string][]*clientConn)
}
cc, err := t.newClientConn(host, port, key)
if err != nil {
return nil, err
}
t.conns[key] = append(t.conns[key], cc)
return cc, nil
}
func (t *Transport) newClientConn(host, port, key string) (*clientConn, error) {
cfg := &tls.Config{
ServerName: host,
NextProtos: []string{NextProtoTLS},
InsecureSkipVerify: t.InsecureTLSDial,
}
tconn, err := tls.Dial("tcp", host+":"+port, cfg)
if err != nil {
return nil, err
}
if err := tconn.Handshake(); err != nil {
return nil, err
}
if !t.InsecureTLSDial {
if err := tconn.VerifyHostname(cfg.ServerName); err != nil {
return nil, err
}
}
state := tconn.ConnectionState()
if p := state.NegotiatedProtocol; p != NextProtoTLS {
// TODO(bradfitz): fall back to Fallback
return nil, fmt.Errorf("bad protocol: %v", p)
}
if !state.NegotiatedProtocolIsMutual {
return nil, errors.New("could not negotiate protocol mutually")
}
if _, err := tconn.Write(clientPreface); err != nil {
return nil, err
}
cc := &clientConn{
t: t,
tconn: tconn,
connKey: []string{key}, // TODO: cert's validated hostnames too
tlsState: &state,
readerDone: make(chan struct{}),
nextStreamID: 1,
maxFrameSize: 16 << 10, // spec default
initialWindowSize: 65535, // spec default
maxConcurrentStreams: 1000, // "infinite", per spec. 1000 seems good enough.
streams: make(map[uint32]*clientStream),
}
cc.bw = bufio.NewWriter(stickyErrWriter{tconn, &cc.werr})
cc.br = bufio.NewReader(tconn)
cc.fr = NewFramer(cc.bw, cc.br)
cc.henc = hpack.NewEncoder(&cc.hbuf)
cc.fr.WriteSettings()
// TODO: re-send more conn-level flow control tokens when server uses all these.
cc.fr.WriteWindowUpdate(0, 1<<30) // um, 0x7fffffff doesn't work to Google? it hangs?
cc.bw.Flush()
if cc.werr != nil {
return nil, cc.werr
}
// Read the obligatory SETTINGS frame
f, err := cc.fr.ReadFrame()
if err != nil {
return nil, err
}
sf, ok := f.(*SettingsFrame)
if !ok {
return nil, fmt.Errorf("expected settings frame, got: %T", f)
}
cc.fr.WriteSettingsAck()
cc.bw.Flush()
sf.ForeachSetting(func(s Setting) error {
switch s.ID {
case SettingMaxFrameSize:
cc.maxFrameSize = s.Val
case SettingMaxConcurrentStreams:
cc.maxConcurrentStreams = s.Val
case SettingInitialWindowSize:
cc.initialWindowSize = s.Val
default:
// TODO(bradfitz): handle more
log.Printf("Unhandled Setting: %v", s)
}
return nil
})
// TODO: figure out henc size
cc.hdec = hpack.NewDecoder(initialHeaderTableSize, cc.onNewHeaderField)
go cc.readLoop()
return cc, nil
}
func (cc *clientConn) setGoAway(f *GoAwayFrame) {
cc.mu.Lock()
defer cc.mu.Unlock()
cc.goAway = f
}
func (cc *clientConn) canTakeNewRequest() bool {
cc.mu.Lock()
defer cc.mu.Unlock()
return cc.goAway == nil &&
int64(len(cc.streams)+1) < int64(cc.maxConcurrentStreams) &&
cc.nextStreamID < 2147483647
}
func (cc *clientConn) closeIfIdle() {
cc.mu.Lock()
if len(cc.streams) > 0 {
cc.mu.Unlock()
return
}
cc.closed = true
// TODO: do clients send GOAWAY too? maybe? Just Close:
cc.mu.Unlock()
cc.tconn.Close()
}
func (cc *clientConn) roundTrip(req *http.Request) (*http.Response, error) {
cc.mu.Lock()
if cc.closed {
cc.mu.Unlock()
return nil, errClientConnClosed
}
cs := cc.newStream()
hasBody := false // TODO
// we send: HEADERS[+CONTINUATION] + (DATA?)
hdrs := cc.encodeHeaders(req)
first := true
for len(hdrs) > 0 {
chunk := hdrs
if len(chunk) > int(cc.maxFrameSize) {
chunk = chunk[:cc.maxFrameSize]
}
hdrs = hdrs[len(chunk):]
endHeaders := len(hdrs) == 0
if first {
cc.fr.WriteHeaders(HeadersFrameParam{
StreamID: cs.ID,
BlockFragment: chunk,
EndStream: !hasBody,
EndHeaders: endHeaders,
})
first = false
} else {
cc.fr.WriteContinuation(cs.ID, endHeaders, chunk)
}
}
cc.bw.Flush()
werr := cc.werr
cc.mu.Unlock()
if hasBody {
// TODO: write data. and it should probably be interleaved:
// go ... io.Copy(dataFrameWriter{cc, cs, ...}, req.Body) ... etc
}
if werr != nil {
return nil, werr
}
re := <-cs.resc
if re.err != nil {
return nil, re.err
}
res := re.res
res.Request = req
res.TLS = cc.tlsState
return res, nil
}
// requires cc.mu be held.
func (cc *clientConn) encodeHeaders(req *http.Request) []byte {
cc.hbuf.Reset()
// TODO(bradfitz): figure out :authority-vs-Host stuff between http2 and Go
host := req.Host
if host == "" {
host = req.URL.Host
}
path := req.URL.Path
if path == "" {
path = "/"
}
cc.writeHeader(":authority", host) // probably not right for all sites
cc.writeHeader(":method", req.Method)
cc.writeHeader(":path", path)
cc.writeHeader(":scheme", "https")
for k, vv := range req.Header {
lowKey := strings.ToLower(k)
if lowKey == "host" {
continue
}
for _, v := range vv {
cc.writeHeader(lowKey, v)
}
}
return cc.hbuf.Bytes()
}
func (cc *clientConn) writeHeader(name, value string) {
log.Printf("sending %q = %q", name, value)
cc.henc.WriteField(hpack.HeaderField{Name: name, Value: value})
}
type resAndError struct {
res *http.Response
err error
}
// requires cc.mu be held.
func (cc *clientConn) newStream() *clientStream {
cs := &clientStream{
ID: cc.nextStreamID,
resc: make(chan resAndError, 1),
}
cc.nextStreamID += 2
cc.streams[cs.ID] = cs
return cs
}
func (cc *clientConn) streamByID(id uint32, andRemove bool) *clientStream {
cc.mu.Lock()
defer cc.mu.Unlock()
cs := cc.streams[id]
if andRemove {
delete(cc.streams, id)
}
return cs
}
// runs in its own goroutine.
func (cc *clientConn) readLoop() {
defer cc.t.removeClientConn(cc)
defer close(cc.readerDone)
activeRes := map[uint32]*clientStream{} // keyed by streamID
// Close any response bodies if the server closes prematurely.
// TODO: also do this if we've written the headers but not
// gotten a response yet.
defer func() {
err := cc.readerErr
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
for _, cs := range activeRes {
cs.pw.CloseWithError(err)
}
}()
// continueStreamID is the stream ID we're waiting for
// continuation frames for.
var continueStreamID uint32
for {
f, err := cc.fr.ReadFrame()
if err != nil {
cc.readerErr = err
return
}
log.Printf("Transport received %v: %#v", f.Header(), f)
streamID := f.Header().StreamID
_, isContinue := f.(*ContinuationFrame)
if isContinue {
if streamID != continueStreamID {
log.Printf("Protocol violation: got CONTINUATION with id %d; want %d", streamID, continueStreamID)
cc.readerErr = ConnectionError(ErrCodeProtocol)
return
}
} else if continueStreamID != 0 {
// Continue frames need to be adjacent in the stream
// and we were in the middle of headers.
log.Printf("Protocol violation: got %T for stream %d, want CONTINUATION for %d", f, streamID, continueStreamID)
cc.readerErr = ConnectionError(ErrCodeProtocol)
return
}
if streamID%2 == 0 {
// Ignore streams pushed from the server for now.
// These always have an even stream id.
continue
}
streamEnded := false
if ff, ok := f.(streamEnder); ok {
streamEnded = ff.StreamEnded()
}
cs := cc.streamByID(streamID, streamEnded)
if cs == nil {
log.Printf("Received frame for untracked stream ID %d", streamID)
continue
}
switch f := f.(type) {
case *HeadersFrame:
cc.nextRes = &http.Response{
Proto: "HTTP/2.0",
ProtoMajor: 2,
Header: make(http.Header),
}
cs.pr, cs.pw = io.Pipe()
cc.hdec.Write(f.HeaderBlockFragment())
case *ContinuationFrame:
cc.hdec.Write(f.HeaderBlockFragment())
case *DataFrame:
log.Printf("DATA: %q", f.Data())
cs.pw.Write(f.Data())
case *GoAwayFrame:
cc.t.removeClientConn(cc)
if f.ErrCode != 0 {
// TODO: deal with GOAWAY more. particularly the error code
log.Printf("transport got GOAWAY with error code = %v", f.ErrCode)
}
cc.setGoAway(f)
default:
log.Printf("Transport: unhandled response frame type %T", f)
}
headersEnded := false
if he, ok := f.(headersEnder); ok {
headersEnded = he.HeadersEnded()
if headersEnded {
continueStreamID = 0
} else {
continueStreamID = streamID
}
}
if streamEnded {
cs.pw.Close()
delete(activeRes, streamID)
}
if headersEnded {
if cs == nil {
panic("couldn't find stream") // TODO be graceful
}
// TODO: set the Body to one which notes the
// Close and also sends the server a
// RST_STREAM
cc.nextRes.Body = cs.pr
res := cc.nextRes
activeRes[streamID] = cs
cs.resc <- resAndError{res: res}
}
}
}
func (cc *clientConn) onNewHeaderField(f hpack.HeaderField) {
// TODO: verifiy pseudo headers come before non-pseudo headers
// TODO: verifiy the status is set
log.Printf("Header field: %+v", f)
if f.Name == ":status" {
code, err := strconv.Atoi(f.Value)
if err != nil {
panic("TODO: be graceful")
}
cc.nextRes.Status = f.Value + " " + http.StatusText(code)
cc.nextRes.StatusCode = code
return
}
if strings.HasPrefix(f.Name, ":") {
// "Endpoints MUST NOT generate pseudo-header fields other than those defined in this document."
// TODO: treat as invalid?
return
}
cc.nextRes.Header.Add(http.CanonicalHeaderKey(f.Name), f.Value)
}

View File

@@ -0,0 +1,204 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import (
"bytes"
"fmt"
"net/http"
"time"
"github.com/bradfitz/http2/hpack"
)
// writeFramer is implemented by any type that is used to write frames.
type writeFramer interface {
writeFrame(writeContext) error
}
// writeContext is the interface needed by the various frame writer
// types below. All the writeFrame methods below are scheduled via the
// frame writing scheduler (see writeScheduler in writesched.go).
//
// This interface is implemented by *serverConn.
// TODO: use it from the client code too, once it exists.
type writeContext interface {
Framer() *Framer
Flush() error
CloseConn() error
// HeaderEncoder returns an HPACK encoder that writes to the
// returned buffer.
HeaderEncoder() (*hpack.Encoder, *bytes.Buffer)
}
// endsStream reports whether the given frame writer w will locally
// close the stream.
func endsStream(w writeFramer) bool {
switch v := w.(type) {
case *writeData:
return v.endStream
case *writeResHeaders:
return v.endStream
}
return false
}
type flushFrameWriter struct{}
func (flushFrameWriter) writeFrame(ctx writeContext) error {
return ctx.Flush()
}
type writeSettings []Setting
func (s writeSettings) writeFrame(ctx writeContext) error {
return ctx.Framer().WriteSettings([]Setting(s)...)
}
type writeGoAway struct {
maxStreamID uint32
code ErrCode
}
func (p *writeGoAway) writeFrame(ctx writeContext) error {
err := ctx.Framer().WriteGoAway(p.maxStreamID, p.code, nil)
if p.code != 0 {
ctx.Flush() // ignore error: we're hanging up on them anyway
time.Sleep(50 * time.Millisecond)
ctx.CloseConn()
}
return err
}
type writeData struct {
streamID uint32
p []byte
endStream bool
}
func (w *writeData) String() string {
return fmt.Sprintf("writeData(stream=%d, p=%d, endStream=%v)", w.streamID, len(w.p), w.endStream)
}
func (w *writeData) writeFrame(ctx writeContext) error {
return ctx.Framer().WriteData(w.streamID, w.endStream, w.p)
}
func (se StreamError) writeFrame(ctx writeContext) error {
return ctx.Framer().WriteRSTStream(se.StreamID, se.Code)
}
type writePingAck struct{ pf *PingFrame }
func (w writePingAck) writeFrame(ctx writeContext) error {
return ctx.Framer().WritePing(true, w.pf.Data)
}
type writeSettingsAck struct{}
func (writeSettingsAck) writeFrame(ctx writeContext) error {
return ctx.Framer().WriteSettingsAck()
}
// writeResHeaders is a request to write a HEADERS and 0+ CONTINUATION frames
// for HTTP response headers from a server handler.
type writeResHeaders struct {
streamID uint32
httpResCode int
h http.Header // may be nil
endStream bool
contentType string
contentLength string
}
func (w *writeResHeaders) writeFrame(ctx writeContext) error {
enc, buf := ctx.HeaderEncoder()
buf.Reset()
enc.WriteField(hpack.HeaderField{Name: ":status", Value: httpCodeString(w.httpResCode)})
for k, vv := range w.h {
k = lowerHeader(k)
for _, v := range vv {
// TODO: more of "8.1.2.2 Connection-Specific Header Fields"
if k == "transfer-encoding" && v != "trailers" {
continue
}
enc.WriteField(hpack.HeaderField{Name: k, Value: v})
}
}
if w.contentType != "" {
enc.WriteField(hpack.HeaderField{Name: "content-type", Value: w.contentType})
}
if w.contentLength != "" {
enc.WriteField(hpack.HeaderField{Name: "content-length", Value: w.contentLength})
}
headerBlock := buf.Bytes()
if len(headerBlock) == 0 {
panic("unexpected empty hpack")
}
// For now we're lazy and just pick the minimum MAX_FRAME_SIZE
// that all peers must support (16KB). Later we could care
// more and send larger frames if the peer advertised it, but
// there's little point. Most headers are small anyway (so we
// generally won't have CONTINUATION frames), and extra frames
// only waste 9 bytes anyway.
const maxFrameSize = 16384
first := true
for len(headerBlock) > 0 {
frag := headerBlock
if len(frag) > maxFrameSize {
frag = frag[:maxFrameSize]
}
headerBlock = headerBlock[len(frag):]
endHeaders := len(headerBlock) == 0
var err error
if first {
first = false
err = ctx.Framer().WriteHeaders(HeadersFrameParam{
StreamID: w.streamID,
BlockFragment: frag,
EndStream: w.endStream,
EndHeaders: endHeaders,
})
} else {
err = ctx.Framer().WriteContinuation(w.streamID, endHeaders, frag)
}
if err != nil {
return err
}
}
return nil
}
type write100ContinueHeadersFrame struct {
streamID uint32
}
func (w write100ContinueHeadersFrame) writeFrame(ctx writeContext) error {
enc, buf := ctx.HeaderEncoder()
buf.Reset()
enc.WriteField(hpack.HeaderField{Name: ":status", Value: "100"})
return ctx.Framer().WriteHeaders(HeadersFrameParam{
StreamID: w.streamID,
BlockFragment: buf.Bytes(),
EndStream: false,
EndHeaders: true,
})
}
type writeWindowUpdate struct {
streamID uint32 // or 0 for conn-level
n uint32
}
func (wu writeWindowUpdate) writeFrame(ctx writeContext) error {
return ctx.Framer().WriteWindowUpdate(wu.streamID, wu.n)
}

View File

@@ -0,0 +1,286 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// See https://code.google.com/p/go/source/browse/CONTRIBUTORS
// Licensed under the same terms as Go itself:
// https://code.google.com/p/go/source/browse/LICENSE
package http2
import "fmt"
// frameWriteMsg is a request to write a frame.
type frameWriteMsg struct {
// write is the interface value that does the writing, once the
// writeScheduler (below) has decided to select this frame
// to write. The write functions are all defined in write.go.
write writeFramer
stream *stream // used for prioritization. nil for non-stream frames.
// done, if non-nil, must be a buffered channel with space for
// 1 message and is sent the return value from write (or an
// earlier error) when the frame has been written.
done chan error
}
// for debugging only:
func (wm frameWriteMsg) String() string {
var streamID uint32
if wm.stream != nil {
streamID = wm.stream.id
}
var des string
if s, ok := wm.write.(fmt.Stringer); ok {
des = s.String()
} else {
des = fmt.Sprintf("%T", wm.write)
}
return fmt.Sprintf("[frameWriteMsg stream=%d, ch=%v, type: %v]", streamID, wm.done != nil, des)
}
// writeScheduler tracks pending frames to write, priorities, and decides
// the next one to use. It is not thread-safe.
type writeScheduler struct {
// zero are frames not associated with a specific stream.
// They're sent before any stream-specific freams.
zero writeQueue
// maxFrameSize is the maximum size of a DATA frame
// we'll write. Must be non-zero and between 16K-16M.
maxFrameSize uint32
// sq contains the stream-specific queues, keyed by stream ID.
// when a stream is idle, it's deleted from the map.
sq map[uint32]*writeQueue
// canSend is a slice of memory that's reused between frame
// scheduling decisions to hold the list of writeQueues (from sq)
// which have enough flow control data to send. After canSend is
// built, the best is selected.
canSend []*writeQueue
// pool of empty queues for reuse.
queuePool []*writeQueue
}
func (ws *writeScheduler) putEmptyQueue(q *writeQueue) {
if len(q.s) != 0 {
panic("queue must be empty")
}
ws.queuePool = append(ws.queuePool, q)
}
func (ws *writeScheduler) getEmptyQueue() *writeQueue {
ln := len(ws.queuePool)
if ln == 0 {
return new(writeQueue)
}
q := ws.queuePool[ln-1]
ws.queuePool = ws.queuePool[:ln-1]
return q
}
func (ws *writeScheduler) empty() bool { return ws.zero.empty() && len(ws.sq) == 0 }
func (ws *writeScheduler) add(wm frameWriteMsg) {
st := wm.stream
if st == nil {
ws.zero.push(wm)
} else {
ws.streamQueue(st.id).push(wm)
}
}
func (ws *writeScheduler) streamQueue(streamID uint32) *writeQueue {
if q, ok := ws.sq[streamID]; ok {
return q
}
if ws.sq == nil {
ws.sq = make(map[uint32]*writeQueue)
}
q := ws.getEmptyQueue()
ws.sq[streamID] = q
return q
}
// take returns the most important frame to write and removes it from the scheduler.
// It is illegal to call this if the scheduler is empty or if there are no connection-level
// flow control bytes available.
func (ws *writeScheduler) take() (wm frameWriteMsg, ok bool) {
if ws.maxFrameSize == 0 {
panic("internal error: ws.maxFrameSize not initialized or invalid")
}
// If there any frames not associated with streams, prefer those first.
// These are usually SETTINGS, etc.
if !ws.zero.empty() {
return ws.zero.shift(), true
}
if len(ws.sq) == 0 {
return
}
// Next, prioritize frames on streams that aren't DATA frames (no cost).
for id, q := range ws.sq {
if q.firstIsNoCost() {
return ws.takeFrom(id, q)
}
}
// Now, all that remains are DATA frames with non-zero bytes to
// send. So pick the best one.
if len(ws.canSend) != 0 {
panic("should be empty")
}
for _, q := range ws.sq {
if n := ws.streamWritableBytes(q); n > 0 {
ws.canSend = append(ws.canSend, q)
}
}
if len(ws.canSend) == 0 {
return
}
defer ws.zeroCanSend()
// TODO: find the best queue
q := ws.canSend[0]
return ws.takeFrom(q.streamID(), q)
}
// zeroCanSend is defered from take.
func (ws *writeScheduler) zeroCanSend() {
for i := range ws.canSend {
ws.canSend[i] = nil
}
ws.canSend = ws.canSend[:0]
}
// streamWritableBytes returns the number of DATA bytes we could write
// from the given queue's stream, if this stream/queue were
// selected. It is an error to call this if q's head isn't a
// *writeData.
func (ws *writeScheduler) streamWritableBytes(q *writeQueue) int32 {
wm := q.head()
ret := wm.stream.flow.available() // max we can write
if ret == 0 {
return 0
}
if int32(ws.maxFrameSize) < ret {
ret = int32(ws.maxFrameSize)
}
if ret == 0 {
panic("internal error: ws.maxFrameSize not initialized or invalid")
}
wd := wm.write.(*writeData)
if len(wd.p) < int(ret) {
ret = int32(len(wd.p))
}
return ret
}
func (ws *writeScheduler) takeFrom(id uint32, q *writeQueue) (wm frameWriteMsg, ok bool) {
wm = q.head()
// If the first item in this queue costs flow control tokens
// and we don't have enough, write as much as we can.
if wd, ok := wm.write.(*writeData); ok && len(wd.p) > 0 {
allowed := wm.stream.flow.available() // max we can write
if allowed == 0 {
// No quota available. Caller can try the next stream.
return frameWriteMsg{}, false
}
if int32(ws.maxFrameSize) < allowed {
allowed = int32(ws.maxFrameSize)
}
// TODO: further restrict the allowed size, because even if
// the peer says it's okay to write 16MB data frames, we might
// want to write smaller ones to properly weight competing
// streams' priorities.
if len(wd.p) > int(allowed) {
wm.stream.flow.take(allowed)
chunk := wd.p[:allowed]
wd.p = wd.p[allowed:]
// Make up a new write message of a valid size, rather
// than shifting one off the queue.
return frameWriteMsg{
stream: wm.stream,
write: &writeData{
streamID: wd.streamID,
p: chunk,
// even if the original had endStream set, there
// arebytes remaining because len(wd.p) > allowed,
// so we know endStream is false:
endStream: false,
},
// our caller is blocking on the final DATA frame, not
// these intermediates, so no need to wait:
done: nil,
}, true
}
wm.stream.flow.take(int32(len(wd.p)))
}
q.shift()
if q.empty() {
ws.putEmptyQueue(q)
delete(ws.sq, id)
}
return wm, true
}
func (ws *writeScheduler) forgetStream(id uint32) {
q, ok := ws.sq[id]
if !ok {
return
}
delete(ws.sq, id)
// But keep it for others later.
for i := range q.s {
q.s[i] = frameWriteMsg{}
}
q.s = q.s[:0]
ws.putEmptyQueue(q)
}
type writeQueue struct {
s []frameWriteMsg
}
// streamID returns the stream ID for a non-empty stream-specific queue.
func (q *writeQueue) streamID() uint32 { return q.s[0].stream.id }
func (q *writeQueue) empty() bool { return len(q.s) == 0 }
func (q *writeQueue) push(wm frameWriteMsg) {
q.s = append(q.s, wm)
}
// head returns the next item that would be removed by shift.
func (q *writeQueue) head() frameWriteMsg {
if len(q.s) == 0 {
panic("invalid use of queue")
}
return q.s[0]
}
func (q *writeQueue) shift() frameWriteMsg {
if len(q.s) == 0 {
panic("invalid use of queue")
}
wm := q.s[0]
// TODO: less copy-happy queue.
copy(q.s, q.s[1:])
q.s[len(q.s)-1] = frameWriteMsg{}
q.s = q.s[:len(q.s)-1]
return wm
}
func (q *writeQueue) firstIsNoCost() bool {
if df, ok := q.s[0].write.(*writeData); ok {
return len(df.p) == 0
}
return true
}

View File

@@ -0,0 +1,94 @@
# etcd/client
etcd/client is the Go client library for etcd.
[![GoDoc](https://godoc.org/github.com/coreos/etcd/client?status.png)](https://godoc.org/github.com/coreos/etcd/client)
## Install
```bash
go get github.com/coreos/etcd/client
```
## Usage
```go
package main
import (
"log"
"time"
"github.com/coreos/etcd/Godeps/_workspace/src/golang.org/x/net/context"
"github.com/coreos/etcd/client"
)
func main() {
cfg := client.Config{
Endpoints: []string{"http://127.0.0.1:2379"},
Transport: client.DefaultTransport,
// set timeout per request to fail fast when the target endpoint is unavailable
HeaderTimeoutPerRequest: time.Second,
}
c, err := client.New(cfg)
if err != nil {
log.Fatal(err)
}
kapi := client.NewKeysAPI(c)
resp, err := kapi.Set(context.Background(), "foo", "bar", nil)
if err != nil {
log.Fatal(err)
}
}
```
## Error Handling
etcd client might return three types of errors.
- context error
Each API call has its first parameter as `context`. A context can be canceled or have an attached deadline. If the context is canceled or reaches its deadline, the responding context error will be returned no matter what internal errors the API call has already encountered.
- cluster error
Each API call tries to send request to the cluster endpoints one by one until it successfully gets a response. If a requests to an endpoint fails, due to exceeding per request timeout or connection issues, the error will be added into a list of errors. If all possible endpoints fail, a cluster error that includes all encountered errors will be returned.
- response error
If the response gets from the cluster is invalid, a plain string error will be returned. For example, it might be a invalid JSON error.
Here is the example code to handle client errors:
```go
cfg := client.Config{Endpoints: []string{"http://etcd1:2379,http://etcd2:2379,http://etcd3:2379"}}
c, err := client.New(cfg)
if err != nil {
log.Fatal(err)
}
kapi := client.NewKeysAPI(c)
resp, err := kapi.Set(ctx, "test", "bar", nil)
if err != nil {
if err == context.Canceled {
// ctx is canceled by another routine
} else if err == context.DeadlineExceeded {
// ctx is attached with a deadline and it exceeded
} else if cerr, ok := err.(*client.ClusterError); ok {
// process (cerr.Errors)
} else {
// bad cluster endpoints, which are not etcd servers
}
}
```
## Caveat
1. etcd/client prefers to use the same endpoint as long as the endpoint continues to work well. This saves socket resources, and improves efficiency for both client and server side. This preference doesn't remove consistency from the data consumed by the client because data replicated to each etcd member has already passed through the consensus process.
2. etcd/client does round-robin rotation on other available endpoints if the preferred endpoint isn't functioning properly. For example, if the member that etcd/client connects to is hard killed, etcd/client will fail on the first attempt with the killed member, and succeed on the second attempt with another member. If it fails to talk to all available endpoints, it will return all errors happened.
3. Default etcd/client cannot handle the case that the remote server is SIGSTOPed now. TCP keepalive mechanism doesn't help in this scenario because operating system may still send TCP keep-alive packets. Over time we'd like to improve this functionality, but solving this issue isn't high priority because a real-life case in which a server is stopped, but the connection is kept alive, hasn't been brought to our attention.
4. etcd/client cannot detect whether the member in use is healthy when doing read requests. If the member is isolated from the cluster, etcd/client may retrieve outdated data. As a workaround, users could monitor experimental /health endpoint for member healthy information. We are improving it at [#3265](https://github.com/coreos/etcd/issues/3265).

View File

@@ -0,0 +1,235 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import (
"bytes"
"encoding/json"
"net/http"
"net/url"
"golang.org/x/net/context"
)
type Role struct {
Role string `json:"role"`
Permissions Permissions `json:"permissions"`
Grant *Permissions `json:"grant,omitempty"`
Revoke *Permissions `json:"revoke,omitempty"`
}
type Permissions struct {
KV rwPermission `json:"kv"`
}
type rwPermission struct {
Read []string `json:"read"`
Write []string `json:"write"`
}
type PermissionType int
const (
ReadPermission PermissionType = iota
WritePermission
ReadWritePermission
)
// NewAuthRoleAPI constructs a new AuthRoleAPI that uses HTTP to
// interact with etcd's role creation and modification features.
func NewAuthRoleAPI(c Client) AuthRoleAPI {
return &httpAuthRoleAPI{
client: c,
}
}
type AuthRoleAPI interface {
// Add a role.
AddRole(ctx context.Context, role string) error
// Remove a role.
RemoveRole(ctx context.Context, role string) error
// Get role details.
GetRole(ctx context.Context, role string) (*Role, error)
// Grant a role some permission prefixes for the KV store.
GrantRoleKV(ctx context.Context, role string, prefixes []string, permType PermissionType) (*Role, error)
// Revoke some some permission prefixes for a role on the KV store.
RevokeRoleKV(ctx context.Context, role string, prefixes []string, permType PermissionType) (*Role, error)
// List roles.
ListRoles(ctx context.Context) ([]string, error)
}
type httpAuthRoleAPI struct {
client httpClient
}
type authRoleAPIAction struct {
verb string
name string
role *Role
}
type authRoleAPIList struct{}
func (list *authRoleAPIList) HTTPRequest(ep url.URL) *http.Request {
u := v2AuthURL(ep, "roles", "")
req, _ := http.NewRequest("GET", u.String(), nil)
req.Header.Set("Content-Type", "application/json")
return req
}
func (l *authRoleAPIAction) HTTPRequest(ep url.URL) *http.Request {
u := v2AuthURL(ep, "roles", l.name)
if l.role == nil {
req, _ := http.NewRequest(l.verb, u.String(), nil)
return req
}
b, err := json.Marshal(l.role)
if err != nil {
panic(err)
}
body := bytes.NewReader(b)
req, _ := http.NewRequest(l.verb, u.String(), body)
req.Header.Set("Content-Type", "application/json")
return req
}
func (r *httpAuthRoleAPI) ListRoles(ctx context.Context) ([]string, error) {
resp, body, err := r.client.Do(ctx, &authRoleAPIList{})
if err != nil {
return nil, err
}
if err := assertStatusCode(resp.StatusCode, http.StatusOK); err != nil {
return nil, err
}
var userList struct {
Roles []string `json:"roles"`
}
err = json.Unmarshal(body, &userList)
if err != nil {
return nil, err
}
return userList.Roles, nil
}
func (r *httpAuthRoleAPI) AddRole(ctx context.Context, rolename string) error {
role := &Role{
Role: rolename,
}
return r.addRemoveRole(ctx, &authRoleAPIAction{
verb: "PUT",
name: rolename,
role: role,
})
}
func (r *httpAuthRoleAPI) RemoveRole(ctx context.Context, rolename string) error {
return r.addRemoveRole(ctx, &authRoleAPIAction{
verb: "DELETE",
name: rolename,
})
}
func (r *httpAuthRoleAPI) addRemoveRole(ctx context.Context, req *authRoleAPIAction) error {
resp, body, err := r.client.Do(ctx, req)
if err != nil {
return err
}
if err := assertStatusCode(resp.StatusCode, http.StatusOK, http.StatusCreated); err != nil {
var sec authError
err := json.Unmarshal(body, &sec)
if err != nil {
return err
}
return sec
}
return nil
}
func (r *httpAuthRoleAPI) GetRole(ctx context.Context, rolename string) (*Role, error) {
return r.modRole(ctx, &authRoleAPIAction{
verb: "GET",
name: rolename,
})
}
func buildRWPermission(prefixes []string, permType PermissionType) rwPermission {
var out rwPermission
switch permType {
case ReadPermission:
out.Read = prefixes
case WritePermission:
out.Write = prefixes
case ReadWritePermission:
out.Read = prefixes
out.Write = prefixes
}
return out
}
func (r *httpAuthRoleAPI) GrantRoleKV(ctx context.Context, rolename string, prefixes []string, permType PermissionType) (*Role, error) {
rwp := buildRWPermission(prefixes, permType)
role := &Role{
Role: rolename,
Grant: &Permissions{
KV: rwp,
},
}
return r.modRole(ctx, &authRoleAPIAction{
verb: "PUT",
name: rolename,
role: role,
})
}
func (r *httpAuthRoleAPI) RevokeRoleKV(ctx context.Context, rolename string, prefixes []string, permType PermissionType) (*Role, error) {
rwp := buildRWPermission(prefixes, permType)
role := &Role{
Role: rolename,
Revoke: &Permissions{
KV: rwp,
},
}
return r.modRole(ctx, &authRoleAPIAction{
verb: "PUT",
name: rolename,
role: role,
})
}
func (r *httpAuthRoleAPI) modRole(ctx context.Context, req *authRoleAPIAction) (*Role, error) {
resp, body, err := r.client.Do(ctx, req)
if err != nil {
return nil, err
}
if err := assertStatusCode(resp.StatusCode, http.StatusOK); err != nil {
var sec authError
err := json.Unmarshal(body, &sec)
if err != nil {
return nil, err
}
return nil, sec
}
var role Role
err = json.Unmarshal(body, &role)
if err != nil {
return nil, err
}
return &role, nil
}

View File

@@ -0,0 +1,297 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import (
"bytes"
"encoding/json"
"net/http"
"net/url"
"path"
"golang.org/x/net/context"
)
var (
defaultV2AuthPrefix = "/v2/auth"
)
type User struct {
User string `json:"user"`
Password string `json:"password,omitempty"`
Roles []string `json:"roles"`
Grant []string `json:"grant,omitempty"`
Revoke []string `json:"revoke,omitempty"`
}
func v2AuthURL(ep url.URL, action string, name string) *url.URL {
if name != "" {
ep.Path = path.Join(ep.Path, defaultV2AuthPrefix, action, name)
return &ep
}
ep.Path = path.Join(ep.Path, defaultV2AuthPrefix, action)
return &ep
}
// NewAuthAPI constructs a new AuthAPI that uses HTTP to
// interact with etcd's general auth features.
func NewAuthAPI(c Client) AuthAPI {
return &httpAuthAPI{
client: c,
}
}
type AuthAPI interface {
// Enable auth.
Enable(ctx context.Context) error
// Disable auth.
Disable(ctx context.Context) error
}
type httpAuthAPI struct {
client httpClient
}
func (s *httpAuthAPI) Enable(ctx context.Context) error {
return s.enableDisable(ctx, &authAPIAction{"PUT"})
}
func (s *httpAuthAPI) Disable(ctx context.Context) error {
return s.enableDisable(ctx, &authAPIAction{"DELETE"})
}
func (s *httpAuthAPI) enableDisable(ctx context.Context, req httpAction) error {
resp, body, err := s.client.Do(ctx, req)
if err != nil {
return err
}
if err := assertStatusCode(resp.StatusCode, http.StatusOK, http.StatusCreated); err != nil {
var sec authError
err := json.Unmarshal(body, &sec)
if err != nil {
return err
}
return sec
}
return nil
}
type authAPIAction struct {
verb string
}
func (l *authAPIAction) HTTPRequest(ep url.URL) *http.Request {
u := v2AuthURL(ep, "enable", "")
req, _ := http.NewRequest(l.verb, u.String(), nil)
return req
}
type authError struct {
Message string `json:"message"`
Code int `json:"-"`
}
func (e authError) Error() string {
return e.Message
}
// NewAuthUserAPI constructs a new AuthUserAPI that uses HTTP to
// interact with etcd's user creation and modification features.
func NewAuthUserAPI(c Client) AuthUserAPI {
return &httpAuthUserAPI{
client: c,
}
}
type AuthUserAPI interface {
// Add a user.
AddUser(ctx context.Context, username string, password string) error
// Remove a user.
RemoveUser(ctx context.Context, username string) error
// Get user details.
GetUser(ctx context.Context, username string) (*User, error)
// Grant a user some permission roles.
GrantUser(ctx context.Context, username string, roles []string) (*User, error)
// Revoke some permission roles from a user.
RevokeUser(ctx context.Context, username string, roles []string) (*User, error)
// Change the user's password.
ChangePassword(ctx context.Context, username string, password string) (*User, error)
// List users.
ListUsers(ctx context.Context) ([]string, error)
}
type httpAuthUserAPI struct {
client httpClient
}
type authUserAPIAction struct {
verb string
username string
user *User
}
type authUserAPIList struct{}
func (list *authUserAPIList) HTTPRequest(ep url.URL) *http.Request {
u := v2AuthURL(ep, "users", "")
req, _ := http.NewRequest("GET", u.String(), nil)
req.Header.Set("Content-Type", "application/json")
return req
}
func (l *authUserAPIAction) HTTPRequest(ep url.URL) *http.Request {
u := v2AuthURL(ep, "users", l.username)
if l.user == nil {
req, _ := http.NewRequest(l.verb, u.String(), nil)
return req
}
b, err := json.Marshal(l.user)
if err != nil {
panic(err)
}
body := bytes.NewReader(b)
req, _ := http.NewRequest(l.verb, u.String(), body)
req.Header.Set("Content-Type", "application/json")
return req
}
func (u *httpAuthUserAPI) ListUsers(ctx context.Context) ([]string, error) {
resp, body, err := u.client.Do(ctx, &authUserAPIList{})
if err != nil {
return nil, err
}
if err := assertStatusCode(resp.StatusCode, http.StatusOK); err != nil {
var sec authError
err := json.Unmarshal(body, &sec)
if err != nil {
return nil, err
}
return nil, sec
}
var userList struct {
Users []string `json:"users"`
}
err = json.Unmarshal(body, &userList)
if err != nil {
return nil, err
}
return userList.Users, nil
}
func (u *httpAuthUserAPI) AddUser(ctx context.Context, username string, password string) error {
user := &User{
User: username,
Password: password,
}
return u.addRemoveUser(ctx, &authUserAPIAction{
verb: "PUT",
username: username,
user: user,
})
}
func (u *httpAuthUserAPI) RemoveUser(ctx context.Context, username string) error {
return u.addRemoveUser(ctx, &authUserAPIAction{
verb: "DELETE",
username: username,
})
}
func (u *httpAuthUserAPI) addRemoveUser(ctx context.Context, req *authUserAPIAction) error {
resp, body, err := u.client.Do(ctx, req)
if err != nil {
return err
}
if err := assertStatusCode(resp.StatusCode, http.StatusOK, http.StatusCreated); err != nil {
var sec authError
err := json.Unmarshal(body, &sec)
if err != nil {
return err
}
return sec
}
return nil
}
func (u *httpAuthUserAPI) GetUser(ctx context.Context, username string) (*User, error) {
return u.modUser(ctx, &authUserAPIAction{
verb: "GET",
username: username,
})
}
func (u *httpAuthUserAPI) GrantUser(ctx context.Context, username string, roles []string) (*User, error) {
user := &User{
User: username,
Grant: roles,
}
return u.modUser(ctx, &authUserAPIAction{
verb: "PUT",
username: username,
user: user,
})
}
func (u *httpAuthUserAPI) RevokeUser(ctx context.Context, username string, roles []string) (*User, error) {
user := &User{
User: username,
Revoke: roles,
}
return u.modUser(ctx, &authUserAPIAction{
verb: "PUT",
username: username,
user: user,
})
}
func (u *httpAuthUserAPI) ChangePassword(ctx context.Context, username string, password string) (*User, error) {
user := &User{
User: username,
Password: password,
}
return u.modUser(ctx, &authUserAPIAction{
verb: "PUT",
username: username,
user: user,
})
}
func (u *httpAuthUserAPI) modUser(ctx context.Context, req *authUserAPIAction) (*User, error) {
resp, body, err := u.client.Do(ctx, req)
if err != nil {
return nil, err
}
if err := assertStatusCode(resp.StatusCode, http.StatusOK); err != nil {
var sec authError
err := json.Unmarshal(body, &sec)
if err != nil {
return nil, err
}
return nil, sec
}
var user User
err = json.Unmarshal(body, &user)
if err != nil {
return nil, err
}
return &user, nil
}

View File

@@ -0,0 +1,20 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// borrowed from golang/net/context/ctxhttp/cancelreq.go
// +build go1.5
package client
import "net/http"
func requestCanceler(tr CancelableTransport, req *http.Request) func() {
ch := make(chan struct{})
req.Cancel = ch
return func() {
close(ch)
}
}

View File

@@ -0,0 +1,17 @@
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// borrowed from golang/net/context/ctxhttp/cancelreq_go14.go
// +build !go1.5
package client
import "net/http"
func requestCanceler(tr CancelableTransport, req *http.Request) func() {
return func() {
tr.CancelRequest(req)
}
}

View File

@@ -0,0 +1,514 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import (
"errors"
"fmt"
"io/ioutil"
"math/rand"
"net"
"net/http"
"net/url"
"reflect"
"sort"
"sync"
"time"
"golang.org/x/net/context"
)
var (
ErrNoEndpoints = errors.New("client: no endpoints available")
ErrTooManyRedirects = errors.New("client: too many redirects")
ErrClusterUnavailable = errors.New("client: etcd cluster is unavailable or misconfigured")
errTooManyRedirectChecks = errors.New("client: too many redirect checks")
)
var DefaultRequestTimeout = 5 * time.Second
var DefaultTransport CancelableTransport = &http.Transport{
Proxy: http.ProxyFromEnvironment,
Dial: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).Dial,
TLSHandshakeTimeout: 10 * time.Second,
}
type Config struct {
// Endpoints defines a set of URLs (schemes, hosts and ports only)
// that can be used to communicate with a logical etcd cluster. For
// example, a three-node cluster could be provided like so:
//
// Endpoints: []string{
// "http://node1.example.com:2379",
// "http://node2.example.com:2379",
// "http://node3.example.com:2379",
// }
//
// If multiple endpoints are provided, the Client will attempt to
// use them all in the event that one or more of them are unusable.
//
// If Client.Sync is ever called, the Client may cache an alternate
// set of endpoints to continue operation.
Endpoints []string
// Transport is used by the Client to drive HTTP requests. If not
// provided, DefaultTransport will be used.
Transport CancelableTransport
// CheckRedirect specifies the policy for handling HTTP redirects.
// If CheckRedirect is not nil, the Client calls it before
// following an HTTP redirect. The sole argument is the number of
// requests that have alrady been made. If CheckRedirect returns
// an error, Client.Do will not make any further requests and return
// the error back it to the caller.
//
// If CheckRedirect is nil, the Client uses its default policy,
// which is to stop after 10 consecutive requests.
CheckRedirect CheckRedirectFunc
// Username specifies the user credential to add as an authorization header
Username string
// Password is the password for the specified user to add as an authorization header
// to the request.
Password string
// HeaderTimeoutPerRequest specifies the time limit to wait for response
// header in a single request made by the Client. The timeout includes
// connection time, any redirects, and header wait time.
//
// For non-watch GET request, server returns the response body immediately.
// For PUT/POST/DELETE request, server will attempt to commit request
// before responding, which is expected to take `100ms + 2 * RTT`.
// For watch request, server returns the header immediately to notify Client
// watch start. But if server is behind some kind of proxy, the response
// header may be cached at proxy, and Client cannot rely on this behavior.
//
// One API call may send multiple requests to different etcd servers until it
// succeeds. Use context of the API to specify the overall timeout.
//
// A HeaderTimeoutPerRequest of zero means no timeout.
HeaderTimeoutPerRequest time.Duration
}
func (cfg *Config) transport() CancelableTransport {
if cfg.Transport == nil {
return DefaultTransport
}
return cfg.Transport
}
func (cfg *Config) checkRedirect() CheckRedirectFunc {
if cfg.CheckRedirect == nil {
return DefaultCheckRedirect
}
return cfg.CheckRedirect
}
// CancelableTransport mimics net/http.Transport, but requires that
// the object also support request cancellation.
type CancelableTransport interface {
http.RoundTripper
CancelRequest(req *http.Request)
}
type CheckRedirectFunc func(via int) error
// DefaultCheckRedirect follows up to 10 redirects, but no more.
var DefaultCheckRedirect CheckRedirectFunc = func(via int) error {
if via > 10 {
return ErrTooManyRedirects
}
return nil
}
type Client interface {
// Sync updates the internal cache of the etcd cluster's membership.
Sync(context.Context) error
// AutoSync periodically calls Sync() every given interval.
// The recommended sync interval is 10 seconds to 1 minute, which does
// not bring too much overhead to server and makes client catch up the
// cluster change in time.
//
// The example to use it:
//
// for {
// err := client.AutoSync(ctx, 10*time.Second)
// if err == context.DeadlineExceeded || err == context.Canceled {
// break
// }
// log.Print(err)
// }
AutoSync(context.Context, time.Duration) error
// Endpoints returns a copy of the current set of API endpoints used
// by Client to resolve HTTP requests. If Sync has ever been called,
// this may differ from the initial Endpoints provided in the Config.
Endpoints() []string
httpClient
}
func New(cfg Config) (Client, error) {
c := &httpClusterClient{
clientFactory: newHTTPClientFactory(cfg.transport(), cfg.checkRedirect(), cfg.HeaderTimeoutPerRequest),
rand: rand.New(rand.NewSource(int64(time.Now().Nanosecond()))),
}
if cfg.Username != "" {
c.credentials = &credentials{
username: cfg.Username,
password: cfg.Password,
}
}
if err := c.reset(cfg.Endpoints); err != nil {
return nil, err
}
return c, nil
}
type httpClient interface {
Do(context.Context, httpAction) (*http.Response, []byte, error)
}
func newHTTPClientFactory(tr CancelableTransport, cr CheckRedirectFunc, headerTimeout time.Duration) httpClientFactory {
return func(ep url.URL) httpClient {
return &redirectFollowingHTTPClient{
checkRedirect: cr,
client: &simpleHTTPClient{
transport: tr,
endpoint: ep,
headerTimeout: headerTimeout,
},
}
}
}
type credentials struct {
username string
password string
}
type httpClientFactory func(url.URL) httpClient
type httpAction interface {
HTTPRequest(url.URL) *http.Request
}
type httpClusterClient struct {
clientFactory httpClientFactory
endpoints []url.URL
pinned int
credentials *credentials
sync.RWMutex
rand *rand.Rand
}
func (c *httpClusterClient) reset(eps []string) error {
if len(eps) == 0 {
return ErrNoEndpoints
}
neps := make([]url.URL, len(eps))
for i, ep := range eps {
u, err := url.Parse(ep)
if err != nil {
return err
}
neps[i] = *u
}
c.endpoints = shuffleEndpoints(c.rand, neps)
// TODO: pin old endpoint if possible, and rebalance when new endpoint appears
c.pinned = 0
return nil
}
func (c *httpClusterClient) Do(ctx context.Context, act httpAction) (*http.Response, []byte, error) {
action := act
c.RLock()
leps := len(c.endpoints)
eps := make([]url.URL, leps)
n := copy(eps, c.endpoints)
pinned := c.pinned
if c.credentials != nil {
action = &authedAction{
act: act,
credentials: *c.credentials,
}
}
c.RUnlock()
if leps == 0 {
return nil, nil, ErrNoEndpoints
}
if leps != n {
return nil, nil, errors.New("unable to pick endpoint: copy failed")
}
var resp *http.Response
var body []byte
var err error
cerr := &ClusterError{}
for i := pinned; i < leps+pinned; i++ {
k := i % leps
hc := c.clientFactory(eps[k])
resp, body, err = hc.Do(ctx, action)
if err != nil {
cerr.Errors = append(cerr.Errors, err)
// mask previous errors with context error, which is controlled by user
if err == context.Canceled || err == context.DeadlineExceeded {
return nil, nil, err
}
continue
}
if resp.StatusCode/100 == 5 {
switch resp.StatusCode {
case http.StatusInternalServerError, http.StatusServiceUnavailable:
// TODO: make sure this is a no leader response
cerr.Errors = append(cerr.Errors, fmt.Errorf("client: etcd member %s has no leader", eps[k].String()))
default:
cerr.Errors = append(cerr.Errors, fmt.Errorf("client: etcd member %s returns server error [%s]", eps[k].String(), http.StatusText(resp.StatusCode)))
}
continue
}
if k != pinned {
c.Lock()
c.pinned = k
c.Unlock()
}
return resp, body, nil
}
return nil, nil, cerr
}
func (c *httpClusterClient) Endpoints() []string {
c.RLock()
defer c.RUnlock()
eps := make([]string, len(c.endpoints))
for i, ep := range c.endpoints {
eps[i] = ep.String()
}
return eps
}
func (c *httpClusterClient) Sync(ctx context.Context) error {
mAPI := NewMembersAPI(c)
ms, err := mAPI.List(ctx)
if err != nil {
return err
}
c.Lock()
defer c.Unlock()
eps := make([]string, 0)
for _, m := range ms {
eps = append(eps, m.ClientURLs...)
}
sort.Sort(sort.StringSlice(eps))
ceps := make([]string, len(c.endpoints))
for i, cep := range c.endpoints {
ceps[i] = cep.String()
}
sort.Sort(sort.StringSlice(ceps))
// fast path if no change happens
// this helps client to pin the endpoint when no cluster change
if reflect.DeepEqual(eps, ceps) {
return nil
}
return c.reset(eps)
}
func (c *httpClusterClient) AutoSync(ctx context.Context, interval time.Duration) error {
ticker := time.NewTicker(interval)
defer ticker.Stop()
for {
err := c.Sync(ctx)
if err != nil {
return err
}
select {
case <-ctx.Done():
return ctx.Err()
case <-ticker.C:
}
}
}
type roundTripResponse struct {
resp *http.Response
err error
}
type simpleHTTPClient struct {
transport CancelableTransport
endpoint url.URL
headerTimeout time.Duration
}
func (c *simpleHTTPClient) Do(ctx context.Context, act httpAction) (*http.Response, []byte, error) {
req := act.HTTPRequest(c.endpoint)
if err := printcURL(req); err != nil {
return nil, nil, err
}
hctx, hcancel := context.WithCancel(ctx)
if c.headerTimeout > 0 {
hctx, hcancel = context.WithTimeout(ctx, c.headerTimeout)
}
defer hcancel()
reqcancel := requestCanceler(c.transport, req)
rtchan := make(chan roundTripResponse, 1)
go func() {
resp, err := c.transport.RoundTrip(req)
rtchan <- roundTripResponse{resp: resp, err: err}
close(rtchan)
}()
var resp *http.Response
var err error
select {
case rtresp := <-rtchan:
resp, err = rtresp.resp, rtresp.err
case <-hctx.Done():
// cancel and wait for request to actually exit before continuing
reqcancel()
rtresp := <-rtchan
resp = rtresp.resp
switch {
case ctx.Err() != nil:
err = ctx.Err()
case hctx.Err() != nil:
err = fmt.Errorf("client: endpoint %s exceeded header timeout", c.endpoint.String())
default:
panic("failed to get error from context")
}
}
// always check for resp nil-ness to deal with possible
// race conditions between channels above
defer func() {
if resp != nil {
resp.Body.Close()
}
}()
if err != nil {
return nil, nil, err
}
var body []byte
done := make(chan struct{})
go func() {
body, err = ioutil.ReadAll(resp.Body)
done <- struct{}{}
}()
select {
case <-ctx.Done():
resp.Body.Close()
<-done
return nil, nil, ctx.Err()
case <-done:
}
return resp, body, err
}
type authedAction struct {
act httpAction
credentials credentials
}
func (a *authedAction) HTTPRequest(url url.URL) *http.Request {
r := a.act.HTTPRequest(url)
r.SetBasicAuth(a.credentials.username, a.credentials.password)
return r
}
type redirectFollowingHTTPClient struct {
client httpClient
checkRedirect CheckRedirectFunc
}
func (r *redirectFollowingHTTPClient) Do(ctx context.Context, act httpAction) (*http.Response, []byte, error) {
next := act
for i := 0; i < 100; i++ {
if i > 0 {
if err := r.checkRedirect(i); err != nil {
return nil, nil, err
}
}
resp, body, err := r.client.Do(ctx, next)
if err != nil {
return nil, nil, err
}
if resp.StatusCode/100 == 3 {
hdr := resp.Header.Get("Location")
if hdr == "" {
return nil, nil, fmt.Errorf("Location header not set")
}
loc, err := url.Parse(hdr)
if err != nil {
return nil, nil, fmt.Errorf("Location header not valid URL: %s", hdr)
}
next = &redirectedHTTPAction{
action: act,
location: *loc,
}
continue
}
return resp, body, nil
}
return nil, nil, errTooManyRedirectChecks
}
type redirectedHTTPAction struct {
action httpAction
location url.URL
}
func (r *redirectedHTTPAction) HTTPRequest(ep url.URL) *http.Request {
orig := r.action.HTTPRequest(ep)
orig.URL = &r.location
return orig
}
func shuffleEndpoints(r *rand.Rand, eps []url.URL) []url.URL {
p := r.Perm(len(eps))
neps := make([]url.URL, len(eps))
for i, k := range p {
neps[i] = eps[k]
}
return neps
}

View File

@@ -0,0 +1,33 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import "fmt"
type ClusterError struct {
Errors []error
}
func (ce *ClusterError) Error() string {
return ErrClusterUnavailable.Error()
}
func (ce *ClusterError) Detail() string {
s := ""
for i, e := range ce.Errors {
s += fmt.Sprintf("error #%d: %s\n", i, e)
}
return s
}

View File

@@ -0,0 +1,70 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import (
"bytes"
"fmt"
"io/ioutil"
"net/http"
"os"
)
var (
cURLDebug = false
)
func EnablecURLDebug() {
cURLDebug = true
}
func DisablecURLDebug() {
cURLDebug = false
}
// printcURL prints the cURL equivalent request to stderr.
// It returns an error if the body of the request cannot
// be read.
// The caller MUST cancel the request if there is an error.
func printcURL(req *http.Request) error {
if !cURLDebug {
return nil
}
var (
command string
b []byte
err error
)
if req.URL != nil {
command = fmt.Sprintf("curl -X %s %s", req.Method, req.URL.String())
}
if req.Body != nil {
b, err = ioutil.ReadAll(req.Body)
if err != nil {
return err
}
command += fmt.Sprintf(" -d %q", string(b))
}
fmt.Fprintf(os.Stderr, "cURL Command: %s\n", command)
// reset body
body := bytes.NewBuffer(b)
req.Body = ioutil.NopCloser(body)
return nil
}

View File

@@ -0,0 +1,21 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
// Discoverer is an interface that wraps the Discover method.
type Discoverer interface {
// Dicover looks up the etcd servers for the domain.
Discover(domain string) ([]string, error)
}

View File

@@ -0,0 +1,71 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
Package client provides bindings for the etcd APIs.
Create a Config and exchange it for a Client:
import (
"net/http"
"github.com/coreos/etcd/client"
"golang.org/x/net/context"
)
cfg := client.Config{
Endpoints: []string{"http://127.0.0.1:2379"},
Transport: DefaultTransport,
}
c, err := client.New(cfg)
if err != nil {
// handle error
}
Create a KeysAPI using the Client, then use it to interact with etcd:
kAPI := client.NewKeysAPI(c)
// create a new key /foo with the value "bar"
_, err = kAPI.Create(context.Background(), "/foo", "bar")
if err != nil {
// handle error
}
// delete the newly created key only if the value is still "bar"
_, err = kAPI.Delete(context.Background(), "/foo", &DeleteOptions{PrevValue: "bar"})
if err != nil {
// handle error
}
Use a custom context to set timeouts on your operations:
import "time"
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
// set a new key, ignoring it's previous state
_, err := kAPI.Set(ctx, "/ping", "pong", nil)
if err != nil {
if err == context.DeadlineExceeded {
// request took longer than 5s
} else {
// handle error
}
}
*/
package client

View File

@@ -0,0 +1,921 @@
// ************************************************************
// DO NOT EDIT.
// THIS FILE IS AUTO-GENERATED BY codecgen.
// ************************************************************
package client
import (
"errors"
"fmt"
codec1978 "github.com/ugorji/go/codec"
"reflect"
"runtime"
time "time"
)
const (
codecSelferC_UTF81819 = 1
codecSelferC_RAW1819 = 0
codecSelferValueTypeArray1819 = 10
codecSelferValueTypeMap1819 = 9
)
var (
codecSelferBitsize1819 = uint8(reflect.TypeOf(uint(0)).Bits())
codecSelferOnlyMapOrArrayEncodeToStructErr1819 = errors.New(`only encoded map or array can be decoded into a struct`)
)
type codecSelfer1819 struct{}
func init() {
if codec1978.GenVersion != 4 {
_, file, _, _ := runtime.Caller(0)
err := fmt.Errorf("codecgen version mismatch: current: %v, need %v. Re-generate file: %v",
4, codec1978.GenVersion, file)
panic(err)
}
if false { // reference the types, but skip this branch at build/run time
var v0 time.Time
_ = v0
}
}
func (x *Response) CodecEncodeSelf(e *codec1978.Encoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperEncoder(e)
_, _, _ = h, z, r
if x == nil {
r.EncodeNil()
} else {
yym1 := z.EncBinary()
_ = yym1
if false {
} else if z.HasExtensions() && z.EncExt(x) {
} else {
yysep2 := !z.EncBinary()
yy2arr2 := z.EncBasicHandle().StructToArray
var yyq2 [3]bool
_, _, _ = yysep2, yyq2, yy2arr2
const yyr2 bool = false
if yyr2 || yy2arr2 {
r.EncodeArrayStart(3)
} else {
var yynn2 int = 3
for _, b := range yyq2 {
if b {
yynn2++
}
}
r.EncodeMapStart(yynn2)
}
if yyr2 || yy2arr2 {
yym4 := z.EncBinary()
_ = yym4
if false {
} else {
r.EncodeString(codecSelferC_UTF81819, string(x.Action))
}
} else {
r.EncodeString(codecSelferC_UTF81819, string("action"))
yym5 := z.EncBinary()
_ = yym5
if false {
} else {
r.EncodeString(codecSelferC_UTF81819, string(x.Action))
}
}
if yyr2 || yy2arr2 {
if x.Node == nil {
r.EncodeNil()
} else {
x.Node.CodecEncodeSelf(e)
}
} else {
r.EncodeString(codecSelferC_UTF81819, string("node"))
if x.Node == nil {
r.EncodeNil()
} else {
x.Node.CodecEncodeSelf(e)
}
}
if yyr2 || yy2arr2 {
if x.PrevNode == nil {
r.EncodeNil()
} else {
x.PrevNode.CodecEncodeSelf(e)
}
} else {
r.EncodeString(codecSelferC_UTF81819, string("prevNode"))
if x.PrevNode == nil {
r.EncodeNil()
} else {
x.PrevNode.CodecEncodeSelf(e)
}
}
if yysep2 {
r.EncodeEnd()
}
}
}
}
func (x *Response) CodecDecodeSelf(d *codec1978.Decoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperDecoder(d)
_, _, _ = h, z, r
yym8 := z.DecBinary()
_ = yym8
if false {
} else if z.HasExtensions() && z.DecExt(x) {
} else {
if r.IsContainerType(codecSelferValueTypeMap1819) {
yyl9 := r.ReadMapStart()
if yyl9 == 0 {
r.ReadEnd()
} else {
x.codecDecodeSelfFromMap(yyl9, d)
}
} else if r.IsContainerType(codecSelferValueTypeArray1819) {
yyl9 := r.ReadArrayStart()
if yyl9 == 0 {
r.ReadEnd()
} else {
x.codecDecodeSelfFromArray(yyl9, d)
}
} else {
panic(codecSelferOnlyMapOrArrayEncodeToStructErr1819)
}
}
}
func (x *Response) codecDecodeSelfFromMap(l int, d *codec1978.Decoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperDecoder(d)
_, _, _ = h, z, r
var yys10Slc = z.DecScratchBuffer() // default slice to decode into
_ = yys10Slc
var yyhl10 bool = l >= 0
for yyj10 := 0; ; yyj10++ {
if yyhl10 {
if yyj10 >= l {
break
}
} else {
if r.CheckBreak() {
break
}
}
yys10Slc = r.DecodeBytes(yys10Slc, true, true)
yys10 := string(yys10Slc)
switch yys10 {
case "action":
if r.TryDecodeAsNil() {
x.Action = ""
} else {
x.Action = string(r.DecodeString())
}
case "node":
if r.TryDecodeAsNil() {
if x.Node != nil {
x.Node = nil
}
} else {
if x.Node == nil {
x.Node = new(Node)
}
x.Node.CodecDecodeSelf(d)
}
case "prevNode":
if r.TryDecodeAsNil() {
if x.PrevNode != nil {
x.PrevNode = nil
}
} else {
if x.PrevNode == nil {
x.PrevNode = new(Node)
}
x.PrevNode.CodecDecodeSelf(d)
}
default:
z.DecStructFieldNotFound(-1, yys10)
} // end switch yys10
} // end for yyj10
if !yyhl10 {
r.ReadEnd()
}
}
func (x *Response) codecDecodeSelfFromArray(l int, d *codec1978.Decoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperDecoder(d)
_, _, _ = h, z, r
var yyj14 int
var yyb14 bool
var yyhl14 bool = l >= 0
yyj14++
if yyhl14 {
yyb14 = yyj14 > l
} else {
yyb14 = r.CheckBreak()
}
if yyb14 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
x.Action = ""
} else {
x.Action = string(r.DecodeString())
}
yyj14++
if yyhl14 {
yyb14 = yyj14 > l
} else {
yyb14 = r.CheckBreak()
}
if yyb14 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
if x.Node != nil {
x.Node = nil
}
} else {
if x.Node == nil {
x.Node = new(Node)
}
x.Node.CodecDecodeSelf(d)
}
yyj14++
if yyhl14 {
yyb14 = yyj14 > l
} else {
yyb14 = r.CheckBreak()
}
if yyb14 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
if x.PrevNode != nil {
x.PrevNode = nil
}
} else {
if x.PrevNode == nil {
x.PrevNode = new(Node)
}
x.PrevNode.CodecDecodeSelf(d)
}
for {
yyj14++
if yyhl14 {
yyb14 = yyj14 > l
} else {
yyb14 = r.CheckBreak()
}
if yyb14 {
break
}
z.DecStructFieldNotFound(yyj14-1, "")
}
r.ReadEnd()
}
func (x *Node) CodecEncodeSelf(e *codec1978.Encoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperEncoder(e)
_, _, _ = h, z, r
if x == nil {
r.EncodeNil()
} else {
yym18 := z.EncBinary()
_ = yym18
if false {
} else if z.HasExtensions() && z.EncExt(x) {
} else {
yysep19 := !z.EncBinary()
yy2arr19 := z.EncBasicHandle().StructToArray
var yyq19 [8]bool
_, _, _ = yysep19, yyq19, yy2arr19
const yyr19 bool = false
yyq19[1] = x.Dir != false
yyq19[6] = x.Expiration != nil
yyq19[7] = x.TTL != 0
if yyr19 || yy2arr19 {
r.EncodeArrayStart(8)
} else {
var yynn19 int = 5
for _, b := range yyq19 {
if b {
yynn19++
}
}
r.EncodeMapStart(yynn19)
}
if yyr19 || yy2arr19 {
yym21 := z.EncBinary()
_ = yym21
if false {
} else {
r.EncodeString(codecSelferC_UTF81819, string(x.Key))
}
} else {
r.EncodeString(codecSelferC_UTF81819, string("key"))
yym22 := z.EncBinary()
_ = yym22
if false {
} else {
r.EncodeString(codecSelferC_UTF81819, string(x.Key))
}
}
if yyr19 || yy2arr19 {
if yyq19[1] {
yym24 := z.EncBinary()
_ = yym24
if false {
} else {
r.EncodeBool(bool(x.Dir))
}
} else {
r.EncodeBool(false)
}
} else {
if yyq19[1] {
r.EncodeString(codecSelferC_UTF81819, string("dir"))
yym25 := z.EncBinary()
_ = yym25
if false {
} else {
r.EncodeBool(bool(x.Dir))
}
}
}
if yyr19 || yy2arr19 {
yym27 := z.EncBinary()
_ = yym27
if false {
} else {
r.EncodeString(codecSelferC_UTF81819, string(x.Value))
}
} else {
r.EncodeString(codecSelferC_UTF81819, string("value"))
yym28 := z.EncBinary()
_ = yym28
if false {
} else {
r.EncodeString(codecSelferC_UTF81819, string(x.Value))
}
}
if yyr19 || yy2arr19 {
if x.Nodes == nil {
r.EncodeNil()
} else {
x.Nodes.CodecEncodeSelf(e)
}
} else {
r.EncodeString(codecSelferC_UTF81819, string("nodes"))
if x.Nodes == nil {
r.EncodeNil()
} else {
x.Nodes.CodecEncodeSelf(e)
}
}
if yyr19 || yy2arr19 {
yym31 := z.EncBinary()
_ = yym31
if false {
} else {
r.EncodeUint(uint64(x.CreatedIndex))
}
} else {
r.EncodeString(codecSelferC_UTF81819, string("createdIndex"))
yym32 := z.EncBinary()
_ = yym32
if false {
} else {
r.EncodeUint(uint64(x.CreatedIndex))
}
}
if yyr19 || yy2arr19 {
yym34 := z.EncBinary()
_ = yym34
if false {
} else {
r.EncodeUint(uint64(x.ModifiedIndex))
}
} else {
r.EncodeString(codecSelferC_UTF81819, string("modifiedIndex"))
yym35 := z.EncBinary()
_ = yym35
if false {
} else {
r.EncodeUint(uint64(x.ModifiedIndex))
}
}
if yyr19 || yy2arr19 {
if yyq19[6] {
if x.Expiration == nil {
r.EncodeNil()
} else {
yym37 := z.EncBinary()
_ = yym37
if false {
} else if yym38 := z.TimeRtidIfBinc(); yym38 != 0 {
r.EncodeBuiltin(yym38, x.Expiration)
} else if z.HasExtensions() && z.EncExt(x.Expiration) {
} else if yym37 {
z.EncBinaryMarshal(x.Expiration)
} else if !yym37 && z.IsJSONHandle() {
z.EncJSONMarshal(x.Expiration)
} else {
z.EncFallback(x.Expiration)
}
}
} else {
r.EncodeNil()
}
} else {
if yyq19[6] {
r.EncodeString(codecSelferC_UTF81819, string("expiration"))
if x.Expiration == nil {
r.EncodeNil()
} else {
yym39 := z.EncBinary()
_ = yym39
if false {
} else if yym40 := z.TimeRtidIfBinc(); yym40 != 0 {
r.EncodeBuiltin(yym40, x.Expiration)
} else if z.HasExtensions() && z.EncExt(x.Expiration) {
} else if yym39 {
z.EncBinaryMarshal(x.Expiration)
} else if !yym39 && z.IsJSONHandle() {
z.EncJSONMarshal(x.Expiration)
} else {
z.EncFallback(x.Expiration)
}
}
}
}
if yyr19 || yy2arr19 {
if yyq19[7] {
yym42 := z.EncBinary()
_ = yym42
if false {
} else {
r.EncodeInt(int64(x.TTL))
}
} else {
r.EncodeInt(0)
}
} else {
if yyq19[7] {
r.EncodeString(codecSelferC_UTF81819, string("ttl"))
yym43 := z.EncBinary()
_ = yym43
if false {
} else {
r.EncodeInt(int64(x.TTL))
}
}
}
if yysep19 {
r.EncodeEnd()
}
}
}
}
func (x *Node) CodecDecodeSelf(d *codec1978.Decoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperDecoder(d)
_, _, _ = h, z, r
yym44 := z.DecBinary()
_ = yym44
if false {
} else if z.HasExtensions() && z.DecExt(x) {
} else {
if r.IsContainerType(codecSelferValueTypeMap1819) {
yyl45 := r.ReadMapStart()
if yyl45 == 0 {
r.ReadEnd()
} else {
x.codecDecodeSelfFromMap(yyl45, d)
}
} else if r.IsContainerType(codecSelferValueTypeArray1819) {
yyl45 := r.ReadArrayStart()
if yyl45 == 0 {
r.ReadEnd()
} else {
x.codecDecodeSelfFromArray(yyl45, d)
}
} else {
panic(codecSelferOnlyMapOrArrayEncodeToStructErr1819)
}
}
}
func (x *Node) codecDecodeSelfFromMap(l int, d *codec1978.Decoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperDecoder(d)
_, _, _ = h, z, r
var yys46Slc = z.DecScratchBuffer() // default slice to decode into
_ = yys46Slc
var yyhl46 bool = l >= 0
for yyj46 := 0; ; yyj46++ {
if yyhl46 {
if yyj46 >= l {
break
}
} else {
if r.CheckBreak() {
break
}
}
yys46Slc = r.DecodeBytes(yys46Slc, true, true)
yys46 := string(yys46Slc)
switch yys46 {
case "key":
if r.TryDecodeAsNil() {
x.Key = ""
} else {
x.Key = string(r.DecodeString())
}
case "dir":
if r.TryDecodeAsNil() {
x.Dir = false
} else {
x.Dir = bool(r.DecodeBool())
}
case "value":
if r.TryDecodeAsNil() {
x.Value = ""
} else {
x.Value = string(r.DecodeString())
}
case "nodes":
if r.TryDecodeAsNil() {
x.Nodes = nil
} else {
yyv50 := &x.Nodes
yyv50.CodecDecodeSelf(d)
}
case "createdIndex":
if r.TryDecodeAsNil() {
x.CreatedIndex = 0
} else {
x.CreatedIndex = uint64(r.DecodeUint(64))
}
case "modifiedIndex":
if r.TryDecodeAsNil() {
x.ModifiedIndex = 0
} else {
x.ModifiedIndex = uint64(r.DecodeUint(64))
}
case "expiration":
if r.TryDecodeAsNil() {
if x.Expiration != nil {
x.Expiration = nil
}
} else {
if x.Expiration == nil {
x.Expiration = new(time.Time)
}
yym54 := z.DecBinary()
_ = yym54
if false {
} else if yym55 := z.TimeRtidIfBinc(); yym55 != 0 {
r.DecodeBuiltin(yym55, x.Expiration)
} else if z.HasExtensions() && z.DecExt(x.Expiration) {
} else if yym54 {
z.DecBinaryUnmarshal(x.Expiration)
} else if !yym54 && z.IsJSONHandle() {
z.DecJSONUnmarshal(x.Expiration)
} else {
z.DecFallback(x.Expiration, false)
}
}
case "ttl":
if r.TryDecodeAsNil() {
x.TTL = 0
} else {
x.TTL = int64(r.DecodeInt(64))
}
default:
z.DecStructFieldNotFound(-1, yys46)
} // end switch yys46
} // end for yyj46
if !yyhl46 {
r.ReadEnd()
}
}
func (x *Node) codecDecodeSelfFromArray(l int, d *codec1978.Decoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperDecoder(d)
_, _, _ = h, z, r
var yyj57 int
var yyb57 bool
var yyhl57 bool = l >= 0
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
x.Key = ""
} else {
x.Key = string(r.DecodeString())
}
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
x.Dir = false
} else {
x.Dir = bool(r.DecodeBool())
}
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
x.Value = ""
} else {
x.Value = string(r.DecodeString())
}
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
x.Nodes = nil
} else {
yyv61 := &x.Nodes
yyv61.CodecDecodeSelf(d)
}
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
x.CreatedIndex = 0
} else {
x.CreatedIndex = uint64(r.DecodeUint(64))
}
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
x.ModifiedIndex = 0
} else {
x.ModifiedIndex = uint64(r.DecodeUint(64))
}
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
if x.Expiration != nil {
x.Expiration = nil
}
} else {
if x.Expiration == nil {
x.Expiration = new(time.Time)
}
yym65 := z.DecBinary()
_ = yym65
if false {
} else if yym66 := z.TimeRtidIfBinc(); yym66 != 0 {
r.DecodeBuiltin(yym66, x.Expiration)
} else if z.HasExtensions() && z.DecExt(x.Expiration) {
} else if yym65 {
z.DecBinaryUnmarshal(x.Expiration)
} else if !yym65 && z.IsJSONHandle() {
z.DecJSONUnmarshal(x.Expiration)
} else {
z.DecFallback(x.Expiration, false)
}
}
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
r.ReadEnd()
return
}
if r.TryDecodeAsNil() {
x.TTL = 0
} else {
x.TTL = int64(r.DecodeInt(64))
}
for {
yyj57++
if yyhl57 {
yyb57 = yyj57 > l
} else {
yyb57 = r.CheckBreak()
}
if yyb57 {
break
}
z.DecStructFieldNotFound(yyj57-1, "")
}
r.ReadEnd()
}
func (x Nodes) CodecEncodeSelf(e *codec1978.Encoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperEncoder(e)
_, _, _ = h, z, r
if x == nil {
r.EncodeNil()
} else {
yym68 := z.EncBinary()
_ = yym68
if false {
} else if z.HasExtensions() && z.EncExt(x) {
} else {
h.encNodes((Nodes)(x), e)
}
}
}
func (x *Nodes) CodecDecodeSelf(d *codec1978.Decoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperDecoder(d)
_, _, _ = h, z, r
yym69 := z.DecBinary()
_ = yym69
if false {
} else if z.HasExtensions() && z.DecExt(x) {
} else {
h.decNodes((*Nodes)(x), d)
}
}
func (x codecSelfer1819) encNodes(v Nodes, e *codec1978.Encoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperEncoder(e)
_, _, _ = h, z, r
r.EncodeArrayStart(len(v))
for _, yyv70 := range v {
if yyv70 == nil {
r.EncodeNil()
} else {
yyv70.CodecEncodeSelf(e)
}
}
r.EncodeEnd()
}
func (x codecSelfer1819) decNodes(v *Nodes, d *codec1978.Decoder) {
var h codecSelfer1819
z, r := codec1978.GenHelperDecoder(d)
_, _, _ = h, z, r
yyv71 := *v
yyh71, yyl71 := z.DecSliceHelperStart()
var yyrr71, yyrl71 int
var yyc71, yyrt71 bool
_, _, _ = yyc71, yyrt71, yyrl71
yyrr71 = yyl71
if yyv71 == nil {
if yyrl71, yyrt71 = z.DecInferLen(yyl71, z.DecBasicHandle().MaxInitLen, 8); yyrt71 {
yyrr71 = yyrl71
}
yyv71 = make(Nodes, yyrl71)
yyc71 = true
}
if yyl71 == 0 {
if len(yyv71) != 0 {
yyv71 = yyv71[:0]
yyc71 = true
}
} else if yyl71 > 0 {
if yyl71 > cap(yyv71) {
yyrl71, yyrt71 = z.DecInferLen(yyl71, z.DecBasicHandle().MaxInitLen, 8)
yyv71 = make([]*Node, yyrl71)
yyc71 = true
yyrr71 = len(yyv71)
} else if yyl71 != len(yyv71) {
yyv71 = yyv71[:yyl71]
yyc71 = true
}
yyj71 := 0
for ; yyj71 < yyrr71; yyj71++ {
if r.TryDecodeAsNil() {
if yyv71[yyj71] != nil {
*yyv71[yyj71] = Node{}
}
} else {
if yyv71[yyj71] == nil {
yyv71[yyj71] = new(Node)
}
yyw72 := yyv71[yyj71]
yyw72.CodecDecodeSelf(d)
}
}
if yyrt71 {
for ; yyj71 < yyl71; yyj71++ {
yyv71 = append(yyv71, nil)
if r.TryDecodeAsNil() {
if yyv71[yyj71] != nil {
*yyv71[yyj71] = Node{}
}
} else {
if yyv71[yyj71] == nil {
yyv71[yyj71] = new(Node)
}
yyw73 := yyv71[yyj71]
yyw73.CodecDecodeSelf(d)
}
}
}
} else {
for yyj71 := 0; !r.CheckBreak(); yyj71++ {
if yyj71 >= len(yyv71) {
yyv71 = append(yyv71, nil) // var yyz71 *Node
yyc71 = true
}
if yyj71 < len(yyv71) {
if r.TryDecodeAsNil() {
if yyv71[yyj71] != nil {
*yyv71[yyj71] = Node{}
}
} else {
if yyv71[yyj71] == nil {
yyv71[yyj71] = new(Node)
}
yyw74 := yyv71[yyj71]
yyw74.CodecDecodeSelf(d)
}
} else {
z.DecSwallow()
}
}
yyh71.End()
}
if yyc71 {
*v = yyv71
}
}

View File

@@ -0,0 +1,651 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
//go:generate codecgen -d 1819 -r "Node|Response|Nodes" -o keys.generated.go keys.go
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"github.com/coreos/etcd/pkg/pathutil"
"github.com/ugorji/go/codec"
"golang.org/x/net/context"
)
const (
ErrorCodeKeyNotFound = 100
ErrorCodeTestFailed = 101
ErrorCodeNotFile = 102
ErrorCodeNotDir = 104
ErrorCodeNodeExist = 105
ErrorCodeRootROnly = 107
ErrorCodeDirNotEmpty = 108
ErrorCodeUnauthorized = 110
ErrorCodePrevValueRequired = 201
ErrorCodeTTLNaN = 202
ErrorCodeIndexNaN = 203
ErrorCodeInvalidField = 209
ErrorCodeInvalidForm = 210
ErrorCodeRaftInternal = 300
ErrorCodeLeaderElect = 301
ErrorCodeWatcherCleared = 400
ErrorCodeEventIndexCleared = 401
)
type Error struct {
Code int `json:"errorCode"`
Message string `json:"message"`
Cause string `json:"cause"`
Index uint64 `json:"index"`
}
func (e Error) Error() string {
return fmt.Sprintf("%v: %v (%v) [%v]", e.Code, e.Message, e.Cause, e.Index)
}
var (
ErrInvalidJSON = errors.New("client: response is invalid json. The endpoint is probably not valid etcd cluster endpoint.")
ErrEmptyBody = errors.New("client: response body is empty")
)
// PrevExistType is used to define an existence condition when setting
// or deleting Nodes.
type PrevExistType string
const (
PrevIgnore = PrevExistType("")
PrevExist = PrevExistType("true")
PrevNoExist = PrevExistType("false")
)
var (
defaultV2KeysPrefix = "/v2/keys"
)
// NewKeysAPI builds a KeysAPI that interacts with etcd's key-value
// API over HTTP.
func NewKeysAPI(c Client) KeysAPI {
return NewKeysAPIWithPrefix(c, defaultV2KeysPrefix)
}
// NewKeysAPIWithPrefix acts like NewKeysAPI, but allows the caller
// to provide a custom base URL path. This should only be used in
// very rare cases.
func NewKeysAPIWithPrefix(c Client, p string) KeysAPI {
return &httpKeysAPI{
client: c,
prefix: p,
}
}
type KeysAPI interface {
// Get retrieves a set of Nodes from etcd
Get(ctx context.Context, key string, opts *GetOptions) (*Response, error)
// Set assigns a new value to a Node identified by a given key. The caller
// may define a set of conditions in the SetOptions. If SetOptions.Dir=true
// than value is ignored.
Set(ctx context.Context, key, value string, opts *SetOptions) (*Response, error)
// Delete removes a Node identified by the given key, optionally destroying
// all of its children as well. The caller may define a set of required
// conditions in an DeleteOptions object.
Delete(ctx context.Context, key string, opts *DeleteOptions) (*Response, error)
// Create is an alias for Set w/ PrevExist=false
Create(ctx context.Context, key, value string) (*Response, error)
// CreateInOrder is used to atomically create in-order keys within the given directory.
CreateInOrder(ctx context.Context, dir, value string, opts *CreateInOrderOptions) (*Response, error)
// Update is an alias for Set w/ PrevExist=true
Update(ctx context.Context, key, value string) (*Response, error)
// Watcher builds a new Watcher targeted at a specific Node identified
// by the given key. The Watcher may be configured at creation time
// through a WatcherOptions object. The returned Watcher is designed
// to emit events that happen to a Node, and optionally to its children.
Watcher(key string, opts *WatcherOptions) Watcher
}
type WatcherOptions struct {
// AfterIndex defines the index after-which the Watcher should
// start emitting events. For example, if a value of 5 is
// provided, the first event will have an index >= 6.
//
// Setting AfterIndex to 0 (default) means that the Watcher
// should start watching for events starting at the current
// index, whatever that may be.
AfterIndex uint64
// Recursive specifies whether or not the Watcher should emit
// events that occur in children of the given keyspace. If set
// to false (default), events will be limited to those that
// occur for the exact key.
Recursive bool
}
type CreateInOrderOptions struct {
// TTL defines a period of time after-which the Node should
// expire and no longer exist. Values <= 0 are ignored. Given
// that the zero-value is ignored, TTL cannot be used to set
// a TTL of 0.
TTL time.Duration
}
type SetOptions struct {
// PrevValue specifies what the current value of the Node must
// be in order for the Set operation to succeed.
//
// Leaving this field empty means that the caller wishes to
// ignore the current value of the Node. This cannot be used
// to compare the Node's current value to an empty string.
//
// PrevValue is ignored if Dir=true
PrevValue string
// PrevIndex indicates what the current ModifiedIndex of the
// Node must be in order for the Set operation to succeed.
//
// If PrevIndex is set to 0 (default), no comparison is made.
PrevIndex uint64
// PrevExist specifies whether the Node must currently exist
// (PrevExist) or not (PrevNoExist). If the caller does not
// care about existence, set PrevExist to PrevIgnore, or simply
// leave it unset.
PrevExist PrevExistType
// TTL defines a period of time after-which the Node should
// expire and no longer exist. Values <= 0 are ignored. Given
// that the zero-value is ignored, TTL cannot be used to set
// a TTL of 0.
TTL time.Duration
// Dir specifies whether or not this Node should be created as a directory.
Dir bool
}
type GetOptions struct {
// Recursive defines whether or not all children of the Node
// should be returned.
Recursive bool
// Sort instructs the server whether or not to sort the Nodes.
// If true, the Nodes are sorted alphabetically by key in
// ascending order (A to z). If false (default), the Nodes will
// not be sorted and the ordering used should not be considered
// predictable.
Sort bool
// Quorum specifies whether it gets the latest committed value that
// has been applied in quorum of members, which ensures external
// consistency (or linearizability).
Quorum bool
}
type DeleteOptions struct {
// PrevValue specifies what the current value of the Node must
// be in order for the Delete operation to succeed.
//
// Leaving this field empty means that the caller wishes to
// ignore the current value of the Node. This cannot be used
// to compare the Node's current value to an empty string.
PrevValue string
// PrevIndex indicates what the current ModifiedIndex of the
// Node must be in order for the Delete operation to succeed.
//
// If PrevIndex is set to 0 (default), no comparison is made.
PrevIndex uint64
// Recursive defines whether or not all children of the Node
// should be deleted. If set to true, all children of the Node
// identified by the given key will be deleted. If left unset
// or explicitly set to false, only a single Node will be
// deleted.
Recursive bool
// Dir specifies whether or not this Node should be removed as a directory.
Dir bool
}
type Watcher interface {
// Next blocks until an etcd event occurs, then returns a Response
// represeting that event. The behavior of Next depends on the
// WatcherOptions used to construct the Watcher. Next is designed to
// be called repeatedly, each time blocking until a subsequent event
// is available.
//
// If the provided context is cancelled, Next will return a non-nil
// error. Any other failures encountered while waiting for the next
// event (connection issues, deserialization failures, etc) will
// also result in a non-nil error.
Next(context.Context) (*Response, error)
}
type Response struct {
// Action is the name of the operation that occurred. Possible values
// include get, set, delete, update, create, compareAndSwap,
// compareAndDelete and expire.
Action string `json:"action"`
// Node represents the state of the relevant etcd Node.
Node *Node `json:"node"`
// PrevNode represents the previous state of the Node. PrevNode is non-nil
// only if the Node existed before the action occurred and the action
// caused a change to the Node.
PrevNode *Node `json:"prevNode"`
// Index holds the cluster-level index at the time the Response was generated.
// This index is not tied to the Node(s) contained in this Response.
Index uint64 `json:"-"`
}
type Node struct {
// Key represents the unique location of this Node (e.g. "/foo/bar").
Key string `json:"key"`
// Dir reports whether node describes a directory.
Dir bool `json:"dir,omitempty"`
// Value is the current data stored on this Node. If this Node
// is a directory, Value will be empty.
Value string `json:"value"`
// Nodes holds the children of this Node, only if this Node is a directory.
// This slice of will be arbitrarily deep (children, grandchildren, great-
// grandchildren, etc.) if a recursive Get or Watch request were made.
Nodes Nodes `json:"nodes"`
// CreatedIndex is the etcd index at-which this Node was created.
CreatedIndex uint64 `json:"createdIndex"`
// ModifiedIndex is the etcd index at-which this Node was last modified.
ModifiedIndex uint64 `json:"modifiedIndex"`
// Expiration is the server side expiration time of the key.
Expiration *time.Time `json:"expiration,omitempty"`
// TTL is the time to live of the key in second.
TTL int64 `json:"ttl,omitempty"`
}
func (n *Node) String() string {
return fmt.Sprintf("{Key: %s, CreatedIndex: %d, ModifiedIndex: %d, TTL: %d}", n.Key, n.CreatedIndex, n.ModifiedIndex, n.TTL)
}
// TTLDuration returns the Node's TTL as a time.Duration object
func (n *Node) TTLDuration() time.Duration {
return time.Duration(n.TTL) * time.Second
}
type Nodes []*Node
// interfaces for sorting
func (ns Nodes) Len() int { return len(ns) }
func (ns Nodes) Less(i, j int) bool { return ns[i].Key < ns[j].Key }
func (ns Nodes) Swap(i, j int) { ns[i], ns[j] = ns[j], ns[i] }
type httpKeysAPI struct {
client httpClient
prefix string
}
func (k *httpKeysAPI) Set(ctx context.Context, key, val string, opts *SetOptions) (*Response, error) {
act := &setAction{
Prefix: k.prefix,
Key: key,
Value: val,
}
if opts != nil {
act.PrevValue = opts.PrevValue
act.PrevIndex = opts.PrevIndex
act.PrevExist = opts.PrevExist
act.TTL = opts.TTL
act.Dir = opts.Dir
}
resp, body, err := k.client.Do(ctx, act)
if err != nil {
return nil, err
}
return unmarshalHTTPResponse(resp.StatusCode, resp.Header, body)
}
func (k *httpKeysAPI) Create(ctx context.Context, key, val string) (*Response, error) {
return k.Set(ctx, key, val, &SetOptions{PrevExist: PrevNoExist})
}
func (k *httpKeysAPI) CreateInOrder(ctx context.Context, dir, val string, opts *CreateInOrderOptions) (*Response, error) {
act := &createInOrderAction{
Prefix: k.prefix,
Dir: dir,
Value: val,
}
if opts != nil {
act.TTL = opts.TTL
}
resp, body, err := k.client.Do(ctx, act)
if err != nil {
return nil, err
}
return unmarshalHTTPResponse(resp.StatusCode, resp.Header, body)
}
func (k *httpKeysAPI) Update(ctx context.Context, key, val string) (*Response, error) {
return k.Set(ctx, key, val, &SetOptions{PrevExist: PrevExist})
}
func (k *httpKeysAPI) Delete(ctx context.Context, key string, opts *DeleteOptions) (*Response, error) {
act := &deleteAction{
Prefix: k.prefix,
Key: key,
}
if opts != nil {
act.PrevValue = opts.PrevValue
act.PrevIndex = opts.PrevIndex
act.Dir = opts.Dir
act.Recursive = opts.Recursive
}
resp, body, err := k.client.Do(ctx, act)
if err != nil {
return nil, err
}
return unmarshalHTTPResponse(resp.StatusCode, resp.Header, body)
}
func (k *httpKeysAPI) Get(ctx context.Context, key string, opts *GetOptions) (*Response, error) {
act := &getAction{
Prefix: k.prefix,
Key: key,
}
if opts != nil {
act.Recursive = opts.Recursive
act.Sorted = opts.Sort
act.Quorum = opts.Quorum
}
resp, body, err := k.client.Do(ctx, act)
if err != nil {
return nil, err
}
return unmarshalHTTPResponse(resp.StatusCode, resp.Header, body)
}
func (k *httpKeysAPI) Watcher(key string, opts *WatcherOptions) Watcher {
act := waitAction{
Prefix: k.prefix,
Key: key,
}
if opts != nil {
act.Recursive = opts.Recursive
if opts.AfterIndex > 0 {
act.WaitIndex = opts.AfterIndex + 1
}
}
return &httpWatcher{
client: k.client,
nextWait: act,
}
}
type httpWatcher struct {
client httpClient
nextWait waitAction
}
func (hw *httpWatcher) Next(ctx context.Context) (*Response, error) {
for {
httpresp, body, err := hw.client.Do(ctx, &hw.nextWait)
if err != nil {
return nil, err
}
resp, err := unmarshalHTTPResponse(httpresp.StatusCode, httpresp.Header, body)
if err != nil {
if err == ErrEmptyBody {
continue
}
return nil, err
}
hw.nextWait.WaitIndex = resp.Node.ModifiedIndex + 1
return resp, nil
}
}
// v2KeysURL forms a URL representing the location of a key.
// The endpoint argument represents the base URL of an etcd
// server. The prefix is the path needed to route from the
// provided endpoint's path to the root of the keys API
// (typically "/v2/keys").
func v2KeysURL(ep url.URL, prefix, key string) *url.URL {
// We concatenate all parts together manually. We cannot use
// path.Join because it does not reserve trailing slash.
// We call CanonicalURLPath to further cleanup the path.
if prefix != "" && prefix[0] != '/' {
prefix = "/" + prefix
}
if key != "" && key[0] != '/' {
key = "/" + key
}
ep.Path = pathutil.CanonicalURLPath(ep.Path + prefix + key)
return &ep
}
type getAction struct {
Prefix string
Key string
Recursive bool
Sorted bool
Quorum bool
}
func (g *getAction) HTTPRequest(ep url.URL) *http.Request {
u := v2KeysURL(ep, g.Prefix, g.Key)
params := u.Query()
params.Set("recursive", strconv.FormatBool(g.Recursive))
params.Set("sorted", strconv.FormatBool(g.Sorted))
params.Set("quorum", strconv.FormatBool(g.Quorum))
u.RawQuery = params.Encode()
req, _ := http.NewRequest("GET", u.String(), nil)
return req
}
type waitAction struct {
Prefix string
Key string
WaitIndex uint64
Recursive bool
}
func (w *waitAction) HTTPRequest(ep url.URL) *http.Request {
u := v2KeysURL(ep, w.Prefix, w.Key)
params := u.Query()
params.Set("wait", "true")
params.Set("waitIndex", strconv.FormatUint(w.WaitIndex, 10))
params.Set("recursive", strconv.FormatBool(w.Recursive))
u.RawQuery = params.Encode()
req, _ := http.NewRequest("GET", u.String(), nil)
return req
}
type setAction struct {
Prefix string
Key string
Value string
PrevValue string
PrevIndex uint64
PrevExist PrevExistType
TTL time.Duration
Dir bool
}
func (a *setAction) HTTPRequest(ep url.URL) *http.Request {
u := v2KeysURL(ep, a.Prefix, a.Key)
params := u.Query()
form := url.Values{}
// we're either creating a directory or setting a key
if a.Dir {
params.Set("dir", strconv.FormatBool(a.Dir))
} else {
// These options are only valid for setting a key
if a.PrevValue != "" {
params.Set("prevValue", a.PrevValue)
}
form.Add("value", a.Value)
}
// Options which apply to both setting a key and creating a dir
if a.PrevIndex != 0 {
params.Set("prevIndex", strconv.FormatUint(a.PrevIndex, 10))
}
if a.PrevExist != PrevIgnore {
params.Set("prevExist", string(a.PrevExist))
}
if a.TTL > 0 {
form.Add("ttl", strconv.FormatUint(uint64(a.TTL.Seconds()), 10))
}
u.RawQuery = params.Encode()
body := strings.NewReader(form.Encode())
req, _ := http.NewRequest("PUT", u.String(), body)
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
return req
}
type deleteAction struct {
Prefix string
Key string
PrevValue string
PrevIndex uint64
Dir bool
Recursive bool
}
func (a *deleteAction) HTTPRequest(ep url.URL) *http.Request {
u := v2KeysURL(ep, a.Prefix, a.Key)
params := u.Query()
if a.PrevValue != "" {
params.Set("prevValue", a.PrevValue)
}
if a.PrevIndex != 0 {
params.Set("prevIndex", strconv.FormatUint(a.PrevIndex, 10))
}
if a.Dir {
params.Set("dir", "true")
}
if a.Recursive {
params.Set("recursive", "true")
}
u.RawQuery = params.Encode()
req, _ := http.NewRequest("DELETE", u.String(), nil)
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
return req
}
type createInOrderAction struct {
Prefix string
Dir string
Value string
TTL time.Duration
}
func (a *createInOrderAction) HTTPRequest(ep url.URL) *http.Request {
u := v2KeysURL(ep, a.Prefix, a.Dir)
form := url.Values{}
form.Add("value", a.Value)
if a.TTL > 0 {
form.Add("ttl", strconv.FormatUint(uint64(a.TTL.Seconds()), 10))
}
body := strings.NewReader(form.Encode())
req, _ := http.NewRequest("POST", u.String(), body)
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
return req
}
func unmarshalHTTPResponse(code int, header http.Header, body []byte) (res *Response, err error) {
switch code {
case http.StatusOK, http.StatusCreated:
if len(body) == 0 {
return nil, ErrEmptyBody
}
res, err = unmarshalSuccessfulKeysResponse(header, body)
default:
err = unmarshalFailedKeysResponse(body)
}
return
}
func unmarshalSuccessfulKeysResponse(header http.Header, body []byte) (*Response, error) {
var res Response
err := codec.NewDecoderBytes(body, new(codec.JsonHandle)).Decode(&res)
if err != nil {
return nil, ErrInvalidJSON
}
if header.Get("X-Etcd-Index") != "" {
res.Index, err = strconv.ParseUint(header.Get("X-Etcd-Index"), 10, 64)
if err != nil {
return nil, err
}
}
return &res, nil
}
func unmarshalFailedKeysResponse(body []byte) error {
var etcdErr Error
if err := json.Unmarshal(body, &etcdErr); err != nil {
return ErrInvalidJSON
}
return etcdErr
}

View File

@@ -0,0 +1,272 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
"net/url"
"path"
"golang.org/x/net/context"
"github.com/coreos/etcd/pkg/types"
)
var (
defaultV2MembersPrefix = "/v2/members"
)
type Member struct {
// ID is the unique identifier of this Member.
ID string `json:"id"`
// Name is a human-readable, non-unique identifier of this Member.
Name string `json:"name"`
// PeerURLs represents the HTTP(S) endpoints this Member uses to
// participate in etcd's consensus protocol.
PeerURLs []string `json:"peerURLs"`
// ClientURLs represents the HTTP(S) endpoints on which this Member
// serves it's client-facing APIs.
ClientURLs []string `json:"clientURLs"`
}
type memberCollection []Member
func (c *memberCollection) UnmarshalJSON(data []byte) error {
d := struct {
Members []Member
}{}
if err := json.Unmarshal(data, &d); err != nil {
return err
}
if d.Members == nil {
*c = make([]Member, 0)
return nil
}
*c = d.Members
return nil
}
type memberCreateOrUpdateRequest struct {
PeerURLs types.URLs
}
func (m *memberCreateOrUpdateRequest) MarshalJSON() ([]byte, error) {
s := struct {
PeerURLs []string `json:"peerURLs"`
}{
PeerURLs: make([]string, len(m.PeerURLs)),
}
for i, u := range m.PeerURLs {
s.PeerURLs[i] = u.String()
}
return json.Marshal(&s)
}
// NewMembersAPI constructs a new MembersAPI that uses HTTP to
// interact with etcd's membership API.
func NewMembersAPI(c Client) MembersAPI {
return &httpMembersAPI{
client: c,
}
}
type MembersAPI interface {
// List enumerates the current cluster membership.
List(ctx context.Context) ([]Member, error)
// Add instructs etcd to accept a new Member into the cluster.
Add(ctx context.Context, peerURL string) (*Member, error)
// Remove demotes an existing Member out of the cluster.
Remove(ctx context.Context, mID string) error
// Update instructs etcd to update an existing Member in the cluster.
Update(ctx context.Context, mID string, peerURLs []string) error
}
type httpMembersAPI struct {
client httpClient
}
func (m *httpMembersAPI) List(ctx context.Context) ([]Member, error) {
req := &membersAPIActionList{}
resp, body, err := m.client.Do(ctx, req)
if err != nil {
return nil, err
}
if err := assertStatusCode(resp.StatusCode, http.StatusOK); err != nil {
return nil, err
}
var mCollection memberCollection
if err := json.Unmarshal(body, &mCollection); err != nil {
return nil, err
}
return []Member(mCollection), nil
}
func (m *httpMembersAPI) Add(ctx context.Context, peerURL string) (*Member, error) {
urls, err := types.NewURLs([]string{peerURL})
if err != nil {
return nil, err
}
req := &membersAPIActionAdd{peerURLs: urls}
resp, body, err := m.client.Do(ctx, req)
if err != nil {
return nil, err
}
if err := assertStatusCode(resp.StatusCode, http.StatusCreated, http.StatusConflict); err != nil {
return nil, err
}
if resp.StatusCode != http.StatusCreated {
var merr membersError
if err := json.Unmarshal(body, &merr); err != nil {
return nil, err
}
return nil, merr
}
var memb Member
if err := json.Unmarshal(body, &memb); err != nil {
return nil, err
}
return &memb, nil
}
func (m *httpMembersAPI) Update(ctx context.Context, memberID string, peerURLs []string) error {
urls, err := types.NewURLs(peerURLs)
if err != nil {
return err
}
req := &membersAPIActionUpdate{peerURLs: urls, memberID: memberID}
resp, body, err := m.client.Do(ctx, req)
if err != nil {
return err
}
if err := assertStatusCode(resp.StatusCode, http.StatusNoContent, http.StatusNotFound, http.StatusConflict); err != nil {
return err
}
if resp.StatusCode != http.StatusNoContent {
var merr membersError
if err := json.Unmarshal(body, &merr); err != nil {
return err
}
return merr
}
return nil
}
func (m *httpMembersAPI) Remove(ctx context.Context, memberID string) error {
req := &membersAPIActionRemove{memberID: memberID}
resp, _, err := m.client.Do(ctx, req)
if err != nil {
return err
}
return assertStatusCode(resp.StatusCode, http.StatusNoContent, http.StatusGone)
}
type membersAPIActionList struct{}
func (l *membersAPIActionList) HTTPRequest(ep url.URL) *http.Request {
u := v2MembersURL(ep)
req, _ := http.NewRequest("GET", u.String(), nil)
return req
}
type membersAPIActionRemove struct {
memberID string
}
func (d *membersAPIActionRemove) HTTPRequest(ep url.URL) *http.Request {
u := v2MembersURL(ep)
u.Path = path.Join(u.Path, d.memberID)
req, _ := http.NewRequest("DELETE", u.String(), nil)
return req
}
type membersAPIActionAdd struct {
peerURLs types.URLs
}
func (a *membersAPIActionAdd) HTTPRequest(ep url.URL) *http.Request {
u := v2MembersURL(ep)
m := memberCreateOrUpdateRequest{PeerURLs: a.peerURLs}
b, _ := json.Marshal(&m)
req, _ := http.NewRequest("POST", u.String(), bytes.NewReader(b))
req.Header.Set("Content-Type", "application/json")
return req
}
type membersAPIActionUpdate struct {
memberID string
peerURLs types.URLs
}
func (a *membersAPIActionUpdate) HTTPRequest(ep url.URL) *http.Request {
u := v2MembersURL(ep)
m := memberCreateOrUpdateRequest{PeerURLs: a.peerURLs}
u.Path = path.Join(u.Path, a.memberID)
b, _ := json.Marshal(&m)
req, _ := http.NewRequest("PUT", u.String(), bytes.NewReader(b))
req.Header.Set("Content-Type", "application/json")
return req
}
func assertStatusCode(got int, want ...int) (err error) {
for _, w := range want {
if w == got {
return nil
}
}
return fmt.Errorf("unexpected status code %d", got)
}
// v2MembersURL add the necessary path to the provided endpoint
// to route requests to the default v2 members API.
func v2MembersURL(ep url.URL) *url.URL {
ep.Path = path.Join(ep.Path, defaultV2MembersPrefix)
return &ep
}
type membersError struct {
Message string `json:"message"`
Code int `json:"-"`
}
func (e membersError) Error() string {
return e.Message
}

View File

@@ -0,0 +1,65 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package client
import (
"fmt"
"net"
"net/url"
)
var (
// indirection for testing
lookupSRV = net.LookupSRV
)
type srvDiscover struct{}
// NewSRVDiscover constructs a new Dicoverer that uses the stdlib to lookup SRV records.
func NewSRVDiscover() Discoverer {
return &srvDiscover{}
}
// Discover looks up the etcd servers for the domain.
func (d *srvDiscover) Discover(domain string) ([]string, error) {
var urls []*url.URL
updateURLs := func(service, scheme string) error {
_, addrs, err := lookupSRV(service, "tcp", domain)
if err != nil {
return err
}
for _, srv := range addrs {
urls = append(urls, &url.URL{
Scheme: scheme,
Host: net.JoinHostPort(srv.Target, fmt.Sprintf("%d", srv.Port)),
})
}
return nil
}
errHTTPS := updateURLs("etcd-server-ssl", "https")
errHTTP := updateURLs("etcd-server", "http")
if errHTTPS != nil && errHTTP != nil {
return nil, fmt.Errorf("dns lookup errors: %s and %s", errHTTPS, errHTTP)
}
endpoints := make([]string, len(urls))
for i := range urls {
endpoints[i] = urls[i].String()
}
return endpoints, nil
}

View File

@@ -0,0 +1,355 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package discovery
import (
"errors"
"fmt"
"math"
"net"
"net/http"
"net/url"
"path"
"sort"
"strconv"
"strings"
"time"
"github.com/coreos/etcd/client"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/pkg/capnslog"
"github.com/jonboulle/clockwork"
"golang.org/x/net/context"
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "discovery")
ErrInvalidURL = errors.New("discovery: invalid URL")
ErrBadSizeKey = errors.New("discovery: size key is bad")
ErrSizeNotFound = errors.New("discovery: size key not found")
ErrTokenNotFound = errors.New("discovery: token not found")
ErrDuplicateID = errors.New("discovery: found duplicate id")
ErrDuplicateName = errors.New("discovery: found duplicate name")
ErrFullCluster = errors.New("discovery: cluster is full")
ErrTooManyRetries = errors.New("discovery: too many retries")
ErrBadDiscoveryEndpoint = errors.New("discovery: bad discovery endpoint")
)
var (
// Number of retries discovery will attempt before giving up and erroring out.
nRetries = uint(math.MaxUint32)
)
// JoinCluster will connect to the discovery service at the given url, and
// register the server represented by the given id and config to the cluster
func JoinCluster(durl, dproxyurl string, id types.ID, config string) (string, error) {
d, err := newDiscovery(durl, dproxyurl, id)
if err != nil {
return "", err
}
return d.joinCluster(config)
}
// GetCluster will connect to the discovery service at the given url and
// retrieve a string describing the cluster
func GetCluster(durl, dproxyurl string) (string, error) {
d, err := newDiscovery(durl, dproxyurl, 0)
if err != nil {
return "", err
}
return d.getCluster()
}
type discovery struct {
cluster string
id types.ID
c client.KeysAPI
retries uint
url *url.URL
clock clockwork.Clock
}
// newProxyFunc builds a proxy function from the given string, which should
// represent a URL that can be used as a proxy. It performs basic
// sanitization of the URL and returns any error encountered.
func newProxyFunc(proxy string) (func(*http.Request) (*url.URL, error), error) {
if proxy == "" {
return nil, nil
}
// Do a small amount of URL sanitization to help the user
// Derived from net/http.ProxyFromEnvironment
proxyURL, err := url.Parse(proxy)
if err != nil || !strings.HasPrefix(proxyURL.Scheme, "http") {
// proxy was bogus. Try prepending "http://" to it and
// see if that parses correctly. If not, we ignore the
// error and complain about the original one
var err2 error
proxyURL, err2 = url.Parse("http://" + proxy)
if err2 == nil {
err = nil
}
}
if err != nil {
return nil, fmt.Errorf("invalid proxy address %q: %v", proxy, err)
}
plog.Infof("using proxy %q", proxyURL.String())
return http.ProxyURL(proxyURL), nil
}
func newDiscovery(durl, dproxyurl string, id types.ID) (*discovery, error) {
u, err := url.Parse(durl)
if err != nil {
return nil, err
}
token := u.Path
u.Path = ""
pf, err := newProxyFunc(dproxyurl)
if err != nil {
return nil, err
}
cfg := client.Config{
Transport: &http.Transport{
Proxy: pf,
Dial: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).Dial,
TLSHandshakeTimeout: 10 * time.Second,
// TODO: add ResponseHeaderTimeout back when watch on discovery service writes header early
},
Endpoints: []string{u.String()},
}
c, err := client.New(cfg)
if err != nil {
return nil, err
}
dc := client.NewKeysAPIWithPrefix(c, "")
return &discovery{
cluster: token,
c: dc,
id: id,
url: u,
clock: clockwork.NewRealClock(),
}, nil
}
func (d *discovery) joinCluster(config string) (string, error) {
// fast path: if the cluster is full, return the error
// do not need to register to the cluster in this case.
if _, _, _, err := d.checkCluster(); err != nil {
return "", err
}
if err := d.createSelf(config); err != nil {
// Fails, even on a timeout, if createSelf times out.
// TODO(barakmich): Retrying the same node might want to succeed here
// (ie, createSelf should be idempotent for discovery).
return "", err
}
nodes, size, index, err := d.checkCluster()
if err != nil {
return "", err
}
all, err := d.waitNodes(nodes, size, index)
if err != nil {
return "", err
}
return nodesToCluster(all, size)
}
func (d *discovery) getCluster() (string, error) {
nodes, size, index, err := d.checkCluster()
if err != nil {
if err == ErrFullCluster {
return nodesToCluster(nodes, size)
}
return "", err
}
all, err := d.waitNodes(nodes, size, index)
if err != nil {
return "", err
}
return nodesToCluster(all, size)
}
func (d *discovery) createSelf(contents string) error {
ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout)
resp, err := d.c.Create(ctx, d.selfKey(), contents)
cancel()
if err != nil {
if eerr, ok := err.(client.Error); ok && eerr.Code == client.ErrorCodeNodeExist {
return ErrDuplicateID
}
return err
}
// ensure self appears on the server we connected to
w := d.c.Watcher(d.selfKey(), &client.WatcherOptions{AfterIndex: resp.Node.CreatedIndex - 1})
_, err = w.Next(context.Background())
return err
}
func (d *discovery) checkCluster() ([]*client.Node, int, uint64, error) {
configKey := path.Join("/", d.cluster, "_config")
ctx, cancel := context.WithTimeout(context.Background(), client.DefaultRequestTimeout)
// find cluster size
resp, err := d.c.Get(ctx, path.Join(configKey, "size"), nil)
cancel()
if err != nil {
if eerr, ok := err.(*client.Error); ok && eerr.Code == client.ErrorCodeKeyNotFound {
return nil, 0, 0, ErrSizeNotFound
}
if err == client.ErrInvalidJSON {
return nil, 0, 0, ErrBadDiscoveryEndpoint
}
if ce, ok := err.(*client.ClusterError); ok {
plog.Error(ce.Detail())
return d.checkClusterRetry()
}
return nil, 0, 0, err
}
size, err := strconv.Atoi(resp.Node.Value)
if err != nil {
return nil, 0, 0, ErrBadSizeKey
}
ctx, cancel = context.WithTimeout(context.Background(), client.DefaultRequestTimeout)
resp, err = d.c.Get(ctx, d.cluster, nil)
cancel()
if err != nil {
if ce, ok := err.(*client.ClusterError); ok {
plog.Error(ce.Detail())
return d.checkClusterRetry()
}
return nil, 0, 0, err
}
nodes := make([]*client.Node, 0)
// append non-config keys to nodes
for _, n := range resp.Node.Nodes {
if !(path.Base(n.Key) == path.Base(configKey)) {
nodes = append(nodes, n)
}
}
snodes := sortableNodes{nodes}
sort.Sort(snodes)
// find self position
for i := range nodes {
if path.Base(nodes[i].Key) == path.Base(d.selfKey()) {
break
}
if i >= size-1 {
return nodes[:size], size, resp.Index, ErrFullCluster
}
}
return nodes, size, resp.Index, nil
}
func (d *discovery) logAndBackoffForRetry(step string) {
d.retries++
retryTime := time.Second * (0x1 << d.retries)
plog.Infof("%s: error connecting to %s, retrying in %s", step, d.url, retryTime)
d.clock.Sleep(retryTime)
}
func (d *discovery) checkClusterRetry() ([]*client.Node, int, uint64, error) {
if d.retries < nRetries {
d.logAndBackoffForRetry("cluster status check")
return d.checkCluster()
}
return nil, 0, 0, ErrTooManyRetries
}
func (d *discovery) waitNodesRetry() ([]*client.Node, error) {
if d.retries < nRetries {
d.logAndBackoffForRetry("waiting for other nodes")
nodes, n, index, err := d.checkCluster()
if err != nil {
return nil, err
}
return d.waitNodes(nodes, n, index)
}
return nil, ErrTooManyRetries
}
func (d *discovery) waitNodes(nodes []*client.Node, size int, index uint64) ([]*client.Node, error) {
if len(nodes) > size {
nodes = nodes[:size]
}
// watch from the next index
w := d.c.Watcher(d.cluster, &client.WatcherOptions{AfterIndex: index, Recursive: true})
all := make([]*client.Node, len(nodes))
copy(all, nodes)
for _, n := range all {
if path.Base(n.Key) == path.Base(d.selfKey()) {
plog.Noticef("found self %s in the cluster", path.Base(d.selfKey()))
} else {
plog.Noticef("found peer %s in the cluster", path.Base(n.Key))
}
}
// wait for others
for len(all) < size {
plog.Noticef("found %d peer(s), waiting for %d more", len(all), size-len(all))
resp, err := w.Next(context.Background())
if err != nil {
if ce, ok := err.(*client.ClusterError); ok {
plog.Error(ce.Detail())
return d.waitNodesRetry()
}
return nil, err
}
plog.Noticef("found peer %s in the cluster", path.Base(resp.Node.Key))
all = append(all, resp.Node)
}
plog.Noticef("found %d needed peer(s)", len(all))
return all, nil
}
func (d *discovery) selfKey() string {
return path.Join("/", d.cluster, d.id.String())
}
func nodesToCluster(ns []*client.Node, size int) (string, error) {
s := make([]string, len(ns))
for i, n := range ns {
s[i] = n.Value
}
us := strings.Join(s, ",")
m, err := types.NewURLsMap(us)
if err != nil {
return us, ErrInvalidURL
}
if m.Len() != size {
return us, ErrDuplicateName
}
return us, nil
}
type sortableNodes struct{ Nodes []*client.Node }
func (ns sortableNodes) Len() int { return len(ns.Nodes) }
func (ns sortableNodes) Less(i, j int) bool {
return ns.Nodes[i].CreatedIndex < ns.Nodes[j].CreatedIndex
}
func (ns sortableNodes) Swap(i, j int) { ns.Nodes[i], ns.Nodes[j] = ns.Nodes[j], ns.Nodes[i] }

View File

@@ -0,0 +1,20 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
Package discovery provides an implementation of the cluster discovery that
is used by etcd.
*/
package discovery

View File

@@ -0,0 +1,97 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package discovery
import (
"fmt"
"net"
"strings"
"github.com/coreos/etcd/pkg/types"
)
var (
// indirection for testing
lookupSRV = net.LookupSRV
resolveTCPAddr = net.ResolveTCPAddr
)
// TODO(barakmich): Currently ignores priority and weight (as they don't make as much sense for a bootstrap)
// Also doesn't do any lookups for the token (though it could)
// Also sees each entry as a separate instance.
func SRVGetCluster(name, dns string, defaultToken string, apurls types.URLs) (string, string, error) {
stringParts := make([]string, 0)
tempName := int(0)
tcpAPUrls := make([]string, 0)
// First, resolve the apurls
for _, url := range apurls {
tcpAddr, err := resolveTCPAddr("tcp", url.Host)
if err != nil {
plog.Errorf("couldn't resolve host %s during SRV discovery", url.Host)
return "", "", err
}
tcpAPUrls = append(tcpAPUrls, tcpAddr.String())
}
updateNodeMap := func(service, prefix string) error {
_, addrs, err := lookupSRV(service, "tcp", dns)
if err != nil {
return err
}
for _, srv := range addrs {
target := strings.TrimSuffix(srv.Target, ".")
host := net.JoinHostPort(target, fmt.Sprintf("%d", srv.Port))
tcpAddr, err := resolveTCPAddr("tcp", host)
if err != nil {
plog.Warningf("couldn't resolve host %s during SRV discovery", host)
continue
}
n := ""
for _, url := range tcpAPUrls {
if url == tcpAddr.String() {
n = name
}
}
if n == "" {
n = fmt.Sprintf("%d", tempName)
tempName += 1
}
stringParts = append(stringParts, fmt.Sprintf("%s=%s%s", n, prefix, host))
plog.Noticef("got bootstrap from DNS for %s at %s%s", service, prefix, host)
}
return nil
}
failCount := 0
err := updateNodeMap("etcd-server-ssl", "https://")
srvErr := make([]string, 2)
if err != nil {
srvErr[0] = fmt.Sprintf("error querying DNS SRV records for _etcd-server-ssl %s", err)
failCount += 1
}
err = updateNodeMap("etcd-server", "http://")
if err != nil {
srvErr[1] = fmt.Sprintf("error querying DNS SRV records for _etcd-server %s", err)
failCount += 1
}
if failCount == 2 {
plog.Warningf(srvErr[0])
plog.Warningf(srvErr[1])
plog.Errorf("SRV discovery failed: too many errors querying DNS SRV records")
return "", "", err
}
return strings.Join(stringParts, ","), defaultToken, nil
}

View File

@@ -0,0 +1,159 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// error package describes errors in etcd project.
// When any change happens, Documentation/errorcode.md needs to be updated
// correspondingly.
package error
import (
"encoding/json"
"fmt"
"net/http"
)
var errors = map[int]string{
// command related errors
EcodeKeyNotFound: "Key not found",
EcodeTestFailed: "Compare failed", //test and set
EcodeNotFile: "Not a file",
ecodeNoMorePeer: "Reached the max number of peers in the cluster",
EcodeNotDir: "Not a directory",
EcodeNodeExist: "Key already exists", // create
ecodeKeyIsPreserved: "The prefix of given key is a keyword in etcd",
EcodeRootROnly: "Root is read only",
EcodeDirNotEmpty: "Directory not empty",
ecodeExistingPeerAddr: "Peer address has existed",
EcodeUnauthorized: "The request requires user authentication",
// Post form related errors
ecodeValueRequired: "Value is Required in POST form",
EcodePrevValueRequired: "PrevValue is Required in POST form",
EcodeTTLNaN: "The given TTL in POST form is not a number",
EcodeIndexNaN: "The given index in POST form is not a number",
ecodeValueOrTTLRequired: "Value or TTL is required in POST form",
ecodeTimeoutNaN: "The given timeout in POST form is not a number",
ecodeNameRequired: "Name is required in POST form",
ecodeIndexOrValueRequired: "Index or value is required",
ecodeIndexValueMutex: "Index and value cannot both be specified",
EcodeInvalidField: "Invalid field",
EcodeInvalidForm: "Invalid POST form",
// raft related errors
EcodeRaftInternal: "Raft Internal Error",
EcodeLeaderElect: "During Leader Election",
// etcd related errors
EcodeWatcherCleared: "watcher is cleared due to etcd recovery",
EcodeEventIndexCleared: "The event in requested index is outdated and cleared",
ecodeStandbyInternal: "Standby Internal Error",
ecodeInvalidActiveSize: "Invalid active size",
ecodeInvalidRemoveDelay: "Standby remove delay",
// client related errors
ecodeClientInternal: "Client Internal Error",
}
var errorStatus = map[int]int{
EcodeKeyNotFound: http.StatusNotFound,
EcodeNotFile: http.StatusForbidden,
EcodeDirNotEmpty: http.StatusForbidden,
EcodeUnauthorized: http.StatusUnauthorized,
EcodeTestFailed: http.StatusPreconditionFailed,
EcodeNodeExist: http.StatusPreconditionFailed,
EcodeRaftInternal: http.StatusInternalServerError,
EcodeLeaderElect: http.StatusInternalServerError,
}
const (
EcodeKeyNotFound = 100
EcodeTestFailed = 101
EcodeNotFile = 102
ecodeNoMorePeer = 103
EcodeNotDir = 104
EcodeNodeExist = 105
ecodeKeyIsPreserved = 106
EcodeRootROnly = 107
EcodeDirNotEmpty = 108
ecodeExistingPeerAddr = 109
EcodeUnauthorized = 110
ecodeValueRequired = 200
EcodePrevValueRequired = 201
EcodeTTLNaN = 202
EcodeIndexNaN = 203
ecodeValueOrTTLRequired = 204
ecodeTimeoutNaN = 205
ecodeNameRequired = 206
ecodeIndexOrValueRequired = 207
ecodeIndexValueMutex = 208
EcodeInvalidField = 209
EcodeInvalidForm = 210
EcodeRaftInternal = 300
EcodeLeaderElect = 301
EcodeWatcherCleared = 400
EcodeEventIndexCleared = 401
ecodeStandbyInternal = 402
ecodeInvalidActiveSize = 403
ecodeInvalidRemoveDelay = 404
ecodeClientInternal = 500
)
type Error struct {
ErrorCode int `json:"errorCode"`
Message string `json:"message"`
Cause string `json:"cause,omitempty"`
Index uint64 `json:"index"`
}
func NewRequestError(errorCode int, cause string) *Error {
return NewError(errorCode, cause, 0)
}
func NewError(errorCode int, cause string, index uint64) *Error {
return &Error{
ErrorCode: errorCode,
Message: errors[errorCode],
Cause: cause,
Index: index,
}
}
// Only for error interface
func (e Error) Error() string {
return e.Message + " (" + e.Cause + ")"
}
func (e Error) toJsonString() string {
b, _ := json.Marshal(e)
return string(b)
}
func (e Error) statusCode() int {
status, ok := errorStatus[e.ErrorCode]
if !ok {
status = http.StatusBadRequest
}
return status
}
func (e Error) WriteTo(w http.ResponseWriter) {
w.Header().Add("X-Etcd-Index", fmt.Sprint(e.Index))
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(e.statusCode())
fmt.Fprintln(w, e.toJsonString())
}

View File

@@ -0,0 +1,53 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package v3rpc
import (
"github.com/coreos/etcd/etcdserver"
pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
type handler struct {
server etcdserver.V3DemoServer
}
func New(s etcdserver.V3DemoServer) pb.EtcdServer {
return &handler{s}
}
func (h *handler) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeResponse, error) {
resp := h.server.V3DemoDo(ctx, pb.InternalRaftRequest{Range: r})
return resp.(*pb.RangeResponse), nil
}
func (h *handler) Put(ctx context.Context, r *pb.PutRequest) (*pb.PutResponse, error) {
resp := h.server.V3DemoDo(ctx, pb.InternalRaftRequest{Put: r})
return resp.(*pb.PutResponse), nil
}
func (h *handler) DeleteRange(ctx context.Context, r *pb.DeleteRangeRequest) (*pb.DeleteRangeResponse, error) {
resp := h.server.V3DemoDo(ctx, pb.InternalRaftRequest{DeleteRange: r})
return resp.(*pb.DeleteRangeResponse), nil
}
func (h *handler) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse, error) {
resp := h.server.V3DemoDo(ctx, pb.InternalRaftRequest{Txn: r})
return resp.(*pb.TxnResponse), nil
}
func (h *handler) Compact(ctx context.Context, r *pb.CompactionRequest) (*pb.CompactionResponse, error) {
panic("not implemented")
}

View File

@@ -0,0 +1,636 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
import (
"encoding/json"
"fmt"
"net/http"
"path"
"reflect"
"sort"
"strings"
"time"
etcderr "github.com/coreos/etcd/error"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/pkg/capnslog"
"golang.org/x/crypto/bcrypt"
"golang.org/x/net/context"
)
const (
// StorePermsPrefix is the internal prefix of the storage layer dedicated to storing user data.
StorePermsPrefix = "/2"
// RootRoleName is the name of the ROOT role, with privileges to manage the cluster.
RootRoleName = "root"
// GuestRoleName is the name of the role that defines the privileges of an unauthenticated user.
GuestRoleName = "guest"
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd/etcdserver", "auth")
)
var rootRole = Role{
Role: RootRoleName,
Permissions: Permissions{
KV: RWPermission{
Read: []string{"*"},
Write: []string{"*"},
},
},
}
var guestRole = Role{
Role: GuestRoleName,
Permissions: Permissions{
KV: RWPermission{
Read: []string{"*"},
Write: []string{"*"},
},
},
}
type doer interface {
Do(context.Context, etcdserverpb.Request) (etcdserver.Response, error)
}
type Store interface {
AllUsers() ([]string, error)
GetUser(name string) (User, error)
CreateOrUpdateUser(user User) (out User, created bool, err error)
CreateUser(user User) (User, error)
DeleteUser(name string) error
UpdateUser(user User) (User, error)
AllRoles() ([]string, error)
GetRole(name string) (Role, error)
CreateRole(role Role) error
DeleteRole(name string) error
UpdateRole(role Role) (Role, error)
AuthEnabled() bool
EnableAuth() error
DisableAuth() error
}
type store struct {
server doer
timeout time.Duration
ensuredOnce bool
enabled *bool
}
type User struct {
User string `json:"user"`
Password string `json:"password,omitempty"`
Roles []string `json:"roles"`
Grant []string `json:"grant,omitempty"`
Revoke []string `json:"revoke,omitempty"`
}
type Role struct {
Role string `json:"role"`
Permissions Permissions `json:"permissions"`
Grant *Permissions `json:"grant,omitempty"`
Revoke *Permissions `json:"revoke,omitempty"`
}
type Permissions struct {
KV RWPermission `json:"kv"`
}
func (p *Permissions) IsEmpty() bool {
return p == nil || (len(p.KV.Read) == 0 && len(p.KV.Write) == 0)
}
type RWPermission struct {
Read []string `json:"read"`
Write []string `json:"write"`
}
type Error struct {
Status int
Errmsg string
}
func (ae Error) Error() string { return ae.Errmsg }
func (ae Error) HTTPStatus() int { return ae.Status }
func authErr(hs int, s string, v ...interface{}) Error {
return Error{Status: hs, Errmsg: fmt.Sprintf("auth: "+s, v...)}
}
func NewStore(server doer, timeout time.Duration) Store {
s := &store{
server: server,
timeout: timeout,
}
return s
}
func (s *store) AllUsers() ([]string, error) {
resp, err := s.requestResource("/users/", false)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
return []string{}, nil
}
}
return nil, err
}
var nodes []string
for _, n := range resp.Event.Node.Nodes {
_, user := path.Split(n.Key)
nodes = append(nodes, user)
}
sort.Strings(nodes)
return nodes, nil
}
func (s *store) GetUser(name string) (User, error) {
resp, err := s.requestResource("/users/"+name, false)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
return User{}, authErr(http.StatusNotFound, "User %s does not exist.", name)
}
}
return User{}, err
}
var u User
err = json.Unmarshal([]byte(*resp.Event.Node.Value), &u)
if err != nil {
return u, err
}
// Attach root role to root user.
if u.User == "root" {
u = attachRootRole(u)
}
return u, nil
}
// CreateOrUpdateUser should be only used for creating the new user or when you are not
// sure if it is a create or update. (When only password is passed in, we are not sure
// if it is a update or create)
func (s *store) CreateOrUpdateUser(user User) (out User, created bool, err error) {
_, err = s.GetUser(user.User)
if err == nil {
out, err = s.UpdateUser(user)
return out, false, err
}
u, err := s.CreateUser(user)
return u, true, err
}
func (s *store) CreateUser(user User) (User, error) {
// Attach root role to root user.
if user.User == "root" {
user = attachRootRole(user)
}
u, err := s.createUserInternal(user)
if err == nil {
plog.Noticef("created user %s", user.User)
}
return u, err
}
func (s *store) createUserInternal(user User) (User, error) {
if user.Password == "" {
return user, authErr(http.StatusBadRequest, "Cannot create user %s with an empty password", user.User)
}
hash, err := bcrypt.GenerateFromPassword([]byte(user.Password), bcrypt.DefaultCost)
if err != nil {
return user, err
}
user.Password = string(hash)
_, err = s.createResource("/users/"+user.User, user)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeNodeExist {
return user, authErr(http.StatusConflict, "User %s already exists.", user.User)
}
}
}
return user, err
}
func (s *store) DeleteUser(name string) error {
if s.AuthEnabled() && name == "root" {
return authErr(http.StatusForbidden, "Cannot delete root user while auth is enabled.")
}
_, err := s.deleteResource("/users/" + name)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
return authErr(http.StatusNotFound, "User %s does not exist", name)
}
}
return err
}
plog.Noticef("deleted user %s", name)
return nil
}
func (s *store) UpdateUser(user User) (User, error) {
old, err := s.GetUser(user.User)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
return user, authErr(http.StatusNotFound, "User %s doesn't exist.", user.User)
}
}
return old, err
}
newUser, err := old.merge(user)
if err != nil {
return old, err
}
if reflect.DeepEqual(old, newUser) {
return old, authErr(http.StatusBadRequest, "User not updated. Use grant/revoke/password to update the user.")
}
_, err = s.updateResource("/users/"+user.User, newUser)
if err == nil {
plog.Noticef("updated user %s", user.User)
}
return newUser, err
}
func (s *store) AllRoles() ([]string, error) {
nodes := []string{RootRoleName}
resp, err := s.requestResource("/roles/", false)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
return nodes, nil
}
}
return nil, err
}
for _, n := range resp.Event.Node.Nodes {
_, role := path.Split(n.Key)
nodes = append(nodes, role)
}
sort.Strings(nodes)
return nodes, nil
}
func (s *store) GetRole(name string) (Role, error) {
if name == RootRoleName {
return rootRole, nil
}
resp, err := s.requestResource("/roles/"+name, false)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
return Role{}, authErr(http.StatusNotFound, "Role %s does not exist.", name)
}
}
return Role{}, err
}
var r Role
err = json.Unmarshal([]byte(*resp.Event.Node.Value), &r)
if err != nil {
return r, err
}
return r, nil
}
func (s *store) CreateRole(role Role) error {
if role.Role == RootRoleName {
return authErr(http.StatusForbidden, "Cannot modify role %s: is root role.", role.Role)
}
_, err := s.createResource("/roles/"+role.Role, role)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeNodeExist {
return authErr(http.StatusConflict, "Role %s already exists.", role.Role)
}
}
}
if err == nil {
plog.Noticef("created new role %s", role.Role)
}
return err
}
func (s *store) DeleteRole(name string) error {
if name == RootRoleName {
return authErr(http.StatusForbidden, "Cannot modify role %s: is root role.", name)
}
_, err := s.deleteResource("/roles/" + name)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
return authErr(http.StatusNotFound, "Role %s doesn't exist.", name)
}
}
}
if err == nil {
plog.Noticef("deleted role %s", name)
}
return err
}
func (s *store) UpdateRole(role Role) (Role, error) {
if role.Role == RootRoleName {
return Role{}, authErr(http.StatusForbidden, "Cannot modify role %s: is root role.", role.Role)
}
old, err := s.GetRole(role.Role)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
return role, authErr(http.StatusNotFound, "Role %s doesn't exist.", role.Role)
}
}
return old, err
}
newRole, err := old.merge(role)
if err != nil {
return old, err
}
if reflect.DeepEqual(old, newRole) {
return old, authErr(http.StatusBadRequest, "Role not updated. Use grant/revoke to update the role.")
}
_, err = s.updateResource("/roles/"+role.Role, newRole)
if err == nil {
plog.Noticef("updated role %s", role.Role)
}
return newRole, err
}
func (s *store) AuthEnabled() bool {
return s.detectAuth()
}
func (s *store) EnableAuth() error {
if s.AuthEnabled() {
return authErr(http.StatusConflict, "already enabled")
}
_, err := s.GetUser("root")
if err != nil {
return authErr(http.StatusConflict, "No root user available, please create one")
}
_, err = s.GetRole(GuestRoleName)
if err != nil {
plog.Printf("no guest role access found, creating default")
err := s.CreateRole(guestRole)
if err != nil {
plog.Errorf("error creating guest role. aborting auth enable.")
return err
}
}
err = s.enableAuth()
if err == nil {
b := true
s.enabled = &b
plog.Noticef("auth: enabled auth")
} else {
plog.Errorf("error enabling auth (%v)", err)
}
return err
}
func (s *store) DisableAuth() error {
if !s.AuthEnabled() {
return authErr(http.StatusConflict, "already disabled")
}
err := s.disableAuth()
if err == nil {
b := false
s.enabled = &b
plog.Noticef("auth: disabled auth")
} else {
plog.Errorf("error disabling auth (%v)", err)
}
return err
}
// merge applies the properties of the passed-in User to the User on which it
// is called and returns a new User with these modifications applied. Think of
// all Users as immutable sets of data. Merge allows you to perform the set
// operations (desired grants and revokes) atomically
func (u User) merge(n User) (User, error) {
var out User
if u.User != n.User {
return out, authErr(http.StatusConflict, "Merging user data with conflicting usernames: %s %s", u.User, n.User)
}
out.User = u.User
if n.Password != "" {
hash, err := bcrypt.GenerateFromPassword([]byte(n.Password), bcrypt.DefaultCost)
if err != nil {
return User{}, err
}
out.Password = string(hash)
} else {
out.Password = u.Password
}
currentRoles := types.NewUnsafeSet(u.Roles...)
for _, g := range n.Grant {
if currentRoles.Contains(g) {
plog.Noticef("granting duplicate role %s for user %s", g, n.User)
return User{}, authErr(http.StatusConflict, fmt.Sprintf("Granting duplicate role %s for user %s", g, n.User))
}
currentRoles.Add(g)
}
for _, r := range n.Revoke {
if !currentRoles.Contains(r) {
plog.Noticef("revoking ungranted role %s for user %s", r, n.User)
return User{}, authErr(http.StatusConflict, fmt.Sprintf("Revoking ungranted role %s for user %s", r, n.User))
}
currentRoles.Remove(r)
}
out.Roles = currentRoles.Values()
sort.Strings(out.Roles)
return out, nil
}
func (u User) CheckPassword(password string) bool {
err := bcrypt.CompareHashAndPassword([]byte(u.Password), []byte(password))
return err == nil
}
// merge for a role works the same as User above -- atomic Role application to
// each of the substructures.
func (r Role) merge(n Role) (Role, error) {
var out Role
var err error
if r.Role != n.Role {
return out, authErr(http.StatusConflict, "Merging role with conflicting names: %s %s", r.Role, n.Role)
}
out.Role = r.Role
out.Permissions, err = r.Permissions.Grant(n.Grant)
if err != nil {
return out, err
}
out.Permissions, err = out.Permissions.Revoke(n.Revoke)
if err != nil {
return out, err
}
return out, nil
}
func (r Role) HasKeyAccess(key string, write bool) bool {
if r.Role == RootRoleName {
return true
}
return r.Permissions.KV.HasAccess(key, write)
}
func (r Role) HasRecursiveAccess(key string, write bool) bool {
if r.Role == RootRoleName {
return true
}
return r.Permissions.KV.HasRecursiveAccess(key, write)
}
// Grant adds a set of permissions to the permission object on which it is called,
// returning a new permission object.
func (p Permissions) Grant(n *Permissions) (Permissions, error) {
var out Permissions
var err error
if n == nil {
return p, nil
}
out.KV, err = p.KV.Grant(n.KV)
return out, err
}
// Revoke removes a set of permissions to the permission object on which it is called,
// returning a new permission object.
func (p Permissions) Revoke(n *Permissions) (Permissions, error) {
var out Permissions
var err error
if n == nil {
return p, nil
}
out.KV, err = p.KV.Revoke(n.KV)
return out, err
}
// Grant adds a set of permissions to the permission object on which it is called,
// returning a new permission object.
func (rw RWPermission) Grant(n RWPermission) (RWPermission, error) {
var out RWPermission
currentRead := types.NewUnsafeSet(rw.Read...)
for _, r := range n.Read {
if currentRead.Contains(r) {
return out, authErr(http.StatusConflict, "Granting duplicate read permission %s", r)
}
currentRead.Add(r)
}
currentWrite := types.NewUnsafeSet(rw.Write...)
for _, w := range n.Write {
if currentWrite.Contains(w) {
return out, authErr(http.StatusConflict, "Granting duplicate write permission %s", w)
}
currentWrite.Add(w)
}
out.Read = currentRead.Values()
out.Write = currentWrite.Values()
sort.Strings(out.Read)
sort.Strings(out.Write)
return out, nil
}
// Revoke removes a set of permissions to the permission object on which it is called,
// returning a new permission object.
func (rw RWPermission) Revoke(n RWPermission) (RWPermission, error) {
var out RWPermission
currentRead := types.NewUnsafeSet(rw.Read...)
for _, r := range n.Read {
if !currentRead.Contains(r) {
plog.Noticef("revoking ungranted read permission %s", r)
continue
}
currentRead.Remove(r)
}
currentWrite := types.NewUnsafeSet(rw.Write...)
for _, w := range n.Write {
if !currentWrite.Contains(w) {
plog.Noticef("revoking ungranted write permission %s", w)
continue
}
currentWrite.Remove(w)
}
out.Read = currentRead.Values()
out.Write = currentWrite.Values()
sort.Strings(out.Read)
sort.Strings(out.Write)
return out, nil
}
func (rw RWPermission) HasAccess(key string, write bool) bool {
var list []string
if write {
list = rw.Write
} else {
list = rw.Read
}
for _, pat := range list {
match, err := simpleMatch(pat, key)
if err == nil && match {
return true
}
}
return false
}
func (rw RWPermission) HasRecursiveAccess(key string, write bool) bool {
list := rw.Read
if write {
list = rw.Write
}
for _, pat := range list {
match, err := prefixMatch(pat, key)
if err == nil && match {
return true
}
}
return false
}
func simpleMatch(pattern string, key string) (match bool, err error) {
if pattern[len(pattern)-1] == '*' {
return strings.HasPrefix(key, pattern[:len(pattern)-1]), nil
}
return key == pattern, nil
}
func prefixMatch(pattern string, key string) (match bool, err error) {
if pattern[len(pattern)-1] != '*' {
return false, nil
}
return strings.HasPrefix(key, pattern[:len(pattern)-1]), nil
}
func attachRootRole(u User) User {
inRoles := false
for _, r := range u.Roles {
if r == RootRoleName {
inRoles = true
break
}
}
if !inRoles {
u.Roles = append(u.Roles, RootRoleName)
}
return u
}

View File

@@ -0,0 +1,168 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package auth
import (
"encoding/json"
"path"
etcderr "github.com/coreos/etcd/error"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/etcdserverpb"
"golang.org/x/net/context"
)
func (s *store) ensureAuthDirectories() error {
if s.ensuredOnce {
return nil
}
for _, res := range []string{StorePermsPrefix, StorePermsPrefix + "/users/", StorePermsPrefix + "/roles/"} {
ctx, cancel := context.WithTimeout(context.Background(), s.timeout)
defer cancel()
pe := false
rr := etcdserverpb.Request{
Method: "PUT",
Path: res,
Dir: true,
PrevExist: &pe,
}
_, err := s.server.Do(ctx, rr)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeNodeExist {
continue
}
}
plog.Errorf("failed to create auth directories in the store (%v)", err)
return err
}
}
ctx, cancel := context.WithTimeout(context.Background(), s.timeout)
defer cancel()
pe := false
rr := etcdserverpb.Request{
Method: "PUT",
Path: StorePermsPrefix + "/enabled",
Val: "false",
PrevExist: &pe,
}
_, err := s.server.Do(ctx, rr)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeNodeExist {
s.ensuredOnce = true
return nil
}
}
return err
}
s.ensuredOnce = true
return nil
}
func (s *store) enableAuth() error {
_, err := s.updateResource("/enabled", true)
return err
}
func (s *store) disableAuth() error {
_, err := s.updateResource("/enabled", false)
return err
}
func (s *store) detectAuth() bool {
if s.server == nil {
return false
}
if s.enabled != nil {
return *s.enabled
}
value, err := s.requestResource("/enabled", false)
if err != nil {
if e, ok := err.(*etcderr.Error); ok {
if e.ErrorCode == etcderr.EcodeKeyNotFound {
b := false
s.enabled = &b
return false
}
}
plog.Errorf("failed to detect auth settings (%s)", err)
return false
}
var u bool
err = json.Unmarshal([]byte(*value.Event.Node.Value), &u)
if err != nil {
plog.Errorf("internal bookkeeping value for enabled isn't valid JSON (%v)", err)
return false
}
s.enabled = &u
return u
}
func (s *store) requestResource(res string, dir bool) (etcdserver.Response, error) {
ctx, cancel := context.WithTimeout(context.Background(), s.timeout)
defer cancel()
p := path.Join(StorePermsPrefix, res)
rr := etcdserverpb.Request{
Method: "GET",
Path: p,
Dir: dir,
}
return s.server.Do(ctx, rr)
}
func (s *store) updateResource(res string, value interface{}) (etcdserver.Response, error) {
return s.setResource(res, value, true)
}
func (s *store) createResource(res string, value interface{}) (etcdserver.Response, error) {
return s.setResource(res, value, false)
}
func (s *store) setResource(res string, value interface{}, prevexist bool) (etcdserver.Response, error) {
err := s.ensureAuthDirectories()
if err != nil {
return etcdserver.Response{}, err
}
ctx, cancel := context.WithTimeout(context.Background(), s.timeout)
defer cancel()
data, err := json.Marshal(value)
if err != nil {
return etcdserver.Response{}, err
}
p := path.Join(StorePermsPrefix, res)
rr := etcdserverpb.Request{
Method: "PUT",
Path: p,
Val: string(data),
PrevExist: &prevexist,
}
return s.server.Do(ctx, rr)
}
func (s *store) deleteResource(res string) (etcdserver.Response, error) {
err := s.ensureAuthDirectories()
if err != nil {
return etcdserver.Response{}, err
}
ctx, cancel := context.WithTimeout(context.Background(), s.timeout)
defer cancel()
pex := true
p := path.Join(StorePermsPrefix, res)
rr := etcdserverpb.Request{
Method: "DELETE",
Path: p,
PrevExist: &pex,
}
return s.server.Do(ctx, rr)
}

View File

@@ -0,0 +1,438 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"bytes"
"crypto/sha1"
"encoding/binary"
"encoding/json"
"fmt"
"path"
"sort"
"strings"
"sync"
"github.com/coreos/etcd/pkg/netutil"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/raft/raftpb"
"github.com/coreos/etcd/store"
"github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver"
)
const (
raftAttributesSuffix = "raftAttributes"
attributesSuffix = "attributes"
)
type Cluster interface {
// ID returns the cluster ID
ID() types.ID
// ClientURLs returns an aggregate set of all URLs on which this
// cluster is listening for client requests
ClientURLs() []string
// Members returns a slice of members sorted by their ID
Members() []*Member
// Member retrieves a particular member based on ID, or nil if the
// member does not exist in the cluster
Member(id types.ID) *Member
// IsIDRemoved checks whether the given ID has been removed from this
// cluster at some point in the past
IsIDRemoved(id types.ID) bool
// ClusterVersion is the cluster-wide minimum major.minor version.
Version() *semver.Version
}
// Cluster is a list of Members that belong to the same raft cluster
type cluster struct {
id types.ID
token string
store store.Store
sync.Mutex // guards the fields below
version *semver.Version
members map[types.ID]*Member
// removed contains the ids of removed members in the cluster.
// removed id cannot be reused.
removed map[types.ID]bool
}
func newClusterFromURLsMap(token string, urlsmap types.URLsMap) (*cluster, error) {
c := newCluster(token)
for name, urls := range urlsmap {
m := NewMember(name, urls, token, nil)
if _, ok := c.members[m.ID]; ok {
return nil, fmt.Errorf("member exists with identical ID %v", m)
}
if uint64(m.ID) == raft.None {
return nil, fmt.Errorf("cannot use %x as member id", raft.None)
}
c.members[m.ID] = m
}
c.genID()
return c, nil
}
func newClusterFromMembers(token string, id types.ID, membs []*Member) *cluster {
c := newCluster(token)
c.id = id
for _, m := range membs {
c.members[m.ID] = m
}
return c
}
func newCluster(token string) *cluster {
return &cluster{
token: token,
members: make(map[types.ID]*Member),
removed: make(map[types.ID]bool),
}
}
func (c *cluster) ID() types.ID { return c.id }
func (c *cluster) Members() []*Member {
c.Lock()
defer c.Unlock()
var ms MembersByID
for _, m := range c.members {
ms = append(ms, m.Clone())
}
sort.Sort(ms)
return []*Member(ms)
}
func (c *cluster) Member(id types.ID) *Member {
c.Lock()
defer c.Unlock()
return c.members[id].Clone()
}
// MemberByName returns a Member with the given name if exists.
// If more than one member has the given name, it will panic.
func (c *cluster) MemberByName(name string) *Member {
c.Lock()
defer c.Unlock()
var memb *Member
for _, m := range c.members {
if m.Name == name {
if memb != nil {
plog.Panicf("two members with the given name %q exist", name)
}
memb = m
}
}
return memb.Clone()
}
func (c *cluster) MemberIDs() []types.ID {
c.Lock()
defer c.Unlock()
var ids []types.ID
for _, m := range c.members {
ids = append(ids, m.ID)
}
sort.Sort(types.IDSlice(ids))
return ids
}
func (c *cluster) IsIDRemoved(id types.ID) bool {
c.Lock()
defer c.Unlock()
return c.removed[id]
}
// PeerURLs returns a list of all peer addresses.
// The returned list is sorted in ascending lexicographical order.
func (c *cluster) PeerURLs() []string {
c.Lock()
defer c.Unlock()
urls := make([]string, 0)
for _, p := range c.members {
for _, addr := range p.PeerURLs {
urls = append(urls, addr)
}
}
sort.Strings(urls)
return urls
}
// ClientURLs returns a list of all client addresses.
// The returned list is sorted in ascending lexicographical order.
func (c *cluster) ClientURLs() []string {
c.Lock()
defer c.Unlock()
urls := make([]string, 0)
for _, p := range c.members {
for _, url := range p.ClientURLs {
urls = append(urls, url)
}
}
sort.Strings(urls)
return urls
}
func (c *cluster) String() string {
c.Lock()
defer c.Unlock()
b := &bytes.Buffer{}
fmt.Fprintf(b, "{ClusterID:%s ", c.id)
var ms []string
for _, m := range c.members {
ms = append(ms, fmt.Sprintf("%+v", m))
}
fmt.Fprintf(b, "Members:[%s] ", strings.Join(ms, " "))
var ids []string
for id := range c.removed {
ids = append(ids, fmt.Sprintf("%s", id))
}
fmt.Fprintf(b, "RemovedMemberIDs:[%s]}", strings.Join(ids, " "))
return b.String()
}
func (c *cluster) genID() {
mIDs := c.MemberIDs()
b := make([]byte, 8*len(mIDs))
for i, id := range mIDs {
binary.BigEndian.PutUint64(b[8*i:], uint64(id))
}
hash := sha1.Sum(b)
c.id = types.ID(binary.BigEndian.Uint64(hash[:8]))
}
func (c *cluster) SetID(id types.ID) { c.id = id }
func (c *cluster) SetStore(st store.Store) { c.store = st }
func (c *cluster) Recover() {
c.members, c.removed = membersFromStore(c.store)
c.version = clusterVersionFromStore(c.store)
MustDetectDowngrade(c.version)
}
// ValidateConfigurationChange takes a proposed ConfChange and
// ensures that it is still valid.
func (c *cluster) ValidateConfigurationChange(cc raftpb.ConfChange) error {
members, removed := membersFromStore(c.store)
id := types.ID(cc.NodeID)
if removed[id] {
return ErrIDRemoved
}
switch cc.Type {
case raftpb.ConfChangeAddNode:
if members[id] != nil {
return ErrIDExists
}
urls := make(map[string]bool)
for _, m := range members {
for _, u := range m.PeerURLs {
urls[u] = true
}
}
m := new(Member)
if err := json.Unmarshal(cc.Context, m); err != nil {
plog.Panicf("unmarshal member should never fail: %v", err)
}
for _, u := range m.PeerURLs {
if urls[u] {
return ErrPeerURLexists
}
}
case raftpb.ConfChangeRemoveNode:
if members[id] == nil {
return ErrIDNotFound
}
case raftpb.ConfChangeUpdateNode:
if members[id] == nil {
return ErrIDNotFound
}
urls := make(map[string]bool)
for _, m := range members {
if m.ID == id {
continue
}
for _, u := range m.PeerURLs {
urls[u] = true
}
}
m := new(Member)
if err := json.Unmarshal(cc.Context, m); err != nil {
plog.Panicf("unmarshal member should never fail: %v", err)
}
for _, u := range m.PeerURLs {
if urls[u] {
return ErrPeerURLexists
}
}
default:
plog.Panicf("ConfChange type should be either AddNode, RemoveNode or UpdateNode")
}
return nil
}
// AddMember adds a new Member into the cluster, and saves the given member's
// raftAttributes into the store. The given member should have empty attributes.
// A Member with a matching id must not exist.
func (c *cluster) AddMember(m *Member) {
c.Lock()
defer c.Unlock()
b, err := json.Marshal(m.RaftAttributes)
if err != nil {
plog.Panicf("marshal raftAttributes should never fail: %v", err)
}
p := path.Join(memberStoreKey(m.ID), raftAttributesSuffix)
if _, err := c.store.Create(p, false, string(b), false, store.Permanent); err != nil {
plog.Panicf("create raftAttributes should never fail: %v", err)
}
c.members[m.ID] = m
}
// RemoveMember removes a member from the store.
// The given id MUST exist, or the function panics.
func (c *cluster) RemoveMember(id types.ID) {
c.Lock()
defer c.Unlock()
if _, err := c.store.Delete(memberStoreKey(id), true, true); err != nil {
plog.Panicf("delete member should never fail: %v", err)
}
delete(c.members, id)
if _, err := c.store.Create(removedMemberStoreKey(id), false, "", false, store.Permanent); err != nil {
plog.Panicf("create removedMember should never fail: %v", err)
}
c.removed[id] = true
}
func (c *cluster) UpdateAttributes(id types.ID, attr Attributes) {
c.Lock()
defer c.Unlock()
if m, ok := c.members[id]; ok {
m.Attributes = attr
return
}
_, ok := c.removed[id]
if ok {
plog.Debugf("skipped updating attributes of removed member %s", id)
} else {
plog.Panicf("error updating attributes of unknown member %s", id)
}
// TODO: update store in this function
}
func (c *cluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes) {
c.Lock()
defer c.Unlock()
b, err := json.Marshal(raftAttr)
if err != nil {
plog.Panicf("marshal raftAttributes should never fail: %v", err)
}
p := path.Join(memberStoreKey(id), raftAttributesSuffix)
if _, err := c.store.Update(p, string(b), store.Permanent); err != nil {
plog.Panicf("update raftAttributes should never fail: %v", err)
}
c.members[id].RaftAttributes = raftAttr
}
func (c *cluster) Version() *semver.Version {
c.Lock()
defer c.Unlock()
if c.version == nil {
return nil
}
return semver.Must(semver.NewVersion(c.version.String()))
}
func (c *cluster) SetVersion(ver *semver.Version) {
c.Lock()
defer c.Unlock()
if c.version != nil {
plog.Noticef("updated the cluster version from %v to %v", version.Cluster(c.version.String()), version.Cluster(ver.String()))
} else {
plog.Noticef("set the initial cluster version to %v", version.Cluster(ver.String()))
}
c.version = ver
MustDetectDowngrade(c.version)
}
func membersFromStore(st store.Store) (map[types.ID]*Member, map[types.ID]bool) {
members := make(map[types.ID]*Member)
removed := make(map[types.ID]bool)
e, err := st.Get(storeMembersPrefix, true, true)
if err != nil {
if isKeyNotFound(err) {
return members, removed
}
plog.Panicf("get storeMembers should never fail: %v", err)
}
for _, n := range e.Node.Nodes {
var m *Member
m, err = nodeToMember(n)
if err != nil {
plog.Panicf("nodeToMember should never fail: %v", err)
}
members[m.ID] = m
}
e, err = st.Get(storeRemovedMembersPrefix, true, true)
if err != nil {
if isKeyNotFound(err) {
return members, removed
}
plog.Panicf("get storeRemovedMembers should never fail: %v", err)
}
for _, n := range e.Node.Nodes {
removed[mustParseMemberIDFromKey(n.Key)] = true
}
return members, removed
}
func clusterVersionFromStore(st store.Store) *semver.Version {
e, err := st.Get(path.Join(StoreClusterPrefix, "version"), false, false)
if err != nil {
if isKeyNotFound(err) {
return nil
}
plog.Panicf("unexpected error (%v) when getting cluster version from store", err)
}
return semver.Must(semver.NewVersion(*e.Node.Value))
}
// ValidateClusterAndAssignIDs validates the local cluster by matching the PeerURLs
// with the existing cluster. If the validation succeeds, it assigns the IDs
// from the existing cluster to the local cluster.
// If the validation fails, an error will be returned.
func ValidateClusterAndAssignIDs(local *cluster, existing *cluster) error {
ems := existing.Members()
lms := local.Members()
if len(ems) != len(lms) {
return fmt.Errorf("member count is unequal")
}
sort.Sort(MembersByPeerURLs(ems))
sort.Sort(MembersByPeerURLs(lms))
for i := range ems {
if !netutil.URLStringsEqual(ems[i].PeerURLs, lms[i].PeerURLs) {
return fmt.Errorf("unmatched member while checking PeerURLs")
}
lms[i].ID = ems[i].ID
}
local.members = make(map[types.ID]*Member)
for _, m := range lms {
local.members[m.ID] = m
}
return nil
}

View File

@@ -0,0 +1,265 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"encoding/json"
"fmt"
"io/ioutil"
"net/http"
"sort"
"time"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/version"
"github.com/coreos/go-semver/semver"
)
// isMemberBootstrapped tries to check if the given member has been bootstrapped
// in the given cluster.
func isMemberBootstrapped(cl *cluster, member string, tr *http.Transport) bool {
rcl, err := getClusterFromRemotePeers(getRemotePeerURLs(cl, member), time.Second, false, tr)
if err != nil {
return false
}
id := cl.MemberByName(member).ID
m := rcl.Member(id)
if m == nil {
return false
}
if len(m.ClientURLs) > 0 {
return true
}
return false
}
// GetClusterFromRemotePeers takes a set of URLs representing etcd peers, and
// attempts to construct a Cluster by accessing the members endpoint on one of
// these URLs. The first URL to provide a response is used. If no URLs provide
// a response, or a Cluster cannot be successfully created from a received
// response, an error is returned.
// Each request has a 10-second timeout. Because the upper limit of TTL is 5s,
// 10 second is enough for building connection and finishing request.
func GetClusterFromRemotePeers(urls []string, tr *http.Transport) (*cluster, error) {
return getClusterFromRemotePeers(urls, 10*time.Second, true, tr)
}
// If logerr is true, it prints out more error messages.
func getClusterFromRemotePeers(urls []string, timeout time.Duration, logerr bool, tr *http.Transport) (*cluster, error) {
cc := &http.Client{
Transport: tr,
Timeout: timeout,
}
for _, u := range urls {
resp, err := cc.Get(u + "/members")
if err != nil {
if logerr {
plog.Warningf("could not get cluster response from %s: %v", u, err)
}
continue
}
b, err := ioutil.ReadAll(resp.Body)
if err != nil {
if logerr {
plog.Warningf("could not read the body of cluster response: %v", err)
}
continue
}
var membs []*Member
if err := json.Unmarshal(b, &membs); err != nil {
if logerr {
plog.Warningf("could not unmarshal cluster response: %v", err)
}
continue
}
id, err := types.IDFromString(resp.Header.Get("X-Etcd-Cluster-ID"))
if err != nil {
if logerr {
plog.Warningf("could not parse the cluster ID from cluster res: %v", err)
}
continue
}
return newClusterFromMembers("", id, membs), nil
}
return nil, fmt.Errorf("could not retrieve cluster information from the given urls")
}
// getRemotePeerURLs returns peer urls of remote members in the cluster. The
// returned list is sorted in ascending lexicographical order.
func getRemotePeerURLs(cl Cluster, local string) []string {
us := make([]string, 0)
for _, m := range cl.Members() {
if m.Name == local {
continue
}
us = append(us, m.PeerURLs...)
}
sort.Strings(us)
return us
}
// getVersions returns the versions of the members in the given cluster.
// The key of the returned map is the member's ID. The value of the returned map
// is the semver versions string, including server and cluster.
// If it fails to get the version of a member, the key will be nil.
func getVersions(cl Cluster, local types.ID, tr *http.Transport) map[string]*version.Versions {
members := cl.Members()
vers := make(map[string]*version.Versions)
for _, m := range members {
if m.ID == local {
cv := "not_decided"
if cl.Version() != nil {
cv = cl.Version().String()
}
vers[m.ID.String()] = &version.Versions{Server: version.Version, Cluster: cv}
continue
}
ver, err := getVersion(m, tr)
if err != nil {
plog.Warningf("cannot get the version of member %s (%v)", m.ID, err)
vers[m.ID.String()] = nil
} else {
vers[m.ID.String()] = ver
}
}
return vers
}
// decideClusterVersion decides the cluster version based on the versions map.
// The returned version is the min server version in the map, or nil if the min
// version in unknown.
func decideClusterVersion(vers map[string]*version.Versions) *semver.Version {
var cv *semver.Version
lv := semver.Must(semver.NewVersion(version.Version))
for mid, ver := range vers {
if ver == nil {
return nil
}
v, err := semver.NewVersion(ver.Server)
if err != nil {
plog.Errorf("cannot understand the version of member %s (%v)", mid, err)
return nil
}
if lv.LessThan(*v) {
plog.Warningf("the local etcd version %s is not up-to-date", lv.String())
plog.Warningf("member %s has a higher version %s", mid, ver.Server)
}
if cv == nil {
cv = v
} else if v.LessThan(*cv) {
cv = v
}
}
return cv
}
// isCompatibleWithCluster return true if the local member has a compitable version with
// the current running cluster.
// The version is considered as compitable when at least one of the other members in the cluster has a
// cluster version in the range of [MinClusterVersion, Version] and no known members has a cluster version
// out of the range.
// We set this rule since when the local member joins, another member might be offline.
func isCompatibleWithCluster(cl Cluster, local types.ID, tr *http.Transport) bool {
vers := getVersions(cl, local, tr)
minV := semver.Must(semver.NewVersion(version.MinClusterVersion))
maxV := semver.Must(semver.NewVersion(version.Version))
maxV = &semver.Version{
Major: maxV.Major,
Minor: maxV.Minor,
}
return isCompatibleWithVers(vers, local, minV, maxV)
}
func isCompatibleWithVers(vers map[string]*version.Versions, local types.ID, minV, maxV *semver.Version) bool {
var ok bool
for id, v := range vers {
// ignore comparasion with local version
if id == local.String() {
continue
}
if v == nil {
continue
}
clusterv, err := semver.NewVersion(v.Cluster)
if err != nil {
plog.Errorf("cannot understand the cluster version of member %s (%v)", id, err)
continue
}
if clusterv.LessThan(*minV) {
plog.Warningf("the running cluster version(%v) is lower than the minimal cluster version(%v) supported", clusterv.String(), minV.String())
return false
}
if maxV.LessThan(*clusterv) {
plog.Warningf("the running cluster version(%v) is higher than the maximum cluster version(%v) supported", clusterv.String(), maxV.String())
return false
}
ok = true
}
return ok
}
// getVersion returns the Versions of the given member via its
// peerURLs. Returns the last error if it fails to get the version.
func getVersion(m *Member, tr *http.Transport) (*version.Versions, error) {
cc := &http.Client{
Transport: tr,
}
var (
err error
resp *http.Response
)
for _, u := range m.PeerURLs {
resp, err = cc.Get(u + "/version")
if err != nil {
plog.Warningf("failed to reach the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
// etcd 2.0 does not have version endpoint on peer url.
if resp.StatusCode == http.StatusNotFound {
resp.Body.Close()
return &version.Versions{
Server: "2.0.0",
Cluster: "2.0.0",
}, nil
}
var b []byte
b, err = ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
plog.Warningf("failed to read out the response body from the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
var vers version.Versions
if err := json.Unmarshal(b, &vers); err != nil {
plog.Warningf("failed to unmarshal the response body got from the peerURL(%s) of member %s (%v)", u, m.ID, err)
continue
}
return &vers, nil
}
return nil, err
}
func MustDetectDowngrade(cv *semver.Version) {
lv := semver.Must(semver.NewVersion(version.Version))
// only keep major.minor version for comparison against cluster version
lv = &semver.Version{Major: lv.Major, Minor: lv.Minor}
if cv != nil && lv.LessThan(*cv) {
plog.Fatalf("cluster cannot be downgraded (current version: %s is lower than determined cluster version: %s).", version.Version, version.Cluster(cv.String()))
}
}

View File

@@ -0,0 +1,173 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"fmt"
"net/http"
"path"
"sort"
"strings"
"time"
"github.com/coreos/etcd/pkg/netutil"
"github.com/coreos/etcd/pkg/types"
)
// ServerConfig holds the configuration of etcd as taken from the command line or discovery.
type ServerConfig struct {
Name string
DiscoveryURL string
DiscoveryProxy string
ClientURLs types.URLs
PeerURLs types.URLs
DataDir string
// DedicatedWALDir config will make the etcd to write the WAL to the WALDir
// rather than the dataDir/member/wal.
DedicatedWALDir string
SnapCount uint64
MaxSnapFiles uint
MaxWALFiles uint
InitialPeerURLsMap types.URLsMap
InitialClusterToken string
NewCluster bool
ForceNewCluster bool
Transport *http.Transport
TickMs uint
ElectionTicks int
V3demo bool
}
// VerifyBootstrapConfig sanity-checks the initial config for bootstrap case
// and returns an error for things that should never happen.
func (c *ServerConfig) VerifyBootstrap() error {
if err := c.verifyLocalMember(true); err != nil {
return err
}
if checkDuplicateURL(c.InitialPeerURLsMap) {
return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
}
if c.InitialPeerURLsMap.String() == "" && c.DiscoveryURL == "" {
return fmt.Errorf("initial cluster unset and no discovery URL found")
}
return nil
}
// VerifyJoinExisting sanity-checks the initial config for join existing cluster
// case and returns an error for things that should never happen.
func (c *ServerConfig) VerifyJoinExisting() error {
// no need for strict checking since the member have announced its
// peer urls to the cluster before starting and do not have to set
// it in the configuration again.
if err := c.verifyLocalMember(false); err != nil {
return err
}
if checkDuplicateURL(c.InitialPeerURLsMap) {
return fmt.Errorf("initial cluster %s has duplicate url", c.InitialPeerURLsMap)
}
if c.DiscoveryURL != "" {
return fmt.Errorf("discovery URL should not be set when joining existing initial cluster")
}
return nil
}
// verifyLocalMember verifies the configured member is in configured
// cluster. If strict is set, it also verifies the configured member
// has the same peer urls as configured advertised peer urls.
func (c *ServerConfig) verifyLocalMember(strict bool) error {
urls := c.InitialPeerURLsMap[c.Name]
// Make sure the cluster at least contains the local server.
if urls == nil {
return fmt.Errorf("couldn't find local name %q in the initial cluster configuration", c.Name)
}
// Advertised peer URLs must match those in the cluster peer list
apurls := c.PeerURLs.StringSlice()
sort.Strings(apurls)
urls.Sort()
if strict {
if !netutil.URLStringsEqual(apurls, urls.StringSlice()) {
umap := map[string]types.URLs{c.Name: c.PeerURLs}
return fmt.Errorf("--initial-cluster must include %s given --initial-advertise-peer-urls=%s", types.URLsMap(umap).String(), strings.Join(apurls, ","))
}
}
return nil
}
func (c *ServerConfig) MemberDir() string { return path.Join(c.DataDir, "member") }
func (c *ServerConfig) WALDir() string {
if c.DedicatedWALDir != "" {
return c.DedicatedWALDir
}
return path.Join(c.MemberDir(), "wal")
}
func (c *ServerConfig) SnapDir() string { return path.Join(c.MemberDir(), "snap") }
func (c *ServerConfig) ShouldDiscover() bool { return c.DiscoveryURL != "" }
// ReqTimeout returns timeout for request to finish.
func (c *ServerConfig) ReqTimeout() time.Duration {
// 5s for queue waiting, computation and disk IO delay
// + 2 * election timeout for possible leader election
return 5*time.Second + 2*time.Duration(c.ElectionTicks)*time.Duration(c.TickMs)*time.Millisecond
}
func (c *ServerConfig) PrintWithInitial() { c.print(true) }
func (c *ServerConfig) Print() { c.print(false) }
func (c *ServerConfig) print(initial bool) {
plog.Infof("name = %s", c.Name)
if c.ForceNewCluster {
plog.Infof("force new cluster")
}
plog.Infof("data dir = %s", c.DataDir)
plog.Infof("member dir = %s", c.MemberDir())
if c.DedicatedWALDir != "" {
plog.Infof("dedicated WAL dir = %s", c.DedicatedWALDir)
}
plog.Infof("heartbeat = %dms", c.TickMs)
plog.Infof("election = %dms", c.ElectionTicks*int(c.TickMs))
plog.Infof("snapshot count = %d", c.SnapCount)
if len(c.DiscoveryURL) != 0 {
plog.Infof("discovery URL= %s", c.DiscoveryURL)
if len(c.DiscoveryProxy) != 0 {
plog.Infof("discovery proxy = %s", c.DiscoveryProxy)
}
}
plog.Infof("advertise client URLs = %s", c.ClientURLs)
if initial {
plog.Infof("initial advertise peer URLs = %s", c.PeerURLs)
plog.Infof("initial cluster = %s", c.InitialPeerURLsMap)
}
}
func checkDuplicateURL(urlsmap types.URLsMap) bool {
um := make(map[string]bool)
for _, urls := range urlsmap {
for _, url := range urls {
u := url.String()
if um[u] {
return true
}
um[u] = true
}
}
return false
}

View File

@@ -0,0 +1,39 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"errors"
etcdErr "github.com/coreos/etcd/error"
)
var (
ErrUnknownMethod = errors.New("etcdserver: unknown method")
ErrStopped = errors.New("etcdserver: server stopped")
ErrIDRemoved = errors.New("etcdserver: ID removed")
ErrIDExists = errors.New("etcdserver: ID exists")
ErrIDNotFound = errors.New("etcdserver: ID not found")
ErrPeerURLexists = errors.New("etcdserver: peerURL exists")
ErrCanceled = errors.New("etcdserver: request cancelled")
ErrTimeout = errors.New("etcdserver: request timed out")
ErrTimeoutDueToLeaderFail = errors.New("etcdserver: request timed out, possibly due to previous leader failure")
ErrTimeoutDueToConnectionLost = errors.New("etcdserver: request timed out, possibly due to connection lost")
)
func isKeyNotFound(err error) bool {
e, ok := err.(*etcdErr.Error)
return ok && e.ErrorCode == etcdErr.EcodeKeyNotFound
}

View File

@@ -0,0 +1,83 @@
package etcdhttp
import (
"fmt"
"net/http"
"sync"
"time"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/etcdhttp/httptypes"
"github.com/coreos/go-semver/semver"
)
type capability string
const (
authCapability capability = "auth"
)
var (
// capabilityMap is a static map of version to capability map.
// the base capabilities is the set of capability 2.0 supports.
capabilityMaps = map[string]map[capability]bool{
"2.1.0": {authCapability: true},
"2.2.0": {authCapability: true},
}
enableMapMu sync.Mutex
// enabled points to a map in cpapbilityMaps
enabledMap map[capability]bool
)
// capabilityLoop checks the cluster version every 500ms and updates
// the enabledCapability when the cluster version increased.
// capabilityLoop MUST be ran in a goroutine before checking capability
// or using capabilityHandler.
func capabilityLoop(s *etcdserver.EtcdServer) {
stopped := s.StopNotify()
var pv *semver.Version
for {
if v := s.ClusterVersion(); v != pv {
if pv == nil {
pv = v
} else if v != nil && pv.LessThan(*v) {
pv = v
}
enableMapMu.Lock()
enabledMap = capabilityMaps[pv.String()]
enableMapMu.Unlock()
}
select {
case <-stopped:
return
case <-time.After(500 * time.Millisecond):
}
}
}
func isCapabilityEnabled(c capability) bool {
enableMapMu.Lock()
defer enableMapMu.Unlock()
if enabledMap == nil {
return false
}
return enabledMap[c]
}
func capabilityHandler(c capability, fn func(http.ResponseWriter, *http.Request)) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if !isCapabilityEnabled(c) {
notCapable(w, c)
return
}
fn(w, r)
}
}
func notCapable(w http.ResponseWriter, c capability) {
herr := httptypes.NewHTTPError(http.StatusInternalServerError, fmt.Sprintf("Not capable of accessing %s feature during rolling upgrades.", c))
herr.WriteTo(w)
}

View File

@@ -0,0 +1,768 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdhttp
import (
"encoding/json"
"errors"
"expvar"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"path"
"strconv"
"strings"
"time"
etcdErr "github.com/coreos/etcd/error"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/auth"
"github.com/coreos/etcd/etcdserver/etcdhttp/httptypes"
"github.com/coreos/etcd/etcdserver/etcdserverpb"
"github.com/coreos/etcd/etcdserver/stats"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/raft"
"github.com/coreos/etcd/store"
"github.com/coreos/etcd/version"
"github.com/coreos/pkg/capnslog"
"github.com/jonboulle/clockwork"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/net/context"
)
const (
authPrefix = "/v2/auth"
keysPrefix = "/v2/keys"
deprecatedMachinesPrefix = "/v2/machines"
membersPrefix = "/v2/members"
statsPrefix = "/v2/stats"
varsPath = "/debug/vars"
metricsPath = "/metrics"
healthPath = "/health"
versionPath = "/version"
configPath = "/config"
)
// NewClientHandler generates a muxed http.Handler with the given parameters to serve etcd client requests.
func NewClientHandler(server *etcdserver.EtcdServer, timeout time.Duration) http.Handler {
go capabilityLoop(server)
sec := auth.NewStore(server, timeout)
kh := &keysHandler{
sec: sec,
server: server,
cluster: server.Cluster(),
timer: server,
timeout: timeout,
}
sh := &statsHandler{
stats: server,
}
mh := &membersHandler{
sec: sec,
server: server,
cluster: server.Cluster(),
timeout: timeout,
clock: clockwork.NewRealClock(),
}
dmh := &deprecatedMachinesHandler{
cluster: server.Cluster(),
}
sech := &authHandler{
sec: sec,
cluster: server.Cluster(),
}
mux := http.NewServeMux()
mux.HandleFunc("/", http.NotFound)
mux.Handle(healthPath, healthHandler(server))
mux.HandleFunc(versionPath, versionHandler(server.Cluster(), serveVersion))
mux.Handle(keysPrefix, kh)
mux.Handle(keysPrefix+"/", kh)
mux.HandleFunc(statsPrefix+"/store", sh.serveStore)
mux.HandleFunc(statsPrefix+"/self", sh.serveSelf)
mux.HandleFunc(statsPrefix+"/leader", sh.serveLeader)
mux.HandleFunc(varsPath, serveVars)
mux.HandleFunc(configPath+"/local/log", logHandleFunc)
mux.Handle(metricsPath, prometheus.Handler())
mux.Handle(membersPrefix, mh)
mux.Handle(membersPrefix+"/", mh)
mux.Handle(deprecatedMachinesPrefix, dmh)
handleAuth(mux, sech)
return requestLogger(mux)
}
type keysHandler struct {
sec auth.Store
server etcdserver.Server
cluster etcdserver.Cluster
timer etcdserver.RaftTimer
timeout time.Duration
}
func (h *keysHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "HEAD", "GET", "PUT", "POST", "DELETE") {
return
}
w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
ctx, cancel := context.WithTimeout(context.Background(), h.timeout)
defer cancel()
rr, err := parseKeyRequest(r, clockwork.NewRealClock())
if err != nil {
writeKeyError(w, err)
return
}
// The path must be valid at this point (we've parsed the request successfully).
if !hasKeyPrefixAccess(h.sec, r, r.URL.Path[len(keysPrefix):], rr.Recursive) {
writeKeyNoAuth(w)
return
}
resp, err := h.server.Do(ctx, rr)
if err != nil {
err = trimErrorPrefix(err, etcdserver.StoreKeysPrefix)
writeKeyError(w, err)
return
}
switch {
case resp.Event != nil:
if err := writeKeyEvent(w, resp.Event, h.timer); err != nil {
// Should never be reached
plog.Errorf("error writing event (%v)", err)
}
case resp.Watcher != nil:
ctx, cancel := context.WithTimeout(context.Background(), defaultWatchTimeout)
defer cancel()
handleKeyWatch(ctx, w, resp.Watcher, rr.Stream, h.timer)
default:
writeKeyError(w, errors.New("received response with no Event/Watcher!"))
}
}
type deprecatedMachinesHandler struct {
cluster etcdserver.Cluster
}
func (h *deprecatedMachinesHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET", "HEAD") {
return
}
endpoints := h.cluster.ClientURLs()
w.Write([]byte(strings.Join(endpoints, ", ")))
}
type membersHandler struct {
sec auth.Store
server etcdserver.Server
cluster etcdserver.Cluster
timeout time.Duration
clock clockwork.Clock
}
func (h *membersHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET", "POST", "DELETE", "PUT") {
return
}
if !hasWriteRootAccess(h.sec, r) {
writeNoAuth(w)
return
}
w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
ctx, cancel := context.WithTimeout(context.Background(), h.timeout)
defer cancel()
switch r.Method {
case "GET":
switch trimPrefix(r.URL.Path, membersPrefix) {
case "":
mc := newMemberCollection(h.cluster.Members())
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(mc); err != nil {
plog.Warningf("failed to encode members response (%v)", err)
}
case "leader":
id := h.server.Leader()
if id == 0 {
writeError(w, httptypes.NewHTTPError(http.StatusServiceUnavailable, "During election"))
return
}
m := newMember(h.cluster.Member(id))
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(m); err != nil {
plog.Warningf("failed to encode members response (%v)", err)
}
default:
writeError(w, httptypes.NewHTTPError(http.StatusNotFound, "Not found"))
}
case "POST":
req := httptypes.MemberCreateRequest{}
if ok := unmarshalRequest(r, &req, w); !ok {
return
}
now := h.clock.Now()
m := etcdserver.NewMember("", req.PeerURLs, "", &now)
err := h.server.AddMember(ctx, *m)
switch {
case err == etcdserver.ErrIDExists || err == etcdserver.ErrPeerURLexists:
writeError(w, httptypes.NewHTTPError(http.StatusConflict, err.Error()))
return
case err != nil:
plog.Errorf("error adding member %s (%v)", m.ID, err)
writeError(w, err)
return
}
res := newMember(m)
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusCreated)
if err := json.NewEncoder(w).Encode(res); err != nil {
plog.Warningf("failed to encode members response (%v)", err)
}
case "DELETE":
id, ok := getID(r.URL.Path, w)
if !ok {
return
}
err := h.server.RemoveMember(ctx, uint64(id))
switch {
case err == etcdserver.ErrIDRemoved:
writeError(w, httptypes.NewHTTPError(http.StatusGone, fmt.Sprintf("Member permanently removed: %s", id)))
case err == etcdserver.ErrIDNotFound:
writeError(w, httptypes.NewHTTPError(http.StatusNotFound, fmt.Sprintf("No such member: %s", id)))
case err != nil:
plog.Errorf("error removing member %s (%v)", id, err)
writeError(w, err)
default:
w.WriteHeader(http.StatusNoContent)
}
case "PUT":
id, ok := getID(r.URL.Path, w)
if !ok {
return
}
req := httptypes.MemberUpdateRequest{}
if ok := unmarshalRequest(r, &req, w); !ok {
return
}
m := etcdserver.Member{
ID: id,
RaftAttributes: etcdserver.RaftAttributes{PeerURLs: req.PeerURLs.StringSlice()},
}
err := h.server.UpdateMember(ctx, m)
switch {
case err == etcdserver.ErrPeerURLexists:
writeError(w, httptypes.NewHTTPError(http.StatusConflict, err.Error()))
case err == etcdserver.ErrIDNotFound:
writeError(w, httptypes.NewHTTPError(http.StatusNotFound, fmt.Sprintf("No such member: %s", id)))
case err != nil:
plog.Errorf("error updating member %s (%v)", m.ID, err)
writeError(w, err)
default:
w.WriteHeader(http.StatusNoContent)
}
}
}
type statsHandler struct {
stats stats.Stats
}
func (h *statsHandler) serveStore(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
w.Header().Set("Content-Type", "application/json")
w.Write(h.stats.StoreStats())
}
func (h *statsHandler) serveSelf(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
w.Header().Set("Content-Type", "application/json")
w.Write(h.stats.SelfStats())
}
func (h *statsHandler) serveLeader(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
stats := h.stats.LeaderStats()
if stats == nil {
writeError(w, httptypes.NewHTTPError(http.StatusForbidden, "not current leader"))
return
}
w.Header().Set("Content-Type", "application/json")
w.Write(stats)
}
func serveVars(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json; charset=utf-8")
fmt.Fprintf(w, "{\n")
first := true
expvar.Do(func(kv expvar.KeyValue) {
if !first {
fmt.Fprintf(w, ",\n")
}
first = false
fmt.Fprintf(w, "%q: %s", kv.Key, kv.Value)
})
fmt.Fprintf(w, "\n}\n")
}
// TODO: change etcdserver to raft interface when we have it.
// add test for healthHeadler when we have the interface ready.
func healthHandler(server *etcdserver.EtcdServer) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
if uint64(server.Leader()) == raft.None {
http.Error(w, `{"health": "false"}`, http.StatusServiceUnavailable)
return
}
// wait for raft's progress
index := server.Index()
for i := 0; i < 3; i++ {
time.Sleep(250 * time.Millisecond)
if server.Index() > index {
w.WriteHeader(http.StatusOK)
w.Write([]byte(`{"health": "true"}`))
return
}
}
http.Error(w, `{"health": "false"}`, http.StatusServiceUnavailable)
return
}
}
func versionHandler(c etcdserver.Cluster, fn func(http.ResponseWriter, *http.Request, string)) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
v := c.Version()
if v != nil {
fn(w, r, v.String())
} else {
fn(w, r, "not_decided")
}
}
}
func serveVersion(w http.ResponseWriter, r *http.Request, clusterV string) {
if !allowMethod(w, r.Method, "GET") {
return
}
vs := version.Versions{
Server: version.Version,
Cluster: clusterV,
}
w.Header().Set("Content-Type", "application/json")
b, err := json.Marshal(&vs)
if err != nil {
plog.Panicf("cannot marshal versions to json (%v)", err)
}
w.Write(b)
}
func logHandleFunc(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "PUT") {
return
}
in := struct{ Level string }{}
d := json.NewDecoder(r.Body)
if err := d.Decode(&in); err != nil {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "Invalid json body"))
return
}
logl, err := capnslog.ParseLevel(strings.ToUpper(in.Level))
if err != nil {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "Invalid log level "+in.Level))
return
}
plog.Noticef("globalLogLevel set to %q", logl.String())
capnslog.SetGlobalLogLevel(logl)
w.WriteHeader(http.StatusNoContent)
}
// parseKeyRequest converts a received http.Request on keysPrefix to
// a server Request, performing validation of supplied fields as appropriate.
// If any validation fails, an empty Request and non-nil error is returned.
func parseKeyRequest(r *http.Request, clock clockwork.Clock) (etcdserverpb.Request, error) {
emptyReq := etcdserverpb.Request{}
err := r.ParseForm()
if err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidForm,
err.Error(),
)
}
if !strings.HasPrefix(r.URL.Path, keysPrefix) {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidForm,
"incorrect key prefix",
)
}
p := path.Join(etcdserver.StoreKeysPrefix, r.URL.Path[len(keysPrefix):])
var pIdx, wIdx uint64
if pIdx, err = getUint64(r.Form, "prevIndex"); err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeIndexNaN,
`invalid value for "prevIndex"`,
)
}
if wIdx, err = getUint64(r.Form, "waitIndex"); err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeIndexNaN,
`invalid value for "waitIndex"`,
)
}
var rec, sort, wait, dir, quorum, stream bool
if rec, err = getBool(r.Form, "recursive"); err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidField,
`invalid value for "recursive"`,
)
}
if sort, err = getBool(r.Form, "sorted"); err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidField,
`invalid value for "sorted"`,
)
}
if wait, err = getBool(r.Form, "wait"); err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidField,
`invalid value for "wait"`,
)
}
// TODO(jonboulle): define what parameters dir is/isn't compatible with?
if dir, err = getBool(r.Form, "dir"); err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidField,
`invalid value for "dir"`,
)
}
if quorum, err = getBool(r.Form, "quorum"); err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidField,
`invalid value for "quorum"`,
)
}
if stream, err = getBool(r.Form, "stream"); err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidField,
`invalid value for "stream"`,
)
}
if wait && r.Method != "GET" {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidField,
`"wait" can only be used with GET requests`,
)
}
pV := r.FormValue("prevValue")
if _, ok := r.Form["prevValue"]; ok && pV == "" {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodePrevValueRequired,
`"prevValue" cannot be empty`,
)
}
// TTL is nullable, so leave it null if not specified
// or an empty string
var ttl *uint64
if len(r.FormValue("ttl")) > 0 {
i, err := getUint64(r.Form, "ttl")
if err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeTTLNaN,
`invalid value for "ttl"`,
)
}
ttl = &i
}
// prevExist is nullable, so leave it null if not specified
var pe *bool
if _, ok := r.Form["prevExist"]; ok {
bv, err := getBool(r.Form, "prevExist")
if err != nil {
return emptyReq, etcdErr.NewRequestError(
etcdErr.EcodeInvalidField,
"invalid value for prevExist",
)
}
pe = &bv
}
rr := etcdserverpb.Request{
Method: r.Method,
Path: p,
Val: r.FormValue("value"),
Dir: dir,
PrevValue: pV,
PrevIndex: pIdx,
PrevExist: pe,
Wait: wait,
Since: wIdx,
Recursive: rec,
Sorted: sort,
Quorum: quorum,
Stream: stream,
}
if pe != nil {
rr.PrevExist = pe
}
// Null TTL is equivalent to unset Expiration
if ttl != nil {
expr := time.Duration(*ttl) * time.Second
rr.Expiration = clock.Now().Add(expr).UnixNano()
}
return rr, nil
}
// writeKeyEvent trims the prefix of key path in a single Event under
// StoreKeysPrefix, serializes it and writes the resulting JSON to the given
// ResponseWriter, along with the appropriate headers.
func writeKeyEvent(w http.ResponseWriter, ev *store.Event, rt etcdserver.RaftTimer) error {
if ev == nil {
return errors.New("cannot write empty Event!")
}
w.Header().Set("Content-Type", "application/json")
w.Header().Set("X-Etcd-Index", fmt.Sprint(ev.EtcdIndex))
w.Header().Set("X-Raft-Index", fmt.Sprint(rt.Index()))
w.Header().Set("X-Raft-Term", fmt.Sprint(rt.Term()))
if ev.IsCreated() {
w.WriteHeader(http.StatusCreated)
}
ev = trimEventPrefix(ev, etcdserver.StoreKeysPrefix)
return json.NewEncoder(w).Encode(ev)
}
func writeKeyNoAuth(w http.ResponseWriter) {
e := etcdErr.NewError(etcdErr.EcodeUnauthorized, "Insufficient credentials", 0)
e.WriteTo(w)
}
// writeKeyError logs and writes the given Error to the ResponseWriter.
// If Error is not an etcdErr, the error will be converted to an etcd error.
func writeKeyError(w http.ResponseWriter, err error) {
if err == nil {
return
}
switch e := err.(type) {
case *etcdErr.Error:
e.WriteTo(w)
default:
switch err {
case etcdserver.ErrTimeoutDueToLeaderFail, etcdserver.ErrTimeoutDueToConnectionLost:
plog.Error(err)
default:
plog.Errorf("got unexpected response error (%v)", err)
}
ee := etcdErr.NewError(etcdErr.EcodeRaftInternal, err.Error(), 0)
ee.WriteTo(w)
}
}
func handleKeyWatch(ctx context.Context, w http.ResponseWriter, wa store.Watcher, stream bool, rt etcdserver.RaftTimer) {
defer wa.Remove()
ech := wa.EventChan()
var nch <-chan bool
if x, ok := w.(http.CloseNotifier); ok {
nch = x.CloseNotify()
}
w.Header().Set("Content-Type", "application/json")
w.Header().Set("X-Etcd-Index", fmt.Sprint(wa.StartIndex()))
w.Header().Set("X-Raft-Index", fmt.Sprint(rt.Index()))
w.Header().Set("X-Raft-Term", fmt.Sprint(rt.Term()))
w.WriteHeader(http.StatusOK)
// Ensure headers are flushed early, in case of long polling
w.(http.Flusher).Flush()
for {
select {
case <-nch:
// Client closed connection. Nothing to do.
return
case <-ctx.Done():
// Timed out. net/http will close the connection for us, so nothing to do.
return
case ev, ok := <-ech:
if !ok {
// If the channel is closed this may be an indication of
// that notifications are much more than we are able to
// send to the client in time. Then we simply end streaming.
return
}
ev = trimEventPrefix(ev, etcdserver.StoreKeysPrefix)
if err := json.NewEncoder(w).Encode(ev); err != nil {
// Should never be reached
plog.Warningf("error writing event (%v)", err)
return
}
if !stream {
return
}
w.(http.Flusher).Flush()
}
}
}
func trimEventPrefix(ev *store.Event, prefix string) *store.Event {
if ev == nil {
return nil
}
// Since the *Event may reference one in the store history
// history, we must copy it before modifying
e := ev.Clone()
e.Node = trimNodeExternPrefix(e.Node, prefix)
e.PrevNode = trimNodeExternPrefix(e.PrevNode, prefix)
return e
}
func trimNodeExternPrefix(n *store.NodeExtern, prefix string) *store.NodeExtern {
if n == nil {
return nil
}
n.Key = strings.TrimPrefix(n.Key, prefix)
for _, nn := range n.Nodes {
nn = trimNodeExternPrefix(nn, prefix)
}
return n
}
func trimErrorPrefix(err error, prefix string) error {
if e, ok := err.(*etcdErr.Error); ok {
e.Cause = strings.TrimPrefix(e.Cause, prefix)
}
return err
}
func unmarshalRequest(r *http.Request, req json.Unmarshaler, w http.ResponseWriter) bool {
ctype := r.Header.Get("Content-Type")
if ctype != "application/json" {
writeError(w, httptypes.NewHTTPError(http.StatusUnsupportedMediaType, fmt.Sprintf("Bad Content-Type %s, accept application/json", ctype)))
return false
}
b, err := ioutil.ReadAll(r.Body)
if err != nil {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, err.Error()))
return false
}
if err := req.UnmarshalJSON(b); err != nil {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, err.Error()))
return false
}
return true
}
func getID(p string, w http.ResponseWriter) (types.ID, bool) {
idStr := trimPrefix(p, membersPrefix)
if idStr == "" {
http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
return 0, false
}
id, err := types.IDFromString(idStr)
if err != nil {
writeError(w, httptypes.NewHTTPError(http.StatusNotFound, fmt.Sprintf("No such member: %s", idStr)))
return 0, false
}
return id, true
}
// getUint64 extracts a uint64 by the given key from a Form. If the key does
// not exist in the form, 0 is returned. If the key exists but the value is
// badly formed, an error is returned. If multiple values are present only the
// first is considered.
func getUint64(form url.Values, key string) (i uint64, err error) {
if vals, ok := form[key]; ok {
i, err = strconv.ParseUint(vals[0], 10, 64)
}
return
}
// getBool extracts a bool by the given key from a Form. If the key does not
// exist in the form, false is returned. If the key exists but the value is
// badly formed, an error is returned. If multiple values are present only the
// first is considered.
func getBool(form url.Values, key string) (b bool, err error) {
if vals, ok := form[key]; ok {
b, err = strconv.ParseBool(vals[0])
}
return
}
// trimPrefix removes a given prefix and any slash following the prefix
// e.g.: trimPrefix("foo", "foo") == trimPrefix("foo/", "foo") == ""
func trimPrefix(p, prefix string) (s string) {
s = strings.TrimPrefix(p, prefix)
s = strings.TrimPrefix(s, "/")
return
}
func newMemberCollection(ms []*etcdserver.Member) *httptypes.MemberCollection {
c := httptypes.MemberCollection(make([]httptypes.Member, len(ms)))
for i, m := range ms {
c[i] = newMember(m)
}
return &c
}
func newMember(m *etcdserver.Member) httptypes.Member {
tm := httptypes.Member{
ID: m.ID.String(),
Name: m.Name,
PeerURLs: make([]string, len(m.PeerURLs)),
ClientURLs: make([]string, len(m.ClientURLs)),
}
copy(tm.PeerURLs, m.PeerURLs)
copy(tm.ClientURLs, m.ClientURLs)
return tm
}

View File

@@ -0,0 +1,435 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdhttp
import (
"encoding/json"
"net/http"
"path"
"strings"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/auth"
"github.com/coreos/etcd/etcdserver/etcdhttp/httptypes"
"github.com/coreos/etcd/pkg/netutil"
)
type authHandler struct {
sec auth.Store
cluster etcdserver.Cluster
}
func hasWriteRootAccess(sec auth.Store, r *http.Request) bool {
if r.Method == "GET" || r.Method == "HEAD" {
return true
}
return hasRootAccess(sec, r)
}
func hasRootAccess(sec auth.Store, r *http.Request) bool {
if sec == nil {
// No store means no auth available, eg, tests.
return true
}
if !sec.AuthEnabled() {
return true
}
username, password, ok := netutil.BasicAuth(r)
if !ok {
return false
}
rootUser, err := sec.GetUser(username)
if err != nil {
return false
}
ok = rootUser.CheckPassword(password)
if !ok {
plog.Warningf("auth: wrong password for user %s", username)
return false
}
for _, role := range rootUser.Roles {
if role == auth.RootRoleName {
return true
}
}
plog.Warningf("auth: user %s does not have the %s role for resource %s.", username, auth.RootRoleName, r.URL.Path)
return false
}
func hasKeyPrefixAccess(sec auth.Store, r *http.Request, key string, recursive bool) bool {
if sec == nil {
// No store means no auth available, eg, tests.
return true
}
if !sec.AuthEnabled() {
return true
}
if r.Header.Get("Authorization") == "" {
plog.Warningf("auth: no authorization provided, checking guest access")
return hasGuestAccess(sec, r, key)
}
username, password, ok := netutil.BasicAuth(r)
if !ok {
plog.Warningf("auth: malformed basic auth encoding")
return false
}
user, err := sec.GetUser(username)
if err != nil {
plog.Warningf("auth: no such user: %s.", username)
return false
}
authAsUser := user.CheckPassword(password)
if !authAsUser {
plog.Warningf("auth: incorrect password for user: %s.", username)
return false
}
writeAccess := r.Method != "GET" && r.Method != "HEAD"
for _, roleName := range user.Roles {
role, err := sec.GetRole(roleName)
if err != nil {
continue
}
if recursive {
if role.HasRecursiveAccess(key, writeAccess) {
return true
}
} else if role.HasKeyAccess(key, writeAccess) {
return true
}
}
plog.Warningf("auth: invalid access for user %s on key %s.", username, key)
return false
}
func hasGuestAccess(sec auth.Store, r *http.Request, key string) bool {
writeAccess := r.Method != "GET" && r.Method != "HEAD"
role, err := sec.GetRole(auth.GuestRoleName)
if err != nil {
return false
}
if role.HasKeyAccess(key, writeAccess) {
return true
}
plog.Warningf("auth: invalid access for unauthenticated user on resource %s.", key)
return false
}
func writeNoAuth(w http.ResponseWriter) {
herr := httptypes.NewHTTPError(http.StatusUnauthorized, "Insufficient credentials")
herr.WriteTo(w)
}
func handleAuth(mux *http.ServeMux, sh *authHandler) {
mux.HandleFunc(authPrefix+"/roles", capabilityHandler(authCapability, sh.baseRoles))
mux.HandleFunc(authPrefix+"/roles/", capabilityHandler(authCapability, sh.handleRoles))
mux.HandleFunc(authPrefix+"/users", capabilityHandler(authCapability, sh.baseUsers))
mux.HandleFunc(authPrefix+"/users/", capabilityHandler(authCapability, sh.handleUsers))
mux.HandleFunc(authPrefix+"/enable", capabilityHandler(authCapability, sh.enableDisable))
}
func (sh *authHandler) baseRoles(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
if !hasRootAccess(sh.sec, r) {
writeNoAuth(w)
return
}
w.Header().Set("X-Etcd-Cluster-ID", sh.cluster.ID().String())
w.Header().Set("Content-Type", "application/json")
var rolesCollections struct {
Roles []string `json:"roles"`
}
roles, err := sh.sec.AllRoles()
if err != nil {
writeError(w, err)
return
}
if roles == nil {
roles = make([]string, 0)
}
rolesCollections.Roles = roles
err = json.NewEncoder(w).Encode(rolesCollections)
if err != nil {
plog.Warningf("baseRoles error encoding on %s", r.URL)
}
}
func (sh *authHandler) handleRoles(w http.ResponseWriter, r *http.Request) {
subpath := path.Clean(r.URL.Path[len(authPrefix):])
// Split "/roles/rolename/command".
// First item is an empty string, second is "roles"
pieces := strings.Split(subpath, "/")
if len(pieces) == 2 {
sh.baseRoles(w, r)
return
}
if len(pieces) != 3 {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "Invalid path"))
return
}
sh.forRole(w, r, pieces[2])
}
func (sh *authHandler) forRole(w http.ResponseWriter, r *http.Request, role string) {
if !allowMethod(w, r.Method, "GET", "PUT", "DELETE") {
return
}
if !hasRootAccess(sh.sec, r) {
writeNoAuth(w)
return
}
w.Header().Set("X-Etcd-Cluster-ID", sh.cluster.ID().String())
w.Header().Set("Content-Type", "application/json")
switch r.Method {
case "GET":
data, err := sh.sec.GetRole(role)
if err != nil {
writeError(w, err)
return
}
err = json.NewEncoder(w).Encode(data)
if err != nil {
plog.Warningf("forRole error encoding on %s", r.URL)
return
}
return
case "PUT":
var in auth.Role
err := json.NewDecoder(r.Body).Decode(&in)
if err != nil {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "Invalid JSON in request body."))
return
}
if in.Role != role {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "Role JSON name does not match the name in the URL"))
return
}
var out auth.Role
// create
if in.Grant.IsEmpty() && in.Revoke.IsEmpty() {
err = sh.sec.CreateRole(in)
if err != nil {
writeError(w, err)
return
}
w.WriteHeader(http.StatusCreated)
out = in
} else {
if !in.Permissions.IsEmpty() {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "Role JSON contains both permissions and grant/revoke"))
return
}
out, err = sh.sec.UpdateRole(in)
if err != nil {
writeError(w, err)
return
}
w.WriteHeader(http.StatusOK)
}
err = json.NewEncoder(w).Encode(out)
if err != nil {
plog.Warningf("forRole error encoding on %s", r.URL)
return
}
return
case "DELETE":
err := sh.sec.DeleteRole(role)
if err != nil {
writeError(w, err)
return
}
}
}
func (sh *authHandler) baseUsers(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
if !hasRootAccess(sh.sec, r) {
writeNoAuth(w)
return
}
w.Header().Set("X-Etcd-Cluster-ID", sh.cluster.ID().String())
w.Header().Set("Content-Type", "application/json")
var usersCollections struct {
Users []string `json:"users"`
}
users, err := sh.sec.AllUsers()
if err != nil {
writeError(w, err)
return
}
if users == nil {
users = make([]string, 0)
}
usersCollections.Users = users
err = json.NewEncoder(w).Encode(usersCollections)
if err != nil {
plog.Warningf("baseUsers error encoding on %s", r.URL)
}
}
func (sh *authHandler) handleUsers(w http.ResponseWriter, r *http.Request) {
subpath := path.Clean(r.URL.Path[len(authPrefix):])
// Split "/users/username".
// First item is an empty string, second is "users"
pieces := strings.Split(subpath, "/")
if len(pieces) == 2 {
sh.baseUsers(w, r)
return
}
if len(pieces) != 3 {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "Invalid path"))
return
}
sh.forUser(w, r, pieces[2])
}
func (sh *authHandler) forUser(w http.ResponseWriter, r *http.Request, user string) {
if !allowMethod(w, r.Method, "GET", "PUT", "DELETE") {
return
}
if !hasRootAccess(sh.sec, r) {
writeNoAuth(w)
return
}
w.Header().Set("X-Etcd-Cluster-ID", sh.cluster.ID().String())
w.Header().Set("Content-Type", "application/json")
switch r.Method {
case "GET":
u, err := sh.sec.GetUser(user)
if err != nil {
writeError(w, err)
return
}
u.Password = ""
err = json.NewEncoder(w).Encode(u)
if err != nil {
plog.Warningf("forUser error encoding on %s", r.URL)
return
}
return
case "PUT":
var u auth.User
err := json.NewDecoder(r.Body).Decode(&u)
if err != nil {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "Invalid JSON in request body."))
return
}
if u.User != user {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "User JSON name does not match the name in the URL"))
return
}
var (
out auth.User
created bool
)
if len(u.Grant) == 0 && len(u.Revoke) == 0 {
// create or update
if len(u.Roles) != 0 {
out, err = sh.sec.CreateUser(u)
} else {
// if user passes in both password and roles, we are unsure about his/her
// intention.
out, created, err = sh.sec.CreateOrUpdateUser(u)
}
if err != nil {
writeError(w, err)
return
}
} else {
// update case
if len(u.Roles) != 0 {
writeError(w, httptypes.NewHTTPError(http.StatusBadRequest, "User JSON contains both roles and grant/revoke"))
return
}
out, err = sh.sec.UpdateUser(u)
if err != nil {
writeError(w, err)
return
}
}
if created {
w.WriteHeader(http.StatusCreated)
} else {
w.WriteHeader(http.StatusOK)
}
out.Password = ""
err = json.NewEncoder(w).Encode(out)
if err != nil {
plog.Warningf("forUser error encoding on %s", r.URL)
return
}
return
case "DELETE":
err := sh.sec.DeleteUser(user)
if err != nil {
writeError(w, err)
return
}
}
}
type enabled struct {
Enabled bool `json:"enabled"`
}
func (sh *authHandler) enableDisable(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET", "PUT", "DELETE") {
return
}
if !hasWriteRootAccess(sh.sec, r) {
writeNoAuth(w)
return
}
w.Header().Set("X-Etcd-Cluster-ID", sh.cluster.ID().String())
w.Header().Set("Content-Type", "application/json")
isEnabled := sh.sec.AuthEnabled()
switch r.Method {
case "GET":
jsonDict := enabled{isEnabled}
err := json.NewEncoder(w).Encode(jsonDict)
if err != nil {
plog.Warningf("error encoding auth state on %s", r.URL)
}
case "PUT":
err := sh.sec.EnableAuth()
if err != nil {
writeError(w, err)
return
}
case "DELETE":
err := sh.sec.DisableAuth()
if err != nil {
writeError(w, err)
return
}
}
}

View File

@@ -0,0 +1,87 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdhttp
import (
"errors"
"math"
"net/http"
"strings"
"time"
etcdErr "github.com/coreos/etcd/error"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/etcdserver/auth"
"github.com/coreos/etcd/etcdserver/etcdhttp/httptypes"
"github.com/coreos/pkg/capnslog"
)
const (
// time to wait for a Watch request
defaultWatchTimeout = time.Duration(math.MaxInt64)
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd", "etcdhttp")
errClosed = errors.New("etcdhttp: client closed connection")
)
// writeError logs and writes the given Error to the ResponseWriter
// If Error is an etcdErr, it is rendered to the ResponseWriter
// Otherwise, it is assumed to be an InternalServerError
func writeError(w http.ResponseWriter, err error) {
if err == nil {
return
}
switch e := err.(type) {
case *etcdErr.Error:
e.WriteTo(w)
case *httptypes.HTTPError:
e.WriteTo(w)
case auth.Error:
herr := httptypes.NewHTTPError(e.HTTPStatus(), e.Error())
herr.WriteTo(w)
default:
switch err {
case etcdserver.ErrTimeoutDueToLeaderFail, etcdserver.ErrTimeoutDueToConnectionLost:
plog.Error(err)
default:
plog.Errorf("got unexpected response error (%v)", err)
}
herr := httptypes.NewHTTPError(http.StatusInternalServerError, "Internal Server Error")
herr.WriteTo(w)
}
}
// allowMethod verifies that the given method is one of the allowed methods,
// and if not, it writes an error to w. A boolean is returned indicating
// whether or not the method is allowed.
func allowMethod(w http.ResponseWriter, m string, ms ...string) bool {
for _, meth := range ms {
if m == meth {
return true
}
}
w.Header().Set("Allow", strings.Join(ms, ","))
http.Error(w, "Method Not Allowed", http.StatusMethodNotAllowed)
return false
}
func requestLogger(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
plog.Debugf("[%s] %s remote:%s", r.Method, r.RequestURI, r.RemoteAddr)
handler.ServeHTTP(w, r)
})
}

View File

@@ -0,0 +1,19 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
Package httptypes defines how etcd's HTTP API entities are serialized to and deserialized from JSON.
*/
package httptypes

View File

@@ -0,0 +1,54 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package httptypes
import (
"encoding/json"
"net/http"
"github.com/coreos/pkg/capnslog"
)
var (
plog = capnslog.NewPackageLogger("github.com/coreos/etcd/etcdserver/etcdhttp", "httptypes")
)
type HTTPError struct {
Message string `json:"message"`
// HTTP return code
Code int `json:"-"`
}
func (e HTTPError) Error() string {
return e.Message
}
// TODO(xiangli): handle http write errors
func (e HTTPError) WriteTo(w http.ResponseWriter) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(e.Code)
b, err := json.Marshal(e)
if err != nil {
plog.Panicf("marshal HTTPError should never fail (%v)", err)
}
w.Write(b)
}
func NewHTTPError(code int, m string) *HTTPError {
return &HTTPError{
Message: m,
Code: code,
}
}

View File

@@ -0,0 +1,67 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package httptypes
import (
"encoding/json"
"github.com/coreos/etcd/pkg/types"
)
type Member struct {
ID string `json:"id"`
Name string `json:"name"`
PeerURLs []string `json:"peerURLs"`
ClientURLs []string `json:"clientURLs"`
}
type MemberCreateRequest struct {
PeerURLs types.URLs
}
type MemberUpdateRequest struct {
MemberCreateRequest
}
func (m *MemberCreateRequest) UnmarshalJSON(data []byte) error {
s := struct {
PeerURLs []string `json:"peerURLs"`
}{}
err := json.Unmarshal(data, &s)
if err != nil {
return err
}
urls, err := types.NewURLs(s.PeerURLs)
if err != nil {
return err
}
m.PeerURLs = urls
return nil
}
type MemberCollection []Member
func (c *MemberCollection) MarshalJSON() ([]byte, error) {
d := struct {
Members []Member `json:"members"`
}{
Members: []Member(*c),
}
return json.Marshal(d)
}

View File

@@ -0,0 +1,63 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdhttp
import (
"encoding/json"
"net/http"
"github.com/coreos/etcd/etcdserver"
"github.com/coreos/etcd/rafthttp"
)
const (
peerMembersPrefix = "/members"
)
// NewPeerHandler generates an http.Handler to handle etcd peer (raft) requests.
func NewPeerHandler(cluster etcdserver.Cluster, raftHandler http.Handler) http.Handler {
mh := &peerMembersHandler{
cluster: cluster,
}
mux := http.NewServeMux()
mux.HandleFunc("/", http.NotFound)
mux.Handle(rafthttp.RaftPrefix, raftHandler)
mux.Handle(rafthttp.RaftPrefix+"/", raftHandler)
mux.Handle(peerMembersPrefix, mh)
mux.HandleFunc(versionPath, versionHandler(cluster, serveVersion))
return mux
}
type peerMembersHandler struct {
cluster etcdserver.Cluster
}
func (h *peerMembersHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if !allowMethod(w, r.Method, "GET") {
return
}
w.Header().Set("X-Etcd-Cluster-ID", h.cluster.ID().String())
if r.URL.Path != peerMembersPrefix {
http.Error(w, "bad path", http.StatusBadRequest)
return
}
ms := h.cluster.Members()
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(ms); err != nil {
plog.Warningf("failed to encode members response (%v)", err)
}
}

View File

@@ -0,0 +1,804 @@
// Code generated by protoc-gen-gogo.
// source: etcdserver.proto
// DO NOT EDIT!
/*
Package etcdserverpb is a generated protocol buffer package.
It is generated from these files:
etcdserver.proto
raft_internal.proto
rpc.proto
It has these top-level messages:
Request
Metadata
*/
package etcdserverpb
import proto "github.com/gogo/protobuf/proto"
import math "math"
// discarding unused import gogoproto "github.com/coreos/etcd/Godeps/_workspace/src/gogoproto"
import io "io"
import fmt "fmt"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
var _ = math.Inf
type Request struct {
ID uint64 `protobuf:"varint,1,opt" json:"ID"`
Method string `protobuf:"bytes,2,opt" json:"Method"`
Path string `protobuf:"bytes,3,opt" json:"Path"`
Val string `protobuf:"bytes,4,opt" json:"Val"`
Dir bool `protobuf:"varint,5,opt" json:"Dir"`
PrevValue string `protobuf:"bytes,6,opt" json:"PrevValue"`
PrevIndex uint64 `protobuf:"varint,7,opt" json:"PrevIndex"`
PrevExist *bool `protobuf:"varint,8,opt" json:"PrevExist,omitempty"`
Expiration int64 `protobuf:"varint,9,opt" json:"Expiration"`
Wait bool `protobuf:"varint,10,opt" json:"Wait"`
Since uint64 `protobuf:"varint,11,opt" json:"Since"`
Recursive bool `protobuf:"varint,12,opt" json:"Recursive"`
Sorted bool `protobuf:"varint,13,opt" json:"Sorted"`
Quorum bool `protobuf:"varint,14,opt" json:"Quorum"`
Time int64 `protobuf:"varint,15,opt" json:"Time"`
Stream bool `protobuf:"varint,16,opt" json:"Stream"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Request) Reset() { *m = Request{} }
func (m *Request) String() string { return proto.CompactTextString(m) }
func (*Request) ProtoMessage() {}
type Metadata struct {
NodeID uint64 `protobuf:"varint,1,opt" json:"NodeID"`
ClusterID uint64 `protobuf:"varint,2,opt" json:"ClusterID"`
XXX_unrecognized []byte `json:"-"`
}
func (m *Metadata) Reset() { *m = Metadata{} }
func (m *Metadata) String() string { return proto.CompactTextString(m) }
func (*Metadata) ProtoMessage() {}
func (m *Request) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *Request) MarshalTo(data []byte) (int, error) {
var i int
_ = i
var l int
_ = l
data[i] = 0x8
i++
i = encodeVarintEtcdserver(data, i, uint64(m.ID))
data[i] = 0x12
i++
i = encodeVarintEtcdserver(data, i, uint64(len(m.Method)))
i += copy(data[i:], m.Method)
data[i] = 0x1a
i++
i = encodeVarintEtcdserver(data, i, uint64(len(m.Path)))
i += copy(data[i:], m.Path)
data[i] = 0x22
i++
i = encodeVarintEtcdserver(data, i, uint64(len(m.Val)))
i += copy(data[i:], m.Val)
data[i] = 0x28
i++
if m.Dir {
data[i] = 1
} else {
data[i] = 0
}
i++
data[i] = 0x32
i++
i = encodeVarintEtcdserver(data, i, uint64(len(m.PrevValue)))
i += copy(data[i:], m.PrevValue)
data[i] = 0x38
i++
i = encodeVarintEtcdserver(data, i, uint64(m.PrevIndex))
if m.PrevExist != nil {
data[i] = 0x40
i++
if *m.PrevExist {
data[i] = 1
} else {
data[i] = 0
}
i++
}
data[i] = 0x48
i++
i = encodeVarintEtcdserver(data, i, uint64(m.Expiration))
data[i] = 0x50
i++
if m.Wait {
data[i] = 1
} else {
data[i] = 0
}
i++
data[i] = 0x58
i++
i = encodeVarintEtcdserver(data, i, uint64(m.Since))
data[i] = 0x60
i++
if m.Recursive {
data[i] = 1
} else {
data[i] = 0
}
i++
data[i] = 0x68
i++
if m.Sorted {
data[i] = 1
} else {
data[i] = 0
}
i++
data[i] = 0x70
i++
if m.Quorum {
data[i] = 1
} else {
data[i] = 0
}
i++
data[i] = 0x78
i++
i = encodeVarintEtcdserver(data, i, uint64(m.Time))
data[i] = 0x80
i++
data[i] = 0x1
i++
if m.Stream {
data[i] = 1
} else {
data[i] = 0
}
i++
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
return i, nil
}
func (m *Metadata) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *Metadata) MarshalTo(data []byte) (int, error) {
var i int
_ = i
var l int
_ = l
data[i] = 0x8
i++
i = encodeVarintEtcdserver(data, i, uint64(m.NodeID))
data[i] = 0x10
i++
i = encodeVarintEtcdserver(data, i, uint64(m.ClusterID))
if m.XXX_unrecognized != nil {
i += copy(data[i:], m.XXX_unrecognized)
}
return i, nil
}
func encodeFixed64Etcdserver(data []byte, offset int, v uint64) int {
data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
data[offset+2] = uint8(v >> 16)
data[offset+3] = uint8(v >> 24)
data[offset+4] = uint8(v >> 32)
data[offset+5] = uint8(v >> 40)
data[offset+6] = uint8(v >> 48)
data[offset+7] = uint8(v >> 56)
return offset + 8
}
func encodeFixed32Etcdserver(data []byte, offset int, v uint32) int {
data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
data[offset+2] = uint8(v >> 16)
data[offset+3] = uint8(v >> 24)
return offset + 4
}
func encodeVarintEtcdserver(data []byte, offset int, v uint64) int {
for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80)
v >>= 7
offset++
}
data[offset] = uint8(v)
return offset + 1
}
func (m *Request) Size() (n int) {
var l int
_ = l
n += 1 + sovEtcdserver(uint64(m.ID))
l = len(m.Method)
n += 1 + l + sovEtcdserver(uint64(l))
l = len(m.Path)
n += 1 + l + sovEtcdserver(uint64(l))
l = len(m.Val)
n += 1 + l + sovEtcdserver(uint64(l))
n += 2
l = len(m.PrevValue)
n += 1 + l + sovEtcdserver(uint64(l))
n += 1 + sovEtcdserver(uint64(m.PrevIndex))
if m.PrevExist != nil {
n += 2
}
n += 1 + sovEtcdserver(uint64(m.Expiration))
n += 2
n += 1 + sovEtcdserver(uint64(m.Since))
n += 2
n += 2
n += 2
n += 1 + sovEtcdserver(uint64(m.Time))
n += 3
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func (m *Metadata) Size() (n int) {
var l int
_ = l
n += 1 + sovEtcdserver(uint64(m.NodeID))
n += 1 + sovEtcdserver(uint64(m.ClusterID))
if m.XXX_unrecognized != nil {
n += len(m.XXX_unrecognized)
}
return n
}
func sovEtcdserver(x uint64) (n int) {
for {
n++
x >>= 7
if x == 0 {
break
}
}
return n
}
func sozEtcdserver(x uint64) (n int) {
return sovEtcdserver(uint64((x << 1) ^ uint64((int64(x) >> 63))))
}
func (m *Request) Unmarshal(data []byte) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field ID", wireType)
}
m.ID = 0
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
m.ID |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Method", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
intStringLen := int(stringLen)
if intStringLen < 0 {
return ErrInvalidLengthEtcdserver
}
postIndex := iNdEx + intStringLen
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.Method = string(data[iNdEx:postIndex])
iNdEx = postIndex
case 3:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Path", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
intStringLen := int(stringLen)
if intStringLen < 0 {
return ErrInvalidLengthEtcdserver
}
postIndex := iNdEx + intStringLen
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.Path = string(data[iNdEx:postIndex])
iNdEx = postIndex
case 4:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Val", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
intStringLen := int(stringLen)
if intStringLen < 0 {
return ErrInvalidLengthEtcdserver
}
postIndex := iNdEx + intStringLen
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.Val = string(data[iNdEx:postIndex])
iNdEx = postIndex
case 5:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Dir", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Dir = bool(v != 0)
case 6:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field PrevValue", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
intStringLen := int(stringLen)
if intStringLen < 0 {
return ErrInvalidLengthEtcdserver
}
postIndex := iNdEx + intStringLen
if postIndex > l {
return io.ErrUnexpectedEOF
}
m.PrevValue = string(data[iNdEx:postIndex])
iNdEx = postIndex
case 7:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field PrevIndex", wireType)
}
m.PrevIndex = 0
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
m.PrevIndex |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
case 8:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field PrevExist", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
b := bool(v != 0)
m.PrevExist = &b
case 9:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Expiration", wireType)
}
m.Expiration = 0
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
m.Expiration |= (int64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
case 10:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Wait", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Wait = bool(v != 0)
case 11:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Since", wireType)
}
m.Since = 0
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
m.Since |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
case 12:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Recursive", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Recursive = bool(v != 0)
case 13:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Sorted", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Sorted = bool(v != 0)
case 14:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Quorum", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Quorum = bool(v != 0)
case 15:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Time", wireType)
}
m.Time = 0
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
m.Time |= (int64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
case 16:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Stream", wireType)
}
var v int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
m.Stream = bool(v != 0)
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipEtcdserver(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthEtcdserver
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
return nil
}
func (m *Metadata) Unmarshal(data []byte) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field NodeID", wireType)
}
m.NodeID = 0
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
m.NodeID |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
case 2:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field ClusterID", wireType)
}
m.ClusterID = 0
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
m.ClusterID |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipEtcdserver(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthEtcdserver
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
return nil
}
func skipEtcdserver(data []byte) (n int, err error) {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
wireType := int(wire & 0x7)
switch wireType {
case 0:
for {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
iNdEx++
if data[iNdEx-1] < 0x80 {
break
}
}
return iNdEx, nil
case 1:
iNdEx += 8
return iNdEx, nil
case 2:
var length int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
length |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
iNdEx += length
if length < 0 {
return 0, ErrInvalidLengthEtcdserver
}
return iNdEx, nil
case 3:
for {
var innerWire uint64
var start int = iNdEx
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
innerWire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
innerWireType := int(innerWire & 0x7)
if innerWireType == 4 {
break
}
next, err := skipEtcdserver(data[start:])
if err != nil {
return 0, err
}
iNdEx = start + next
}
return iNdEx, nil
case 4:
return iNdEx, nil
case 5:
iNdEx += 4
return iNdEx, nil
default:
return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
}
}
panic("unreachable")
}
var (
ErrInvalidLengthEtcdserver = fmt.Errorf("proto: negative length found during unmarshaling")
)

View File

@@ -0,0 +1,33 @@
syntax = "proto2";
package etcdserverpb;
import "gogoproto/gogo.proto";
option (gogoproto.marshaler_all) = true;
option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true;
option (gogoproto.goproto_getters_all) = false;
message Request {
optional uint64 ID = 1 [(gogoproto.nullable) = false];
optional string Method = 2 [(gogoproto.nullable) = false];
optional string Path = 3 [(gogoproto.nullable) = false];
optional string Val = 4 [(gogoproto.nullable) = false];
optional bool Dir = 5 [(gogoproto.nullable) = false];
optional string PrevValue = 6 [(gogoproto.nullable) = false];
optional uint64 PrevIndex = 7 [(gogoproto.nullable) = false];
optional bool PrevExist = 8 [(gogoproto.nullable) = true];
optional int64 Expiration = 9 [(gogoproto.nullable) = false];
optional bool Wait = 10 [(gogoproto.nullable) = false];
optional uint64 Since = 11 [(gogoproto.nullable) = false];
optional bool Recursive = 12 [(gogoproto.nullable) = false];
optional bool Sorted = 13 [(gogoproto.nullable) = false];
optional bool Quorum = 14 [(gogoproto.nullable) = false];
optional int64 Time = 15 [(gogoproto.nullable) = false];
optional bool Stream = 16 [(gogoproto.nullable) = false];
}
message Metadata {
optional uint64 NodeID = 1 [(gogoproto.nullable) = false];
optional uint64 ClusterID = 2 [(gogoproto.nullable) = false];
}

View File

@@ -0,0 +1,486 @@
// Code generated by protoc-gen-gogo.
// source: raft_internal.proto
// DO NOT EDIT!
package etcdserverpb
import proto "github.com/gogo/protobuf/proto"
// discarding unused import gogoproto "github.com/coreos/etcd/Godeps/_workspace/src/gogoproto"
import io "io"
import fmt "fmt"
// Reference imports to suppress errors if they are not otherwise used.
var _ = proto.Marshal
// An InternalRaftRequest is the union of all requests which can be
// sent via raft.
type InternalRaftRequest struct {
V2 *Request `protobuf:"bytes,1,opt,name=v2" json:"v2,omitempty"`
Range *RangeRequest `protobuf:"bytes,2,opt,name=range" json:"range,omitempty"`
Put *PutRequest `protobuf:"bytes,3,opt,name=put" json:"put,omitempty"`
DeleteRange *DeleteRangeRequest `protobuf:"bytes,4,opt,name=delete_range" json:"delete_range,omitempty"`
Txn *TxnRequest `protobuf:"bytes,5,opt,name=txn" json:"txn,omitempty"`
}
func (m *InternalRaftRequest) Reset() { *m = InternalRaftRequest{} }
func (m *InternalRaftRequest) String() string { return proto.CompactTextString(m) }
func (*InternalRaftRequest) ProtoMessage() {}
func (m *InternalRaftRequest) Marshal() (data []byte, err error) {
size := m.Size()
data = make([]byte, size)
n, err := m.MarshalTo(data)
if err != nil {
return nil, err
}
return data[:n], nil
}
func (m *InternalRaftRequest) MarshalTo(data []byte) (int, error) {
var i int
_ = i
var l int
_ = l
if m.V2 != nil {
data[i] = 0xa
i++
i = encodeVarintRaftInternal(data, i, uint64(m.V2.Size()))
n1, err := m.V2.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n1
}
if m.Range != nil {
data[i] = 0x12
i++
i = encodeVarintRaftInternal(data, i, uint64(m.Range.Size()))
n2, err := m.Range.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n2
}
if m.Put != nil {
data[i] = 0x1a
i++
i = encodeVarintRaftInternal(data, i, uint64(m.Put.Size()))
n3, err := m.Put.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n3
}
if m.DeleteRange != nil {
data[i] = 0x22
i++
i = encodeVarintRaftInternal(data, i, uint64(m.DeleteRange.Size()))
n4, err := m.DeleteRange.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n4
}
if m.Txn != nil {
data[i] = 0x2a
i++
i = encodeVarintRaftInternal(data, i, uint64(m.Txn.Size()))
n5, err := m.Txn.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n5
}
return i, nil
}
func encodeFixed64RaftInternal(data []byte, offset int, v uint64) int {
data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
data[offset+2] = uint8(v >> 16)
data[offset+3] = uint8(v >> 24)
data[offset+4] = uint8(v >> 32)
data[offset+5] = uint8(v >> 40)
data[offset+6] = uint8(v >> 48)
data[offset+7] = uint8(v >> 56)
return offset + 8
}
func encodeFixed32RaftInternal(data []byte, offset int, v uint32) int {
data[offset] = uint8(v)
data[offset+1] = uint8(v >> 8)
data[offset+2] = uint8(v >> 16)
data[offset+3] = uint8(v >> 24)
return offset + 4
}
func encodeVarintRaftInternal(data []byte, offset int, v uint64) int {
for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80)
v >>= 7
offset++
}
data[offset] = uint8(v)
return offset + 1
}
func (m *InternalRaftRequest) Size() (n int) {
var l int
_ = l
if m.V2 != nil {
l = m.V2.Size()
n += 1 + l + sovRaftInternal(uint64(l))
}
if m.Range != nil {
l = m.Range.Size()
n += 1 + l + sovRaftInternal(uint64(l))
}
if m.Put != nil {
l = m.Put.Size()
n += 1 + l + sovRaftInternal(uint64(l))
}
if m.DeleteRange != nil {
l = m.DeleteRange.Size()
n += 1 + l + sovRaftInternal(uint64(l))
}
if m.Txn != nil {
l = m.Txn.Size()
n += 1 + l + sovRaftInternal(uint64(l))
}
return n
}
func sovRaftInternal(x uint64) (n int) {
for {
n++
x >>= 7
if x == 0 {
break
}
}
return n
}
func sozRaftInternal(x uint64) (n int) {
return sovRaftInternal(uint64((x << 1) ^ uint64((int64(x) >> 63))))
}
func (this *InternalRaftRequest) GetValue() interface{} {
if this.V2 != nil {
return this.V2
}
if this.Range != nil {
return this.Range
}
if this.Put != nil {
return this.Put
}
if this.DeleteRange != nil {
return this.DeleteRange
}
if this.Txn != nil {
return this.Txn
}
return nil
}
func (this *InternalRaftRequest) SetValue(value interface{}) bool {
switch vt := value.(type) {
case *Request:
this.V2 = vt
case *RangeRequest:
this.Range = vt
case *PutRequest:
this.Put = vt
case *DeleteRangeRequest:
this.DeleteRange = vt
case *TxnRequest:
this.Txn = vt
default:
return false
}
return true
}
func (m *InternalRaftRequest) Unmarshal(data []byte) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field V2", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
if msglen < 0 {
return ErrInvalidLengthRaftInternal
}
postIndex := iNdEx + msglen
if postIndex > l {
return io.ErrUnexpectedEOF
}
if m.V2 == nil {
m.V2 = &Request{}
}
if err := m.V2.Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Range", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
if msglen < 0 {
return ErrInvalidLengthRaftInternal
}
postIndex := iNdEx + msglen
if postIndex > l {
return io.ErrUnexpectedEOF
}
if m.Range == nil {
m.Range = &RangeRequest{}
}
if err := m.Range.Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
case 3:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Put", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
if msglen < 0 {
return ErrInvalidLengthRaftInternal
}
postIndex := iNdEx + msglen
if postIndex > l {
return io.ErrUnexpectedEOF
}
if m.Put == nil {
m.Put = &PutRequest{}
}
if err := m.Put.Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
case 4:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field DeleteRange", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
if msglen < 0 {
return ErrInvalidLengthRaftInternal
}
postIndex := iNdEx + msglen
if postIndex > l {
return io.ErrUnexpectedEOF
}
if m.DeleteRange == nil {
m.DeleteRange = &DeleteRangeRequest{}
}
if err := m.DeleteRange.Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
case 5:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Txn", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
if msglen < 0 {
return ErrInvalidLengthRaftInternal
}
postIndex := iNdEx + msglen
if postIndex > l {
return io.ErrUnexpectedEOF
}
if m.Txn == nil {
m.Txn = &TxnRequest{}
}
if err := m.Txn.Unmarshal(data[iNdEx:postIndex]); err != nil {
return err
}
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipRaftInternal(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthRaftInternal
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
iNdEx += skippy
}
}
return nil
}
func skipRaftInternal(data []byte) (n int, err error) {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
wireType := int(wire & 0x7)
switch wireType {
case 0:
for {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
iNdEx++
if data[iNdEx-1] < 0x80 {
break
}
}
return iNdEx, nil
case 1:
iNdEx += 8
return iNdEx, nil
case 2:
var length int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
length |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
iNdEx += length
if length < 0 {
return 0, ErrInvalidLengthRaftInternal
}
return iNdEx, nil
case 3:
for {
var innerWire uint64
var start int = iNdEx
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
innerWire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
innerWireType := int(innerWire & 0x7)
if innerWireType == 4 {
break
}
next, err := skipRaftInternal(data[start:])
if err != nil {
return 0, err
}
iNdEx = start + next
}
return iNdEx, nil
case 4:
return iNdEx, nil
case 5:
iNdEx += 4
return iNdEx, nil
default:
return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
}
}
panic("unreachable")
}
var (
ErrInvalidLengthRaftInternal = fmt.Errorf("proto: negative length found during unmarshaling")
)

View File

@@ -0,0 +1,24 @@
syntax = "proto3";
package etcdserverpb;
import "gogoproto/gogo.proto";
import "etcdserver.proto";
import "rpc.proto";
option (gogoproto.marshaler_all) = true;
option (gogoproto.sizer_all) = true;
option (gogoproto.unmarshaler_all) = true;
option (gogoproto.goproto_getters_all) = false;
// An InternalRaftRequest is the union of all requests which can be
// sent via raft.
message InternalRaftRequest {
option (gogoproto.onlyone) = true;
oneof value {
Request v2 = 1;
RangeRequest range = 2;
PutRequest put = 3;
DeleteRangeRequest delete_range = 4;
TxnRequest txn = 5;
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,173 @@
syntax = "proto3";
package etcdserverpb;
import "gogoproto/gogo.proto";
import "etcd/storage/storagepb/kv.proto";
option (gogoproto.marshaler_all) = true;
option (gogoproto.unmarshaler_all) = true;
// Interface exported by the server.
service etcd {
// Range gets the keys in the range from the store.
rpc Range(RangeRequest) returns (RangeResponse) {}
// Put puts the given key into the store.
// A put request increases the revision of the store,
// and generates one event in the event history.
rpc Put(PutRequest) returns (PutResponse) {}
// Delete deletes the given range from the store.
// A delete request increase the revision of the store,
// and generates one event in the event history.
rpc DeleteRange(DeleteRangeRequest) returns (DeleteRangeResponse) {}
// Txn processes all the requests in one transaction.
// A txn request increases the revision of the store,
// and generates events with the same revision in the event history.
rpc Txn(TxnRequest) returns (TxnResponse) {}
// Compact compacts the event history in etcd. User should compact the
// event history periodically, or it will grow infinitely.
rpc Compact(CompactionRequest) returns (CompactionResponse) {}
}
message ResponseHeader {
// an error type message?
string error = 1;
uint64 cluster_id = 2;
uint64 member_id = 3;
// revision of the store when the request was applied.
int64 revision = 4;
// term of raft when the request was applied.
uint64 raft_term = 5;
}
message RangeRequest {
// if the range_end is not given, the request returns the key.
bytes key = 1;
// if the range_end is given, it gets the keys in range [key, range_end).
bytes range_end = 2;
// limit the number of keys returned.
int64 limit = 3;
// range over the store at the given revision.
// if revision is less or equal to zero, range over the newest store.
// if the revision has been compacted, ErrCompaction will be returned in
// response.
int64 revision = 4;
}
message RangeResponse {
ResponseHeader header = 1;
repeated storagepb.KeyValue kvs = 2;
// more indicates if there are more keys to return in the requested range.
bool more = 3;
}
message PutRequest {
bytes key = 1;
bytes value = 2;
}
message PutResponse {
ResponseHeader header = 1;
}
message DeleteRangeRequest {
// if the range_end is not given, the request deletes the key.
bytes key = 1;
// if the range_end is given, it deletes the keys in range [key, range_end).
bytes range_end = 2;
}
message DeleteRangeResponse {
ResponseHeader header = 1;
}
message RequestUnion {
oneof request {
RangeRequest request_range = 1;
PutRequest request_put = 2;
DeleteRangeRequest request_delete_range = 3;
}
}
message ResponseUnion {
oneof response {
RangeResponse response_range = 1;
PutResponse response_put = 2;
DeleteRangeResponse response_delete_range = 3;
}
}
message Compare {
enum CompareResult {
EQUAL = 0;
GREATER = 1;
LESS = 2;
}
enum CompareTarget {
VERSION = 0;
CREATE = 1;
MOD = 2;
VALUE= 3;
}
CompareResult result = 1;
CompareTarget target = 2;
// key path
bytes key = 3;
oneof target_union {
// version of the given key
int64 version = 4;
// create revision of the given key
int64 create_revision = 5;
// last modified revision of the given key
int64 mod_revision = 6;
// value of the given key
bytes value = 7;
}
}
// If the comparisons succeed, then the success requests will be processed in order,
// and the response will contain their respective responses in order.
// If the comparisons fail, then the failure requests will be processed in order,
// and the response will contain their respective responses in order.
// From google paxosdb paper:
// Our implementation hinges around a powerful primitive which we call MultiOp. All other database
// operations except for iteration are implemented as a single call to MultiOp. A MultiOp is applied atomically
// and consists of three components:
// 1. A list of tests called guard. Each test in guard checks a single entry in the database. It may check
// for the absence or presence of a value, or compare with a given value. Two different tests in the guard
// may apply to the same or different entries in the database. All tests in the guard are applied and
// MultiOp returns the results. If all tests are true, MultiOp executes t op (see item 2 below), otherwise
// it executes f op (see item 3 below).
// 2. A list of database operations called t op. Each operation in the list is either an insert, delete, or
// lookup operation, and applies to a single database entry. Two different operations in the list may apply
// to the same or different entries in the database. These operations are executed
// if guard evaluates to
// true.
// 3. A list of database operations called f op. Like t op, but executed if guard evaluates to false.
message TxnRequest {
repeated Compare compare = 1;
repeated RequestUnion success = 2;
repeated RequestUnion failure = 3;
}
message TxnResponse {
ResponseHeader header = 1;
bool succeeded = 2;
repeated ResponseUnion responses = 3;
}
// Compaction compacts the kv store upto the given revision (including).
// It removes the old versions of a key. It keeps the newest version of
// the key even if its latest modification revision is smaller than the given
// revision.
message CompactionRequest {
int64 revision = 1;
}
message CompactionResponse {
ResponseHeader header = 1;
}

View File

@@ -0,0 +1,170 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"crypto/sha1"
"encoding/binary"
"encoding/json"
"fmt"
"math/rand"
"path"
"sort"
"time"
"github.com/coreos/etcd/pkg/types"
"github.com/coreos/etcd/store"
)
var (
storeMembersPrefix = path.Join(StoreClusterPrefix, "members")
storeRemovedMembersPrefix = path.Join(StoreClusterPrefix, "removed_members")
)
// RaftAttributes represents the raft related attributes of an etcd member.
type RaftAttributes struct {
// TODO(philips): ensure these are URLs
PeerURLs []string `json:"peerURLs"`
}
// Attributes represents all the non-raft related attributes of an etcd member.
type Attributes struct {
Name string `json:"name,omitempty"`
ClientURLs []string `json:"clientURLs,omitempty"`
}
type Member struct {
ID types.ID `json:"id"`
RaftAttributes
Attributes
}
// NewMember creates a Member without an ID and generates one based on the
// name, peer URLs. This is used for bootstrapping/adding new member.
func NewMember(name string, peerURLs types.URLs, clusterName string, now *time.Time) *Member {
m := &Member{
RaftAttributes: RaftAttributes{PeerURLs: peerURLs.StringSlice()},
Attributes: Attributes{Name: name},
}
var b []byte
sort.Strings(m.PeerURLs)
for _, p := range m.PeerURLs {
b = append(b, []byte(p)...)
}
b = append(b, []byte(clusterName)...)
if now != nil {
b = append(b, []byte(fmt.Sprintf("%d", now.Unix()))...)
}
hash := sha1.Sum(b)
m.ID = types.ID(binary.BigEndian.Uint64(hash[:8]))
return m
}
// PickPeerURL chooses a random address from a given Member's PeerURLs.
// It will panic if there is no PeerURLs available in Member.
func (m *Member) PickPeerURL() string {
if len(m.PeerURLs) == 0 {
plog.Panicf("member should always have some peer url")
}
return m.PeerURLs[rand.Intn(len(m.PeerURLs))]
}
func (m *Member) Clone() *Member {
if m == nil {
return nil
}
mm := &Member{
ID: m.ID,
Attributes: Attributes{
Name: m.Name,
},
}
if m.PeerURLs != nil {
mm.PeerURLs = make([]string, len(m.PeerURLs))
copy(mm.PeerURLs, m.PeerURLs)
}
if m.ClientURLs != nil {
mm.ClientURLs = make([]string, len(m.ClientURLs))
copy(mm.ClientURLs, m.ClientURLs)
}
return mm
}
func memberStoreKey(id types.ID) string {
return path.Join(storeMembersPrefix, id.String())
}
func MemberAttributesStorePath(id types.ID) string {
return path.Join(memberStoreKey(id), attributesSuffix)
}
func mustParseMemberIDFromKey(key string) types.ID {
id, err := types.IDFromString(path.Base(key))
if err != nil {
plog.Panicf("unexpected parse member id error: %v", err)
}
return id
}
func removedMemberStoreKey(id types.ID) string {
return path.Join(storeRemovedMembersPrefix, id.String())
}
// nodeToMember builds member from a key value node.
// the child nodes of the given node MUST be sorted by key.
func nodeToMember(n *store.NodeExtern) (*Member, error) {
m := &Member{ID: mustParseMemberIDFromKey(n.Key)}
attrs := make(map[string][]byte)
raftAttrKey := path.Join(n.Key, raftAttributesSuffix)
attrKey := path.Join(n.Key, attributesSuffix)
for _, nn := range n.Nodes {
if nn.Key != raftAttrKey && nn.Key != attrKey {
return nil, fmt.Errorf("unknown key %q", nn.Key)
}
attrs[nn.Key] = []byte(*nn.Value)
}
if data := attrs[raftAttrKey]; data != nil {
if err := json.Unmarshal(data, &m.RaftAttributes); err != nil {
return nil, fmt.Errorf("unmarshal raftAttributes error: %v", err)
}
} else {
return nil, fmt.Errorf("raftAttributes key doesn't exist")
}
if data := attrs[attrKey]; data != nil {
if err := json.Unmarshal(data, &m.Attributes); err != nil {
return m, fmt.Errorf("unmarshal attributes error: %v", err)
}
}
return m, nil
}
// implement sort by ID interface
type MembersByID []*Member
func (ms MembersByID) Len() int { return len(ms) }
func (ms MembersByID) Less(i, j int) bool { return ms[i].ID < ms[j].ID }
func (ms MembersByID) Swap(i, j int) { ms[i], ms[j] = ms[j], ms[i] }
// implement sort by peer urls interface
type MembersByPeerURLs []*Member
func (ms MembersByPeerURLs) Len() int { return len(ms) }
func (ms MembersByPeerURLs) Less(i, j int) bool {
return ms[i].PeerURLs[0] < ms[j].PeerURLs[0]
}
func (ms MembersByPeerURLs) Swap(i, j int) { ms[i], ms[j] = ms[j], ms[i] }

View File

@@ -0,0 +1,86 @@
// Copyright 2015 CoreOS, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package etcdserver
import (
"time"
"github.com/coreos/etcd/pkg/runtime"
"github.com/prometheus/client_golang/prometheus"
)
var (
// TODO: with label in v3?
proposeDurations = prometheus.NewSummary(prometheus.SummaryOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposal_durations_milliseconds",
Help: "The latency distributions of committing proposal.",
})
proposePending = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "pending_proposal_total",
Help: "The total number of pending proposals.",
})
// This is number of proposal failed in client's view.
// The proposal might be later got committed in raft.
proposeFailed = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "proposal_failed_total",
Help: "The total number of failed proposals.",
})
fileDescriptorUsed = prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "etcd",
Subsystem: "server",
Name: "file_descriptors_used_total",
Help: "The total number of file descriptors used.",
})
)
func init() {
prometheus.MustRegister(proposeDurations)
prometheus.MustRegister(proposePending)
prometheus.MustRegister(proposeFailed)
prometheus.MustRegister(fileDescriptorUsed)
}
func monitorFileDescriptor(done <-chan struct{}) {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
used, err := runtime.FDUsage()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
fileDescriptorUsed.Set(float64(used))
limit, err := runtime.FDLimit()
if err != nil {
plog.Errorf("cannot monitor file descriptor usage (%v)", err)
return
}
if used >= limit/5*4 {
plog.Warningf("80%% of the file descriptor limit is used [used = %d, limit = %d]", used, limit)
}
select {
case <-ticker.C:
case <-done:
return
}
}
}

Some files were not shown because too many files have changed in this diff Show More